{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.666666666666667e-07, "loss": 2.3887, "step": 1 }, { "epoch": 0.0, "learning_rate": 5.333333333333335e-07, "loss": 2.4072, "step": 2 }, { "epoch": 0.0, "learning_rate": 8.000000000000001e-07, "loss": 2.2979, "step": 3 }, { "epoch": 0.0, "learning_rate": 1.066666666666667e-06, "loss": 2.165, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.3333333333333334e-06, "loss": 2.6035, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.6000000000000001e-06, "loss": 2.1289, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.8666666666666669e-06, "loss": 2.3145, "step": 7 }, { "epoch": 0.0, "learning_rate": 2.133333333333334e-06, "loss": 2.3779, "step": 8 }, { "epoch": 0.0, "learning_rate": 2.4000000000000003e-06, "loss": 2.3086, "step": 9 }, { "epoch": 0.0, "learning_rate": 2.666666666666667e-06, "loss": 2.3623, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.9333333333333338e-06, "loss": 2.6094, "step": 11 }, { "epoch": 0.0, "learning_rate": 3.2000000000000003e-06, "loss": 2.1338, "step": 12 }, { "epoch": 0.01, "learning_rate": 3.4666666666666672e-06, "loss": 2.3887, "step": 13 }, { "epoch": 0.01, "learning_rate": 3.7333333333333337e-06, "loss": 2.5645, "step": 14 }, { "epoch": 0.01, "learning_rate": 4.000000000000001e-06, "loss": 2.3789, "step": 15 }, { "epoch": 0.01, "learning_rate": 4.266666666666668e-06, "loss": 2.3682, "step": 16 }, { "epoch": 0.01, "learning_rate": 4.533333333333334e-06, "loss": 2.2998, "step": 17 }, { "epoch": 0.01, "learning_rate": 4.800000000000001e-06, "loss": 2.4111, "step": 18 }, { "epoch": 0.01, "learning_rate": 5.0666666666666676e-06, "loss": 2.1943, "step": 19 }, { "epoch": 0.01, "learning_rate": 5.333333333333334e-06, "loss": 2.1924, "step": 20 }, { "epoch": 0.01, "learning_rate": 5.600000000000001e-06, "loss": 2.2148, "step": 21 }, { "epoch": 0.01, "learning_rate": 5.8666666666666675e-06, "loss": 2.2256, "step": 22 }, { "epoch": 0.01, "learning_rate": 6.133333333333334e-06, "loss": 2.4062, "step": 23 }, { "epoch": 0.01, "learning_rate": 6.4000000000000006e-06, "loss": 2.3984, "step": 24 }, { "epoch": 0.01, "learning_rate": 6.666666666666667e-06, "loss": 2.2061, "step": 25 }, { "epoch": 0.01, "learning_rate": 6.9333333333333344e-06, "loss": 2.252, "step": 26 }, { "epoch": 0.01, "learning_rate": 7.2000000000000005e-06, "loss": 2.2881, "step": 27 }, { "epoch": 0.01, "learning_rate": 7.4666666666666675e-06, "loss": 2.498, "step": 28 }, { "epoch": 0.01, "learning_rate": 7.733333333333334e-06, "loss": 2.3789, "step": 29 }, { "epoch": 0.01, "learning_rate": 8.000000000000001e-06, "loss": 2.1348, "step": 30 }, { "epoch": 0.01, "learning_rate": 8.266666666666667e-06, "loss": 2.2266, "step": 31 }, { "epoch": 0.01, "learning_rate": 8.533333333333335e-06, "loss": 2.4375, "step": 32 }, { "epoch": 0.01, "learning_rate": 8.8e-06, "loss": 2.1768, "step": 33 }, { "epoch": 0.01, "learning_rate": 9.066666666666667e-06, "loss": 2.0879, "step": 34 }, { "epoch": 0.01, "learning_rate": 9.333333333333334e-06, "loss": 2.3516, "step": 35 }, { "epoch": 0.01, "learning_rate": 9.600000000000001e-06, "loss": 2.2676, "step": 36 }, { "epoch": 0.01, "learning_rate": 9.866666666666668e-06, "loss": 2.21, "step": 37 }, { "epoch": 0.02, "learning_rate": 1.0133333333333335e-05, "loss": 2.3613, "step": 38 }, { "epoch": 0.02, "learning_rate": 1.04e-05, "loss": 2.1328, "step": 39 }, { "epoch": 0.02, "learning_rate": 1.0666666666666667e-05, "loss": 2.1348, "step": 40 }, { "epoch": 0.02, "learning_rate": 1.0933333333333334e-05, "loss": 2.124, "step": 41 }, { "epoch": 0.02, "learning_rate": 1.1200000000000001e-05, "loss": 2.2021, "step": 42 }, { "epoch": 0.02, "learning_rate": 1.1466666666666668e-05, "loss": 2.2031, "step": 43 }, { "epoch": 0.02, "learning_rate": 1.1733333333333335e-05, "loss": 1.9912, "step": 44 }, { "epoch": 0.02, "learning_rate": 1.2e-05, "loss": 2.1504, "step": 45 }, { "epoch": 0.02, "learning_rate": 1.2266666666666667e-05, "loss": 2.0928, "step": 46 }, { "epoch": 0.02, "learning_rate": 1.2533333333333336e-05, "loss": 2.1074, "step": 47 }, { "epoch": 0.02, "learning_rate": 1.2800000000000001e-05, "loss": 1.9893, "step": 48 }, { "epoch": 0.02, "learning_rate": 1.3066666666666668e-05, "loss": 2.1797, "step": 49 }, { "epoch": 0.02, "learning_rate": 1.3333333333333333e-05, "loss": 2.1689, "step": 50 }, { "epoch": 0.02, "learning_rate": 1.3600000000000002e-05, "loss": 1.8926, "step": 51 }, { "epoch": 0.02, "learning_rate": 1.3866666666666669e-05, "loss": 2.0645, "step": 52 }, { "epoch": 0.02, "learning_rate": 1.4133333333333334e-05, "loss": 2.0381, "step": 53 }, { "epoch": 0.02, "learning_rate": 1.4400000000000001e-05, "loss": 1.9053, "step": 54 }, { "epoch": 0.02, "learning_rate": 1.4666666666666666e-05, "loss": 2.0645, "step": 55 }, { "epoch": 0.02, "learning_rate": 1.4933333333333335e-05, "loss": 2.0547, "step": 56 }, { "epoch": 0.02, "learning_rate": 1.5200000000000002e-05, "loss": 1.8281, "step": 57 }, { "epoch": 0.02, "learning_rate": 1.546666666666667e-05, "loss": 1.9805, "step": 58 }, { "epoch": 0.02, "learning_rate": 1.5733333333333334e-05, "loss": 1.834, "step": 59 }, { "epoch": 0.02, "learning_rate": 1.6000000000000003e-05, "loss": 1.9092, "step": 60 }, { "epoch": 0.02, "learning_rate": 1.6266666666666668e-05, "loss": 1.9316, "step": 61 }, { "epoch": 0.02, "learning_rate": 1.6533333333333333e-05, "loss": 1.9014, "step": 62 }, { "epoch": 0.03, "learning_rate": 1.6800000000000002e-05, "loss": 1.835, "step": 63 }, { "epoch": 0.03, "learning_rate": 1.706666666666667e-05, "loss": 1.9072, "step": 64 }, { "epoch": 0.03, "learning_rate": 1.7333333333333336e-05, "loss": 1.8965, "step": 65 }, { "epoch": 0.03, "learning_rate": 1.76e-05, "loss": 1.8955, "step": 66 }, { "epoch": 0.03, "learning_rate": 1.7866666666666666e-05, "loss": 1.751, "step": 67 }, { "epoch": 0.03, "learning_rate": 1.8133333333333335e-05, "loss": 1.8311, "step": 68 }, { "epoch": 0.03, "learning_rate": 1.8400000000000003e-05, "loss": 1.8838, "step": 69 }, { "epoch": 0.03, "learning_rate": 1.866666666666667e-05, "loss": 1.8311, "step": 70 }, { "epoch": 0.03, "learning_rate": 1.8933333333333334e-05, "loss": 1.7793, "step": 71 }, { "epoch": 0.03, "learning_rate": 1.9200000000000003e-05, "loss": 1.7803, "step": 72 }, { "epoch": 0.03, "learning_rate": 1.9466666666666668e-05, "loss": 1.7646, "step": 73 }, { "epoch": 0.03, "learning_rate": 1.9733333333333336e-05, "loss": 1.7891, "step": 74 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 1.7637, "step": 75 }, { "epoch": 0.03, "learning_rate": 1.9999991608372392e-05, "loss": 1.8818, "step": 76 }, { "epoch": 0.03, "learning_rate": 1.999996643350365e-05, "loss": 1.8125, "step": 77 }, { "epoch": 0.03, "learning_rate": 1.999992447543603e-05, "loss": 1.8926, "step": 78 }, { "epoch": 0.03, "learning_rate": 1.999986573423995e-05, "loss": 1.7178, "step": 79 }, { "epoch": 0.03, "learning_rate": 1.999979021001399e-05, "loss": 1.8164, "step": 80 }, { "epoch": 0.03, "learning_rate": 1.999969790288491e-05, "loss": 1.7217, "step": 81 }, { "epoch": 0.03, "learning_rate": 1.999958881300763e-05, "loss": 1.7373, "step": 82 }, { "epoch": 0.03, "learning_rate": 1.9999462940565242e-05, "loss": 1.6904, "step": 83 }, { "epoch": 0.03, "learning_rate": 1.9999320285769e-05, "loss": 1.7148, "step": 84 }, { "epoch": 0.03, "learning_rate": 1.999916084885832e-05, "loss": 1.71, "step": 85 }, { "epoch": 0.03, "learning_rate": 1.999898463010079e-05, "loss": 1.6416, "step": 86 }, { "epoch": 0.03, "learning_rate": 1.9998791629792172e-05, "loss": 1.7617, "step": 87 }, { "epoch": 0.04, "learning_rate": 1.999858184825637e-05, "loss": 1.6875, "step": 88 }, { "epoch": 0.04, "learning_rate": 1.9998355285845473e-05, "loss": 1.6982, "step": 89 }, { "epoch": 0.04, "learning_rate": 1.9998111942939727e-05, "loss": 1.7324, "step": 90 }, { "epoch": 0.04, "learning_rate": 1.9997851819947537e-05, "loss": 1.7676, "step": 91 }, { "epoch": 0.04, "learning_rate": 1.999757491730548e-05, "loss": 1.6602, "step": 92 }, { "epoch": 0.04, "learning_rate": 1.999728123547828e-05, "loss": 1.6875, "step": 93 }, { "epoch": 0.04, "learning_rate": 1.9996970774958836e-05, "loss": 1.6797, "step": 94 }, { "epoch": 0.04, "learning_rate": 1.9996643536268202e-05, "loss": 1.6553, "step": 95 }, { "epoch": 0.04, "learning_rate": 1.999629951995559e-05, "loss": 1.6357, "step": 96 }, { "epoch": 0.04, "learning_rate": 1.9995938726598374e-05, "loss": 1.7236, "step": 97 }, { "epoch": 0.04, "learning_rate": 1.999556115680208e-05, "loss": 1.6641, "step": 98 }, { "epoch": 0.04, "learning_rate": 1.999516681120039e-05, "loss": 1.6797, "step": 99 }, { "epoch": 0.04, "learning_rate": 1.9994755690455154e-05, "loss": 1.7129, "step": 100 }, { "epoch": 0.04, "learning_rate": 1.999432779525635e-05, "loss": 1.6865, "step": 101 }, { "epoch": 0.04, "learning_rate": 1.9993883126322142e-05, "loss": 1.6406, "step": 102 }, { "epoch": 0.04, "learning_rate": 1.9993421684398825e-05, "loss": 1.5762, "step": 103 }, { "epoch": 0.04, "learning_rate": 1.9992943470260845e-05, "loss": 1.626, "step": 104 }, { "epoch": 0.04, "learning_rate": 1.99924484847108e-05, "loss": 1.5605, "step": 105 }, { "epoch": 0.04, "learning_rate": 1.9991936728579438e-05, "loss": 1.6123, "step": 106 }, { "epoch": 0.04, "learning_rate": 1.999140820272566e-05, "loss": 1.6299, "step": 107 }, { "epoch": 0.04, "learning_rate": 1.9990862908036492e-05, "loss": 1.5742, "step": 108 }, { "epoch": 0.04, "learning_rate": 1.9990300845427123e-05, "loss": 1.6318, "step": 109 }, { "epoch": 0.04, "learning_rate": 1.998972201584088e-05, "loss": 1.6455, "step": 110 }, { "epoch": 0.04, "learning_rate": 1.998912642024922e-05, "loss": 1.5801, "step": 111 }, { "epoch": 0.04, "learning_rate": 1.998851405965175e-05, "loss": 1.6445, "step": 112 }, { "epoch": 0.05, "learning_rate": 1.9987884935076213e-05, "loss": 1.6709, "step": 113 }, { "epoch": 0.05, "learning_rate": 1.9987239047578482e-05, "loss": 1.6318, "step": 114 }, { "epoch": 0.05, "learning_rate": 1.9986576398242566e-05, "loss": 1.6201, "step": 115 }, { "epoch": 0.05, "learning_rate": 1.9985896988180607e-05, "loss": 1.6045, "step": 116 }, { "epoch": 0.05, "learning_rate": 1.9985200818532873e-05, "loss": 1.6523, "step": 117 }, { "epoch": 0.05, "learning_rate": 1.9984487890467773e-05, "loss": 1.5605, "step": 118 }, { "epoch": 0.05, "learning_rate": 1.9983758205181824e-05, "loss": 1.5469, "step": 119 }, { "epoch": 0.05, "learning_rate": 1.9983011763899674e-05, "loss": 1.6855, "step": 120 }, { "epoch": 0.05, "learning_rate": 1.9982248567874098e-05, "loss": 1.5693, "step": 121 }, { "epoch": 0.05, "learning_rate": 1.998146861838599e-05, "loss": 1.6172, "step": 122 }, { "epoch": 0.05, "learning_rate": 1.9980671916744356e-05, "loss": 1.585, "step": 123 }, { "epoch": 0.05, "learning_rate": 1.9979858464286317e-05, "loss": 1.5391, "step": 124 }, { "epoch": 0.05, "learning_rate": 1.997902826237712e-05, "loss": 1.6201, "step": 125 }, { "epoch": 0.05, "learning_rate": 1.9978181312410104e-05, "loss": 1.6162, "step": 126 }, { "epoch": 0.05, "learning_rate": 1.9977317615806738e-05, "loss": 1.5645, "step": 127 }, { "epoch": 0.05, "learning_rate": 1.9976437174016575e-05, "loss": 1.5996, "step": 128 }, { "epoch": 0.05, "learning_rate": 1.997553998851729e-05, "loss": 1.6934, "step": 129 }, { "epoch": 0.05, "learning_rate": 1.997462606081465e-05, "loss": 1.5195, "step": 130 }, { "epoch": 0.05, "learning_rate": 1.997369539244252e-05, "loss": 1.5293, "step": 131 }, { "epoch": 0.05, "learning_rate": 1.997274798496287e-05, "loss": 1.5215, "step": 132 }, { "epoch": 0.05, "learning_rate": 1.9971783839965756e-05, "loss": 1.5312, "step": 133 }, { "epoch": 0.05, "learning_rate": 1.997080295906933e-05, "loss": 1.6279, "step": 134 }, { "epoch": 0.05, "learning_rate": 1.9969805343919822e-05, "loss": 1.584, "step": 135 }, { "epoch": 0.05, "learning_rate": 1.996879099619156e-05, "loss": 1.5732, "step": 136 }, { "epoch": 0.05, "learning_rate": 1.9967759917586953e-05, "loss": 1.5596, "step": 137 }, { "epoch": 0.06, "learning_rate": 1.9966712109836476e-05, "loss": 1.6631, "step": 138 }, { "epoch": 0.06, "learning_rate": 1.9965647574698705e-05, "loss": 1.5898, "step": 139 }, { "epoch": 0.06, "learning_rate": 1.9964566313960265e-05, "loss": 1.5732, "step": 140 }, { "epoch": 0.06, "learning_rate": 1.9963468329435872e-05, "loss": 1.5234, "step": 141 }, { "epoch": 0.06, "learning_rate": 1.9962353622968296e-05, "loss": 1.6338, "step": 142 }, { "epoch": 0.06, "learning_rate": 1.996122219642838e-05, "loss": 1.4805, "step": 143 }, { "epoch": 0.06, "learning_rate": 1.9960074051715022e-05, "loss": 1.5469, "step": 144 }, { "epoch": 0.06, "learning_rate": 1.995890919075519e-05, "loss": 1.4883, "step": 145 }, { "epoch": 0.06, "learning_rate": 1.995772761550389e-05, "loss": 1.5527, "step": 146 }, { "epoch": 0.06, "learning_rate": 1.9956529327944198e-05, "loss": 1.6113, "step": 147 }, { "epoch": 0.06, "learning_rate": 1.9955314330087225e-05, "loss": 1.5625, "step": 148 }, { "epoch": 0.06, "learning_rate": 1.9954082623972143e-05, "loss": 1.5469, "step": 149 }, { "epoch": 0.06, "learning_rate": 1.995283421166614e-05, "loss": 1.6494, "step": 150 }, { "epoch": 0.06, "learning_rate": 1.9951569095264473e-05, "loss": 1.4512, "step": 151 }, { "epoch": 0.06, "learning_rate": 1.995028727689041e-05, "loss": 1.5938, "step": 152 }, { "epoch": 0.06, "learning_rate": 1.9948988758695263e-05, "loss": 1.5781, "step": 153 }, { "epoch": 0.06, "learning_rate": 1.994767354285837e-05, "loss": 1.5566, "step": 154 }, { "epoch": 0.06, "learning_rate": 1.9946341631587086e-05, "loss": 1.6162, "step": 155 }, { "epoch": 0.06, "learning_rate": 1.9944993027116798e-05, "loss": 1.4971, "step": 156 }, { "epoch": 0.06, "learning_rate": 1.9943627731710896e-05, "loss": 1.5029, "step": 157 }, { "epoch": 0.06, "learning_rate": 1.9942245747660797e-05, "loss": 1.5908, "step": 158 }, { "epoch": 0.06, "learning_rate": 1.9940847077285918e-05, "loss": 1.6309, "step": 159 }, { "epoch": 0.06, "learning_rate": 1.9939431722933678e-05, "loss": 1.4551, "step": 160 }, { "epoch": 0.06, "learning_rate": 1.993799968697951e-05, "loss": 1.5078, "step": 161 }, { "epoch": 0.06, "learning_rate": 1.9936550971826835e-05, "loss": 1.6074, "step": 162 }, { "epoch": 0.07, "learning_rate": 1.9935085579907064e-05, "loss": 1.5283, "step": 163 }, { "epoch": 0.07, "learning_rate": 1.9933603513679604e-05, "loss": 1.5361, "step": 164 }, { "epoch": 0.07, "learning_rate": 1.9932104775631847e-05, "loss": 1.5898, "step": 165 }, { "epoch": 0.07, "learning_rate": 1.993058936827916e-05, "loss": 1.623, "step": 166 }, { "epoch": 0.07, "learning_rate": 1.9929057294164894e-05, "loss": 1.4795, "step": 167 }, { "epoch": 0.07, "learning_rate": 1.992750855586036e-05, "loss": 1.5469, "step": 168 }, { "epoch": 0.07, "learning_rate": 1.9925943155964857e-05, "loss": 1.5449, "step": 169 }, { "epoch": 0.07, "learning_rate": 1.9924361097105624e-05, "loss": 1.5332, "step": 170 }, { "epoch": 0.07, "learning_rate": 1.992276238193788e-05, "loss": 1.5674, "step": 171 }, { "epoch": 0.07, "learning_rate": 1.9921147013144782e-05, "loss": 1.4453, "step": 172 }, { "epoch": 0.07, "learning_rate": 1.9919514993437445e-05, "loss": 1.5254, "step": 173 }, { "epoch": 0.07, "learning_rate": 1.9917866325554936e-05, "loss": 1.5918, "step": 174 }, { "epoch": 0.07, "learning_rate": 1.9916201012264255e-05, "loss": 1.5117, "step": 175 }, { "epoch": 0.07, "learning_rate": 1.991451905636033e-05, "loss": 1.5625, "step": 176 }, { "epoch": 0.07, "learning_rate": 1.9912820460666046e-05, "loss": 1.4229, "step": 177 }, { "epoch": 0.07, "learning_rate": 1.9911105228032186e-05, "loss": 1.5327, "step": 178 }, { "epoch": 0.07, "learning_rate": 1.9909373361337475e-05, "loss": 1.5527, "step": 179 }, { "epoch": 0.07, "learning_rate": 1.990762486348855e-05, "loss": 1.5342, "step": 180 }, { "epoch": 0.07, "learning_rate": 1.990585973741996e-05, "loss": 1.5166, "step": 181 }, { "epoch": 0.07, "learning_rate": 1.9904077986094153e-05, "loss": 1.459, "step": 182 }, { "epoch": 0.07, "learning_rate": 1.9902279612501494e-05, "loss": 1.4951, "step": 183 }, { "epoch": 0.07, "learning_rate": 1.9900464619660243e-05, "loss": 1.5332, "step": 184 }, { "epoch": 0.07, "learning_rate": 1.989863301061654e-05, "loss": 1.5078, "step": 185 }, { "epoch": 0.07, "learning_rate": 1.989678478844443e-05, "loss": 1.583, "step": 186 }, { "epoch": 0.07, "learning_rate": 1.9894919956245825e-05, "loss": 1.4678, "step": 187 }, { "epoch": 0.08, "learning_rate": 1.9893038517150526e-05, "loss": 1.4844, "step": 188 }, { "epoch": 0.08, "learning_rate": 1.9891140474316197e-05, "loss": 1.4414, "step": 189 }, { "epoch": 0.08, "learning_rate": 1.9889225830928365e-05, "loss": 1.5059, "step": 190 }, { "epoch": 0.08, "learning_rate": 1.9887294590200437e-05, "loss": 1.5557, "step": 191 }, { "epoch": 0.08, "learning_rate": 1.988534675537366e-05, "loss": 1.5342, "step": 192 }, { "epoch": 0.08, "learning_rate": 1.988338232971713e-05, "loss": 1.582, "step": 193 }, { "epoch": 0.08, "learning_rate": 1.9881401316527795e-05, "loss": 1.4492, "step": 194 }, { "epoch": 0.08, "learning_rate": 1.987940371913044e-05, "loss": 1.5225, "step": 195 }, { "epoch": 0.08, "learning_rate": 1.9877389540877686e-05, "loss": 1.4824, "step": 196 }, { "epoch": 0.08, "learning_rate": 1.9875358785149982e-05, "loss": 1.4932, "step": 197 }, { "epoch": 0.08, "learning_rate": 1.987331145535559e-05, "loss": 1.4893, "step": 198 }, { "epoch": 0.08, "learning_rate": 1.98712475549306e-05, "loss": 1.5342, "step": 199 }, { "epoch": 0.08, "learning_rate": 1.9869167087338908e-05, "loss": 1.5449, "step": 200 }, { "epoch": 0.08, "learning_rate": 1.9867070056072215e-05, "loss": 1.4482, "step": 201 }, { "epoch": 0.08, "learning_rate": 1.9864956464650027e-05, "loss": 1.4668, "step": 202 }, { "epoch": 0.08, "learning_rate": 1.986282631661963e-05, "loss": 1.5068, "step": 203 }, { "epoch": 0.08, "learning_rate": 1.9860679615556112e-05, "loss": 1.5068, "step": 204 }, { "epoch": 0.08, "learning_rate": 1.9858516365062334e-05, "loss": 1.4492, "step": 205 }, { "epoch": 0.08, "learning_rate": 1.9856336568768936e-05, "loss": 1.4688, "step": 206 }, { "epoch": 0.08, "learning_rate": 1.9854140230334323e-05, "loss": 1.4629, "step": 207 }, { "epoch": 0.08, "learning_rate": 1.985192735344467e-05, "loss": 1.4648, "step": 208 }, { "epoch": 0.08, "learning_rate": 1.98496979418139e-05, "loss": 1.4629, "step": 209 }, { "epoch": 0.08, "learning_rate": 1.9847451999183692e-05, "loss": 1.459, "step": 210 }, { "epoch": 0.08, "learning_rate": 1.9845189529323473e-05, "loss": 1.5215, "step": 211 }, { "epoch": 0.08, "learning_rate": 1.98429105360304e-05, "loss": 1.5762, "step": 212 }, { "epoch": 0.09, "learning_rate": 1.9840615023129372e-05, "loss": 1.5059, "step": 213 }, { "epoch": 0.09, "learning_rate": 1.9838302994473e-05, "loss": 1.4678, "step": 214 }, { "epoch": 0.09, "learning_rate": 1.9835974453941623e-05, "loss": 1.5098, "step": 215 }, { "epoch": 0.09, "learning_rate": 1.9833629405443283e-05, "loss": 1.4102, "step": 216 }, { "epoch": 0.09, "learning_rate": 1.983126785291375e-05, "loss": 1.5117, "step": 217 }, { "epoch": 0.09, "learning_rate": 1.9828889800316467e-05, "loss": 1.4414, "step": 218 }, { "epoch": 0.09, "learning_rate": 1.982649525164258e-05, "loss": 1.4365, "step": 219 }, { "epoch": 0.09, "learning_rate": 1.9824084210910924e-05, "loss": 1.5479, "step": 220 }, { "epoch": 0.09, "learning_rate": 1.9821656682168013e-05, "loss": 1.5049, "step": 221 }, { "epoch": 0.09, "learning_rate": 1.9819212669488026e-05, "loss": 1.5117, "step": 222 }, { "epoch": 0.09, "learning_rate": 1.9816752176972815e-05, "loss": 1.4863, "step": 223 }, { "epoch": 0.09, "learning_rate": 1.9814275208751882e-05, "loss": 1.4473, "step": 224 }, { "epoch": 0.09, "learning_rate": 1.9811781768982392e-05, "loss": 1.5156, "step": 225 }, { "epoch": 0.09, "learning_rate": 1.9809271861849147e-05, "loss": 1.5137, "step": 226 }, { "epoch": 0.09, "learning_rate": 1.9806745491564588e-05, "loss": 1.5439, "step": 227 }, { "epoch": 0.09, "learning_rate": 1.9804202662368782e-05, "loss": 1.5742, "step": 228 }, { "epoch": 0.09, "learning_rate": 1.980164337852943e-05, "loss": 1.5176, "step": 229 }, { "epoch": 0.09, "learning_rate": 1.9799067644341844e-05, "loss": 1.4697, "step": 230 }, { "epoch": 0.09, "learning_rate": 1.9796475464128943e-05, "loss": 1.5898, "step": 231 }, { "epoch": 0.09, "learning_rate": 1.9793866842241245e-05, "loss": 1.4834, "step": 232 }, { "epoch": 0.09, "learning_rate": 1.9791241783056874e-05, "loss": 1.4023, "step": 233 }, { "epoch": 0.09, "learning_rate": 1.9788600290981525e-05, "loss": 1.5107, "step": 234 }, { "epoch": 0.09, "learning_rate": 1.978594237044849e-05, "loss": 1.4971, "step": 235 }, { "epoch": 0.09, "learning_rate": 1.9783268025918622e-05, "loss": 1.4707, "step": 236 }, { "epoch": 0.09, "learning_rate": 1.9780577261880336e-05, "loss": 1.4785, "step": 237 }, { "epoch": 0.1, "learning_rate": 1.977787008284962e-05, "loss": 1.5693, "step": 238 }, { "epoch": 0.1, "learning_rate": 1.9775146493369996e-05, "loss": 1.5488, "step": 239 }, { "epoch": 0.1, "learning_rate": 1.977240649801253e-05, "loss": 1.4307, "step": 240 }, { "epoch": 0.1, "learning_rate": 1.9769650101375835e-05, "loss": 1.4492, "step": 241 }, { "epoch": 0.1, "learning_rate": 1.9766877308086038e-05, "loss": 1.4287, "step": 242 }, { "epoch": 0.1, "learning_rate": 1.9764088122796785e-05, "loss": 1.457, "step": 243 }, { "epoch": 0.1, "learning_rate": 1.976128255018924e-05, "loss": 1.4707, "step": 244 }, { "epoch": 0.1, "learning_rate": 1.9758460594972068e-05, "loss": 1.418, "step": 245 }, { "epoch": 0.1, "learning_rate": 1.975562226188143e-05, "loss": 1.4404, "step": 246 }, { "epoch": 0.1, "learning_rate": 1.9752767555680967e-05, "loss": 1.3896, "step": 247 }, { "epoch": 0.1, "learning_rate": 1.9749896481161807e-05, "loss": 1.4492, "step": 248 }, { "epoch": 0.1, "learning_rate": 1.9747009043142556e-05, "loss": 1.4912, "step": 249 }, { "epoch": 0.1, "learning_rate": 1.9744105246469264e-05, "loss": 1.4277, "step": 250 }, { "epoch": 0.1, "learning_rate": 1.974118509601545e-05, "loss": 1.5195, "step": 251 }, { "epoch": 0.1, "learning_rate": 1.9738248596682078e-05, "loss": 1.4629, "step": 252 }, { "epoch": 0.1, "learning_rate": 1.973529575339755e-05, "loss": 1.5527, "step": 253 }, { "epoch": 0.1, "learning_rate": 1.9732326571117703e-05, "loss": 1.4619, "step": 254 }, { "epoch": 0.1, "learning_rate": 1.9729341054825783e-05, "loss": 1.4971, "step": 255 }, { "epoch": 0.1, "learning_rate": 1.9726339209532462e-05, "loss": 1.4619, "step": 256 }, { "epoch": 0.1, "learning_rate": 1.9723321040275816e-05, "loss": 1.4854, "step": 257 }, { "epoch": 0.1, "learning_rate": 1.972028655212131e-05, "loss": 1.418, "step": 258 }, { "epoch": 0.1, "learning_rate": 1.9717235750161808e-05, "loss": 1.4893, "step": 259 }, { "epoch": 0.1, "learning_rate": 1.9714168639517543e-05, "loss": 1.4707, "step": 260 }, { "epoch": 0.1, "learning_rate": 1.971108522533613e-05, "loss": 1.4756, "step": 261 }, { "epoch": 0.1, "learning_rate": 1.9707985512792544e-05, "loss": 1.4414, "step": 262 }, { "epoch": 0.11, "learning_rate": 1.9704869507089105e-05, "loss": 1.4072, "step": 263 }, { "epoch": 0.11, "learning_rate": 1.970173721345549e-05, "loss": 1.4658, "step": 264 }, { "epoch": 0.11, "learning_rate": 1.9698588637148705e-05, "loss": 1.3789, "step": 265 }, { "epoch": 0.11, "learning_rate": 1.9695423783453086e-05, "loss": 1.4922, "step": 266 }, { "epoch": 0.11, "learning_rate": 1.9692242657680286e-05, "loss": 1.4883, "step": 267 }, { "epoch": 0.11, "learning_rate": 1.9689045265169272e-05, "loss": 1.4434, "step": 268 }, { "epoch": 0.11, "learning_rate": 1.9685831611286312e-05, "loss": 1.4072, "step": 269 }, { "epoch": 0.11, "learning_rate": 1.9682601701424958e-05, "loss": 1.4424, "step": 270 }, { "epoch": 0.11, "learning_rate": 1.9679355541006056e-05, "loss": 1.46, "step": 271 }, { "epoch": 0.11, "learning_rate": 1.9676093135477713e-05, "loss": 1.4756, "step": 272 }, { "epoch": 0.11, "learning_rate": 1.9672814490315312e-05, "loss": 1.4062, "step": 273 }, { "epoch": 0.11, "learning_rate": 1.9669519611021485e-05, "loss": 1.4102, "step": 274 }, { "epoch": 0.11, "learning_rate": 1.9666208503126115e-05, "loss": 1.4141, "step": 275 }, { "epoch": 0.11, "learning_rate": 1.9662881172186313e-05, "loss": 1.4082, "step": 276 }, { "epoch": 0.11, "learning_rate": 1.9659537623786428e-05, "loss": 1.3984, "step": 277 }, { "epoch": 0.11, "learning_rate": 1.9656177863538025e-05, "loss": 1.3682, "step": 278 }, { "epoch": 0.11, "learning_rate": 1.965280189707987e-05, "loss": 1.5684, "step": 279 }, { "epoch": 0.11, "learning_rate": 1.9649409730077934e-05, "loss": 1.4336, "step": 280 }, { "epoch": 0.11, "learning_rate": 1.9646001368225382e-05, "loss": 1.4062, "step": 281 }, { "epoch": 0.11, "learning_rate": 1.9642576817242553e-05, "loss": 1.4375, "step": 282 }, { "epoch": 0.11, "learning_rate": 1.9639136082876954e-05, "loss": 1.5215, "step": 283 }, { "epoch": 0.11, "learning_rate": 1.9635679170903258e-05, "loss": 1.4434, "step": 284 }, { "epoch": 0.11, "learning_rate": 1.9632206087123296e-05, "loss": 1.4619, "step": 285 }, { "epoch": 0.11, "learning_rate": 1.962871683736603e-05, "loss": 1.4385, "step": 286 }, { "epoch": 0.11, "learning_rate": 1.962521142748755e-05, "loss": 1.418, "step": 287 }, { "epoch": 0.12, "learning_rate": 1.9621689863371083e-05, "loss": 1.4531, "step": 288 }, { "epoch": 0.12, "learning_rate": 1.9618152150926953e-05, "loss": 1.5449, "step": 289 }, { "epoch": 0.12, "learning_rate": 1.9614598296092603e-05, "loss": 1.4639, "step": 290 }, { "epoch": 0.12, "learning_rate": 1.9611028304832547e-05, "loss": 1.5107, "step": 291 }, { "epoch": 0.12, "learning_rate": 1.9607442183138403e-05, "loss": 1.3916, "step": 292 }, { "epoch": 0.12, "learning_rate": 1.960383993702884e-05, "loss": 1.4541, "step": 293 }, { "epoch": 0.12, "learning_rate": 1.9600221572549607e-05, "loss": 1.4658, "step": 294 }, { "epoch": 0.12, "learning_rate": 1.9596587095773496e-05, "loss": 1.5107, "step": 295 }, { "epoch": 0.12, "learning_rate": 1.959293651280034e-05, "loss": 1.418, "step": 296 }, { "epoch": 0.12, "learning_rate": 1.958926982975701e-05, "loss": 1.4102, "step": 297 }, { "epoch": 0.12, "learning_rate": 1.958558705279739e-05, "loss": 1.4307, "step": 298 }, { "epoch": 0.12, "learning_rate": 1.9581888188102375e-05, "loss": 1.4922, "step": 299 }, { "epoch": 0.12, "learning_rate": 1.957817324187987e-05, "loss": 1.3721, "step": 300 }, { "epoch": 0.12, "learning_rate": 1.9574442220364768e-05, "loss": 1.4785, "step": 301 }, { "epoch": 0.12, "learning_rate": 1.9570695129818928e-05, "loss": 1.4287, "step": 302 }, { "epoch": 0.12, "learning_rate": 1.956693197653119e-05, "loss": 1.4043, "step": 303 }, { "epoch": 0.12, "learning_rate": 1.9563152766817356e-05, "loss": 1.4287, "step": 304 }, { "epoch": 0.12, "learning_rate": 1.9559357507020163e-05, "loss": 1.4355, "step": 305 }, { "epoch": 0.12, "learning_rate": 1.9555546203509297e-05, "loss": 1.3369, "step": 306 }, { "epoch": 0.12, "learning_rate": 1.9551718862681363e-05, "loss": 1.4941, "step": 307 }, { "epoch": 0.12, "learning_rate": 1.9547875490959884e-05, "loss": 1.5059, "step": 308 }, { "epoch": 0.12, "learning_rate": 1.9544016094795294e-05, "loss": 1.457, "step": 309 }, { "epoch": 0.12, "learning_rate": 1.9540140680664915e-05, "loss": 1.4805, "step": 310 }, { "epoch": 0.12, "learning_rate": 1.953624925507295e-05, "loss": 1.4268, "step": 311 }, { "epoch": 0.12, "learning_rate": 1.953234182455048e-05, "loss": 1.4521, "step": 312 }, { "epoch": 0.13, "learning_rate": 1.9528418395655443e-05, "loss": 1.4668, "step": 313 }, { "epoch": 0.13, "learning_rate": 1.952447897497263e-05, "loss": 1.4131, "step": 314 }, { "epoch": 0.13, "learning_rate": 1.952052356911368e-05, "loss": 1.3779, "step": 315 }, { "epoch": 0.13, "learning_rate": 1.9516552184717036e-05, "loss": 1.4385, "step": 316 }, { "epoch": 0.13, "learning_rate": 1.951256482844799e-05, "loss": 1.5264, "step": 317 }, { "epoch": 0.13, "learning_rate": 1.9508561506998613e-05, "loss": 1.4365, "step": 318 }, { "epoch": 0.13, "learning_rate": 1.950454222708778e-05, "loss": 1.4131, "step": 319 }, { "epoch": 0.13, "learning_rate": 1.950050699546116e-05, "loss": 1.4629, "step": 320 }, { "epoch": 0.13, "learning_rate": 1.949645581889118e-05, "loss": 1.4492, "step": 321 }, { "epoch": 0.13, "learning_rate": 1.9492388704177036e-05, "loss": 1.4746, "step": 322 }, { "epoch": 0.13, "learning_rate": 1.9488305658144666e-05, "loss": 1.4463, "step": 323 }, { "epoch": 0.13, "learning_rate": 1.9484206687646753e-05, "loss": 1.5, "step": 324 }, { "epoch": 0.13, "learning_rate": 1.9480091799562706e-05, "loss": 1.3857, "step": 325 }, { "epoch": 0.13, "learning_rate": 1.9475961000798645e-05, "loss": 1.4424, "step": 326 }, { "epoch": 0.13, "learning_rate": 1.947181429828739e-05, "loss": 1.4512, "step": 327 }, { "epoch": 0.13, "learning_rate": 1.9467651698988464e-05, "loss": 1.4072, "step": 328 }, { "epoch": 0.13, "learning_rate": 1.9463473209888063e-05, "loss": 1.4346, "step": 329 }, { "epoch": 0.13, "learning_rate": 1.9459278837999048e-05, "loss": 1.4668, "step": 330 }, { "epoch": 0.13, "learning_rate": 1.9455068590360943e-05, "loss": 1.4326, "step": 331 }, { "epoch": 0.13, "learning_rate": 1.9450842474039914e-05, "loss": 1.4971, "step": 332 }, { "epoch": 0.13, "learning_rate": 1.944660049612876e-05, "loss": 1.4229, "step": 333 }, { "epoch": 0.13, "learning_rate": 1.9442342663746903e-05, "loss": 1.3584, "step": 334 }, { "epoch": 0.13, "learning_rate": 1.9438068984040366e-05, "loss": 1.377, "step": 335 }, { "epoch": 0.13, "learning_rate": 1.943377946418178e-05, "loss": 1.4355, "step": 336 }, { "epoch": 0.13, "learning_rate": 1.942947411137035e-05, "loss": 1.458, "step": 337 }, { "epoch": 0.14, "learning_rate": 1.942515293283187e-05, "loss": 1.4248, "step": 338 }, { "epoch": 0.14, "learning_rate": 1.9420815935818673e-05, "loss": 1.3672, "step": 339 }, { "epoch": 0.14, "learning_rate": 1.9416463127609655e-05, "loss": 1.4746, "step": 340 }, { "epoch": 0.14, "learning_rate": 1.941209451551025e-05, "loss": 1.4346, "step": 341 }, { "epoch": 0.14, "learning_rate": 1.9407710106852405e-05, "loss": 1.4072, "step": 342 }, { "epoch": 0.14, "learning_rate": 1.940330990899459e-05, "loss": 1.418, "step": 343 }, { "epoch": 0.14, "learning_rate": 1.9398893929321763e-05, "loss": 1.5625, "step": 344 }, { "epoch": 0.14, "learning_rate": 1.9394462175245382e-05, "loss": 1.4102, "step": 345 }, { "epoch": 0.14, "learning_rate": 1.939001465420337e-05, "loss": 1.4521, "step": 346 }, { "epoch": 0.14, "learning_rate": 1.9385551373660113e-05, "loss": 1.4062, "step": 347 }, { "epoch": 0.14, "learning_rate": 1.9381072341106453e-05, "loss": 1.4824, "step": 348 }, { "epoch": 0.14, "learning_rate": 1.937657756405966e-05, "loss": 1.4004, "step": 349 }, { "epoch": 0.14, "learning_rate": 1.937206705006344e-05, "loss": 1.3926, "step": 350 }, { "epoch": 0.14, "learning_rate": 1.9367540806687894e-05, "loss": 1.3867, "step": 351 }, { "epoch": 0.14, "learning_rate": 1.9362998841529542e-05, "loss": 1.3926, "step": 352 }, { "epoch": 0.14, "learning_rate": 1.935844116221127e-05, "loss": 1.4209, "step": 353 }, { "epoch": 0.14, "learning_rate": 1.9353867776382357e-05, "loss": 1.4102, "step": 354 }, { "epoch": 0.14, "learning_rate": 1.9349278691718426e-05, "loss": 1.4541, "step": 355 }, { "epoch": 0.14, "learning_rate": 1.934467391592146e-05, "loss": 1.3955, "step": 356 }, { "epoch": 0.14, "learning_rate": 1.9340053456719768e-05, "loss": 1.4619, "step": 357 }, { "epoch": 0.14, "learning_rate": 1.9335417321867988e-05, "loss": 1.4102, "step": 358 }, { "epoch": 0.14, "learning_rate": 1.9330765519147058e-05, "loss": 1.4248, "step": 359 }, { "epoch": 0.14, "learning_rate": 1.9326098056364224e-05, "loss": 1.4355, "step": 360 }, { "epoch": 0.14, "learning_rate": 1.9321414941353006e-05, "loss": 1.4854, "step": 361 }, { "epoch": 0.14, "learning_rate": 1.931671618197319e-05, "loss": 1.4688, "step": 362 }, { "epoch": 0.15, "learning_rate": 1.931200178611083e-05, "loss": 1.4258, "step": 363 }, { "epoch": 0.15, "learning_rate": 1.9307271761678214e-05, "loss": 1.4229, "step": 364 }, { "epoch": 0.15, "learning_rate": 1.9302526116613863e-05, "loss": 1.5088, "step": 365 }, { "epoch": 0.15, "learning_rate": 1.9297764858882516e-05, "loss": 1.5566, "step": 366 }, { "epoch": 0.15, "learning_rate": 1.9292987996475113e-05, "loss": 1.4375, "step": 367 }, { "epoch": 0.15, "learning_rate": 1.928819553740878e-05, "loss": 1.5156, "step": 368 }, { "epoch": 0.15, "learning_rate": 1.9283387489726827e-05, "loss": 1.4072, "step": 369 }, { "epoch": 0.15, "learning_rate": 1.9278563861498726e-05, "loss": 1.4883, "step": 370 }, { "epoch": 0.15, "learning_rate": 1.9273724660820086e-05, "loss": 1.5059, "step": 371 }, { "epoch": 0.15, "learning_rate": 1.9268869895812673e-05, "loss": 1.4268, "step": 372 }, { "epoch": 0.15, "learning_rate": 1.9263999574624357e-05, "loss": 1.4385, "step": 373 }, { "epoch": 0.15, "learning_rate": 1.925911370542912e-05, "loss": 1.4102, "step": 374 }, { "epoch": 0.15, "learning_rate": 1.9254212296427043e-05, "loss": 1.5, "step": 375 }, { "epoch": 0.15, "learning_rate": 1.9249295355844286e-05, "loss": 1.4131, "step": 376 }, { "epoch": 0.15, "learning_rate": 1.9244362891933077e-05, "loss": 1.3721, "step": 377 }, { "epoch": 0.15, "learning_rate": 1.9239414912971697e-05, "loss": 1.4131, "step": 378 }, { "epoch": 0.15, "learning_rate": 1.923445142726446e-05, "loss": 1.4053, "step": 379 }, { "epoch": 0.15, "learning_rate": 1.922947244314172e-05, "loss": 1.4434, "step": 380 }, { "epoch": 0.15, "learning_rate": 1.922447796895982e-05, "loss": 1.4307, "step": 381 }, { "epoch": 0.15, "learning_rate": 1.9219468013101123e-05, "loss": 1.4932, "step": 382 }, { "epoch": 0.15, "learning_rate": 1.9214442583973965e-05, "loss": 1.458, "step": 383 }, { "epoch": 0.15, "learning_rate": 1.920940169001265e-05, "loss": 1.4062, "step": 384 }, { "epoch": 0.15, "learning_rate": 1.9204345339677442e-05, "loss": 1.4492, "step": 385 }, { "epoch": 0.15, "learning_rate": 1.919927354145454e-05, "loss": 1.5049, "step": 386 }, { "epoch": 0.15, "learning_rate": 1.919418630385607e-05, "loss": 1.4668, "step": 387 }, { "epoch": 0.16, "learning_rate": 1.9189083635420077e-05, "loss": 1.4297, "step": 388 }, { "epoch": 0.16, "learning_rate": 1.9183965544710495e-05, "loss": 1.4932, "step": 389 }, { "epoch": 0.16, "learning_rate": 1.9178832040317153e-05, "loss": 1.46, "step": 390 }, { "epoch": 0.16, "learning_rate": 1.9173683130855737e-05, "loss": 1.4316, "step": 391 }, { "epoch": 0.16, "learning_rate": 1.9168518824967797e-05, "loss": 1.3564, "step": 392 }, { "epoch": 0.16, "learning_rate": 1.916333913132072e-05, "loss": 1.3408, "step": 393 }, { "epoch": 0.16, "learning_rate": 1.915814405860771e-05, "loss": 1.4385, "step": 394 }, { "epoch": 0.16, "learning_rate": 1.91529336155478e-05, "loss": 1.3857, "step": 395 }, { "epoch": 0.16, "learning_rate": 1.9147707810885798e-05, "loss": 1.4033, "step": 396 }, { "epoch": 0.16, "learning_rate": 1.9142466653392317e-05, "loss": 1.4424, "step": 397 }, { "epoch": 0.16, "learning_rate": 1.913721015186372e-05, "loss": 1.4443, "step": 398 }, { "epoch": 0.16, "learning_rate": 1.913193831512213e-05, "loss": 1.5137, "step": 399 }, { "epoch": 0.16, "learning_rate": 1.9126651152015404e-05, "loss": 1.4707, "step": 400 }, { "epoch": 0.16, "learning_rate": 1.912134867141712e-05, "loss": 1.4033, "step": 401 }, { "epoch": 0.16, "learning_rate": 1.911603088222657e-05, "loss": 1.4209, "step": 402 }, { "epoch": 0.16, "learning_rate": 1.9110697793368733e-05, "loss": 1.499, "step": 403 }, { "epoch": 0.16, "learning_rate": 1.9105349413794272e-05, "loss": 1.4219, "step": 404 }, { "epoch": 0.16, "learning_rate": 1.9099985752479505e-05, "loss": 1.4053, "step": 405 }, { "epoch": 0.16, "learning_rate": 1.9094606818426403e-05, "loss": 1.4355, "step": 406 }, { "epoch": 0.16, "learning_rate": 1.908921262066257e-05, "loss": 1.3486, "step": 407 }, { "epoch": 0.16, "learning_rate": 1.9083803168241225e-05, "loss": 1.4854, "step": 408 }, { "epoch": 0.16, "learning_rate": 1.9078378470241183e-05, "loss": 1.4424, "step": 409 }, { "epoch": 0.16, "learning_rate": 1.9072938535766864e-05, "loss": 1.4609, "step": 410 }, { "epoch": 0.16, "learning_rate": 1.9067483373948245e-05, "loss": 1.4482, "step": 411 }, { "epoch": 0.16, "learning_rate": 1.906201299394086e-05, "loss": 1.3643, "step": 412 }, { "epoch": 0.17, "learning_rate": 1.9056527404925788e-05, "loss": 1.3994, "step": 413 }, { "epoch": 0.17, "learning_rate": 1.9051026616109637e-05, "loss": 1.415, "step": 414 }, { "epoch": 0.17, "learning_rate": 1.904551063672452e-05, "loss": 1.501, "step": 415 }, { "epoch": 0.17, "learning_rate": 1.9039979476028044e-05, "loss": 1.3438, "step": 416 }, { "epoch": 0.17, "learning_rate": 1.90344331433033e-05, "loss": 1.4131, "step": 417 }, { "epoch": 0.17, "learning_rate": 1.9028871647858836e-05, "loss": 1.3994, "step": 418 }, { "epoch": 0.17, "learning_rate": 1.9023294999028654e-05, "loss": 1.3545, "step": 419 }, { "epoch": 0.17, "learning_rate": 1.9017703206172187e-05, "loss": 1.4658, "step": 420 }, { "epoch": 0.17, "learning_rate": 1.9012096278674283e-05, "loss": 1.4551, "step": 421 }, { "epoch": 0.17, "learning_rate": 1.900647422594519e-05, "loss": 1.3691, "step": 422 }, { "epoch": 0.17, "learning_rate": 1.900083705742054e-05, "loss": 1.3613, "step": 423 }, { "epoch": 0.17, "learning_rate": 1.8995184782561343e-05, "loss": 1.4658, "step": 424 }, { "epoch": 0.17, "learning_rate": 1.8989517410853956e-05, "loss": 1.4512, "step": 425 }, { "epoch": 0.17, "learning_rate": 1.8983834951810068e-05, "loss": 1.4238, "step": 426 }, { "epoch": 0.17, "learning_rate": 1.89781374149667e-05, "loss": 1.4619, "step": 427 }, { "epoch": 0.17, "learning_rate": 1.897242480988617e-05, "loss": 1.3477, "step": 428 }, { "epoch": 0.17, "learning_rate": 1.8966697146156092e-05, "loss": 1.4473, "step": 429 }, { "epoch": 0.17, "learning_rate": 1.896095443338935e-05, "loss": 1.3896, "step": 430 }, { "epoch": 0.17, "learning_rate": 1.895519668122408e-05, "loss": 1.3984, "step": 431 }, { "epoch": 0.17, "learning_rate": 1.894942389932367e-05, "loss": 1.4043, "step": 432 }, { "epoch": 0.17, "learning_rate": 1.8943636097376728e-05, "loss": 1.3613, "step": 433 }, { "epoch": 0.17, "learning_rate": 1.8937833285097067e-05, "loss": 1.4355, "step": 434 }, { "epoch": 0.17, "learning_rate": 1.8932015472223692e-05, "loss": 1.3398, "step": 435 }, { "epoch": 0.17, "learning_rate": 1.8926182668520794e-05, "loss": 1.4189, "step": 436 }, { "epoch": 0.17, "learning_rate": 1.892033488377771e-05, "loss": 1.3545, "step": 437 }, { "epoch": 0.18, "learning_rate": 1.891447212780893e-05, "loss": 1.5049, "step": 438 }, { "epoch": 0.18, "learning_rate": 1.8908594410454068e-05, "loss": 1.4639, "step": 439 }, { "epoch": 0.18, "learning_rate": 1.8902701741577844e-05, "loss": 1.4023, "step": 440 }, { "epoch": 0.18, "learning_rate": 1.8896794131070073e-05, "loss": 1.4229, "step": 441 }, { "epoch": 0.18, "learning_rate": 1.8890871588845653e-05, "loss": 1.4385, "step": 442 }, { "epoch": 0.18, "learning_rate": 1.8884934124844534e-05, "loss": 1.3867, "step": 443 }, { "epoch": 0.18, "learning_rate": 1.8878981749031718e-05, "loss": 1.373, "step": 444 }, { "epoch": 0.18, "learning_rate": 1.8873014471397225e-05, "loss": 1.3525, "step": 445 }, { "epoch": 0.18, "learning_rate": 1.886703230195609e-05, "loss": 1.5332, "step": 446 }, { "epoch": 0.18, "learning_rate": 1.8861035250748343e-05, "loss": 1.4326, "step": 447 }, { "epoch": 0.18, "learning_rate": 1.8855023327838984e-05, "loss": 1.3838, "step": 448 }, { "epoch": 0.18, "learning_rate": 1.8848996543317982e-05, "loss": 1.4014, "step": 449 }, { "epoch": 0.18, "learning_rate": 1.8842954907300236e-05, "loss": 1.4082, "step": 450 }, { "epoch": 0.18, "learning_rate": 1.8836898429925586e-05, "loss": 1.3896, "step": 451 }, { "epoch": 0.18, "learning_rate": 1.883082712135877e-05, "loss": 1.3457, "step": 452 }, { "epoch": 0.18, "learning_rate": 1.8824740991789417e-05, "loss": 1.4199, "step": 453 }, { "epoch": 0.18, "learning_rate": 1.8818640051432036e-05, "loss": 1.4014, "step": 454 }, { "epoch": 0.18, "learning_rate": 1.881252431052599e-05, "loss": 1.4346, "step": 455 }, { "epoch": 0.18, "learning_rate": 1.8806393779335483e-05, "loss": 1.3828, "step": 456 }, { "epoch": 0.18, "learning_rate": 1.8800248468149545e-05, "loss": 1.4082, "step": 457 }, { "epoch": 0.18, "learning_rate": 1.8794088387282e-05, "loss": 1.4326, "step": 458 }, { "epoch": 0.18, "learning_rate": 1.8787913547071485e-05, "loss": 1.3809, "step": 459 }, { "epoch": 0.18, "learning_rate": 1.8781723957881374e-05, "loss": 1.4482, "step": 460 }, { "epoch": 0.18, "learning_rate": 1.8775519630099822e-05, "loss": 1.3574, "step": 461 }, { "epoch": 0.18, "learning_rate": 1.876930057413971e-05, "loss": 1.4219, "step": 462 }, { "epoch": 0.19, "learning_rate": 1.8763066800438638e-05, "loss": 1.3574, "step": 463 }, { "epoch": 0.19, "learning_rate": 1.875681831945891e-05, "loss": 1.4336, "step": 464 }, { "epoch": 0.19, "learning_rate": 1.87505551416875e-05, "loss": 1.4502, "step": 465 }, { "epoch": 0.19, "learning_rate": 1.874427727763607e-05, "loss": 1.4619, "step": 466 }, { "epoch": 0.19, "learning_rate": 1.873798473784092e-05, "loss": 1.3135, "step": 467 }, { "epoch": 0.19, "learning_rate": 1.8731677532862975e-05, "loss": 1.4795, "step": 468 }, { "epoch": 0.19, "learning_rate": 1.872535567328778e-05, "loss": 1.3867, "step": 469 }, { "epoch": 0.19, "learning_rate": 1.871901916972547e-05, "loss": 1.4121, "step": 470 }, { "epoch": 0.19, "learning_rate": 1.8712668032810767e-05, "loss": 1.4492, "step": 471 }, { "epoch": 0.19, "learning_rate": 1.870630227320294e-05, "loss": 1.3682, "step": 472 }, { "epoch": 0.19, "learning_rate": 1.8699921901585814e-05, "loss": 1.4053, "step": 473 }, { "epoch": 0.19, "learning_rate": 1.8693526928667724e-05, "loss": 1.3916, "step": 474 }, { "epoch": 0.19, "learning_rate": 1.8687117365181514e-05, "loss": 1.4316, "step": 475 }, { "epoch": 0.19, "learning_rate": 1.868069322188452e-05, "loss": 1.3076, "step": 476 }, { "epoch": 0.19, "learning_rate": 1.8674254509558544e-05, "loss": 1.3984, "step": 477 }, { "epoch": 0.19, "learning_rate": 1.8667801239009845e-05, "loss": 1.4004, "step": 478 }, { "epoch": 0.19, "learning_rate": 1.866133342106911e-05, "loss": 1.3994, "step": 479 }, { "epoch": 0.19, "learning_rate": 1.865485106659145e-05, "loss": 1.3867, "step": 480 }, { "epoch": 0.19, "learning_rate": 1.864835418645635e-05, "loss": 1.4326, "step": 481 }, { "epoch": 0.19, "learning_rate": 1.86418427915677e-05, "loss": 1.3486, "step": 482 }, { "epoch": 0.19, "learning_rate": 1.863531689285374e-05, "loss": 1.4521, "step": 483 }, { "epoch": 0.19, "learning_rate": 1.8628776501267052e-05, "loss": 1.4434, "step": 484 }, { "epoch": 0.19, "learning_rate": 1.862222162778454e-05, "loss": 1.3779, "step": 485 }, { "epoch": 0.19, "learning_rate": 1.861565228340742e-05, "loss": 1.4199, "step": 486 }, { "epoch": 0.19, "learning_rate": 1.8609068479161182e-05, "loss": 1.4375, "step": 487 }, { "epoch": 0.2, "learning_rate": 1.8602470226095602e-05, "loss": 1.373, "step": 488 }, { "epoch": 0.2, "learning_rate": 1.8595857535284692e-05, "loss": 1.4316, "step": 489 }, { "epoch": 0.2, "learning_rate": 1.85892304178267e-05, "loss": 1.4111, "step": 490 }, { "epoch": 0.2, "learning_rate": 1.8582588884844086e-05, "loss": 1.3984, "step": 491 }, { "epoch": 0.2, "learning_rate": 1.8575932947483503e-05, "loss": 1.4316, "step": 492 }, { "epoch": 0.2, "learning_rate": 1.8569262616915784e-05, "loss": 1.2969, "step": 493 }, { "epoch": 0.2, "learning_rate": 1.8562577904335913e-05, "loss": 1.3887, "step": 494 }, { "epoch": 0.2, "learning_rate": 1.8555878820963014e-05, "loss": 1.416, "step": 495 }, { "epoch": 0.2, "learning_rate": 1.8549165378040328e-05, "loss": 1.4561, "step": 496 }, { "epoch": 0.2, "learning_rate": 1.8542437586835202e-05, "loss": 1.3652, "step": 497 }, { "epoch": 0.2, "learning_rate": 1.8535695458639056e-05, "loss": 1.3721, "step": 498 }, { "epoch": 0.2, "learning_rate": 1.8528939004767377e-05, "loss": 1.3525, "step": 499 }, { "epoch": 0.2, "learning_rate": 1.8522168236559693e-05, "loss": 1.4189, "step": 500 }, { "epoch": 0.2, "learning_rate": 1.851538316537956e-05, "loss": 1.4209, "step": 501 }, { "epoch": 0.2, "learning_rate": 1.8508583802614534e-05, "loss": 1.4385, "step": 502 }, { "epoch": 0.2, "learning_rate": 1.8501770159676157e-05, "loss": 1.4746, "step": 503 }, { "epoch": 0.2, "learning_rate": 1.849494224799994e-05, "loss": 1.457, "step": 504 }, { "epoch": 0.2, "learning_rate": 1.8488100079045345e-05, "loss": 1.4688, "step": 505 }, { "epoch": 0.2, "learning_rate": 1.848124366429576e-05, "loss": 1.3564, "step": 506 }, { "epoch": 0.2, "learning_rate": 1.8474373015258472e-05, "loss": 1.4443, "step": 507 }, { "epoch": 0.2, "learning_rate": 1.846748814346468e-05, "loss": 1.4297, "step": 508 }, { "epoch": 0.2, "learning_rate": 1.846058906046943e-05, "loss": 1.4023, "step": 509 }, { "epoch": 0.2, "learning_rate": 1.8453675777851627e-05, "loss": 1.3867, "step": 510 }, { "epoch": 0.2, "learning_rate": 1.844674830721402e-05, "loss": 1.4014, "step": 511 }, { "epoch": 0.2, "learning_rate": 1.843980666018315e-05, "loss": 1.3984, "step": 512 }, { "epoch": 0.21, "learning_rate": 1.8432850848409367e-05, "loss": 1.4531, "step": 513 }, { "epoch": 0.21, "learning_rate": 1.8425880883566784e-05, "loss": 1.4619, "step": 514 }, { "epoch": 0.21, "learning_rate": 1.8418896777353272e-05, "loss": 1.3965, "step": 515 }, { "epoch": 0.21, "learning_rate": 1.8411898541490433e-05, "loss": 1.334, "step": 516 }, { "epoch": 0.21, "learning_rate": 1.840488618772359e-05, "loss": 1.4189, "step": 517 }, { "epoch": 0.21, "learning_rate": 1.8397859727821747e-05, "loss": 1.4033, "step": 518 }, { "epoch": 0.21, "learning_rate": 1.83908191735776e-05, "loss": 1.4453, "step": 519 }, { "epoch": 0.21, "learning_rate": 1.8383764536807486e-05, "loss": 1.46, "step": 520 }, { "epoch": 0.21, "learning_rate": 1.8376695829351378e-05, "loss": 1.4932, "step": 521 }, { "epoch": 0.21, "learning_rate": 1.8369613063072875e-05, "loss": 1.3574, "step": 522 }, { "epoch": 0.21, "learning_rate": 1.8362516249859164e-05, "loss": 1.4463, "step": 523 }, { "epoch": 0.21, "learning_rate": 1.8355405401621e-05, "loss": 1.4922, "step": 524 }, { "epoch": 0.21, "learning_rate": 1.8348280530292712e-05, "loss": 1.3535, "step": 525 }, { "epoch": 0.21, "learning_rate": 1.834114164783215e-05, "loss": 1.4766, "step": 526 }, { "epoch": 0.21, "learning_rate": 1.8333988766220676e-05, "loss": 1.4707, "step": 527 }, { "epoch": 0.21, "learning_rate": 1.832682189746316e-05, "loss": 1.4375, "step": 528 }, { "epoch": 0.21, "learning_rate": 1.831964105358794e-05, "loss": 1.4736, "step": 529 }, { "epoch": 0.21, "learning_rate": 1.831244624664681e-05, "loss": 1.3779, "step": 530 }, { "epoch": 0.21, "learning_rate": 1.8305237488714995e-05, "loss": 1.4082, "step": 531 }, { "epoch": 0.21, "learning_rate": 1.8298014791891138e-05, "loss": 1.4062, "step": 532 }, { "epoch": 0.21, "learning_rate": 1.829077816829728e-05, "loss": 1.4209, "step": 533 }, { "epoch": 0.21, "learning_rate": 1.8283527630078827e-05, "loss": 1.4053, "step": 534 }, { "epoch": 0.21, "learning_rate": 1.827626318940454e-05, "loss": 1.3584, "step": 535 }, { "epoch": 0.21, "learning_rate": 1.8268984858466524e-05, "loss": 1.4375, "step": 536 }, { "epoch": 0.21, "learning_rate": 1.8261692649480174e-05, "loss": 1.334, "step": 537 }, { "epoch": 0.22, "learning_rate": 1.8254386574684205e-05, "loss": 1.3594, "step": 538 }, { "epoch": 0.22, "learning_rate": 1.824706664634058e-05, "loss": 1.5459, "step": 539 }, { "epoch": 0.22, "learning_rate": 1.8239732876734525e-05, "loss": 1.3613, "step": 540 }, { "epoch": 0.22, "learning_rate": 1.823238527817449e-05, "loss": 1.4756, "step": 541 }, { "epoch": 0.22, "learning_rate": 1.822502386299214e-05, "loss": 1.3936, "step": 542 }, { "epoch": 0.22, "learning_rate": 1.8217648643542326e-05, "loss": 1.3623, "step": 543 }, { "epoch": 0.22, "learning_rate": 1.8210259632203063e-05, "loss": 1.3477, "step": 544 }, { "epoch": 0.22, "learning_rate": 1.8202856841375517e-05, "loss": 1.4717, "step": 545 }, { "epoch": 0.22, "learning_rate": 1.819544028348399e-05, "loss": 1.4043, "step": 546 }, { "epoch": 0.22, "learning_rate": 1.818800997097587e-05, "loss": 1.4883, "step": 547 }, { "epoch": 0.22, "learning_rate": 1.8180565916321646e-05, "loss": 1.4229, "step": 548 }, { "epoch": 0.22, "learning_rate": 1.817310813201486e-05, "loss": 1.374, "step": 549 }, { "epoch": 0.22, "learning_rate": 1.816563663057211e-05, "loss": 1.458, "step": 550 }, { "epoch": 0.22, "learning_rate": 1.8158151424533002e-05, "loss": 1.4473, "step": 551 }, { "epoch": 0.22, "learning_rate": 1.8150652526460146e-05, "loss": 1.4561, "step": 552 }, { "epoch": 0.22, "learning_rate": 1.8143139948939138e-05, "loss": 1.3682, "step": 553 }, { "epoch": 0.22, "learning_rate": 1.8135613704578525e-05, "loss": 1.3135, "step": 554 }, { "epoch": 0.22, "learning_rate": 1.81280738060098e-05, "loss": 1.3945, "step": 555 }, { "epoch": 0.22, "learning_rate": 1.8120520265887364e-05, "loss": 1.4326, "step": 556 }, { "epoch": 0.22, "learning_rate": 1.8112953096888517e-05, "loss": 1.4072, "step": 557 }, { "epoch": 0.22, "learning_rate": 1.810537231171343e-05, "loss": 1.4609, "step": 558 }, { "epoch": 0.22, "learning_rate": 1.809777792308513e-05, "loss": 1.4219, "step": 559 }, { "epoch": 0.22, "learning_rate": 1.8090169943749477e-05, "loss": 1.4355, "step": 560 }, { "epoch": 0.22, "learning_rate": 1.808254838647513e-05, "loss": 1.4941, "step": 561 }, { "epoch": 0.22, "learning_rate": 1.8074913264053547e-05, "loss": 1.3311, "step": 562 }, { "epoch": 0.23, "learning_rate": 1.8067264589298945e-05, "loss": 1.4482, "step": 563 }, { "epoch": 0.23, "learning_rate": 1.8059602375048294e-05, "loss": 1.3672, "step": 564 }, { "epoch": 0.23, "learning_rate": 1.8051926634161282e-05, "loss": 1.4277, "step": 565 }, { "epoch": 0.23, "learning_rate": 1.8044237379520305e-05, "loss": 1.3555, "step": 566 }, { "epoch": 0.23, "learning_rate": 1.8036534624030428e-05, "loss": 1.3926, "step": 567 }, { "epoch": 0.23, "learning_rate": 1.802881838061939e-05, "loss": 1.3418, "step": 568 }, { "epoch": 0.23, "learning_rate": 1.802108866223755e-05, "loss": 1.4443, "step": 569 }, { "epoch": 0.23, "learning_rate": 1.8013345481857903e-05, "loss": 1.373, "step": 570 }, { "epoch": 0.23, "learning_rate": 1.8005588852476018e-05, "loss": 1.4268, "step": 571 }, { "epoch": 0.23, "learning_rate": 1.7997818787110043e-05, "loss": 1.4395, "step": 572 }, { "epoch": 0.23, "learning_rate": 1.7990035298800682e-05, "loss": 1.4219, "step": 573 }, { "epoch": 0.23, "learning_rate": 1.798223840061116e-05, "loss": 1.376, "step": 574 }, { "epoch": 0.23, "learning_rate": 1.797442810562721e-05, "loss": 1.4727, "step": 575 }, { "epoch": 0.23, "learning_rate": 1.796660442695705e-05, "loss": 1.459, "step": 576 }, { "epoch": 0.23, "learning_rate": 1.795876737773136e-05, "loss": 1.4785, "step": 577 }, { "epoch": 0.23, "learning_rate": 1.795091697110326e-05, "loss": 1.3594, "step": 578 }, { "epoch": 0.23, "learning_rate": 1.7943053220248284e-05, "loss": 1.4092, "step": 579 }, { "epoch": 0.23, "learning_rate": 1.793517613836437e-05, "loss": 1.4541, "step": 580 }, { "epoch": 0.23, "learning_rate": 1.7927285738671825e-05, "loss": 1.4531, "step": 581 }, { "epoch": 0.23, "learning_rate": 1.7919382034413306e-05, "loss": 1.4102, "step": 582 }, { "epoch": 0.23, "learning_rate": 1.7911465038853805e-05, "loss": 1.3701, "step": 583 }, { "epoch": 0.23, "learning_rate": 1.7903534765280616e-05, "loss": 1.4736, "step": 584 }, { "epoch": 0.23, "learning_rate": 1.7895591227003316e-05, "loss": 1.3691, "step": 585 }, { "epoch": 0.23, "learning_rate": 1.7887634437353754e-05, "loss": 1.3496, "step": 586 }, { "epoch": 0.23, "learning_rate": 1.7879664409686007e-05, "loss": 1.4561, "step": 587 }, { "epoch": 0.24, "learning_rate": 1.7871681157376382e-05, "loss": 1.4307, "step": 588 }, { "epoch": 0.24, "learning_rate": 1.7863684693823375e-05, "loss": 1.4453, "step": 589 }, { "epoch": 0.24, "learning_rate": 1.7855675032447648e-05, "loss": 1.4336, "step": 590 }, { "epoch": 0.24, "learning_rate": 1.7847652186692025e-05, "loss": 1.3633, "step": 591 }, { "epoch": 0.24, "learning_rate": 1.7839616170021452e-05, "loss": 1.2949, "step": 592 }, { "epoch": 0.24, "learning_rate": 1.7831566995922983e-05, "loss": 1.335, "step": 593 }, { "epoch": 0.24, "learning_rate": 1.782350467790575e-05, "loss": 1.4102, "step": 594 }, { "epoch": 0.24, "learning_rate": 1.7815429229500946e-05, "loss": 1.3105, "step": 595 }, { "epoch": 0.24, "learning_rate": 1.78073406642618e-05, "loss": 1.4746, "step": 596 }, { "epoch": 0.24, "learning_rate": 1.779923899576357e-05, "loss": 1.334, "step": 597 }, { "epoch": 0.24, "learning_rate": 1.7791124237603477e-05, "loss": 1.376, "step": 598 }, { "epoch": 0.24, "learning_rate": 1.7782996403400737e-05, "loss": 1.3945, "step": 599 }, { "epoch": 0.24, "learning_rate": 1.7774855506796497e-05, "loss": 1.4199, "step": 600 }, { "epoch": 0.24, "learning_rate": 1.776670156145383e-05, "loss": 1.4082, "step": 601 }, { "epoch": 0.24, "learning_rate": 1.775853458105772e-05, "loss": 1.4014, "step": 602 }, { "epoch": 0.24, "learning_rate": 1.7750354579315004e-05, "loss": 1.4424, "step": 603 }, { "epoch": 0.24, "learning_rate": 1.77421615699544e-05, "loss": 1.3945, "step": 604 }, { "epoch": 0.24, "learning_rate": 1.7733955566726438e-05, "loss": 1.3662, "step": 605 }, { "epoch": 0.24, "learning_rate": 1.772573658340347e-05, "loss": 1.3555, "step": 606 }, { "epoch": 0.24, "learning_rate": 1.7717504633779618e-05, "loss": 1.3652, "step": 607 }, { "epoch": 0.24, "learning_rate": 1.7709259731670774e-05, "loss": 1.3818, "step": 608 }, { "epoch": 0.24, "learning_rate": 1.770100189091457e-05, "loss": 1.4297, "step": 609 }, { "epoch": 0.24, "learning_rate": 1.7692731125370355e-05, "loss": 1.3555, "step": 610 }, { "epoch": 0.24, "learning_rate": 1.7684447448919156e-05, "loss": 1.4287, "step": 611 }, { "epoch": 0.24, "learning_rate": 1.7676150875463688e-05, "loss": 1.3848, "step": 612 }, { "epoch": 0.25, "learning_rate": 1.7667841418928292e-05, "loss": 1.4072, "step": 613 }, { "epoch": 0.25, "learning_rate": 1.765951909325895e-05, "loss": 1.3564, "step": 614 }, { "epoch": 0.25, "learning_rate": 1.7651183912423228e-05, "loss": 1.4326, "step": 615 }, { "epoch": 0.25, "learning_rate": 1.764283589041028e-05, "loss": 1.4199, "step": 616 }, { "epoch": 0.25, "learning_rate": 1.7634475041230796e-05, "loss": 1.4863, "step": 617 }, { "epoch": 0.25, "learning_rate": 1.7626101378917004e-05, "loss": 1.3281, "step": 618 }, { "epoch": 0.25, "learning_rate": 1.761771491752264e-05, "loss": 1.3623, "step": 619 }, { "epoch": 0.25, "learning_rate": 1.7609315671122912e-05, "loss": 1.3916, "step": 620 }, { "epoch": 0.25, "learning_rate": 1.760090365381449e-05, "loss": 1.4209, "step": 621 }, { "epoch": 0.25, "learning_rate": 1.759247887971548e-05, "loss": 1.4082, "step": 622 }, { "epoch": 0.25, "learning_rate": 1.7584041362965397e-05, "loss": 1.3516, "step": 623 }, { "epoch": 0.25, "learning_rate": 1.7575591117725132e-05, "loss": 1.3799, "step": 624 }, { "epoch": 0.25, "learning_rate": 1.7567128158176955e-05, "loss": 1.4043, "step": 625 }, { "epoch": 0.25, "learning_rate": 1.7558652498524464e-05, "loss": 1.3711, "step": 626 }, { "epoch": 0.25, "learning_rate": 1.7550164152992573e-05, "loss": 1.4336, "step": 627 }, { "epoch": 0.25, "learning_rate": 1.7541663135827493e-05, "loss": 1.3174, "step": 628 }, { "epoch": 0.25, "learning_rate": 1.75331494612967e-05, "loss": 1.3594, "step": 629 }, { "epoch": 0.25, "learning_rate": 1.7524623143688905e-05, "loss": 1.417, "step": 630 }, { "epoch": 0.25, "learning_rate": 1.7516084197314044e-05, "loss": 1.3721, "step": 631 }, { "epoch": 0.25, "learning_rate": 1.7507532636503256e-05, "loss": 1.416, "step": 632 }, { "epoch": 0.25, "learning_rate": 1.749896847560884e-05, "loss": 1.3857, "step": 633 }, { "epoch": 0.25, "learning_rate": 1.7490391729004242e-05, "loss": 1.3379, "step": 634 }, { "epoch": 0.25, "learning_rate": 1.748180241108404e-05, "loss": 1.3799, "step": 635 }, { "epoch": 0.25, "learning_rate": 1.7473200536263905e-05, "loss": 1.3418, "step": 636 }, { "epoch": 0.25, "learning_rate": 1.746458611898058e-05, "loss": 1.3789, "step": 637 }, { "epoch": 0.26, "learning_rate": 1.7455959173691863e-05, "loss": 1.3574, "step": 638 }, { "epoch": 0.26, "learning_rate": 1.744731971487658e-05, "loss": 1.4443, "step": 639 }, { "epoch": 0.26, "learning_rate": 1.7438667757034547e-05, "loss": 1.334, "step": 640 }, { "epoch": 0.26, "learning_rate": 1.743000331468657e-05, "loss": 1.4082, "step": 641 }, { "epoch": 0.26, "learning_rate": 1.7421326402374406e-05, "loss": 1.3994, "step": 642 }, { "epoch": 0.26, "learning_rate": 1.7412637034660735e-05, "loss": 1.3271, "step": 643 }, { "epoch": 0.26, "learning_rate": 1.740393522612915e-05, "loss": 1.4023, "step": 644 }, { "epoch": 0.26, "learning_rate": 1.739522099138411e-05, "loss": 1.3633, "step": 645 }, { "epoch": 0.26, "learning_rate": 1.7386494345050944e-05, "loss": 1.3643, "step": 646 }, { "epoch": 0.26, "learning_rate": 1.73777553017758e-05, "loss": 1.3506, "step": 647 }, { "epoch": 0.26, "learning_rate": 1.7369003876225644e-05, "loss": 1.3779, "step": 648 }, { "epoch": 0.26, "learning_rate": 1.7360240083088213e-05, "loss": 1.4336, "step": 649 }, { "epoch": 0.26, "learning_rate": 1.7351463937072008e-05, "loss": 1.3984, "step": 650 }, { "epoch": 0.26, "learning_rate": 1.734267545290625e-05, "loss": 1.416, "step": 651 }, { "epoch": 0.26, "learning_rate": 1.7333874645340886e-05, "loss": 1.3916, "step": 652 }, { "epoch": 0.26, "learning_rate": 1.7325061529146528e-05, "loss": 1.3975, "step": 653 }, { "epoch": 0.26, "learning_rate": 1.7316236119114466e-05, "loss": 1.4092, "step": 654 }, { "epoch": 0.26, "learning_rate": 1.7307398430056595e-05, "loss": 1.3711, "step": 655 }, { "epoch": 0.26, "learning_rate": 1.7298548476805446e-05, "loss": 1.4229, "step": 656 }, { "epoch": 0.26, "learning_rate": 1.7289686274214116e-05, "loss": 1.3867, "step": 657 }, { "epoch": 0.26, "learning_rate": 1.7280811837156268e-05, "loss": 1.4219, "step": 658 }, { "epoch": 0.26, "learning_rate": 1.7271925180526094e-05, "loss": 1.3301, "step": 659 }, { "epoch": 0.26, "learning_rate": 1.72630263192383e-05, "loss": 1.3164, "step": 660 }, { "epoch": 0.26, "learning_rate": 1.7254115268228073e-05, "loss": 1.332, "step": 661 }, { "epoch": 0.26, "learning_rate": 1.724519204245105e-05, "loss": 1.3936, "step": 662 }, { "epoch": 0.27, "learning_rate": 1.723625665688331e-05, "loss": 1.335, "step": 663 }, { "epoch": 0.27, "learning_rate": 1.7227309126521347e-05, "loss": 1.3203, "step": 664 }, { "epoch": 0.27, "learning_rate": 1.7218349466382024e-05, "loss": 1.3398, "step": 665 }, { "epoch": 0.27, "learning_rate": 1.7209377691502565e-05, "loss": 1.3828, "step": 666 }, { "epoch": 0.27, "learning_rate": 1.720039381694053e-05, "loss": 1.4053, "step": 667 }, { "epoch": 0.27, "learning_rate": 1.7191397857773787e-05, "loss": 1.3994, "step": 668 }, { "epoch": 0.27, "learning_rate": 1.7182389829100484e-05, "loss": 1.3643, "step": 669 }, { "epoch": 0.27, "learning_rate": 1.7173369746039026e-05, "loss": 1.3633, "step": 670 }, { "epoch": 0.27, "learning_rate": 1.7164337623728044e-05, "loss": 1.4482, "step": 671 }, { "epoch": 0.27, "learning_rate": 1.7155293477326385e-05, "loss": 1.4521, "step": 672 }, { "epoch": 0.27, "learning_rate": 1.714623732201307e-05, "loss": 1.3643, "step": 673 }, { "epoch": 0.27, "learning_rate": 1.713716917298727e-05, "loss": 1.4111, "step": 674 }, { "epoch": 0.27, "learning_rate": 1.7128089045468294e-05, "loss": 1.4355, "step": 675 }, { "epoch": 0.27, "learning_rate": 1.7118996954695553e-05, "loss": 1.3184, "step": 676 }, { "epoch": 0.27, "learning_rate": 1.7109892915928535e-05, "loss": 1.4141, "step": 677 }, { "epoch": 0.27, "learning_rate": 1.7100776944446783e-05, "loss": 1.3594, "step": 678 }, { "epoch": 0.27, "learning_rate": 1.709164905554986e-05, "loss": 1.4268, "step": 679 }, { "epoch": 0.27, "learning_rate": 1.7082509264557333e-05, "loss": 1.3691, "step": 680 }, { "epoch": 0.27, "learning_rate": 1.7073357586808753e-05, "loss": 1.2998, "step": 681 }, { "epoch": 0.27, "learning_rate": 1.706419403766361e-05, "loss": 1.4463, "step": 682 }, { "epoch": 0.27, "learning_rate": 1.7055018632501326e-05, "loss": 1.3945, "step": 683 }, { "epoch": 0.27, "learning_rate": 1.7045831386721213e-05, "loss": 1.3428, "step": 684 }, { "epoch": 0.27, "learning_rate": 1.7036632315742464e-05, "loss": 1.3936, "step": 685 }, { "epoch": 0.27, "learning_rate": 1.7027421435004114e-05, "loss": 1.3994, "step": 686 }, { "epoch": 0.27, "learning_rate": 1.7018198759965018e-05, "loss": 1.4004, "step": 687 }, { "epoch": 0.28, "learning_rate": 1.7008964306103823e-05, "loss": 1.3838, "step": 688 }, { "epoch": 0.28, "learning_rate": 1.6999718088918956e-05, "loss": 1.3799, "step": 689 }, { "epoch": 0.28, "learning_rate": 1.6990460123928577e-05, "loss": 1.4492, "step": 690 }, { "epoch": 0.28, "learning_rate": 1.698119042667056e-05, "loss": 1.4658, "step": 691 }, { "epoch": 0.28, "learning_rate": 1.6971909012702483e-05, "loss": 1.3584, "step": 692 }, { "epoch": 0.28, "learning_rate": 1.6962615897601573e-05, "loss": 1.4229, "step": 693 }, { "epoch": 0.28, "learning_rate": 1.6953311096964706e-05, "loss": 1.4004, "step": 694 }, { "epoch": 0.28, "learning_rate": 1.6943994626408365e-05, "loss": 1.3008, "step": 695 }, { "epoch": 0.28, "learning_rate": 1.6934666501568618e-05, "loss": 1.3955, "step": 696 }, { "epoch": 0.28, "learning_rate": 1.69253267381011e-05, "loss": 1.3408, "step": 697 }, { "epoch": 0.28, "learning_rate": 1.6915975351680968e-05, "loss": 1.3389, "step": 698 }, { "epoch": 0.28, "learning_rate": 1.69066123580029e-05, "loss": 1.4854, "step": 699 }, { "epoch": 0.28, "learning_rate": 1.6897237772781046e-05, "loss": 1.3857, "step": 700 }, { "epoch": 0.28, "learning_rate": 1.6887851611749005e-05, "loss": 1.3672, "step": 701 }, { "epoch": 0.28, "learning_rate": 1.6878453890659815e-05, "loss": 1.418, "step": 702 }, { "epoch": 0.28, "learning_rate": 1.686904462528591e-05, "loss": 1.4326, "step": 703 }, { "epoch": 0.28, "learning_rate": 1.68596238314191e-05, "loss": 1.3438, "step": 704 }, { "epoch": 0.28, "learning_rate": 1.6850191524870548e-05, "loss": 1.4014, "step": 705 }, { "epoch": 0.28, "learning_rate": 1.6840747721470733e-05, "loss": 1.415, "step": 706 }, { "epoch": 0.28, "learning_rate": 1.6831292437069425e-05, "loss": 1.3604, "step": 707 }, { "epoch": 0.28, "learning_rate": 1.6821825687535675e-05, "loss": 1.4131, "step": 708 }, { "epoch": 0.28, "learning_rate": 1.6812347488757774e-05, "loss": 1.4326, "step": 709 }, { "epoch": 0.28, "learning_rate": 1.6802857856643214e-05, "loss": 1.4238, "step": 710 }, { "epoch": 0.28, "learning_rate": 1.6793356807118695e-05, "loss": 1.4473, "step": 711 }, { "epoch": 0.28, "learning_rate": 1.6783844356130073e-05, "loss": 1.3525, "step": 712 }, { "epoch": 0.29, "learning_rate": 1.677432051964233e-05, "loss": 1.3975, "step": 713 }, { "epoch": 0.29, "learning_rate": 1.6764785313639568e-05, "loss": 1.3643, "step": 714 }, { "epoch": 0.29, "learning_rate": 1.6755238754124965e-05, "loss": 1.4092, "step": 715 }, { "epoch": 0.29, "learning_rate": 1.6745680857120757e-05, "loss": 1.4023, "step": 716 }, { "epoch": 0.29, "learning_rate": 1.6736111638668203e-05, "loss": 1.3955, "step": 717 }, { "epoch": 0.29, "learning_rate": 1.6726531114827572e-05, "loss": 1.3652, "step": 718 }, { "epoch": 0.29, "learning_rate": 1.6716939301678098e-05, "loss": 1.3252, "step": 719 }, { "epoch": 0.29, "learning_rate": 1.6707336215317968e-05, "loss": 1.3496, "step": 720 }, { "epoch": 0.29, "learning_rate": 1.6697721871864286e-05, "loss": 1.3984, "step": 721 }, { "epoch": 0.29, "learning_rate": 1.6688096287453048e-05, "loss": 1.4434, "step": 722 }, { "epoch": 0.29, "learning_rate": 1.6678459478239116e-05, "loss": 1.3975, "step": 723 }, { "epoch": 0.29, "learning_rate": 1.6668811460396202e-05, "loss": 1.4072, "step": 724 }, { "epoch": 0.29, "learning_rate": 1.665915225011681e-05, "loss": 1.4199, "step": 725 }, { "epoch": 0.29, "learning_rate": 1.664948186361225e-05, "loss": 1.335, "step": 726 }, { "epoch": 0.29, "learning_rate": 1.663980031711257e-05, "loss": 1.3535, "step": 727 }, { "epoch": 0.29, "learning_rate": 1.6630107626866558e-05, "loss": 1.3779, "step": 728 }, { "epoch": 0.29, "learning_rate": 1.6620403809141707e-05, "loss": 1.3379, "step": 729 }, { "epoch": 0.29, "learning_rate": 1.6610688880224178e-05, "loss": 1.3604, "step": 730 }, { "epoch": 0.29, "learning_rate": 1.6600962856418782e-05, "loss": 1.3623, "step": 731 }, { "epoch": 0.29, "learning_rate": 1.6591225754048963e-05, "loss": 1.3711, "step": 732 }, { "epoch": 0.29, "learning_rate": 1.6581477589456737e-05, "loss": 1.3359, "step": 733 }, { "epoch": 0.29, "learning_rate": 1.6571718379002705e-05, "loss": 1.375, "step": 734 }, { "epoch": 0.29, "learning_rate": 1.6561948139065997e-05, "loss": 1.416, "step": 735 }, { "epoch": 0.29, "learning_rate": 1.6552166886044253e-05, "loss": 1.4482, "step": 736 }, { "epoch": 0.29, "learning_rate": 1.6542374636353605e-05, "loss": 1.4004, "step": 737 }, { "epoch": 0.3, "learning_rate": 1.653257140642863e-05, "loss": 1.4043, "step": 738 }, { "epoch": 0.3, "learning_rate": 1.6522757212722346e-05, "loss": 1.4199, "step": 739 }, { "epoch": 0.3, "learning_rate": 1.6512932071706153e-05, "loss": 1.3203, "step": 740 }, { "epoch": 0.3, "learning_rate": 1.650309599986985e-05, "loss": 1.3848, "step": 741 }, { "epoch": 0.3, "learning_rate": 1.6493249013721558e-05, "loss": 1.3506, "step": 742 }, { "epoch": 0.3, "learning_rate": 1.6483391129787725e-05, "loss": 1.3164, "step": 743 }, { "epoch": 0.3, "learning_rate": 1.64735223646131e-05, "loss": 1.3867, "step": 744 }, { "epoch": 0.3, "learning_rate": 1.646364273476067e-05, "loss": 1.3359, "step": 745 }, { "epoch": 0.3, "learning_rate": 1.6453752256811676e-05, "loss": 1.3594, "step": 746 }, { "epoch": 0.3, "learning_rate": 1.644385094736556e-05, "loss": 1.4307, "step": 747 }, { "epoch": 0.3, "learning_rate": 1.6433938823039942e-05, "loss": 1.3418, "step": 748 }, { "epoch": 0.3, "learning_rate": 1.642401590047059e-05, "loss": 1.3389, "step": 749 }, { "epoch": 0.3, "learning_rate": 1.6414082196311402e-05, "loss": 1.3584, "step": 750 }, { "epoch": 0.3, "learning_rate": 1.6404137727234366e-05, "loss": 1.417, "step": 751 }, { "epoch": 0.3, "learning_rate": 1.639418250992954e-05, "loss": 1.4189, "step": 752 }, { "epoch": 0.3, "learning_rate": 1.6384216561105014e-05, "loss": 1.3555, "step": 753 }, { "epoch": 0.3, "learning_rate": 1.63742398974869e-05, "loss": 1.3965, "step": 754 }, { "epoch": 0.3, "learning_rate": 1.6364252535819284e-05, "loss": 1.376, "step": 755 }, { "epoch": 0.3, "learning_rate": 1.635425449286421e-05, "loss": 1.3633, "step": 756 }, { "epoch": 0.3, "learning_rate": 1.6344245785401653e-05, "loss": 1.3682, "step": 757 }, { "epoch": 0.3, "learning_rate": 1.6334226430229475e-05, "loss": 1.3672, "step": 758 }, { "epoch": 0.3, "learning_rate": 1.632419644416342e-05, "loss": 1.415, "step": 759 }, { "epoch": 0.3, "learning_rate": 1.6314155844037074e-05, "loss": 1.2754, "step": 760 }, { "epoch": 0.3, "learning_rate": 1.6304104646701818e-05, "loss": 1.4844, "step": 761 }, { "epoch": 0.3, "learning_rate": 1.629404286902685e-05, "loss": 1.3936, "step": 762 }, { "epoch": 0.31, "learning_rate": 1.62839705278991e-05, "loss": 1.3496, "step": 763 }, { "epoch": 0.31, "learning_rate": 1.627388764022323e-05, "loss": 1.3984, "step": 764 }, { "epoch": 0.31, "learning_rate": 1.626379422292162e-05, "loss": 1.3398, "step": 765 }, { "epoch": 0.31, "learning_rate": 1.6253690292934303e-05, "loss": 1.3906, "step": 766 }, { "epoch": 0.31, "learning_rate": 1.624357586721896e-05, "loss": 1.3721, "step": 767 }, { "epoch": 0.31, "learning_rate": 1.6233450962750895e-05, "loss": 1.4004, "step": 768 }, { "epoch": 0.31, "learning_rate": 1.622331559652299e-05, "loss": 1.3086, "step": 769 }, { "epoch": 0.31, "learning_rate": 1.6213169785545688e-05, "loss": 1.4141, "step": 770 }, { "epoch": 0.31, "learning_rate": 1.6203013546846967e-05, "loss": 1.4297, "step": 771 }, { "epoch": 0.31, "learning_rate": 1.61928468974723e-05, "loss": 1.3613, "step": 772 }, { "epoch": 0.31, "learning_rate": 1.618266985448463e-05, "loss": 1.3379, "step": 773 }, { "epoch": 0.31, "learning_rate": 1.6172482434964353e-05, "loss": 1.417, "step": 774 }, { "epoch": 0.31, "learning_rate": 1.6162284656009276e-05, "loss": 1.3936, "step": 775 }, { "epoch": 0.31, "learning_rate": 1.6152076534734585e-05, "loss": 1.3555, "step": 776 }, { "epoch": 0.31, "learning_rate": 1.6141858088272838e-05, "loss": 1.3506, "step": 777 }, { "epoch": 0.31, "learning_rate": 1.6131629333773908e-05, "loss": 1.4326, "step": 778 }, { "epoch": 0.31, "learning_rate": 1.612139028840498e-05, "loss": 1.3398, "step": 779 }, { "epoch": 0.31, "learning_rate": 1.6111140969350504e-05, "loss": 1.416, "step": 780 }, { "epoch": 0.31, "learning_rate": 1.610088139381217e-05, "loss": 1.4248, "step": 781 }, { "epoch": 0.31, "learning_rate": 1.609061157900889e-05, "loss": 1.3184, "step": 782 }, { "epoch": 0.31, "learning_rate": 1.6080331542176754e-05, "loss": 1.2988, "step": 783 }, { "epoch": 0.31, "learning_rate": 1.6070041300569014e-05, "loss": 1.375, "step": 784 }, { "epoch": 0.31, "learning_rate": 1.6059740871456035e-05, "loss": 1.4229, "step": 785 }, { "epoch": 0.31, "learning_rate": 1.60494302721253e-05, "loss": 1.3926, "step": 786 }, { "epoch": 0.31, "learning_rate": 1.603910951988135e-05, "loss": 1.4062, "step": 787 }, { "epoch": 0.32, "learning_rate": 1.602877863204576e-05, "loss": 1.3496, "step": 788 }, { "epoch": 0.32, "learning_rate": 1.6018437625957135e-05, "loss": 1.457, "step": 789 }, { "epoch": 0.32, "learning_rate": 1.6008086518971037e-05, "loss": 1.3525, "step": 790 }, { "epoch": 0.32, "learning_rate": 1.599772532846e-05, "loss": 1.3564, "step": 791 }, { "epoch": 0.32, "learning_rate": 1.598735407181347e-05, "loss": 1.3965, "step": 792 }, { "epoch": 0.32, "learning_rate": 1.5976972766437796e-05, "loss": 1.3203, "step": 793 }, { "epoch": 0.32, "learning_rate": 1.596658142975618e-05, "loss": 1.4414, "step": 794 }, { "epoch": 0.32, "learning_rate": 1.5956180079208684e-05, "loss": 1.3389, "step": 795 }, { "epoch": 0.32, "learning_rate": 1.5945768732252144e-05, "loss": 1.3447, "step": 796 }, { "epoch": 0.32, "learning_rate": 1.5935347406360192e-05, "loss": 1.3672, "step": 797 }, { "epoch": 0.32, "learning_rate": 1.5924916119023214e-05, "loss": 1.373, "step": 798 }, { "epoch": 0.32, "learning_rate": 1.5914474887748297e-05, "loss": 1.3359, "step": 799 }, { "epoch": 0.32, "learning_rate": 1.5904023730059227e-05, "loss": 1.373, "step": 800 }, { "epoch": 0.32, "learning_rate": 1.589356266349645e-05, "loss": 1.3887, "step": 801 }, { "epoch": 0.32, "learning_rate": 1.5883091705617045e-05, "loss": 1.3896, "step": 802 }, { "epoch": 0.32, "learning_rate": 1.5872610873994685e-05, "loss": 1.4629, "step": 803 }, { "epoch": 0.32, "learning_rate": 1.5862120186219614e-05, "loss": 1.4111, "step": 804 }, { "epoch": 0.32, "learning_rate": 1.5851619659898623e-05, "loss": 1.3398, "step": 805 }, { "epoch": 0.32, "learning_rate": 1.5841109312655017e-05, "loss": 1.3848, "step": 806 }, { "epoch": 0.32, "learning_rate": 1.5830589162128574e-05, "loss": 1.3564, "step": 807 }, { "epoch": 0.32, "learning_rate": 1.582005922597553e-05, "loss": 1.3887, "step": 808 }, { "epoch": 0.32, "learning_rate": 1.580951952186856e-05, "loss": 1.4561, "step": 809 }, { "epoch": 0.32, "learning_rate": 1.57989700674967e-05, "loss": 1.3682, "step": 810 }, { "epoch": 0.32, "learning_rate": 1.578841088056538e-05, "loss": 1.4717, "step": 811 }, { "epoch": 0.32, "learning_rate": 1.5777841978796348e-05, "loss": 1.3701, "step": 812 }, { "epoch": 0.33, "learning_rate": 1.5767263379927663e-05, "loss": 1.4473, "step": 813 }, { "epoch": 0.33, "learning_rate": 1.5756675101713657e-05, "loss": 1.3662, "step": 814 }, { "epoch": 0.33, "learning_rate": 1.5746077161924905e-05, "loss": 1.3623, "step": 815 }, { "epoch": 0.33, "learning_rate": 1.573546957834821e-05, "loss": 1.3564, "step": 816 }, { "epoch": 0.33, "learning_rate": 1.572485236878654e-05, "loss": 1.3779, "step": 817 }, { "epoch": 0.33, "learning_rate": 1.5714225551059027e-05, "loss": 1.3438, "step": 818 }, { "epoch": 0.33, "learning_rate": 1.570358914300094e-05, "loss": 1.3408, "step": 819 }, { "epoch": 0.33, "learning_rate": 1.5692943162463628e-05, "loss": 1.3555, "step": 820 }, { "epoch": 0.33, "learning_rate": 1.5682287627314513e-05, "loss": 1.3242, "step": 821 }, { "epoch": 0.33, "learning_rate": 1.5671622555437055e-05, "loss": 1.4229, "step": 822 }, { "epoch": 0.33, "learning_rate": 1.566094796473071e-05, "loss": 1.418, "step": 823 }, { "epoch": 0.33, "learning_rate": 1.565026387311092e-05, "loss": 1.4023, "step": 824 }, { "epoch": 0.33, "learning_rate": 1.5639570298509067e-05, "loss": 1.3809, "step": 825 }, { "epoch": 0.33, "learning_rate": 1.562886725887245e-05, "loss": 1.3838, "step": 826 }, { "epoch": 0.33, "learning_rate": 1.5618154772164257e-05, "loss": 1.4004, "step": 827 }, { "epoch": 0.33, "learning_rate": 1.5607432856363523e-05, "loss": 1.3906, "step": 828 }, { "epoch": 0.33, "learning_rate": 1.559670152946512e-05, "loss": 1.3379, "step": 829 }, { "epoch": 0.33, "learning_rate": 1.5585960809479698e-05, "loss": 1.377, "step": 830 }, { "epoch": 0.33, "learning_rate": 1.5575210714433687e-05, "loss": 1.3906, "step": 831 }, { "epoch": 0.33, "learning_rate": 1.5564451262369247e-05, "loss": 1.3809, "step": 832 }, { "epoch": 0.33, "learning_rate": 1.5553682471344237e-05, "loss": 1.459, "step": 833 }, { "epoch": 0.33, "learning_rate": 1.5542904359432198e-05, "loss": 1.3633, "step": 834 }, { "epoch": 0.33, "learning_rate": 1.5532116944722308e-05, "loss": 1.3301, "step": 835 }, { "epoch": 0.33, "learning_rate": 1.5521320245319364e-05, "loss": 1.377, "step": 836 }, { "epoch": 0.33, "learning_rate": 1.5510514279343736e-05, "loss": 1.3564, "step": 837 }, { "epoch": 0.34, "learning_rate": 1.5499699064931354e-05, "loss": 1.3691, "step": 838 }, { "epoch": 0.34, "learning_rate": 1.5488874620233674e-05, "loss": 1.3701, "step": 839 }, { "epoch": 0.34, "learning_rate": 1.547804096341763e-05, "loss": 1.4004, "step": 840 }, { "epoch": 0.34, "learning_rate": 1.5467198112665632e-05, "loss": 1.3867, "step": 841 }, { "epoch": 0.34, "learning_rate": 1.5456346086175508e-05, "loss": 1.3271, "step": 842 }, { "epoch": 0.34, "learning_rate": 1.5445484902160494e-05, "loss": 1.3496, "step": 843 }, { "epoch": 0.34, "learning_rate": 1.543461457884919e-05, "loss": 1.374, "step": 844 }, { "epoch": 0.34, "learning_rate": 1.5423735134485537e-05, "loss": 1.3848, "step": 845 }, { "epoch": 0.34, "learning_rate": 1.541284658732878e-05, "loss": 1.3613, "step": 846 }, { "epoch": 0.34, "learning_rate": 1.540194895565346e-05, "loss": 1.3672, "step": 847 }, { "epoch": 0.34, "learning_rate": 1.5391042257749338e-05, "loss": 1.4375, "step": 848 }, { "epoch": 0.34, "learning_rate": 1.5380126511921404e-05, "loss": 1.3594, "step": 849 }, { "epoch": 0.34, "learning_rate": 1.536920173648984e-05, "loss": 1.4277, "step": 850 }, { "epoch": 0.34, "learning_rate": 1.5358267949789968e-05, "loss": 1.3545, "step": 851 }, { "epoch": 0.34, "learning_rate": 1.5347325170172246e-05, "loss": 1.3857, "step": 852 }, { "epoch": 0.34, "learning_rate": 1.533637341600221e-05, "loss": 1.4404, "step": 853 }, { "epoch": 0.34, "learning_rate": 1.532541270566049e-05, "loss": 1.2666, "step": 854 }, { "epoch": 0.34, "learning_rate": 1.5314443057542703e-05, "loss": 1.4336, "step": 855 }, { "epoch": 0.34, "learning_rate": 1.5303464490059506e-05, "loss": 1.3672, "step": 856 }, { "epoch": 0.34, "learning_rate": 1.5292477021636498e-05, "loss": 1.4229, "step": 857 }, { "epoch": 0.34, "learning_rate": 1.528148067071423e-05, "loss": 1.4287, "step": 858 }, { "epoch": 0.34, "learning_rate": 1.5270475455748165e-05, "loss": 1.4375, "step": 859 }, { "epoch": 0.34, "learning_rate": 1.5259461395208628e-05, "loss": 1.4482, "step": 860 }, { "epoch": 0.34, "learning_rate": 1.5248438507580806e-05, "loss": 1.4248, "step": 861 }, { "epoch": 0.34, "learning_rate": 1.5237406811364682e-05, "loss": 1.3613, "step": 862 }, { "epoch": 0.35, "learning_rate": 1.5226366325075042e-05, "loss": 1.3525, "step": 863 }, { "epoch": 0.35, "learning_rate": 1.5215317067241415e-05, "loss": 1.3643, "step": 864 }, { "epoch": 0.35, "learning_rate": 1.5204259056408046e-05, "loss": 1.4473, "step": 865 }, { "epoch": 0.35, "learning_rate": 1.5193192311133884e-05, "loss": 1.4385, "step": 866 }, { "epoch": 0.35, "learning_rate": 1.5182116849992528e-05, "loss": 1.3486, "step": 867 }, { "epoch": 0.35, "learning_rate": 1.5171032691572207e-05, "loss": 1.3965, "step": 868 }, { "epoch": 0.35, "learning_rate": 1.5159939854475743e-05, "loss": 1.4121, "step": 869 }, { "epoch": 0.35, "learning_rate": 1.5148838357320537e-05, "loss": 1.3379, "step": 870 }, { "epoch": 0.35, "learning_rate": 1.5137728218738504e-05, "loss": 1.4297, "step": 871 }, { "epoch": 0.35, "learning_rate": 1.512660945737608e-05, "loss": 1.3906, "step": 872 }, { "epoch": 0.35, "learning_rate": 1.5115482091894164e-05, "loss": 1.3662, "step": 873 }, { "epoch": 0.35, "learning_rate": 1.5104346140968096e-05, "loss": 1.4346, "step": 874 }, { "epoch": 0.35, "learning_rate": 1.5093201623287631e-05, "loss": 1.4062, "step": 875 }, { "epoch": 0.35, "learning_rate": 1.5082048557556892e-05, "loss": 1.458, "step": 876 }, { "epoch": 0.35, "learning_rate": 1.507088696249436e-05, "loss": 1.4121, "step": 877 }, { "epoch": 0.35, "learning_rate": 1.505971685683282e-05, "loss": 1.3506, "step": 878 }, { "epoch": 0.35, "learning_rate": 1.5048538259319347e-05, "loss": 1.4092, "step": 879 }, { "epoch": 0.35, "learning_rate": 1.5037351188715265e-05, "loss": 1.4463, "step": 880 }, { "epoch": 0.35, "learning_rate": 1.5026155663796123e-05, "loss": 1.3486, "step": 881 }, { "epoch": 0.35, "learning_rate": 1.5014951703351655e-05, "loss": 1.3799, "step": 882 }, { "epoch": 0.35, "learning_rate": 1.500373932618575e-05, "loss": 1.4248, "step": 883 }, { "epoch": 0.35, "learning_rate": 1.4992518551116436e-05, "loss": 1.4268, "step": 884 }, { "epoch": 0.35, "learning_rate": 1.4981289396975818e-05, "loss": 1.4258, "step": 885 }, { "epoch": 0.35, "learning_rate": 1.4970051882610073e-05, "loss": 1.4092, "step": 886 }, { "epoch": 0.35, "learning_rate": 1.4958806026879411e-05, "loss": 1.4189, "step": 887 }, { "epoch": 0.36, "learning_rate": 1.4947551848658036e-05, "loss": 1.3643, "step": 888 }, { "epoch": 0.36, "learning_rate": 1.4936289366834123e-05, "loss": 1.4131, "step": 889 }, { "epoch": 0.36, "learning_rate": 1.4925018600309784e-05, "loss": 1.3701, "step": 890 }, { "epoch": 0.36, "learning_rate": 1.4913739568001034e-05, "loss": 1.4375, "step": 891 }, { "epoch": 0.36, "learning_rate": 1.4902452288837761e-05, "loss": 1.3867, "step": 892 }, { "epoch": 0.36, "learning_rate": 1.4891156781763692e-05, "loss": 1.3867, "step": 893 }, { "epoch": 0.36, "learning_rate": 1.4879853065736366e-05, "loss": 1.3828, "step": 894 }, { "epoch": 0.36, "learning_rate": 1.4868541159727097e-05, "loss": 1.4033, "step": 895 }, { "epoch": 0.36, "learning_rate": 1.485722108272095e-05, "loss": 1.3213, "step": 896 }, { "epoch": 0.36, "learning_rate": 1.4845892853716692e-05, "loss": 1.376, "step": 897 }, { "epoch": 0.36, "learning_rate": 1.4834556491726781e-05, "loss": 1.3682, "step": 898 }, { "epoch": 0.36, "learning_rate": 1.482321201577733e-05, "loss": 1.3438, "step": 899 }, { "epoch": 0.36, "learning_rate": 1.4811859444908053e-05, "loss": 1.3652, "step": 900 }, { "epoch": 0.36, "learning_rate": 1.4800498798172263e-05, "loss": 1.4521, "step": 901 }, { "epoch": 0.36, "learning_rate": 1.478913009463682e-05, "loss": 1.4121, "step": 902 }, { "epoch": 0.36, "learning_rate": 1.4777753353382121e-05, "loss": 1.335, "step": 903 }, { "epoch": 0.36, "learning_rate": 1.4766368593502028e-05, "loss": 1.3359, "step": 904 }, { "epoch": 0.36, "learning_rate": 1.4754975834103877e-05, "loss": 1.3008, "step": 905 }, { "epoch": 0.36, "learning_rate": 1.474357509430843e-05, "loss": 1.3545, "step": 906 }, { "epoch": 0.36, "learning_rate": 1.473216639324984e-05, "loss": 1.4346, "step": 907 }, { "epoch": 0.36, "learning_rate": 1.472074975007562e-05, "loss": 1.3594, "step": 908 }, { "epoch": 0.36, "learning_rate": 1.4709325183946613e-05, "loss": 1.4209, "step": 909 }, { "epoch": 0.36, "learning_rate": 1.4697892714036959e-05, "loss": 1.3711, "step": 910 }, { "epoch": 0.36, "learning_rate": 1.4686452359534067e-05, "loss": 1.4062, "step": 911 }, { "epoch": 0.36, "learning_rate": 1.467500413963857e-05, "loss": 1.3896, "step": 912 }, { "epoch": 0.37, "learning_rate": 1.4663548073564316e-05, "loss": 1.3379, "step": 913 }, { "epoch": 0.37, "learning_rate": 1.4652084180538304e-05, "loss": 1.374, "step": 914 }, { "epoch": 0.37, "learning_rate": 1.4640612479800686e-05, "loss": 1.4141, "step": 915 }, { "epoch": 0.37, "learning_rate": 1.4629132990604706e-05, "loss": 1.3701, "step": 916 }, { "epoch": 0.37, "learning_rate": 1.4617645732216686e-05, "loss": 1.3232, "step": 917 }, { "epoch": 0.37, "learning_rate": 1.4606150723915984e-05, "loss": 1.3887, "step": 918 }, { "epoch": 0.37, "learning_rate": 1.4594647984994966e-05, "loss": 1.3174, "step": 919 }, { "epoch": 0.37, "learning_rate": 1.4583137534758968e-05, "loss": 1.3926, "step": 920 }, { "epoch": 0.37, "learning_rate": 1.4571619392526279e-05, "loss": 1.3232, "step": 921 }, { "epoch": 0.37, "learning_rate": 1.456009357762809e-05, "loss": 1.3359, "step": 922 }, { "epoch": 0.37, "learning_rate": 1.4548560109408465e-05, "loss": 1.333, "step": 923 }, { "epoch": 0.37, "learning_rate": 1.4537019007224324e-05, "loss": 1.3936, "step": 924 }, { "epoch": 0.37, "learning_rate": 1.4525470290445392e-05, "loss": 1.3447, "step": 925 }, { "epoch": 0.37, "learning_rate": 1.4513913978454169e-05, "loss": 1.3086, "step": 926 }, { "epoch": 0.37, "learning_rate": 1.4502350090645919e-05, "loss": 1.3994, "step": 927 }, { "epoch": 0.37, "learning_rate": 1.4490778646428601e-05, "loss": 1.4473, "step": 928 }, { "epoch": 0.37, "learning_rate": 1.4479199665222869e-05, "loss": 1.3857, "step": 929 }, { "epoch": 0.37, "learning_rate": 1.4467613166462024e-05, "loss": 1.3125, "step": 930 }, { "epoch": 0.37, "learning_rate": 1.445601916959198e-05, "loss": 1.3926, "step": 931 }, { "epoch": 0.37, "learning_rate": 1.4444417694071242e-05, "loss": 1.3789, "step": 932 }, { "epoch": 0.37, "learning_rate": 1.4432808759370853e-05, "loss": 1.335, "step": 933 }, { "epoch": 0.37, "learning_rate": 1.4421192384974396e-05, "loss": 1.3828, "step": 934 }, { "epoch": 0.37, "learning_rate": 1.4409568590377918e-05, "loss": 1.4111, "step": 935 }, { "epoch": 0.37, "learning_rate": 1.439793739508994e-05, "loss": 1.332, "step": 936 }, { "epoch": 0.37, "learning_rate": 1.4386298818631388e-05, "loss": 1.3555, "step": 937 }, { "epoch": 0.38, "learning_rate": 1.437465288053558e-05, "loss": 1.418, "step": 938 }, { "epoch": 0.38, "learning_rate": 1.4362999600348198e-05, "loss": 1.3955, "step": 939 }, { "epoch": 0.38, "learning_rate": 1.4351338997627233e-05, "loss": 1.2959, "step": 940 }, { "epoch": 0.38, "learning_rate": 1.433967109194298e-05, "loss": 1.4229, "step": 941 }, { "epoch": 0.38, "learning_rate": 1.4327995902877972e-05, "loss": 1.3936, "step": 942 }, { "epoch": 0.38, "learning_rate": 1.4316313450026986e-05, "loss": 1.3096, "step": 943 }, { "epoch": 0.38, "learning_rate": 1.4304623752996974e-05, "loss": 1.4189, "step": 944 }, { "epoch": 0.38, "learning_rate": 1.429292683140706e-05, "loss": 1.4082, "step": 945 }, { "epoch": 0.38, "learning_rate": 1.428122270488848e-05, "loss": 1.3691, "step": 946 }, { "epoch": 0.38, "learning_rate": 1.4269511393084572e-05, "loss": 1.375, "step": 947 }, { "epoch": 0.38, "learning_rate": 1.4257792915650728e-05, "loss": 1.3555, "step": 948 }, { "epoch": 0.38, "learning_rate": 1.4246067292254367e-05, "loss": 1.4004, "step": 949 }, { "epoch": 0.38, "learning_rate": 1.4234334542574906e-05, "loss": 1.4717, "step": 950 }, { "epoch": 0.38, "learning_rate": 1.4222594686303707e-05, "loss": 1.374, "step": 951 }, { "epoch": 0.38, "learning_rate": 1.4210847743144087e-05, "loss": 1.3936, "step": 952 }, { "epoch": 0.38, "learning_rate": 1.4199093732811227e-05, "loss": 1.3691, "step": 953 }, { "epoch": 0.38, "learning_rate": 1.4187332675032189e-05, "loss": 1.3955, "step": 954 }, { "epoch": 0.38, "learning_rate": 1.4175564589545853e-05, "loss": 1.3779, "step": 955 }, { "epoch": 0.38, "learning_rate": 1.4163789496102902e-05, "loss": 1.3223, "step": 956 }, { "epoch": 0.38, "learning_rate": 1.4152007414465771e-05, "loss": 1.4609, "step": 957 }, { "epoch": 0.38, "learning_rate": 1.4140218364408634e-05, "loss": 1.4248, "step": 958 }, { "epoch": 0.38, "learning_rate": 1.4128422365717346e-05, "loss": 1.3193, "step": 959 }, { "epoch": 0.38, "learning_rate": 1.411661943818944e-05, "loss": 1.3555, "step": 960 }, { "epoch": 0.38, "learning_rate": 1.4104809601634069e-05, "loss": 1.418, "step": 961 }, { "epoch": 0.38, "learning_rate": 1.409299287587198e-05, "loss": 1.3545, "step": 962 }, { "epoch": 0.39, "learning_rate": 1.4081169280735488e-05, "loss": 1.3955, "step": 963 }, { "epoch": 0.39, "learning_rate": 1.4069338836068434e-05, "loss": 1.3408, "step": 964 }, { "epoch": 0.39, "learning_rate": 1.4057501561726157e-05, "loss": 1.3662, "step": 965 }, { "epoch": 0.39, "learning_rate": 1.404565747757545e-05, "loss": 1.417, "step": 966 }, { "epoch": 0.39, "learning_rate": 1.403380660349455e-05, "loss": 1.3838, "step": 967 }, { "epoch": 0.39, "learning_rate": 1.4021948959373075e-05, "loss": 1.29, "step": 968 }, { "epoch": 0.39, "learning_rate": 1.4010084565112018e-05, "loss": 1.3955, "step": 969 }, { "epoch": 0.39, "learning_rate": 1.3998213440623691e-05, "loss": 1.3828, "step": 970 }, { "epoch": 0.39, "learning_rate": 1.3986335605831707e-05, "loss": 1.3701, "step": 971 }, { "epoch": 0.39, "learning_rate": 1.3974451080670934e-05, "loss": 1.3994, "step": 972 }, { "epoch": 0.39, "learning_rate": 1.3962559885087482e-05, "loss": 1.4629, "step": 973 }, { "epoch": 0.39, "learning_rate": 1.3950662039038643e-05, "loss": 1.3789, "step": 974 }, { "epoch": 0.39, "learning_rate": 1.3938757562492873e-05, "loss": 1.2637, "step": 975 }, { "epoch": 0.39, "learning_rate": 1.3926846475429767e-05, "loss": 1.3379, "step": 976 }, { "epoch": 0.39, "learning_rate": 1.3914928797839996e-05, "loss": 1.3848, "step": 977 }, { "epoch": 0.39, "learning_rate": 1.3903004549725313e-05, "loss": 1.3809, "step": 978 }, { "epoch": 0.39, "learning_rate": 1.3891073751098481e-05, "loss": 1.4199, "step": 979 }, { "epoch": 0.39, "learning_rate": 1.3879136421983265e-05, "loss": 1.3115, "step": 980 }, { "epoch": 0.39, "learning_rate": 1.3867192582414393e-05, "loss": 1.3369, "step": 981 }, { "epoch": 0.39, "learning_rate": 1.3855242252437511e-05, "loss": 1.416, "step": 982 }, { "epoch": 0.39, "learning_rate": 1.3843285452109166e-05, "loss": 1.3584, "step": 983 }, { "epoch": 0.39, "learning_rate": 1.3831322201496757e-05, "loss": 1.3584, "step": 984 }, { "epoch": 0.39, "learning_rate": 1.3819352520678519e-05, "loss": 1.3662, "step": 985 }, { "epoch": 0.39, "learning_rate": 1.3807376429743467e-05, "loss": 1.3604, "step": 986 }, { "epoch": 0.39, "learning_rate": 1.3795393948791382e-05, "loss": 1.3623, "step": 987 }, { "epoch": 0.4, "learning_rate": 1.3783405097932772e-05, "loss": 1.3809, "step": 988 }, { "epoch": 0.4, "learning_rate": 1.3771409897288823e-05, "loss": 1.3857, "step": 989 }, { "epoch": 0.4, "learning_rate": 1.3759408366991391e-05, "loss": 1.3867, "step": 990 }, { "epoch": 0.4, "learning_rate": 1.3747400527182952e-05, "loss": 1.3916, "step": 991 }, { "epoch": 0.4, "learning_rate": 1.373538639801657e-05, "loss": 1.3545, "step": 992 }, { "epoch": 0.4, "learning_rate": 1.3723365999655859e-05, "loss": 1.375, "step": 993 }, { "epoch": 0.4, "learning_rate": 1.3711339352274969e-05, "loss": 1.416, "step": 994 }, { "epoch": 0.4, "learning_rate": 1.3699306476058523e-05, "loss": 1.377, "step": 995 }, { "epoch": 0.4, "learning_rate": 1.3687267391201604e-05, "loss": 1.4004, "step": 996 }, { "epoch": 0.4, "learning_rate": 1.3675222117909716e-05, "loss": 1.4131, "step": 997 }, { "epoch": 0.4, "learning_rate": 1.366317067639875e-05, "loss": 1.4043, "step": 998 }, { "epoch": 0.4, "learning_rate": 1.3651113086894951e-05, "loss": 1.3496, "step": 999 }, { "epoch": 0.4, "learning_rate": 1.3639049369634878e-05, "loss": 1.3203, "step": 1000 }, { "epoch": 0.4, "learning_rate": 1.3626979544865369e-05, "loss": 1.4268, "step": 1001 }, { "epoch": 0.4, "learning_rate": 1.3614903632843523e-05, "loss": 1.3789, "step": 1002 }, { "epoch": 0.4, "learning_rate": 1.3602821653836654e-05, "loss": 1.3154, "step": 1003 }, { "epoch": 0.4, "learning_rate": 1.3590733628122253e-05, "loss": 1.3311, "step": 1004 }, { "epoch": 0.4, "learning_rate": 1.357863957598796e-05, "loss": 1.3945, "step": 1005 }, { "epoch": 0.4, "learning_rate": 1.3566539517731536e-05, "loss": 1.376, "step": 1006 }, { "epoch": 0.4, "learning_rate": 1.3554433473660818e-05, "loss": 1.4082, "step": 1007 }, { "epoch": 0.4, "learning_rate": 1.354232146409368e-05, "loss": 1.4199, "step": 1008 }, { "epoch": 0.4, "learning_rate": 1.353020350935803e-05, "loss": 1.3877, "step": 1009 }, { "epoch": 0.4, "learning_rate": 1.3518079629791725e-05, "loss": 1.332, "step": 1010 }, { "epoch": 0.4, "learning_rate": 1.3505949845742599e-05, "loss": 1.4316, "step": 1011 }, { "epoch": 0.4, "learning_rate": 1.3493814177568365e-05, "loss": 1.3506, "step": 1012 }, { "epoch": 0.41, "learning_rate": 1.3481672645636627e-05, "loss": 1.3906, "step": 1013 }, { "epoch": 0.41, "learning_rate": 1.3469525270324835e-05, "loss": 1.4141, "step": 1014 }, { "epoch": 0.41, "learning_rate": 1.345737207202023e-05, "loss": 1.374, "step": 1015 }, { "epoch": 0.41, "learning_rate": 1.3445213071119841e-05, "loss": 1.3457, "step": 1016 }, { "epoch": 0.41, "learning_rate": 1.3433048288030424e-05, "loss": 1.3359, "step": 1017 }, { "epoch": 0.41, "learning_rate": 1.342087774316845e-05, "loss": 1.3535, "step": 1018 }, { "epoch": 0.41, "learning_rate": 1.3408701456960052e-05, "loss": 1.4385, "step": 1019 }, { "epoch": 0.41, "learning_rate": 1.3396519449841006e-05, "loss": 1.3779, "step": 1020 }, { "epoch": 0.41, "learning_rate": 1.338433174225668e-05, "loss": 1.3818, "step": 1021 }, { "epoch": 0.41, "learning_rate": 1.3372138354662018e-05, "loss": 1.3525, "step": 1022 }, { "epoch": 0.41, "learning_rate": 1.3359939307521494e-05, "loss": 1.3828, "step": 1023 }, { "epoch": 0.41, "learning_rate": 1.3347734621309076e-05, "loss": 1.3301, "step": 1024 }, { "epoch": 0.41, "learning_rate": 1.3335524316508208e-05, "loss": 1.3477, "step": 1025 }, { "epoch": 0.41, "learning_rate": 1.3323308413611748e-05, "loss": 1.3838, "step": 1026 }, { "epoch": 0.41, "learning_rate": 1.3311086933121961e-05, "loss": 1.3975, "step": 1027 }, { "epoch": 0.41, "learning_rate": 1.3298859895550473e-05, "loss": 1.4033, "step": 1028 }, { "epoch": 0.41, "learning_rate": 1.3286627321418229e-05, "loss": 1.3242, "step": 1029 }, { "epoch": 0.41, "learning_rate": 1.3274389231255466e-05, "loss": 1.2695, "step": 1030 }, { "epoch": 0.41, "learning_rate": 1.3262145645601693e-05, "loss": 1.3838, "step": 1031 }, { "epoch": 0.41, "learning_rate": 1.3249896585005628e-05, "loss": 1.3271, "step": 1032 }, { "epoch": 0.41, "learning_rate": 1.3237642070025183e-05, "loss": 1.3643, "step": 1033 }, { "epoch": 0.41, "learning_rate": 1.322538212122742e-05, "loss": 1.4111, "step": 1034 }, { "epoch": 0.41, "learning_rate": 1.3213116759188525e-05, "loss": 1.4199, "step": 1035 }, { "epoch": 0.41, "learning_rate": 1.320084600449377e-05, "loss": 1.3799, "step": 1036 }, { "epoch": 0.41, "learning_rate": 1.3188569877737474e-05, "loss": 1.3701, "step": 1037 }, { "epoch": 0.42, "learning_rate": 1.3176288399522975e-05, "loss": 1.3477, "step": 1038 }, { "epoch": 0.42, "learning_rate": 1.3164001590462592e-05, "loss": 1.3232, "step": 1039 }, { "epoch": 0.42, "learning_rate": 1.3151709471177589e-05, "loss": 1.3486, "step": 1040 }, { "epoch": 0.42, "learning_rate": 1.3139412062298141e-05, "loss": 1.3408, "step": 1041 }, { "epoch": 0.42, "learning_rate": 1.312710938446331e-05, "loss": 1.3721, "step": 1042 }, { "epoch": 0.42, "learning_rate": 1.3114801458320988e-05, "loss": 1.3262, "step": 1043 }, { "epoch": 0.42, "learning_rate": 1.3102488304527883e-05, "loss": 1.3105, "step": 1044 }, { "epoch": 0.42, "learning_rate": 1.3090169943749475e-05, "loss": 1.4014, "step": 1045 }, { "epoch": 0.42, "learning_rate": 1.3077846396659986e-05, "loss": 1.3389, "step": 1046 }, { "epoch": 0.42, "learning_rate": 1.3065517683942339e-05, "loss": 1.3584, "step": 1047 }, { "epoch": 0.42, "learning_rate": 1.3053183826288124e-05, "loss": 1.4189, "step": 1048 }, { "epoch": 0.42, "learning_rate": 1.3040844844397573e-05, "loss": 1.3271, "step": 1049 }, { "epoch": 0.42, "learning_rate": 1.3028500758979507e-05, "loss": 1.3564, "step": 1050 }, { "epoch": 0.42, "learning_rate": 1.3016151590751332e-05, "loss": 1.2969, "step": 1051 }, { "epoch": 0.42, "learning_rate": 1.3003797360438961e-05, "loss": 1.3486, "step": 1052 }, { "epoch": 0.42, "learning_rate": 1.2991438088776818e-05, "loss": 1.3896, "step": 1053 }, { "epoch": 0.42, "learning_rate": 1.2979073796507786e-05, "loss": 1.3311, "step": 1054 }, { "epoch": 0.42, "learning_rate": 1.296670450438317e-05, "loss": 1.3584, "step": 1055 }, { "epoch": 0.42, "learning_rate": 1.2954330233162669e-05, "loss": 1.333, "step": 1056 }, { "epoch": 0.42, "learning_rate": 1.2941951003614337e-05, "loss": 1.3486, "step": 1057 }, { "epoch": 0.42, "learning_rate": 1.2929566836514556e-05, "loss": 1.3506, "step": 1058 }, { "epoch": 0.42, "learning_rate": 1.291717775264798e-05, "loss": 1.3379, "step": 1059 }, { "epoch": 0.42, "learning_rate": 1.2904783772807534e-05, "loss": 1.416, "step": 1060 }, { "epoch": 0.42, "learning_rate": 1.2892384917794347e-05, "loss": 1.3428, "step": 1061 }, { "epoch": 0.42, "learning_rate": 1.2879981208417735e-05, "loss": 1.3906, "step": 1062 }, { "epoch": 0.43, "learning_rate": 1.2867572665495156e-05, "loss": 1.4678, "step": 1063 }, { "epoch": 0.43, "learning_rate": 1.285515930985219e-05, "loss": 1.3057, "step": 1064 }, { "epoch": 0.43, "learning_rate": 1.2842741162322487e-05, "loss": 1.3594, "step": 1065 }, { "epoch": 0.43, "learning_rate": 1.2830318243747736e-05, "loss": 1.3086, "step": 1066 }, { "epoch": 0.43, "learning_rate": 1.2817890574977648e-05, "loss": 1.3779, "step": 1067 }, { "epoch": 0.43, "learning_rate": 1.2805458176869885e-05, "loss": 1.3018, "step": 1068 }, { "epoch": 0.43, "learning_rate": 1.2793021070290065e-05, "loss": 1.3926, "step": 1069 }, { "epoch": 0.43, "learning_rate": 1.2780579276111702e-05, "loss": 1.3818, "step": 1070 }, { "epoch": 0.43, "learning_rate": 1.2768132815216174e-05, "loss": 1.3867, "step": 1071 }, { "epoch": 0.43, "learning_rate": 1.2755681708492696e-05, "loss": 1.4404, "step": 1072 }, { "epoch": 0.43, "learning_rate": 1.2743225976838277e-05, "loss": 1.373, "step": 1073 }, { "epoch": 0.43, "learning_rate": 1.2730765641157689e-05, "loss": 1.3984, "step": 1074 }, { "epoch": 0.43, "learning_rate": 1.2718300722363431e-05, "loss": 1.3408, "step": 1075 }, { "epoch": 0.43, "learning_rate": 1.2705831241375695e-05, "loss": 1.3691, "step": 1076 }, { "epoch": 0.43, "learning_rate": 1.2693357219122331e-05, "loss": 1.4531, "step": 1077 }, { "epoch": 0.43, "learning_rate": 1.2680878676538804e-05, "loss": 1.3848, "step": 1078 }, { "epoch": 0.43, "learning_rate": 1.2668395634568175e-05, "loss": 1.3906, "step": 1079 }, { "epoch": 0.43, "learning_rate": 1.2655908114161053e-05, "loss": 1.3604, "step": 1080 }, { "epoch": 0.43, "learning_rate": 1.2643416136275557e-05, "loss": 1.3418, "step": 1081 }, { "epoch": 0.43, "learning_rate": 1.2630919721877299e-05, "loss": 1.3975, "step": 1082 }, { "epoch": 0.43, "learning_rate": 1.261841889193932e-05, "loss": 1.3379, "step": 1083 }, { "epoch": 0.43, "learning_rate": 1.2605913667442096e-05, "loss": 1.3584, "step": 1084 }, { "epoch": 0.43, "learning_rate": 1.2593404069373452e-05, "loss": 1.3311, "step": 1085 }, { "epoch": 0.43, "learning_rate": 1.2580890118728572e-05, "loss": 1.3818, "step": 1086 }, { "epoch": 0.43, "learning_rate": 1.2568371836509936e-05, "loss": 1.3711, "step": 1087 }, { "epoch": 0.44, "learning_rate": 1.2555849243727298e-05, "loss": 1.3564, "step": 1088 }, { "epoch": 0.44, "learning_rate": 1.2543322361397648e-05, "loss": 1.3076, "step": 1089 }, { "epoch": 0.44, "learning_rate": 1.2530791210545163e-05, "loss": 1.3584, "step": 1090 }, { "epoch": 0.44, "learning_rate": 1.2518255812201203e-05, "loss": 1.4033, "step": 1091 }, { "epoch": 0.44, "learning_rate": 1.2505716187404242e-05, "loss": 1.335, "step": 1092 }, { "epoch": 0.44, "learning_rate": 1.2493172357199856e-05, "loss": 1.3877, "step": 1093 }, { "epoch": 0.44, "learning_rate": 1.2480624342640673e-05, "loss": 1.3965, "step": 1094 }, { "epoch": 0.44, "learning_rate": 1.2468072164786342e-05, "loss": 1.4268, "step": 1095 }, { "epoch": 0.44, "learning_rate": 1.2455515844703512e-05, "loss": 1.3408, "step": 1096 }, { "epoch": 0.44, "learning_rate": 1.2442955403465768e-05, "loss": 1.4404, "step": 1097 }, { "epoch": 0.44, "learning_rate": 1.2430390862153625e-05, "loss": 1.3633, "step": 1098 }, { "epoch": 0.44, "learning_rate": 1.2417822241854466e-05, "loss": 1.29, "step": 1099 }, { "epoch": 0.44, "learning_rate": 1.2405249563662539e-05, "loss": 1.3799, "step": 1100 }, { "epoch": 0.44, "learning_rate": 1.2392672848678877e-05, "loss": 1.2793, "step": 1101 }, { "epoch": 0.44, "learning_rate": 1.238009211801131e-05, "loss": 1.2832, "step": 1102 }, { "epoch": 0.44, "learning_rate": 1.2367507392774398e-05, "loss": 1.4277, "step": 1103 }, { "epoch": 0.44, "learning_rate": 1.2354918694089406e-05, "loss": 1.3799, "step": 1104 }, { "epoch": 0.44, "learning_rate": 1.2342326043084268e-05, "loss": 1.3477, "step": 1105 }, { "epoch": 0.44, "learning_rate": 1.2329729460893552e-05, "loss": 1.3369, "step": 1106 }, { "epoch": 0.44, "learning_rate": 1.2317128968658424e-05, "loss": 1.3477, "step": 1107 }, { "epoch": 0.44, "learning_rate": 1.2304524587526609e-05, "loss": 1.4258, "step": 1108 }, { "epoch": 0.44, "learning_rate": 1.2291916338652365e-05, "loss": 1.3799, "step": 1109 }, { "epoch": 0.44, "learning_rate": 1.2279304243196438e-05, "loss": 1.3389, "step": 1110 }, { "epoch": 0.44, "learning_rate": 1.2266688322326024e-05, "loss": 1.3164, "step": 1111 }, { "epoch": 0.44, "learning_rate": 1.225406859721475e-05, "loss": 1.2949, "step": 1112 }, { "epoch": 0.45, "learning_rate": 1.2241445089042623e-05, "loss": 1.3428, "step": 1113 }, { "epoch": 0.45, "learning_rate": 1.2228817818995998e-05, "loss": 1.3271, "step": 1114 }, { "epoch": 0.45, "learning_rate": 1.2216186808267544e-05, "loss": 1.3848, "step": 1115 }, { "epoch": 0.45, "learning_rate": 1.2203552078056209e-05, "loss": 1.3369, "step": 1116 }, { "epoch": 0.45, "learning_rate": 1.2190913649567185e-05, "loss": 1.3496, "step": 1117 }, { "epoch": 0.45, "learning_rate": 1.2178271544011864e-05, "loss": 1.3926, "step": 1118 }, { "epoch": 0.45, "learning_rate": 1.2165625782607817e-05, "loss": 1.4287, "step": 1119 }, { "epoch": 0.45, "learning_rate": 1.215297638657875e-05, "loss": 1.3711, "step": 1120 }, { "epoch": 0.45, "learning_rate": 1.2140323377154467e-05, "loss": 1.3242, "step": 1121 }, { "epoch": 0.45, "learning_rate": 1.2127666775570837e-05, "loss": 1.4043, "step": 1122 }, { "epoch": 0.45, "learning_rate": 1.211500660306975e-05, "loss": 1.4277, "step": 1123 }, { "epoch": 0.45, "learning_rate": 1.210234288089911e-05, "loss": 1.2852, "step": 1124 }, { "epoch": 0.45, "learning_rate": 1.2089675630312755e-05, "loss": 1.3301, "step": 1125 }, { "epoch": 0.45, "learning_rate": 1.2077004872570454e-05, "loss": 1.4277, "step": 1126 }, { "epoch": 0.45, "learning_rate": 1.206433062893787e-05, "loss": 1.3271, "step": 1127 }, { "epoch": 0.45, "learning_rate": 1.2051652920686505e-05, "loss": 1.3594, "step": 1128 }, { "epoch": 0.45, "learning_rate": 1.2038971769093685e-05, "loss": 1.3184, "step": 1129 }, { "epoch": 0.45, "learning_rate": 1.2026287195442503e-05, "loss": 1.334, "step": 1130 }, { "epoch": 0.45, "learning_rate": 1.201359922102181e-05, "loss": 1.3486, "step": 1131 }, { "epoch": 0.45, "learning_rate": 1.200090786712615e-05, "loss": 1.3633, "step": 1132 }, { "epoch": 0.45, "learning_rate": 1.1988213155055754e-05, "loss": 1.3359, "step": 1133 }, { "epoch": 0.45, "learning_rate": 1.1975515106116472e-05, "loss": 1.3838, "step": 1134 }, { "epoch": 0.45, "learning_rate": 1.1962813741619777e-05, "loss": 1.3721, "step": 1135 }, { "epoch": 0.45, "learning_rate": 1.1950109082882681e-05, "loss": 1.3242, "step": 1136 }, { "epoch": 0.45, "learning_rate": 1.193740115122774e-05, "loss": 1.3926, "step": 1137 }, { "epoch": 0.46, "learning_rate": 1.1924689967983006e-05, "loss": 1.4043, "step": 1138 }, { "epoch": 0.46, "learning_rate": 1.191197555448197e-05, "loss": 1.3301, "step": 1139 }, { "epoch": 0.46, "learning_rate": 1.189925793206357e-05, "loss": 1.3799, "step": 1140 }, { "epoch": 0.46, "learning_rate": 1.1886537122072106e-05, "loss": 1.4971, "step": 1141 }, { "epoch": 0.46, "learning_rate": 1.187381314585725e-05, "loss": 1.3477, "step": 1142 }, { "epoch": 0.46, "learning_rate": 1.1861086024773963e-05, "loss": 1.3682, "step": 1143 }, { "epoch": 0.46, "learning_rate": 1.1848355780182502e-05, "loss": 1.373, "step": 1144 }, { "epoch": 0.46, "learning_rate": 1.1835622433448361e-05, "loss": 1.3232, "step": 1145 }, { "epoch": 0.46, "learning_rate": 1.1822886005942244e-05, "loss": 1.3516, "step": 1146 }, { "epoch": 0.46, "learning_rate": 1.1810146519040023e-05, "loss": 1.3438, "step": 1147 }, { "epoch": 0.46, "learning_rate": 1.1797403994122698e-05, "loss": 1.3115, "step": 1148 }, { "epoch": 0.46, "learning_rate": 1.178465845257638e-05, "loss": 1.3252, "step": 1149 }, { "epoch": 0.46, "learning_rate": 1.177190991579223e-05, "loss": 1.3164, "step": 1150 }, { "epoch": 0.46, "learning_rate": 1.1759158405166446e-05, "loss": 1.3525, "step": 1151 }, { "epoch": 0.46, "learning_rate": 1.1746403942100215e-05, "loss": 1.3398, "step": 1152 }, { "epoch": 0.46, "learning_rate": 1.1733646547999678e-05, "loss": 1.3545, "step": 1153 }, { "epoch": 0.46, "learning_rate": 1.1720886244275893e-05, "loss": 1.4229, "step": 1154 }, { "epoch": 0.46, "learning_rate": 1.1708123052344803e-05, "loss": 1.4199, "step": 1155 }, { "epoch": 0.46, "learning_rate": 1.1695356993627203e-05, "loss": 1.4209, "step": 1156 }, { "epoch": 0.46, "learning_rate": 1.1682588089548692e-05, "loss": 1.3262, "step": 1157 }, { "epoch": 0.46, "learning_rate": 1.1669816361539647e-05, "loss": 1.2959, "step": 1158 }, { "epoch": 0.46, "learning_rate": 1.1657041831035186e-05, "loss": 1.4258, "step": 1159 }, { "epoch": 0.46, "learning_rate": 1.164426451947513e-05, "loss": 1.3613, "step": 1160 }, { "epoch": 0.46, "learning_rate": 1.1631484448303964e-05, "loss": 1.3965, "step": 1161 }, { "epoch": 0.46, "learning_rate": 1.1618701638970815e-05, "loss": 1.3438, "step": 1162 }, { "epoch": 0.47, "learning_rate": 1.1605916112929388e-05, "loss": 1.4355, "step": 1163 }, { "epoch": 0.47, "learning_rate": 1.1593127891637968e-05, "loss": 1.3535, "step": 1164 }, { "epoch": 0.47, "learning_rate": 1.1580336996559343e-05, "loss": 1.4678, "step": 1165 }, { "epoch": 0.47, "learning_rate": 1.156754344916081e-05, "loss": 1.3887, "step": 1166 }, { "epoch": 0.47, "learning_rate": 1.1554747270914098e-05, "loss": 1.3682, "step": 1167 }, { "epoch": 0.47, "learning_rate": 1.1541948483295358e-05, "loss": 1.4141, "step": 1168 }, { "epoch": 0.47, "learning_rate": 1.1529147107785129e-05, "loss": 1.416, "step": 1169 }, { "epoch": 0.47, "learning_rate": 1.151634316586828e-05, "loss": 1.3027, "step": 1170 }, { "epoch": 0.47, "learning_rate": 1.1503536679034e-05, "loss": 1.4043, "step": 1171 }, { "epoch": 0.47, "learning_rate": 1.1490727668775735e-05, "loss": 1.293, "step": 1172 }, { "epoch": 0.47, "learning_rate": 1.147791615659118e-05, "loss": 1.3672, "step": 1173 }, { "epoch": 0.47, "learning_rate": 1.1465102163982218e-05, "loss": 1.3037, "step": 1174 }, { "epoch": 0.47, "learning_rate": 1.1452285712454905e-05, "loss": 1.3438, "step": 1175 }, { "epoch": 0.47, "learning_rate": 1.1439466823519414e-05, "loss": 1.3428, "step": 1176 }, { "epoch": 0.47, "learning_rate": 1.1426645518690015e-05, "loss": 1.3779, "step": 1177 }, { "epoch": 0.47, "learning_rate": 1.1413821819485035e-05, "loss": 1.3643, "step": 1178 }, { "epoch": 0.47, "learning_rate": 1.140099574742681e-05, "loss": 1.3477, "step": 1179 }, { "epoch": 0.47, "learning_rate": 1.138816732404167e-05, "loss": 1.3623, "step": 1180 }, { "epoch": 0.47, "learning_rate": 1.1375336570859877e-05, "loss": 1.3652, "step": 1181 }, { "epoch": 0.47, "learning_rate": 1.136250350941562e-05, "loss": 1.4131, "step": 1182 }, { "epoch": 0.47, "learning_rate": 1.1349668161246945e-05, "loss": 1.4668, "step": 1183 }, { "epoch": 0.47, "learning_rate": 1.1336830547895752e-05, "loss": 1.3662, "step": 1184 }, { "epoch": 0.47, "learning_rate": 1.1323990690907734e-05, "loss": 1.3955, "step": 1185 }, { "epoch": 0.47, "learning_rate": 1.1311148611832346e-05, "loss": 1.3379, "step": 1186 }, { "epoch": 0.47, "learning_rate": 1.129830433222278e-05, "loss": 1.4795, "step": 1187 }, { "epoch": 0.48, "learning_rate": 1.128545787363592e-05, "loss": 1.3877, "step": 1188 }, { "epoch": 0.48, "learning_rate": 1.1272609257632305e-05, "loss": 1.4121, "step": 1189 }, { "epoch": 0.48, "learning_rate": 1.1259758505776092e-05, "loss": 1.3379, "step": 1190 }, { "epoch": 0.48, "learning_rate": 1.1246905639635029e-05, "loss": 1.3154, "step": 1191 }, { "epoch": 0.48, "learning_rate": 1.1234050680780407e-05, "loss": 1.2949, "step": 1192 }, { "epoch": 0.48, "learning_rate": 1.1221193650787032e-05, "loss": 1.3809, "step": 1193 }, { "epoch": 0.48, "learning_rate": 1.1208334571233186e-05, "loss": 1.3486, "step": 1194 }, { "epoch": 0.48, "learning_rate": 1.119547346370059e-05, "loss": 1.3613, "step": 1195 }, { "epoch": 0.48, "learning_rate": 1.118261034977437e-05, "loss": 1.334, "step": 1196 }, { "epoch": 0.48, "learning_rate": 1.116974525104302e-05, "loss": 1.3398, "step": 1197 }, { "epoch": 0.48, "learning_rate": 1.1156878189098357e-05, "loss": 1.3574, "step": 1198 }, { "epoch": 0.48, "learning_rate": 1.114400918553551e-05, "loss": 1.2822, "step": 1199 }, { "epoch": 0.48, "learning_rate": 1.1131138261952845e-05, "loss": 1.3369, "step": 1200 }, { "epoch": 0.48, "learning_rate": 1.1118265439951968e-05, "loss": 1.3389, "step": 1201 }, { "epoch": 0.48, "learning_rate": 1.110539074113766e-05, "loss": 1.3652, "step": 1202 }, { "epoch": 0.48, "learning_rate": 1.1092514187117865e-05, "loss": 1.3281, "step": 1203 }, { "epoch": 0.48, "learning_rate": 1.1079635799503625e-05, "loss": 1.3125, "step": 1204 }, { "epoch": 0.48, "learning_rate": 1.1066755599909065e-05, "loss": 1.3516, "step": 1205 }, { "epoch": 0.48, "learning_rate": 1.1053873609951362e-05, "loss": 1.3447, "step": 1206 }, { "epoch": 0.48, "learning_rate": 1.1040989851250678e-05, "loss": 1.3291, "step": 1207 }, { "epoch": 0.48, "learning_rate": 1.1028104345430161e-05, "loss": 1.2764, "step": 1208 }, { "epoch": 0.48, "learning_rate": 1.1015217114115884e-05, "loss": 1.3975, "step": 1209 }, { "epoch": 0.48, "learning_rate": 1.1002328178936813e-05, "loss": 1.3477, "step": 1210 }, { "epoch": 0.48, "learning_rate": 1.0989437561524776e-05, "loss": 1.3457, "step": 1211 }, { "epoch": 0.48, "learning_rate": 1.097654528351443e-05, "loss": 1.3311, "step": 1212 }, { "epoch": 0.49, "learning_rate": 1.0963651366543214e-05, "loss": 1.3848, "step": 1213 }, { "epoch": 0.49, "learning_rate": 1.095075583225131e-05, "loss": 1.3887, "step": 1214 }, { "epoch": 0.49, "learning_rate": 1.0937858702281631e-05, "loss": 1.3252, "step": 1215 }, { "epoch": 0.49, "learning_rate": 1.0924959998279754e-05, "loss": 1.3604, "step": 1216 }, { "epoch": 0.49, "learning_rate": 1.0912059741893908e-05, "loss": 1.2646, "step": 1217 }, { "epoch": 0.49, "learning_rate": 1.089915795477492e-05, "loss": 1.4258, "step": 1218 }, { "epoch": 0.49, "learning_rate": 1.0886254658576186e-05, "loss": 1.3535, "step": 1219 }, { "epoch": 0.49, "learning_rate": 1.087334987495364e-05, "loss": 1.377, "step": 1220 }, { "epoch": 0.49, "learning_rate": 1.0860443625565712e-05, "loss": 1.334, "step": 1221 }, { "epoch": 0.49, "learning_rate": 1.0847535932073288e-05, "loss": 1.3291, "step": 1222 }, { "epoch": 0.49, "learning_rate": 1.0834626816139678e-05, "loss": 1.3984, "step": 1223 }, { "epoch": 0.49, "learning_rate": 1.0821716299430577e-05, "loss": 1.376, "step": 1224 }, { "epoch": 0.49, "learning_rate": 1.0808804403614044e-05, "loss": 1.3516, "step": 1225 }, { "epoch": 0.49, "learning_rate": 1.0795891150360435e-05, "loss": 1.3291, "step": 1226 }, { "epoch": 0.49, "learning_rate": 1.0782976561342398e-05, "loss": 1.3086, "step": 1227 }, { "epoch": 0.49, "learning_rate": 1.0770060658234815e-05, "loss": 1.3926, "step": 1228 }, { "epoch": 0.49, "learning_rate": 1.0757143462714777e-05, "loss": 1.3555, "step": 1229 }, { "epoch": 0.49, "learning_rate": 1.0744224996461541e-05, "loss": 1.3545, "step": 1230 }, { "epoch": 0.49, "learning_rate": 1.0731305281156499e-05, "loss": 1.3633, "step": 1231 }, { "epoch": 0.49, "learning_rate": 1.0718384338483141e-05, "loss": 1.3438, "step": 1232 }, { "epoch": 0.49, "learning_rate": 1.0705462190127011e-05, "loss": 1.2803, "step": 1233 }, { "epoch": 0.49, "learning_rate": 1.0692538857775685e-05, "loss": 1.3066, "step": 1234 }, { "epoch": 0.49, "learning_rate": 1.0679614363118718e-05, "loss": 1.3564, "step": 1235 }, { "epoch": 0.49, "learning_rate": 1.066668872784762e-05, "loss": 1.3184, "step": 1236 }, { "epoch": 0.49, "learning_rate": 1.0653761973655819e-05, "loss": 1.3047, "step": 1237 }, { "epoch": 0.5, "learning_rate": 1.0640834122238606e-05, "loss": 1.3389, "step": 1238 }, { "epoch": 0.5, "learning_rate": 1.0627905195293135e-05, "loss": 1.4209, "step": 1239 }, { "epoch": 0.5, "learning_rate": 1.061497521451835e-05, "loss": 1.3223, "step": 1240 }, { "epoch": 0.5, "learning_rate": 1.0602044201614965e-05, "loss": 1.3369, "step": 1241 }, { "epoch": 0.5, "learning_rate": 1.0589112178285432e-05, "loss": 1.3818, "step": 1242 }, { "epoch": 0.5, "learning_rate": 1.0576179166233895e-05, "loss": 1.4443, "step": 1243 }, { "epoch": 0.5, "learning_rate": 1.056324518716616e-05, "loss": 1.3652, "step": 1244 }, { "epoch": 0.5, "learning_rate": 1.055031026278965e-05, "loss": 1.3125, "step": 1245 }, { "epoch": 0.5, "learning_rate": 1.0537374414813384e-05, "loss": 1.3789, "step": 1246 }, { "epoch": 0.5, "learning_rate": 1.0524437664947918e-05, "loss": 1.4316, "step": 1247 }, { "epoch": 0.5, "learning_rate": 1.051150003490534e-05, "loss": 1.3301, "step": 1248 }, { "epoch": 0.5, "learning_rate": 1.0498561546399194e-05, "loss": 1.3311, "step": 1249 }, { "epoch": 0.5, "learning_rate": 1.0485622221144485e-05, "loss": 1.4053, "step": 1250 }, { "epoch": 0.5, "learning_rate": 1.0472682080857606e-05, "loss": 1.4355, "step": 1251 }, { "epoch": 0.5, "learning_rate": 1.0459741147256325e-05, "loss": 1.3486, "step": 1252 }, { "epoch": 0.5, "learning_rate": 1.044679944205975e-05, "loss": 1.3398, "step": 1253 }, { "epoch": 0.5, "learning_rate": 1.043385698698826e-05, "loss": 1.4033, "step": 1254 }, { "epoch": 0.5, "learning_rate": 1.0420913803763522e-05, "loss": 1.3115, "step": 1255 }, { "epoch": 0.5, "learning_rate": 1.04079699141084e-05, "loss": 1.3564, "step": 1256 }, { "epoch": 0.5, "learning_rate": 1.0395025339746965e-05, "loss": 1.3037, "step": 1257 }, { "epoch": 0.5, "learning_rate": 1.0382080102404417e-05, "loss": 1.3262, "step": 1258 }, { "epoch": 0.5, "learning_rate": 1.0369134223807082e-05, "loss": 1.4072, "step": 1259 }, { "epoch": 0.5, "learning_rate": 1.0356187725682359e-05, "loss": 1.4062, "step": 1260 }, { "epoch": 0.5, "learning_rate": 1.0343240629758683e-05, "loss": 1.3545, "step": 1261 }, { "epoch": 0.5, "learning_rate": 1.0330292957765502e-05, "loss": 1.3867, "step": 1262 }, { "epoch": 0.51, "learning_rate": 1.0317344731433217e-05, "loss": 1.3359, "step": 1263 }, { "epoch": 0.51, "learning_rate": 1.0304395972493172e-05, "loss": 1.3418, "step": 1264 }, { "epoch": 0.51, "learning_rate": 1.0291446702677598e-05, "loss": 1.3193, "step": 1265 }, { "epoch": 0.51, "learning_rate": 1.0278496943719585e-05, "loss": 1.3584, "step": 1266 }, { "epoch": 0.51, "learning_rate": 1.0265546717353041e-05, "loss": 1.4424, "step": 1267 }, { "epoch": 0.51, "learning_rate": 1.0252596045312666e-05, "loss": 1.376, "step": 1268 }, { "epoch": 0.51, "learning_rate": 1.02396449493339e-05, "loss": 1.3721, "step": 1269 }, { "epoch": 0.51, "learning_rate": 1.02266934511529e-05, "loss": 1.3389, "step": 1270 }, { "epoch": 0.51, "learning_rate": 1.0213741572506497e-05, "loss": 1.3232, "step": 1271 }, { "epoch": 0.51, "learning_rate": 1.0200789335132157e-05, "loss": 1.2627, "step": 1272 }, { "epoch": 0.51, "learning_rate": 1.0187836760767954e-05, "loss": 1.2822, "step": 1273 }, { "epoch": 0.51, "learning_rate": 1.0174883871152517e-05, "loss": 1.4062, "step": 1274 }, { "epoch": 0.51, "learning_rate": 1.0161930688025018e-05, "loss": 1.3994, "step": 1275 }, { "epoch": 0.51, "learning_rate": 1.014897723312511e-05, "loss": 1.3574, "step": 1276 }, { "epoch": 0.51, "learning_rate": 1.013602352819291e-05, "loss": 1.3516, "step": 1277 }, { "epoch": 0.51, "learning_rate": 1.0123069594968952e-05, "loss": 1.3701, "step": 1278 }, { "epoch": 0.51, "learning_rate": 1.0110115455194157e-05, "loss": 1.4238, "step": 1279 }, { "epoch": 0.51, "learning_rate": 1.0097161130609774e-05, "loss": 1.3662, "step": 1280 }, { "epoch": 0.51, "learning_rate": 1.0084206642957393e-05, "loss": 1.3496, "step": 1281 }, { "epoch": 0.51, "learning_rate": 1.0071252013978852e-05, "loss": 1.3975, "step": 1282 }, { "epoch": 0.51, "learning_rate": 1.0058297265416234e-05, "loss": 1.3359, "step": 1283 }, { "epoch": 0.51, "learning_rate": 1.0045342419011832e-05, "loss": 1.3594, "step": 1284 }, { "epoch": 0.51, "learning_rate": 1.003238749650809e-05, "loss": 1.3096, "step": 1285 }, { "epoch": 0.51, "learning_rate": 1.0019432519647585e-05, "loss": 1.3203, "step": 1286 }, { "epoch": 0.51, "learning_rate": 1.0006477510172984e-05, "loss": 1.4092, "step": 1287 }, { "epoch": 0.52, "learning_rate": 9.993522489827016e-06, "loss": 1.4521, "step": 1288 }, { "epoch": 0.52, "learning_rate": 9.980567480352417e-06, "loss": 1.4512, "step": 1289 }, { "epoch": 0.52, "learning_rate": 9.967612503491915e-06, "loss": 1.3857, "step": 1290 }, { "epoch": 0.52, "learning_rate": 9.954657580988171e-06, "loss": 1.3369, "step": 1291 }, { "epoch": 0.52, "learning_rate": 9.941702734583771e-06, "loss": 1.3418, "step": 1292 }, { "epoch": 0.52, "learning_rate": 9.928747986021153e-06, "loss": 1.3877, "step": 1293 }, { "epoch": 0.52, "learning_rate": 9.91579335704261e-06, "loss": 1.3271, "step": 1294 }, { "epoch": 0.52, "learning_rate": 9.90283886939023e-06, "loss": 1.3506, "step": 1295 }, { "epoch": 0.52, "learning_rate": 9.88988454480585e-06, "loss": 1.4131, "step": 1296 }, { "epoch": 0.52, "learning_rate": 9.876930405031047e-06, "loss": 1.3354, "step": 1297 }, { "epoch": 0.52, "learning_rate": 9.86397647180709e-06, "loss": 1.3916, "step": 1298 }, { "epoch": 0.52, "learning_rate": 9.851022766874892e-06, "loss": 1.3516, "step": 1299 }, { "epoch": 0.52, "learning_rate": 9.838069311974986e-06, "loss": 1.416, "step": 1300 }, { "epoch": 0.52, "learning_rate": 9.825116128847488e-06, "loss": 1.3018, "step": 1301 }, { "epoch": 0.52, "learning_rate": 9.812163239232051e-06, "loss": 1.2241, "step": 1302 }, { "epoch": 0.52, "learning_rate": 9.799210664867844e-06, "loss": 1.3936, "step": 1303 }, { "epoch": 0.52, "learning_rate": 9.786258427493505e-06, "loss": 1.3662, "step": 1304 }, { "epoch": 0.52, "learning_rate": 9.773306548847102e-06, "loss": 1.3262, "step": 1305 }, { "epoch": 0.52, "learning_rate": 9.760355050666102e-06, "loss": 1.3467, "step": 1306 }, { "epoch": 0.52, "learning_rate": 9.747403954687334e-06, "loss": 1.3867, "step": 1307 }, { "epoch": 0.52, "learning_rate": 9.734453282646962e-06, "loss": 1.3516, "step": 1308 }, { "epoch": 0.52, "learning_rate": 9.721503056280418e-06, "loss": 1.4238, "step": 1309 }, { "epoch": 0.52, "learning_rate": 9.708553297322407e-06, "loss": 1.3408, "step": 1310 }, { "epoch": 0.52, "learning_rate": 9.69560402750683e-06, "loss": 1.3184, "step": 1311 }, { "epoch": 0.52, "learning_rate": 9.682655268566783e-06, "loss": 1.4082, "step": 1312 }, { "epoch": 0.53, "learning_rate": 9.669707042234502e-06, "loss": 1.3691, "step": 1313 }, { "epoch": 0.53, "learning_rate": 9.656759370241318e-06, "loss": 1.2607, "step": 1314 }, { "epoch": 0.53, "learning_rate": 9.643812274317644e-06, "loss": 1.3154, "step": 1315 }, { "epoch": 0.53, "learning_rate": 9.630865776192918e-06, "loss": 1.334, "step": 1316 }, { "epoch": 0.53, "learning_rate": 9.617919897595586e-06, "loss": 1.3701, "step": 1317 }, { "epoch": 0.53, "learning_rate": 9.604974660253039e-06, "loss": 1.3418, "step": 1318 }, { "epoch": 0.53, "learning_rate": 9.592030085891602e-06, "loss": 1.4326, "step": 1319 }, { "epoch": 0.53, "learning_rate": 9.579086196236483e-06, "loss": 1.2822, "step": 1320 }, { "epoch": 0.53, "learning_rate": 9.56614301301174e-06, "loss": 1.3809, "step": 1321 }, { "epoch": 0.53, "learning_rate": 9.553200557940254e-06, "loss": 1.3311, "step": 1322 }, { "epoch": 0.53, "learning_rate": 9.540258852743676e-06, "loss": 1.3379, "step": 1323 }, { "epoch": 0.53, "learning_rate": 9.527317919142398e-06, "loss": 1.374, "step": 1324 }, { "epoch": 0.53, "learning_rate": 9.514377778855521e-06, "loss": 1.3232, "step": 1325 }, { "epoch": 0.53, "learning_rate": 9.501438453600808e-06, "loss": 1.3408, "step": 1326 }, { "epoch": 0.53, "learning_rate": 9.488499965094664e-06, "loss": 1.3262, "step": 1327 }, { "epoch": 0.53, "learning_rate": 9.475562335052086e-06, "loss": 1.417, "step": 1328 }, { "epoch": 0.53, "learning_rate": 9.462625585186621e-06, "loss": 1.3223, "step": 1329 }, { "epoch": 0.53, "learning_rate": 9.449689737210352e-06, "loss": 1.376, "step": 1330 }, { "epoch": 0.53, "learning_rate": 9.436754812833843e-06, "loss": 1.3145, "step": 1331 }, { "epoch": 0.53, "learning_rate": 9.423820833766108e-06, "loss": 1.3584, "step": 1332 }, { "epoch": 0.53, "learning_rate": 9.410887821714571e-06, "loss": 1.3594, "step": 1333 }, { "epoch": 0.53, "learning_rate": 9.39795579838504e-06, "loss": 1.3916, "step": 1334 }, { "epoch": 0.53, "learning_rate": 9.385024785481653e-06, "loss": 1.3311, "step": 1335 }, { "epoch": 0.53, "learning_rate": 9.372094804706867e-06, "loss": 1.3984, "step": 1336 }, { "epoch": 0.53, "learning_rate": 9.359165877761396e-06, "loss": 1.3809, "step": 1337 }, { "epoch": 0.54, "learning_rate": 9.346238026344186e-06, "loss": 1.3574, "step": 1338 }, { "epoch": 0.54, "learning_rate": 9.333311272152385e-06, "loss": 1.3789, "step": 1339 }, { "epoch": 0.54, "learning_rate": 9.320385636881283e-06, "loss": 1.3691, "step": 1340 }, { "epoch": 0.54, "learning_rate": 9.307461142224318e-06, "loss": 1.3457, "step": 1341 }, { "epoch": 0.54, "learning_rate": 9.29453780987299e-06, "loss": 1.3145, "step": 1342 }, { "epoch": 0.54, "learning_rate": 9.281615661516866e-06, "loss": 1.3682, "step": 1343 }, { "epoch": 0.54, "learning_rate": 9.268694718843503e-06, "loss": 1.3926, "step": 1344 }, { "epoch": 0.54, "learning_rate": 9.255775003538462e-06, "loss": 1.4248, "step": 1345 }, { "epoch": 0.54, "learning_rate": 9.242856537285227e-06, "loss": 1.2715, "step": 1346 }, { "epoch": 0.54, "learning_rate": 9.229939341765188e-06, "loss": 1.3496, "step": 1347 }, { "epoch": 0.54, "learning_rate": 9.217023438657606e-06, "loss": 1.3369, "step": 1348 }, { "epoch": 0.54, "learning_rate": 9.204108849639565e-06, "loss": 1.3838, "step": 1349 }, { "epoch": 0.54, "learning_rate": 9.19119559638596e-06, "loss": 1.2881, "step": 1350 }, { "epoch": 0.54, "learning_rate": 9.178283700569424e-06, "loss": 1.3643, "step": 1351 }, { "epoch": 0.54, "learning_rate": 9.165373183860329e-06, "loss": 1.335, "step": 1352 }, { "epoch": 0.54, "learning_rate": 9.152464067926717e-06, "loss": 1.3506, "step": 1353 }, { "epoch": 0.54, "learning_rate": 9.139556374434288e-06, "loss": 1.3887, "step": 1354 }, { "epoch": 0.54, "learning_rate": 9.126650125046361e-06, "loss": 1.3164, "step": 1355 }, { "epoch": 0.54, "learning_rate": 9.113745341423816e-06, "loss": 1.3389, "step": 1356 }, { "epoch": 0.54, "learning_rate": 9.100842045225084e-06, "loss": 1.3652, "step": 1357 }, { "epoch": 0.54, "learning_rate": 9.087940258106093e-06, "loss": 1.3135, "step": 1358 }, { "epoch": 0.54, "learning_rate": 9.075040001720247e-06, "loss": 1.3369, "step": 1359 }, { "epoch": 0.54, "learning_rate": 9.062141297718372e-06, "loss": 1.3721, "step": 1360 }, { "epoch": 0.54, "learning_rate": 9.049244167748694e-06, "loss": 1.4238, "step": 1361 }, { "epoch": 0.54, "learning_rate": 9.036348633456791e-06, "loss": 1.3486, "step": 1362 }, { "epoch": 0.55, "learning_rate": 9.023454716485572e-06, "loss": 1.3047, "step": 1363 }, { "epoch": 0.55, "learning_rate": 9.010562438475225e-06, "loss": 1.3936, "step": 1364 }, { "epoch": 0.55, "learning_rate": 8.99767182106319e-06, "loss": 1.3066, "step": 1365 }, { "epoch": 0.55, "learning_rate": 8.984782885884119e-06, "loss": 1.4043, "step": 1366 }, { "epoch": 0.55, "learning_rate": 8.971895654569842e-06, "loss": 1.417, "step": 1367 }, { "epoch": 0.55, "learning_rate": 8.959010148749324e-06, "loss": 1.4229, "step": 1368 }, { "epoch": 0.55, "learning_rate": 8.94612639004864e-06, "loss": 1.4258, "step": 1369 }, { "epoch": 0.55, "learning_rate": 8.933244400090937e-06, "loss": 1.3369, "step": 1370 }, { "epoch": 0.55, "learning_rate": 8.92036420049638e-06, "loss": 1.3916, "step": 1371 }, { "epoch": 0.55, "learning_rate": 8.907485812882137e-06, "loss": 1.3643, "step": 1372 }, { "epoch": 0.55, "learning_rate": 8.89460925886234e-06, "loss": 1.4326, "step": 1373 }, { "epoch": 0.55, "learning_rate": 8.881734560048037e-06, "loss": 1.293, "step": 1374 }, { "epoch": 0.55, "learning_rate": 8.868861738047158e-06, "loss": 1.2617, "step": 1375 }, { "epoch": 0.55, "learning_rate": 8.855990814464497e-06, "loss": 1.2637, "step": 1376 }, { "epoch": 0.55, "learning_rate": 8.843121810901643e-06, "loss": 1.3691, "step": 1377 }, { "epoch": 0.55, "learning_rate": 8.830254748956983e-06, "loss": 1.3564, "step": 1378 }, { "epoch": 0.55, "learning_rate": 8.817389650225631e-06, "loss": 1.3896, "step": 1379 }, { "epoch": 0.55, "learning_rate": 8.804526536299413e-06, "loss": 1.3311, "step": 1380 }, { "epoch": 0.55, "learning_rate": 8.79166542876682e-06, "loss": 1.3096, "step": 1381 }, { "epoch": 0.55, "learning_rate": 8.778806349212968e-06, "loss": 1.3428, "step": 1382 }, { "epoch": 0.55, "learning_rate": 8.765949319219595e-06, "loss": 1.3301, "step": 1383 }, { "epoch": 0.55, "learning_rate": 8.753094360364973e-06, "loss": 1.3486, "step": 1384 }, { "epoch": 0.55, "learning_rate": 8.740241494223911e-06, "loss": 1.3887, "step": 1385 }, { "epoch": 0.55, "learning_rate": 8.727390742367698e-06, "loss": 1.3857, "step": 1386 }, { "epoch": 0.55, "learning_rate": 8.71454212636408e-06, "loss": 1.3926, "step": 1387 }, { "epoch": 0.56, "learning_rate": 8.701695667777221e-06, "loss": 1.4453, "step": 1388 }, { "epoch": 0.56, "learning_rate": 8.688851388167658e-06, "loss": 1.3359, "step": 1389 }, { "epoch": 0.56, "learning_rate": 8.676009309092273e-06, "loss": 1.333, "step": 1390 }, { "epoch": 0.56, "learning_rate": 8.663169452104248e-06, "loss": 1.3291, "step": 1391 }, { "epoch": 0.56, "learning_rate": 8.650331838753057e-06, "loss": 1.3799, "step": 1392 }, { "epoch": 0.56, "learning_rate": 8.637496490584385e-06, "loss": 1.3604, "step": 1393 }, { "epoch": 0.56, "learning_rate": 8.624663429140128e-06, "loss": 1.3721, "step": 1394 }, { "epoch": 0.56, "learning_rate": 8.611832675958335e-06, "loss": 1.4238, "step": 1395 }, { "epoch": 0.56, "learning_rate": 8.599004252573191e-06, "loss": 1.3252, "step": 1396 }, { "epoch": 0.56, "learning_rate": 8.586178180514968e-06, "loss": 1.3535, "step": 1397 }, { "epoch": 0.56, "learning_rate": 8.573354481309986e-06, "loss": 1.3828, "step": 1398 }, { "epoch": 0.56, "learning_rate": 8.560533176480588e-06, "loss": 1.3965, "step": 1399 }, { "epoch": 0.56, "learning_rate": 8.5477142875451e-06, "loss": 1.3457, "step": 1400 }, { "epoch": 0.56, "learning_rate": 8.534897836017784e-06, "loss": 1.3418, "step": 1401 }, { "epoch": 0.56, "learning_rate": 8.522083843408823e-06, "loss": 1.3604, "step": 1402 }, { "epoch": 0.56, "learning_rate": 8.50927233122427e-06, "loss": 1.3701, "step": 1403 }, { "epoch": 0.56, "learning_rate": 8.496463320966004e-06, "loss": 1.3506, "step": 1404 }, { "epoch": 0.56, "learning_rate": 8.48365683413172e-06, "loss": 1.3613, "step": 1405 }, { "epoch": 0.56, "learning_rate": 8.470852892214875e-06, "loss": 1.4062, "step": 1406 }, { "epoch": 0.56, "learning_rate": 8.458051516704644e-06, "loss": 1.3955, "step": 1407 }, { "epoch": 0.56, "learning_rate": 8.445252729085907e-06, "loss": 1.3525, "step": 1408 }, { "epoch": 0.56, "learning_rate": 8.432456550839196e-06, "loss": 1.3701, "step": 1409 }, { "epoch": 0.56, "learning_rate": 8.419663003440657e-06, "loss": 1.3564, "step": 1410 }, { "epoch": 0.56, "learning_rate": 8.406872108362034e-06, "loss": 1.3682, "step": 1411 }, { "epoch": 0.56, "learning_rate": 8.394083887070614e-06, "loss": 1.2861, "step": 1412 }, { "epoch": 0.57, "learning_rate": 8.38129836102919e-06, "loss": 1.3916, "step": 1413 }, { "epoch": 0.57, "learning_rate": 8.36851555169604e-06, "loss": 1.3975, "step": 1414 }, { "epoch": 0.57, "learning_rate": 8.355735480524874e-06, "loss": 1.3721, "step": 1415 }, { "epoch": 0.57, "learning_rate": 8.342958168964816e-06, "loss": 1.3506, "step": 1416 }, { "epoch": 0.57, "learning_rate": 8.330183638460356e-06, "loss": 1.3418, "step": 1417 }, { "epoch": 0.57, "learning_rate": 8.317411910451313e-06, "loss": 1.3496, "step": 1418 }, { "epoch": 0.57, "learning_rate": 8.304643006372797e-06, "loss": 1.3516, "step": 1419 }, { "epoch": 0.57, "learning_rate": 8.291876947655197e-06, "loss": 1.3506, "step": 1420 }, { "epoch": 0.57, "learning_rate": 8.27911375572411e-06, "loss": 1.3418, "step": 1421 }, { "epoch": 0.57, "learning_rate": 8.266353452000326e-06, "loss": 1.4775, "step": 1422 }, { "epoch": 0.57, "learning_rate": 8.25359605789979e-06, "loss": 1.2861, "step": 1423 }, { "epoch": 0.57, "learning_rate": 8.240841594833554e-06, "loss": 1.3389, "step": 1424 }, { "epoch": 0.57, "learning_rate": 8.228090084207773e-06, "loss": 1.3799, "step": 1425 }, { "epoch": 0.57, "learning_rate": 8.215341547423624e-06, "loss": 1.333, "step": 1426 }, { "epoch": 0.57, "learning_rate": 8.202596005877307e-06, "loss": 1.3301, "step": 1427 }, { "epoch": 0.57, "learning_rate": 8.189853480959982e-06, "loss": 1.3662, "step": 1428 }, { "epoch": 0.57, "learning_rate": 8.177113994057756e-06, "loss": 1.3496, "step": 1429 }, { "epoch": 0.57, "learning_rate": 8.16437756655164e-06, "loss": 1.4043, "step": 1430 }, { "epoch": 0.57, "learning_rate": 8.1516442198175e-06, "loss": 1.3271, "step": 1431 }, { "epoch": 0.57, "learning_rate": 8.138913975226044e-06, "loss": 1.3545, "step": 1432 }, { "epoch": 0.57, "learning_rate": 8.126186854142752e-06, "loss": 1.3613, "step": 1433 }, { "epoch": 0.57, "learning_rate": 8.113462877927893e-06, "loss": 1.3281, "step": 1434 }, { "epoch": 0.57, "learning_rate": 8.100742067936432e-06, "loss": 1.3711, "step": 1435 }, { "epoch": 0.57, "learning_rate": 8.088024445518033e-06, "loss": 1.3896, "step": 1436 }, { "epoch": 0.57, "learning_rate": 8.075310032017e-06, "loss": 1.3877, "step": 1437 }, { "epoch": 0.58, "learning_rate": 8.062598848772261e-06, "loss": 1.3682, "step": 1438 }, { "epoch": 0.58, "learning_rate": 8.049890917117322e-06, "loss": 1.3398, "step": 1439 }, { "epoch": 0.58, "learning_rate": 8.037186258380226e-06, "loss": 1.3477, "step": 1440 }, { "epoch": 0.58, "learning_rate": 8.02448489388353e-06, "loss": 1.2598, "step": 1441 }, { "epoch": 0.58, "learning_rate": 8.01178684494425e-06, "loss": 1.4199, "step": 1442 }, { "epoch": 0.58, "learning_rate": 7.999092132873851e-06, "loss": 1.3203, "step": 1443 }, { "epoch": 0.58, "learning_rate": 7.986400778978192e-06, "loss": 1.4346, "step": 1444 }, { "epoch": 0.58, "learning_rate": 7.9737128045575e-06, "loss": 1.3623, "step": 1445 }, { "epoch": 0.58, "learning_rate": 7.96102823090632e-06, "loss": 1.2451, "step": 1446 }, { "epoch": 0.58, "learning_rate": 7.948347079313494e-06, "loss": 1.3223, "step": 1447 }, { "epoch": 0.58, "learning_rate": 7.935669371062132e-06, "loss": 1.3652, "step": 1448 }, { "epoch": 0.58, "learning_rate": 7.922995127429547e-06, "loss": 1.3379, "step": 1449 }, { "epoch": 0.58, "learning_rate": 7.91032436968725e-06, "loss": 1.3262, "step": 1450 }, { "epoch": 0.58, "learning_rate": 7.897657119100896e-06, "loss": 1.3574, "step": 1451 }, { "epoch": 0.58, "learning_rate": 7.88499339693025e-06, "loss": 1.3506, "step": 1452 }, { "epoch": 0.58, "learning_rate": 7.872333224429166e-06, "loss": 1.3867, "step": 1453 }, { "epoch": 0.58, "learning_rate": 7.859676622845535e-06, "loss": 1.332, "step": 1454 }, { "epoch": 0.58, "learning_rate": 7.847023613421251e-06, "loss": 1.3545, "step": 1455 }, { "epoch": 0.58, "learning_rate": 7.834374217392188e-06, "loss": 1.3945, "step": 1456 }, { "epoch": 0.58, "learning_rate": 7.82172845598814e-06, "loss": 1.3535, "step": 1457 }, { "epoch": 0.58, "learning_rate": 7.80908635043282e-06, "loss": 1.373, "step": 1458 }, { "epoch": 0.58, "learning_rate": 7.796447921943793e-06, "loss": 1.3672, "step": 1459 }, { "epoch": 0.58, "learning_rate": 7.78381319173246e-06, "loss": 1.377, "step": 1460 }, { "epoch": 0.58, "learning_rate": 7.771182181004005e-06, "loss": 1.3672, "step": 1461 }, { "epoch": 0.58, "learning_rate": 7.758554910957378e-06, "loss": 1.3896, "step": 1462 }, { "epoch": 0.59, "learning_rate": 7.745931402785252e-06, "loss": 1.3623, "step": 1463 }, { "epoch": 0.59, "learning_rate": 7.733311677673979e-06, "loss": 1.3047, "step": 1464 }, { "epoch": 0.59, "learning_rate": 7.720695756803569e-06, "loss": 1.3506, "step": 1465 }, { "epoch": 0.59, "learning_rate": 7.708083661347637e-06, "loss": 1.3652, "step": 1466 }, { "epoch": 0.59, "learning_rate": 7.695475412473393e-06, "loss": 1.376, "step": 1467 }, { "epoch": 0.59, "learning_rate": 7.682871031341579e-06, "loss": 1.3281, "step": 1468 }, { "epoch": 0.59, "learning_rate": 7.670270539106452e-06, "loss": 1.3447, "step": 1469 }, { "epoch": 0.59, "learning_rate": 7.657673956915735e-06, "loss": 1.3232, "step": 1470 }, { "epoch": 0.59, "learning_rate": 7.645081305910596e-06, "loss": 1.377, "step": 1471 }, { "epoch": 0.59, "learning_rate": 7.632492607225604e-06, "loss": 1.3037, "step": 1472 }, { "epoch": 0.59, "learning_rate": 7.619907881988692e-06, "loss": 1.3594, "step": 1473 }, { "epoch": 0.59, "learning_rate": 7.607327151321127e-06, "loss": 1.3643, "step": 1474 }, { "epoch": 0.59, "learning_rate": 7.594750436337467e-06, "loss": 1.291, "step": 1475 }, { "epoch": 0.59, "learning_rate": 7.582177758145532e-06, "loss": 1.3369, "step": 1476 }, { "epoch": 0.59, "learning_rate": 7.569609137846376e-06, "loss": 1.332, "step": 1477 }, { "epoch": 0.59, "learning_rate": 7.557044596534234e-06, "loss": 1.292, "step": 1478 }, { "epoch": 0.59, "learning_rate": 7.544484155296492e-06, "loss": 1.3789, "step": 1479 }, { "epoch": 0.59, "learning_rate": 7.531927835213657e-06, "loss": 1.3281, "step": 1480 }, { "epoch": 0.59, "learning_rate": 7.519375657359331e-06, "loss": 1.3789, "step": 1481 }, { "epoch": 0.59, "learning_rate": 7.506827642800146e-06, "loss": 1.3779, "step": 1482 }, { "epoch": 0.59, "learning_rate": 7.49428381259576e-06, "loss": 1.3467, "step": 1483 }, { "epoch": 0.59, "learning_rate": 7.4817441877988005e-06, "loss": 1.2686, "step": 1484 }, { "epoch": 0.59, "learning_rate": 7.469208789454838e-06, "loss": 1.3877, "step": 1485 }, { "epoch": 0.59, "learning_rate": 7.456677638602355e-06, "loss": 1.3389, "step": 1486 }, { "epoch": 0.59, "learning_rate": 7.444150756272704e-06, "loss": 1.2559, "step": 1487 }, { "epoch": 0.6, "learning_rate": 7.431628163490067e-06, "loss": 1.2646, "step": 1488 }, { "epoch": 0.6, "learning_rate": 7.419109881271434e-06, "loss": 1.4336, "step": 1489 }, { "epoch": 0.6, "learning_rate": 7.40659593062655e-06, "loss": 1.3369, "step": 1490 }, { "epoch": 0.6, "learning_rate": 7.394086332557907e-06, "loss": 1.333, "step": 1491 }, { "epoch": 0.6, "learning_rate": 7.38158110806068e-06, "loss": 1.3486, "step": 1492 }, { "epoch": 0.6, "learning_rate": 7.3690802781227056e-06, "loss": 1.3584, "step": 1493 }, { "epoch": 0.6, "learning_rate": 7.356583863724442e-06, "loss": 1.3701, "step": 1494 }, { "epoch": 0.6, "learning_rate": 7.344091885838949e-06, "loss": 1.3887, "step": 1495 }, { "epoch": 0.6, "learning_rate": 7.331604365431826e-06, "loss": 1.3857, "step": 1496 }, { "epoch": 0.6, "learning_rate": 7.319121323461198e-06, "loss": 1.332, "step": 1497 }, { "epoch": 0.6, "learning_rate": 7.3066427808776754e-06, "loss": 1.3125, "step": 1498 }, { "epoch": 0.6, "learning_rate": 7.294168758624307e-06, "loss": 1.3604, "step": 1499 }, { "epoch": 0.6, "learning_rate": 7.2816992776365714e-06, "loss": 1.3682, "step": 1500 }, { "epoch": 0.6, "learning_rate": 7.269234358842314e-06, "loss": 1.3613, "step": 1501 }, { "epoch": 0.6, "learning_rate": 7.256774023161728e-06, "loss": 1.3828, "step": 1502 }, { "epoch": 0.6, "learning_rate": 7.244318291507308e-06, "loss": 1.4023, "step": 1503 }, { "epoch": 0.6, "learning_rate": 7.231867184783826e-06, "loss": 1.3545, "step": 1504 }, { "epoch": 0.6, "learning_rate": 7.219420723888301e-06, "loss": 1.3389, "step": 1505 }, { "epoch": 0.6, "learning_rate": 7.2069789297099355e-06, "loss": 1.3301, "step": 1506 }, { "epoch": 0.6, "learning_rate": 7.194541823130119e-06, "loss": 1.3604, "step": 1507 }, { "epoch": 0.6, "learning_rate": 7.182109425022357e-06, "loss": 1.3652, "step": 1508 }, { "epoch": 0.6, "learning_rate": 7.169681756252265e-06, "loss": 1.4395, "step": 1509 }, { "epoch": 0.6, "learning_rate": 7.157258837677514e-06, "loss": 1.3271, "step": 1510 }, { "epoch": 0.6, "learning_rate": 7.144840690147812e-06, "loss": 1.3262, "step": 1511 }, { "epoch": 0.6, "learning_rate": 7.132427334504846e-06, "loss": 1.4014, "step": 1512 }, { "epoch": 0.61, "learning_rate": 7.120018791582266e-06, "loss": 1.3027, "step": 1513 }, { "epoch": 0.61, "learning_rate": 7.107615082205654e-06, "loss": 1.3418, "step": 1514 }, { "epoch": 0.61, "learning_rate": 7.095216227192467e-06, "loss": 1.3115, "step": 1515 }, { "epoch": 0.61, "learning_rate": 7.082822247352024e-06, "loss": 1.3584, "step": 1516 }, { "epoch": 0.61, "learning_rate": 7.070433163485451e-06, "loss": 1.4111, "step": 1517 }, { "epoch": 0.61, "learning_rate": 7.0580489963856646e-06, "loss": 1.2637, "step": 1518 }, { "epoch": 0.61, "learning_rate": 7.045669766837333e-06, "loss": 1.374, "step": 1519 }, { "epoch": 0.61, "learning_rate": 7.033295495616834e-06, "loss": 1.4062, "step": 1520 }, { "epoch": 0.61, "learning_rate": 7.020926203492218e-06, "loss": 1.3379, "step": 1521 }, { "epoch": 0.61, "learning_rate": 7.008561911223186e-06, "loss": 1.4131, "step": 1522 }, { "epoch": 0.61, "learning_rate": 6.9962026395610416e-06, "loss": 1.3828, "step": 1523 }, { "epoch": 0.61, "learning_rate": 6.983848409248672e-06, "loss": 1.3262, "step": 1524 }, { "epoch": 0.61, "learning_rate": 6.971499241020495e-06, "loss": 1.2734, "step": 1525 }, { "epoch": 0.61, "learning_rate": 6.959155155602433e-06, "loss": 1.3662, "step": 1526 }, { "epoch": 0.61, "learning_rate": 6.946816173711878e-06, "loss": 1.4238, "step": 1527 }, { "epoch": 0.61, "learning_rate": 6.934482316057663e-06, "loss": 1.3398, "step": 1528 }, { "epoch": 0.61, "learning_rate": 6.922153603340016e-06, "loss": 1.3389, "step": 1529 }, { "epoch": 0.61, "learning_rate": 6.909830056250527e-06, "loss": 1.2969, "step": 1530 }, { "epoch": 0.61, "learning_rate": 6.897511695472124e-06, "loss": 1.3545, "step": 1531 }, { "epoch": 0.61, "learning_rate": 6.885198541679016e-06, "loss": 1.3281, "step": 1532 }, { "epoch": 0.61, "learning_rate": 6.872890615536694e-06, "loss": 1.3467, "step": 1533 }, { "epoch": 0.61, "learning_rate": 6.860587937701862e-06, "loss": 1.3604, "step": 1534 }, { "epoch": 0.61, "learning_rate": 6.848290528822417e-06, "loss": 1.374, "step": 1535 }, { "epoch": 0.61, "learning_rate": 6.835998409537412e-06, "loss": 1.4102, "step": 1536 }, { "epoch": 0.61, "learning_rate": 6.823711600477025e-06, "loss": 1.3613, "step": 1537 }, { "epoch": 0.62, "learning_rate": 6.811430122262529e-06, "loss": 1.3223, "step": 1538 }, { "epoch": 0.62, "learning_rate": 6.799153995506234e-06, "loss": 1.3486, "step": 1539 }, { "epoch": 0.62, "learning_rate": 6.786883240811479e-06, "loss": 1.3438, "step": 1540 }, { "epoch": 0.62, "learning_rate": 6.774617878772582e-06, "loss": 1.3223, "step": 1541 }, { "epoch": 0.62, "learning_rate": 6.76235792997482e-06, "loss": 1.3682, "step": 1542 }, { "epoch": 0.62, "learning_rate": 6.750103414994374e-06, "loss": 1.3184, "step": 1543 }, { "epoch": 0.62, "learning_rate": 6.737854354398308e-06, "loss": 1.2861, "step": 1544 }, { "epoch": 0.62, "learning_rate": 6.725610768744535e-06, "loss": 1.3887, "step": 1545 }, { "epoch": 0.62, "learning_rate": 6.713372678581773e-06, "loss": 1.459, "step": 1546 }, { "epoch": 0.62, "learning_rate": 6.7011401044495304e-06, "loss": 1.3555, "step": 1547 }, { "epoch": 0.62, "learning_rate": 6.68891306687804e-06, "loss": 1.3477, "step": 1548 }, { "epoch": 0.62, "learning_rate": 6.676691586388255e-06, "loss": 1.3838, "step": 1549 }, { "epoch": 0.62, "learning_rate": 6.664475683491797e-06, "loss": 1.3584, "step": 1550 }, { "epoch": 0.62, "learning_rate": 6.652265378690923e-06, "loss": 1.3096, "step": 1551 }, { "epoch": 0.62, "learning_rate": 6.6400606924785095e-06, "loss": 1.3701, "step": 1552 }, { "epoch": 0.62, "learning_rate": 6.627861645337984e-06, "loss": 1.3818, "step": 1553 }, { "epoch": 0.62, "learning_rate": 6.615668257743322e-06, "loss": 1.3779, "step": 1554 }, { "epoch": 0.62, "learning_rate": 6.603480550158995e-06, "loss": 1.2734, "step": 1555 }, { "epoch": 0.62, "learning_rate": 6.591298543039949e-06, "loss": 1.3818, "step": 1556 }, { "epoch": 0.62, "learning_rate": 6.579122256831551e-06, "loss": 1.3799, "step": 1557 }, { "epoch": 0.62, "learning_rate": 6.566951711969581e-06, "loss": 1.3428, "step": 1558 }, { "epoch": 0.62, "learning_rate": 6.554786928880165e-06, "loss": 1.2852, "step": 1559 }, { "epoch": 0.62, "learning_rate": 6.542627927979772e-06, "loss": 1.2734, "step": 1560 }, { "epoch": 0.62, "learning_rate": 6.530474729675167e-06, "loss": 1.3193, "step": 1561 }, { "epoch": 0.62, "learning_rate": 6.518327354363374e-06, "loss": 1.3682, "step": 1562 }, { "epoch": 0.63, "learning_rate": 6.506185822431639e-06, "loss": 1.3516, "step": 1563 }, { "epoch": 0.63, "learning_rate": 6.494050154257408e-06, "loss": 1.4121, "step": 1564 }, { "epoch": 0.63, "learning_rate": 6.481920370208274e-06, "loss": 1.3369, "step": 1565 }, { "epoch": 0.63, "learning_rate": 6.469796490641974e-06, "loss": 1.2471, "step": 1566 }, { "epoch": 0.63, "learning_rate": 6.4576785359063225e-06, "loss": 1.3643, "step": 1567 }, { "epoch": 0.63, "learning_rate": 6.445566526339187e-06, "loss": 1.3838, "step": 1568 }, { "epoch": 0.63, "learning_rate": 6.4334604822684645e-06, "loss": 1.2812, "step": 1569 }, { "epoch": 0.63, "learning_rate": 6.421360424012039e-06, "loss": 1.3223, "step": 1570 }, { "epoch": 0.63, "learning_rate": 6.409266371877751e-06, "loss": 1.3809, "step": 1571 }, { "epoch": 0.63, "learning_rate": 6.397178346163348e-06, "loss": 1.3184, "step": 1572 }, { "epoch": 0.63, "learning_rate": 6.38509636715648e-06, "loss": 1.3389, "step": 1573 }, { "epoch": 0.63, "learning_rate": 6.373020455134633e-06, "loss": 1.3594, "step": 1574 }, { "epoch": 0.63, "learning_rate": 6.360950630365126e-06, "loss": 1.4238, "step": 1575 }, { "epoch": 0.63, "learning_rate": 6.3488869131050505e-06, "loss": 1.3271, "step": 1576 }, { "epoch": 0.63, "learning_rate": 6.33682932360125e-06, "loss": 1.3467, "step": 1577 }, { "epoch": 0.63, "learning_rate": 6.324777882090287e-06, "loss": 1.3457, "step": 1578 }, { "epoch": 0.63, "learning_rate": 6.3127326087983974e-06, "loss": 1.3174, "step": 1579 }, { "epoch": 0.63, "learning_rate": 6.300693523941481e-06, "loss": 1.3398, "step": 1580 }, { "epoch": 0.63, "learning_rate": 6.2886606477250345e-06, "loss": 1.3564, "step": 1581 }, { "epoch": 0.63, "learning_rate": 6.276634000344144e-06, "loss": 1.3809, "step": 1582 }, { "epoch": 0.63, "learning_rate": 6.264613601983435e-06, "loss": 1.4248, "step": 1583 }, { "epoch": 0.63, "learning_rate": 6.2525994728170495e-06, "loss": 1.3574, "step": 1584 }, { "epoch": 0.63, "learning_rate": 6.2405916330086106e-06, "loss": 1.3877, "step": 1585 }, { "epoch": 0.63, "learning_rate": 6.2285901027111806e-06, "loss": 1.3037, "step": 1586 }, { "epoch": 0.63, "learning_rate": 6.216594902067233e-06, "loss": 1.3701, "step": 1587 }, { "epoch": 0.64, "learning_rate": 6.204606051208617e-06, "loss": 1.3555, "step": 1588 }, { "epoch": 0.64, "learning_rate": 6.192623570256535e-06, "loss": 1.3682, "step": 1589 }, { "epoch": 0.64, "learning_rate": 6.180647479321484e-06, "loss": 1.3809, "step": 1590 }, { "epoch": 0.64, "learning_rate": 6.168677798503246e-06, "loss": 1.3232, "step": 1591 }, { "epoch": 0.64, "learning_rate": 6.156714547890838e-06, "loss": 1.3691, "step": 1592 }, { "epoch": 0.64, "learning_rate": 6.14475774756249e-06, "loss": 1.3135, "step": 1593 }, { "epoch": 0.64, "learning_rate": 6.13280741758561e-06, "loss": 1.332, "step": 1594 }, { "epoch": 0.64, "learning_rate": 6.120863578016736e-06, "loss": 1.3838, "step": 1595 }, { "epoch": 0.64, "learning_rate": 6.108926248901521e-06, "loss": 1.373, "step": 1596 }, { "epoch": 0.64, "learning_rate": 6.0969954502746916e-06, "loss": 1.3311, "step": 1597 }, { "epoch": 0.64, "learning_rate": 6.0850712021600044e-06, "loss": 1.3867, "step": 1598 }, { "epoch": 0.64, "learning_rate": 6.073153524570236e-06, "loss": 1.4053, "step": 1599 }, { "epoch": 0.64, "learning_rate": 6.061242437507131e-06, "loss": 1.3008, "step": 1600 }, { "epoch": 0.64, "learning_rate": 6.049337960961362e-06, "loss": 1.3311, "step": 1601 }, { "epoch": 0.64, "learning_rate": 6.037440114912521e-06, "loss": 1.4219, "step": 1602 }, { "epoch": 0.64, "learning_rate": 6.0255489193290675e-06, "loss": 1.3623, "step": 1603 }, { "epoch": 0.64, "learning_rate": 6.013664394168297e-06, "loss": 1.3477, "step": 1604 }, { "epoch": 0.64, "learning_rate": 6.00178655937631e-06, "loss": 1.3154, "step": 1605 }, { "epoch": 0.64, "learning_rate": 5.989915434887985e-06, "loss": 1.4199, "step": 1606 }, { "epoch": 0.64, "learning_rate": 5.9780510406269245e-06, "loss": 1.3691, "step": 1607 }, { "epoch": 0.64, "learning_rate": 5.966193396505452e-06, "loss": 1.292, "step": 1608 }, { "epoch": 0.64, "learning_rate": 5.954342522424553e-06, "loss": 1.3164, "step": 1609 }, { "epoch": 0.64, "learning_rate": 5.942498438273849e-06, "loss": 1.373, "step": 1610 }, { "epoch": 0.64, "learning_rate": 5.930661163931572e-06, "loss": 1.3066, "step": 1611 }, { "epoch": 0.64, "learning_rate": 5.918830719264514e-06, "loss": 1.3203, "step": 1612 }, { "epoch": 0.65, "learning_rate": 5.9070071241280235e-06, "loss": 1.3721, "step": 1613 }, { "epoch": 0.65, "learning_rate": 5.895190398365935e-06, "loss": 1.3916, "step": 1614 }, { "epoch": 0.65, "learning_rate": 5.8833805618105635e-06, "loss": 1.3076, "step": 1615 }, { "epoch": 0.65, "learning_rate": 5.871577634282655e-06, "loss": 1.3301, "step": 1616 }, { "epoch": 0.65, "learning_rate": 5.8597816355913685e-06, "loss": 1.3574, "step": 1617 }, { "epoch": 0.65, "learning_rate": 5.84799258553423e-06, "loss": 1.4072, "step": 1618 }, { "epoch": 0.65, "learning_rate": 5.836210503897099e-06, "loss": 1.375, "step": 1619 }, { "epoch": 0.65, "learning_rate": 5.82443541045415e-06, "loss": 1.3027, "step": 1620 }, { "epoch": 0.65, "learning_rate": 5.812667324967813e-06, "loss": 1.3643, "step": 1621 }, { "epoch": 0.65, "learning_rate": 5.800906267188773e-06, "loss": 1.334, "step": 1622 }, { "epoch": 0.65, "learning_rate": 5.789152256855917e-06, "loss": 1.3516, "step": 1623 }, { "epoch": 0.65, "learning_rate": 5.777405313696294e-06, "loss": 1.3525, "step": 1624 }, { "epoch": 0.65, "learning_rate": 5.765665457425102e-06, "loss": 1.3477, "step": 1625 }, { "epoch": 0.65, "learning_rate": 5.753932707745635e-06, "loss": 1.3477, "step": 1626 }, { "epoch": 0.65, "learning_rate": 5.742207084349274e-06, "loss": 1.3594, "step": 1627 }, { "epoch": 0.65, "learning_rate": 5.73048860691543e-06, "loss": 1.3076, "step": 1628 }, { "epoch": 0.65, "learning_rate": 5.718777295111524e-06, "loss": 1.3047, "step": 1629 }, { "epoch": 0.65, "learning_rate": 5.707073168592943e-06, "loss": 1.29, "step": 1630 }, { "epoch": 0.65, "learning_rate": 5.695376247003025e-06, "loss": 1.3828, "step": 1631 }, { "epoch": 0.65, "learning_rate": 5.683686549973018e-06, "loss": 1.3848, "step": 1632 }, { "epoch": 0.65, "learning_rate": 5.672004097122033e-06, "loss": 1.3936, "step": 1633 }, { "epoch": 0.65, "learning_rate": 5.6603289080570274e-06, "loss": 1.3281, "step": 1634 }, { "epoch": 0.65, "learning_rate": 5.648661002372769e-06, "loss": 1.3145, "step": 1635 }, { "epoch": 0.65, "learning_rate": 5.637000399651804e-06, "loss": 1.3779, "step": 1636 }, { "epoch": 0.65, "learning_rate": 5.625347119464422e-06, "loss": 1.3887, "step": 1637 }, { "epoch": 0.66, "learning_rate": 5.613701181368618e-06, "loss": 1.3496, "step": 1638 }, { "epoch": 0.66, "learning_rate": 5.602062604910064e-06, "loss": 1.376, "step": 1639 }, { "epoch": 0.66, "learning_rate": 5.590431409622081e-06, "loss": 1.3721, "step": 1640 }, { "epoch": 0.66, "learning_rate": 5.5788076150256075e-06, "loss": 1.332, "step": 1641 }, { "epoch": 0.66, "learning_rate": 5.567191240629151e-06, "loss": 1.3545, "step": 1642 }, { "epoch": 0.66, "learning_rate": 5.555582305928766e-06, "loss": 1.3613, "step": 1643 }, { "epoch": 0.66, "learning_rate": 5.5439808304080225e-06, "loss": 1.3613, "step": 1644 }, { "epoch": 0.66, "learning_rate": 5.5323868335379775e-06, "loss": 1.3799, "step": 1645 }, { "epoch": 0.66, "learning_rate": 5.520800334777132e-06, "loss": 1.3535, "step": 1646 }, { "epoch": 0.66, "learning_rate": 5.509221353571404e-06, "loss": 1.3818, "step": 1647 }, { "epoch": 0.66, "learning_rate": 5.497649909354084e-06, "loss": 1.375, "step": 1648 }, { "epoch": 0.66, "learning_rate": 5.486086021545829e-06, "loss": 1.3662, "step": 1649 }, { "epoch": 0.66, "learning_rate": 5.4745297095546125e-06, "loss": 1.3936, "step": 1650 }, { "epoch": 0.66, "learning_rate": 5.4629809927756794e-06, "loss": 1.3545, "step": 1651 }, { "epoch": 0.66, "learning_rate": 5.451439890591539e-06, "loss": 1.3721, "step": 1652 }, { "epoch": 0.66, "learning_rate": 5.439906422371914e-06, "loss": 1.3818, "step": 1653 }, { "epoch": 0.66, "learning_rate": 5.42838060747372e-06, "loss": 1.3125, "step": 1654 }, { "epoch": 0.66, "learning_rate": 5.416862465241033e-06, "loss": 1.248, "step": 1655 }, { "epoch": 0.66, "learning_rate": 5.405352015005039e-06, "loss": 1.3838, "step": 1656 }, { "epoch": 0.66, "learning_rate": 5.3938492760840176e-06, "loss": 1.375, "step": 1657 }, { "epoch": 0.66, "learning_rate": 5.382354267783316e-06, "loss": 1.3652, "step": 1658 }, { "epoch": 0.66, "learning_rate": 5.370867009395294e-06, "loss": 1.3652, "step": 1659 }, { "epoch": 0.66, "learning_rate": 5.359387520199317e-06, "loss": 1.3281, "step": 1660 }, { "epoch": 0.66, "learning_rate": 5.3479158194617e-06, "loss": 1.3604, "step": 1661 }, { "epoch": 0.66, "learning_rate": 5.336451926435688e-06, "loss": 1.3516, "step": 1662 }, { "epoch": 0.67, "learning_rate": 5.32499586036143e-06, "loss": 1.4355, "step": 1663 }, { "epoch": 0.67, "learning_rate": 5.313547640465937e-06, "loss": 1.3223, "step": 1664 }, { "epoch": 0.67, "learning_rate": 5.302107285963045e-06, "loss": 1.3633, "step": 1665 }, { "epoch": 0.67, "learning_rate": 5.2906748160533895e-06, "loss": 1.3477, "step": 1666 }, { "epoch": 0.67, "learning_rate": 5.279250249924384e-06, "loss": 1.3262, "step": 1667 }, { "epoch": 0.67, "learning_rate": 5.26783360675016e-06, "loss": 1.3701, "step": 1668 }, { "epoch": 0.67, "learning_rate": 5.2564249056915704e-06, "loss": 1.3301, "step": 1669 }, { "epoch": 0.67, "learning_rate": 5.245024165896126e-06, "loss": 1.4053, "step": 1670 }, { "epoch": 0.67, "learning_rate": 5.2336314064979766e-06, "loss": 1.417, "step": 1671 }, { "epoch": 0.67, "learning_rate": 5.222246646617886e-06, "loss": 1.3584, "step": 1672 }, { "epoch": 0.67, "learning_rate": 5.210869905363178e-06, "loss": 1.3447, "step": 1673 }, { "epoch": 0.67, "learning_rate": 5.199501201827741e-06, "loss": 1.3467, "step": 1674 }, { "epoch": 0.67, "learning_rate": 5.18814055509195e-06, "loss": 1.3506, "step": 1675 }, { "epoch": 0.67, "learning_rate": 5.1767879842226745e-06, "loss": 1.374, "step": 1676 }, { "epoch": 0.67, "learning_rate": 5.165443508273218e-06, "loss": 1.3486, "step": 1677 }, { "epoch": 0.67, "learning_rate": 5.154107146283311e-06, "loss": 1.3291, "step": 1678 }, { "epoch": 0.67, "learning_rate": 5.1427789172790565e-06, "loss": 1.335, "step": 1679 }, { "epoch": 0.67, "learning_rate": 5.131458840272905e-06, "loss": 1.4062, "step": 1680 }, { "epoch": 0.67, "learning_rate": 5.120146934263638e-06, "loss": 1.3203, "step": 1681 }, { "epoch": 0.67, "learning_rate": 5.10884321823631e-06, "loss": 1.334, "step": 1682 }, { "epoch": 0.67, "learning_rate": 5.097547711162243e-06, "loss": 1.3281, "step": 1683 }, { "epoch": 0.67, "learning_rate": 5.086260431998967e-06, "loss": 1.4199, "step": 1684 }, { "epoch": 0.67, "learning_rate": 5.074981399690219e-06, "loss": 1.3154, "step": 1685 }, { "epoch": 0.67, "learning_rate": 5.0637106331658815e-06, "loss": 1.4375, "step": 1686 }, { "epoch": 0.67, "learning_rate": 5.0524481513419675e-06, "loss": 1.373, "step": 1687 }, { "epoch": 0.68, "learning_rate": 5.041193973120595e-06, "loss": 1.3701, "step": 1688 }, { "epoch": 0.68, "learning_rate": 5.02994811738993e-06, "loss": 1.3027, "step": 1689 }, { "epoch": 0.68, "learning_rate": 5.018710603024187e-06, "loss": 1.3691, "step": 1690 }, { "epoch": 0.68, "learning_rate": 5.007481448883567e-06, "loss": 1.3652, "step": 1691 }, { "epoch": 0.68, "learning_rate": 4.99626067381425e-06, "loss": 1.374, "step": 1692 }, { "epoch": 0.68, "learning_rate": 4.985048296648346e-06, "loss": 1.3867, "step": 1693 }, { "epoch": 0.68, "learning_rate": 4.973844336203879e-06, "loss": 1.3271, "step": 1694 }, { "epoch": 0.68, "learning_rate": 4.9626488112847384e-06, "loss": 1.3242, "step": 1695 }, { "epoch": 0.68, "learning_rate": 4.951461740680655e-06, "loss": 1.3516, "step": 1696 }, { "epoch": 0.68, "learning_rate": 4.9402831431671834e-06, "loss": 1.3682, "step": 1697 }, { "epoch": 0.68, "learning_rate": 4.929113037505642e-06, "loss": 1.3076, "step": 1698 }, { "epoch": 0.68, "learning_rate": 4.91795144244311e-06, "loss": 1.3867, "step": 1699 }, { "epoch": 0.68, "learning_rate": 4.9067983767123736e-06, "loss": 1.3076, "step": 1700 }, { "epoch": 0.68, "learning_rate": 4.895653859031906e-06, "loss": 1.4102, "step": 1701 }, { "epoch": 0.68, "learning_rate": 4.884517908105837e-06, "loss": 1.3457, "step": 1702 }, { "epoch": 0.68, "learning_rate": 4.873390542623922e-06, "loss": 1.4248, "step": 1703 }, { "epoch": 0.68, "learning_rate": 4.8622717812615e-06, "loss": 1.3408, "step": 1704 }, { "epoch": 0.68, "learning_rate": 4.851161642679466e-06, "loss": 1.3057, "step": 1705 }, { "epoch": 0.68, "learning_rate": 4.840060145524254e-06, "loss": 1.3828, "step": 1706 }, { "epoch": 0.68, "learning_rate": 4.828967308427795e-06, "loss": 1.3213, "step": 1707 }, { "epoch": 0.68, "learning_rate": 4.817883150007474e-06, "loss": 1.3174, "step": 1708 }, { "epoch": 0.68, "learning_rate": 4.806807688866119e-06, "loss": 1.3525, "step": 1709 }, { "epoch": 0.68, "learning_rate": 4.795740943591955e-06, "loss": 1.3936, "step": 1710 }, { "epoch": 0.68, "learning_rate": 4.784682932758588e-06, "loss": 1.3662, "step": 1711 }, { "epoch": 0.68, "learning_rate": 4.77363367492496e-06, "loss": 1.251, "step": 1712 }, { "epoch": 0.69, "learning_rate": 4.7625931886353215e-06, "loss": 1.3809, "step": 1713 }, { "epoch": 0.69, "learning_rate": 4.7515614924192026e-06, "loss": 1.3516, "step": 1714 }, { "epoch": 0.69, "learning_rate": 4.740538604791371e-06, "loss": 1.3604, "step": 1715 }, { "epoch": 0.69, "learning_rate": 4.729524544251837e-06, "loss": 1.3389, "step": 1716 }, { "epoch": 0.69, "learning_rate": 4.718519329285771e-06, "loss": 1.2842, "step": 1717 }, { "epoch": 0.69, "learning_rate": 4.707522978363508e-06, "loss": 1.3438, "step": 1718 }, { "epoch": 0.69, "learning_rate": 4.696535509940499e-06, "loss": 1.3418, "step": 1719 }, { "epoch": 0.69, "learning_rate": 4.685556942457296e-06, "loss": 1.3516, "step": 1720 }, { "epoch": 0.69, "learning_rate": 4.674587294339513e-06, "loss": 1.2803, "step": 1721 }, { "epoch": 0.69, "learning_rate": 4.663626583997789e-06, "loss": 1.2666, "step": 1722 }, { "epoch": 0.69, "learning_rate": 4.652674829827762e-06, "loss": 1.3242, "step": 1723 }, { "epoch": 0.69, "learning_rate": 4.641732050210032e-06, "loss": 1.291, "step": 1724 }, { "epoch": 0.69, "learning_rate": 4.630798263510162e-06, "loss": 1.3867, "step": 1725 }, { "epoch": 0.69, "learning_rate": 4.619873488078597e-06, "loss": 1.2871, "step": 1726 }, { "epoch": 0.69, "learning_rate": 4.608957742250667e-06, "loss": 1.3125, "step": 1727 }, { "epoch": 0.69, "learning_rate": 4.598051044346542e-06, "loss": 1.3555, "step": 1728 }, { "epoch": 0.69, "learning_rate": 4.587153412671217e-06, "loss": 1.2905, "step": 1729 }, { "epoch": 0.69, "learning_rate": 4.576264865514467e-06, "loss": 1.3652, "step": 1730 }, { "epoch": 0.69, "learning_rate": 4.565385421150817e-06, "loss": 1.3564, "step": 1731 }, { "epoch": 0.69, "learning_rate": 4.554515097839511e-06, "loss": 1.3926, "step": 1732 }, { "epoch": 0.69, "learning_rate": 4.543653913824496e-06, "loss": 1.3965, "step": 1733 }, { "epoch": 0.69, "learning_rate": 4.53280188733437e-06, "loss": 1.3945, "step": 1734 }, { "epoch": 0.69, "learning_rate": 4.521959036582372e-06, "loss": 1.3115, "step": 1735 }, { "epoch": 0.69, "learning_rate": 4.511125379766332e-06, "loss": 1.4268, "step": 1736 }, { "epoch": 0.69, "learning_rate": 4.500300935068647e-06, "loss": 1.3281, "step": 1737 }, { "epoch": 0.7, "learning_rate": 4.489485720656266e-06, "loss": 1.4121, "step": 1738 }, { "epoch": 0.7, "learning_rate": 4.478679754680639e-06, "loss": 1.3564, "step": 1739 }, { "epoch": 0.7, "learning_rate": 4.467883055277696e-06, "loss": 1.249, "step": 1740 }, { "epoch": 0.7, "learning_rate": 4.457095640567804e-06, "loss": 1.2969, "step": 1741 }, { "epoch": 0.7, "learning_rate": 4.4463175286557654e-06, "loss": 1.2627, "step": 1742 }, { "epoch": 0.7, "learning_rate": 4.435548737630756e-06, "loss": 1.3682, "step": 1743 }, { "epoch": 0.7, "learning_rate": 4.4247892855663164e-06, "loss": 1.3496, "step": 1744 }, { "epoch": 0.7, "learning_rate": 4.414039190520308e-06, "loss": 1.3604, "step": 1745 }, { "epoch": 0.7, "learning_rate": 4.403298470534885e-06, "loss": 1.3066, "step": 1746 }, { "epoch": 0.7, "learning_rate": 4.39256714363648e-06, "loss": 1.3438, "step": 1747 }, { "epoch": 0.7, "learning_rate": 4.3818452278357445e-06, "loss": 1.3174, "step": 1748 }, { "epoch": 0.7, "learning_rate": 4.371132741127553e-06, "loss": 1.3232, "step": 1749 }, { "epoch": 0.7, "learning_rate": 4.360429701490935e-06, "loss": 1.3711, "step": 1750 }, { "epoch": 0.7, "learning_rate": 4.349736126889084e-06, "loss": 1.3408, "step": 1751 }, { "epoch": 0.7, "learning_rate": 4.339052035269291e-06, "loss": 1.2803, "step": 1752 }, { "epoch": 0.7, "learning_rate": 4.328377444562948e-06, "loss": 1.3555, "step": 1753 }, { "epoch": 0.7, "learning_rate": 4.31771237268549e-06, "loss": 1.3086, "step": 1754 }, { "epoch": 0.7, "learning_rate": 4.307056837536373e-06, "loss": 1.4492, "step": 1755 }, { "epoch": 0.7, "learning_rate": 4.296410856999062e-06, "loss": 1.3389, "step": 1756 }, { "epoch": 0.7, "learning_rate": 4.2857744489409725e-06, "loss": 1.374, "step": 1757 }, { "epoch": 0.7, "learning_rate": 4.2751476312134655e-06, "loss": 1.3887, "step": 1758 }, { "epoch": 0.7, "learning_rate": 4.264530421651792e-06, "loss": 1.3115, "step": 1759 }, { "epoch": 0.7, "learning_rate": 4.2539228380750955e-06, "loss": 1.3008, "step": 1760 }, { "epoch": 0.7, "learning_rate": 4.243324898286349e-06, "loss": 1.3184, "step": 1761 }, { "epoch": 0.7, "learning_rate": 4.2327366200723404e-06, "loss": 1.3896, "step": 1762 }, { "epoch": 0.71, "learning_rate": 4.222158021203657e-06, "loss": 1.291, "step": 1763 }, { "epoch": 0.71, "learning_rate": 4.211589119434622e-06, "loss": 1.3867, "step": 1764 }, { "epoch": 0.71, "learning_rate": 4.201029932503303e-06, "loss": 1.3525, "step": 1765 }, { "epoch": 0.71, "learning_rate": 4.190480478131443e-06, "loss": 1.3252, "step": 1766 }, { "epoch": 0.71, "learning_rate": 4.179940774024469e-06, "loss": 1.376, "step": 1767 }, { "epoch": 0.71, "learning_rate": 4.169410837871427e-06, "loss": 1.3271, "step": 1768 }, { "epoch": 0.71, "learning_rate": 4.158890687344986e-06, "loss": 1.2217, "step": 1769 }, { "epoch": 0.71, "learning_rate": 4.14838034010138e-06, "loss": 1.3193, "step": 1770 }, { "epoch": 0.71, "learning_rate": 4.137879813780388e-06, "loss": 1.3848, "step": 1771 }, { "epoch": 0.71, "learning_rate": 4.127389126005319e-06, "loss": 1.3682, "step": 1772 }, { "epoch": 0.71, "learning_rate": 4.116908294382956e-06, "loss": 1.3604, "step": 1773 }, { "epoch": 0.71, "learning_rate": 4.10643733650355e-06, "loss": 1.3203, "step": 1774 }, { "epoch": 0.71, "learning_rate": 4.095976269940777e-06, "loss": 1.377, "step": 1775 }, { "epoch": 0.71, "learning_rate": 4.085525112251706e-06, "loss": 1.374, "step": 1776 }, { "epoch": 0.71, "learning_rate": 4.0750838809767875e-06, "loss": 1.3369, "step": 1777 }, { "epoch": 0.71, "learning_rate": 4.0646525936398086e-06, "loss": 1.3535, "step": 1778 }, { "epoch": 0.71, "learning_rate": 4.054231267747862e-06, "loss": 1.3203, "step": 1779 }, { "epoch": 0.71, "learning_rate": 4.043819920791322e-06, "loss": 1.3218, "step": 1780 }, { "epoch": 0.71, "learning_rate": 4.033418570243819e-06, "loss": 1.3027, "step": 1781 }, { "epoch": 0.71, "learning_rate": 4.0230272335622065e-06, "loss": 1.3828, "step": 1782 }, { "epoch": 0.71, "learning_rate": 4.012645928186533e-06, "loss": 1.3672, "step": 1783 }, { "epoch": 0.71, "learning_rate": 4.002274671540006e-06, "loss": 1.376, "step": 1784 }, { "epoch": 0.71, "learning_rate": 3.991913481028965e-06, "loss": 1.3906, "step": 1785 }, { "epoch": 0.71, "learning_rate": 3.981562374042867e-06, "loss": 1.3271, "step": 1786 }, { "epoch": 0.71, "learning_rate": 3.971221367954239e-06, "loss": 1.3438, "step": 1787 }, { "epoch": 0.72, "learning_rate": 3.960890480118653e-06, "loss": 1.3096, "step": 1788 }, { "epoch": 0.72, "learning_rate": 3.950569727874704e-06, "loss": 1.2998, "step": 1789 }, { "epoch": 0.72, "learning_rate": 3.940259128543967e-06, "loss": 1.3301, "step": 1790 }, { "epoch": 0.72, "learning_rate": 3.9299586994309905e-06, "loss": 1.3516, "step": 1791 }, { "epoch": 0.72, "learning_rate": 3.919668457823248e-06, "loss": 1.4082, "step": 1792 }, { "epoch": 0.72, "learning_rate": 3.909388420991113e-06, "loss": 1.3037, "step": 1793 }, { "epoch": 0.72, "learning_rate": 3.899118606187832e-06, "loss": 1.3584, "step": 1794 }, { "epoch": 0.72, "learning_rate": 3.888859030649498e-06, "loss": 1.3721, "step": 1795 }, { "epoch": 0.72, "learning_rate": 3.878609711595022e-06, "loss": 1.3252, "step": 1796 }, { "epoch": 0.72, "learning_rate": 3.8683706662260945e-06, "loss": 1.4033, "step": 1797 }, { "epoch": 0.72, "learning_rate": 3.858141911727168e-06, "loss": 1.3457, "step": 1798 }, { "epoch": 0.72, "learning_rate": 3.847923465265418e-06, "loss": 1.3408, "step": 1799 }, { "epoch": 0.72, "learning_rate": 3.837715343990727e-06, "loss": 1.2969, "step": 1800 }, { "epoch": 0.72, "learning_rate": 3.8275175650356485e-06, "loss": 1.3389, "step": 1801 }, { "epoch": 0.72, "learning_rate": 3.817330145515374e-06, "loss": 1.3525, "step": 1802 }, { "epoch": 0.72, "learning_rate": 3.807153102527704e-06, "loss": 1.334, "step": 1803 }, { "epoch": 0.72, "learning_rate": 3.7969864531530344e-06, "loss": 1.4092, "step": 1804 }, { "epoch": 0.72, "learning_rate": 3.7868302144543146e-06, "loss": 1.2578, "step": 1805 }, { "epoch": 0.72, "learning_rate": 3.7766844034770155e-06, "loss": 1.4717, "step": 1806 }, { "epoch": 0.72, "learning_rate": 3.766549037249112e-06, "loss": 1.3564, "step": 1807 }, { "epoch": 0.72, "learning_rate": 3.7564241327810436e-06, "loss": 1.3877, "step": 1808 }, { "epoch": 0.72, "learning_rate": 3.7463097070656995e-06, "loss": 1.4502, "step": 1809 }, { "epoch": 0.72, "learning_rate": 3.736205777078381e-06, "loss": 1.375, "step": 1810 }, { "epoch": 0.72, "learning_rate": 3.72611235977677e-06, "loss": 1.46, "step": 1811 }, { "epoch": 0.72, "learning_rate": 3.7160294721009026e-06, "loss": 1.4434, "step": 1812 }, { "epoch": 0.73, "learning_rate": 3.705957130973149e-06, "loss": 1.3281, "step": 1813 }, { "epoch": 0.73, "learning_rate": 3.69589535329818e-06, "loss": 1.3281, "step": 1814 }, { "epoch": 0.73, "learning_rate": 3.685844155962931e-06, "loss": 1.373, "step": 1815 }, { "epoch": 0.73, "learning_rate": 3.675803555836582e-06, "loss": 1.3721, "step": 1816 }, { "epoch": 0.73, "learning_rate": 3.6657735697705267e-06, "loss": 1.2773, "step": 1817 }, { "epoch": 0.73, "learning_rate": 3.6557542145983495e-06, "loss": 1.3311, "step": 1818 }, { "epoch": 0.73, "learning_rate": 3.6457455071357918e-06, "loss": 1.2959, "step": 1819 }, { "epoch": 0.73, "learning_rate": 3.63574746418072e-06, "loss": 1.3887, "step": 1820 }, { "epoch": 0.73, "learning_rate": 3.625760102513103e-06, "loss": 1.3359, "step": 1821 }, { "epoch": 0.73, "learning_rate": 3.6157834388949907e-06, "loss": 1.3984, "step": 1822 }, { "epoch": 0.73, "learning_rate": 3.6058174900704646e-06, "loss": 1.4102, "step": 1823 }, { "epoch": 0.73, "learning_rate": 3.595862272765638e-06, "loss": 1.3477, "step": 1824 }, { "epoch": 0.73, "learning_rate": 3.585917803688603e-06, "loss": 1.3262, "step": 1825 }, { "epoch": 0.73, "learning_rate": 3.5759840995294136e-06, "loss": 1.3506, "step": 1826 }, { "epoch": 0.73, "learning_rate": 3.5660611769600604e-06, "loss": 1.3252, "step": 1827 }, { "epoch": 0.73, "learning_rate": 3.556149052634443e-06, "loss": 1.3232, "step": 1828 }, { "epoch": 0.73, "learning_rate": 3.546247743188328e-06, "loss": 1.3779, "step": 1829 }, { "epoch": 0.73, "learning_rate": 3.536357265239333e-06, "loss": 1.3086, "step": 1830 }, { "epoch": 0.73, "learning_rate": 3.5264776353869046e-06, "loss": 1.3682, "step": 1831 }, { "epoch": 0.73, "learning_rate": 3.5166088702122738e-06, "loss": 1.3242, "step": 1832 }, { "epoch": 0.73, "learning_rate": 3.5067509862784455e-06, "loss": 1.3379, "step": 1833 }, { "epoch": 0.73, "learning_rate": 3.4969040001301513e-06, "loss": 1.2764, "step": 1834 }, { "epoch": 0.73, "learning_rate": 3.487067928293848e-06, "loss": 1.3457, "step": 1835 }, { "epoch": 0.73, "learning_rate": 3.4772427872776606e-06, "loss": 1.3711, "step": 1836 }, { "epoch": 0.73, "learning_rate": 3.4674285935713715e-06, "loss": 1.4023, "step": 1837 }, { "epoch": 0.74, "learning_rate": 3.4576253636463996e-06, "loss": 1.3379, "step": 1838 }, { "epoch": 0.74, "learning_rate": 3.4478331139557475e-06, "loss": 1.3486, "step": 1839 }, { "epoch": 0.74, "learning_rate": 3.4380518609340076e-06, "loss": 1.293, "step": 1840 }, { "epoch": 0.74, "learning_rate": 3.428281620997296e-06, "loss": 1.3633, "step": 1841 }, { "epoch": 0.74, "learning_rate": 3.418522410543266e-06, "loss": 1.2871, "step": 1842 }, { "epoch": 0.74, "learning_rate": 3.4087742459510396e-06, "loss": 1.3232, "step": 1843 }, { "epoch": 0.74, "learning_rate": 3.3990371435812185e-06, "loss": 1.3477, "step": 1844 }, { "epoch": 0.74, "learning_rate": 3.3893111197758276e-06, "loss": 1.4326, "step": 1845 }, { "epoch": 0.74, "learning_rate": 3.3795961908582965e-06, "loss": 1.3096, "step": 1846 }, { "epoch": 0.74, "learning_rate": 3.3698923731334453e-06, "loss": 1.3574, "step": 1847 }, { "epoch": 0.74, "learning_rate": 3.360199682887433e-06, "loss": 1.2939, "step": 1848 }, { "epoch": 0.74, "learning_rate": 3.3505181363877536e-06, "loss": 1.3662, "step": 1849 }, { "epoch": 0.74, "learning_rate": 3.3408477498831917e-06, "loss": 1.3887, "step": 1850 }, { "epoch": 0.74, "learning_rate": 3.3311885396038002e-06, "loss": 1.3906, "step": 1851 }, { "epoch": 0.74, "learning_rate": 3.321540521760883e-06, "loss": 1.3604, "step": 1852 }, { "epoch": 0.74, "learning_rate": 3.3119037125469553e-06, "loss": 1.3301, "step": 1853 }, { "epoch": 0.74, "learning_rate": 3.3022781281357184e-06, "loss": 1.3311, "step": 1854 }, { "epoch": 0.74, "learning_rate": 3.2926637846820366e-06, "loss": 1.3779, "step": 1855 }, { "epoch": 0.74, "learning_rate": 3.2830606983219038e-06, "loss": 1.2646, "step": 1856 }, { "epoch": 0.74, "learning_rate": 3.2734688851724273e-06, "loss": 1.3027, "step": 1857 }, { "epoch": 0.74, "learning_rate": 3.2638883613317974e-06, "loss": 1.3604, "step": 1858 }, { "epoch": 0.74, "learning_rate": 3.2543191428792466e-06, "loss": 1.3535, "step": 1859 }, { "epoch": 0.74, "learning_rate": 3.2447612458750365e-06, "loss": 1.3223, "step": 1860 }, { "epoch": 0.74, "learning_rate": 3.2352146863604317e-06, "loss": 1.3174, "step": 1861 }, { "epoch": 0.74, "learning_rate": 3.2256794803576707e-06, "loss": 1.3682, "step": 1862 }, { "epoch": 0.75, "learning_rate": 3.2161556438699303e-06, "loss": 1.3984, "step": 1863 }, { "epoch": 0.75, "learning_rate": 3.2066431928813068e-06, "loss": 1.3418, "step": 1864 }, { "epoch": 0.75, "learning_rate": 3.197142143356787e-06, "loss": 1.2812, "step": 1865 }, { "epoch": 0.75, "learning_rate": 3.1876525112422283e-06, "loss": 1.3896, "step": 1866 }, { "epoch": 0.75, "learning_rate": 3.178174312464326e-06, "loss": 1.4102, "step": 1867 }, { "epoch": 0.75, "learning_rate": 3.1687075629305787e-06, "loss": 1.3164, "step": 1868 }, { "epoch": 0.75, "learning_rate": 3.1592522785292714e-06, "loss": 1.3848, "step": 1869 }, { "epoch": 0.75, "learning_rate": 3.1498084751294523e-06, "loss": 1.3652, "step": 1870 }, { "epoch": 0.75, "learning_rate": 3.1403761685809007e-06, "loss": 1.3828, "step": 1871 }, { "epoch": 0.75, "learning_rate": 3.130955374714094e-06, "loss": 1.3115, "step": 1872 }, { "epoch": 0.75, "learning_rate": 3.1215461093401913e-06, "loss": 1.333, "step": 1873 }, { "epoch": 0.75, "learning_rate": 3.1121483882509996e-06, "loss": 1.3008, "step": 1874 }, { "epoch": 0.75, "learning_rate": 3.1027622272189572e-06, "loss": 1.416, "step": 1875 }, { "epoch": 0.75, "learning_rate": 3.0933876419971008e-06, "loss": 1.3145, "step": 1876 }, { "epoch": 0.75, "learning_rate": 3.0840246483190338e-06, "loss": 1.3506, "step": 1877 }, { "epoch": 0.75, "learning_rate": 3.074673261898903e-06, "loss": 1.29, "step": 1878 }, { "epoch": 0.75, "learning_rate": 3.065333498431381e-06, "loss": 1.3008, "step": 1879 }, { "epoch": 0.75, "learning_rate": 3.0560053735916372e-06, "loss": 1.3535, "step": 1880 }, { "epoch": 0.75, "learning_rate": 3.0466889030352976e-06, "loss": 1.2773, "step": 1881 }, { "epoch": 0.75, "learning_rate": 3.037384102398431e-06, "loss": 1.3604, "step": 1882 }, { "epoch": 0.75, "learning_rate": 3.0280909872975194e-06, "loss": 1.3379, "step": 1883 }, { "epoch": 0.75, "learning_rate": 3.0188095733294388e-06, "loss": 1.3945, "step": 1884 }, { "epoch": 0.75, "learning_rate": 3.009539876071427e-06, "loss": 1.3643, "step": 1885 }, { "epoch": 0.75, "learning_rate": 3.0002819110810475e-06, "loss": 1.3945, "step": 1886 }, { "epoch": 0.75, "learning_rate": 2.9910356938961782e-06, "loss": 1.3203, "step": 1887 }, { "epoch": 0.76, "learning_rate": 2.981801240034985e-06, "loss": 1.3438, "step": 1888 }, { "epoch": 0.76, "learning_rate": 2.9725785649958895e-06, "loss": 1.2832, "step": 1889 }, { "epoch": 0.76, "learning_rate": 2.9633676842575386e-06, "loss": 1.3018, "step": 1890 }, { "epoch": 0.76, "learning_rate": 2.9541686132787907e-06, "loss": 1.3506, "step": 1891 }, { "epoch": 0.76, "learning_rate": 2.944981367498677e-06, "loss": 1.3818, "step": 1892 }, { "epoch": 0.76, "learning_rate": 2.93580596233639e-06, "loss": 1.4189, "step": 1893 }, { "epoch": 0.76, "learning_rate": 2.9266424131912495e-06, "loss": 1.3359, "step": 1894 }, { "epoch": 0.76, "learning_rate": 2.9174907354426696e-06, "loss": 1.3447, "step": 1895 }, { "epoch": 0.76, "learning_rate": 2.9083509444501433e-06, "loss": 1.3838, "step": 1896 }, { "epoch": 0.76, "learning_rate": 2.899223055553221e-06, "loss": 1.3301, "step": 1897 }, { "epoch": 0.76, "learning_rate": 2.890107084071465e-06, "loss": 1.3174, "step": 1898 }, { "epoch": 0.76, "learning_rate": 2.881003045304448e-06, "loss": 1.3301, "step": 1899 }, { "epoch": 0.76, "learning_rate": 2.8719109545317102e-06, "loss": 1.3916, "step": 1900 }, { "epoch": 0.76, "learning_rate": 2.8628308270127335e-06, "loss": 1.3281, "step": 1901 }, { "epoch": 0.76, "learning_rate": 2.853762677986932e-06, "loss": 1.29, "step": 1902 }, { "epoch": 0.76, "learning_rate": 2.844706522673616e-06, "loss": 1.3477, "step": 1903 }, { "epoch": 0.76, "learning_rate": 2.835662376271957e-06, "loss": 1.3389, "step": 1904 }, { "epoch": 0.76, "learning_rate": 2.8266302539609747e-06, "loss": 1.3623, "step": 1905 }, { "epoch": 0.76, "learning_rate": 2.8176101708995174e-06, "loss": 1.3594, "step": 1906 }, { "epoch": 0.76, "learning_rate": 2.808602142226212e-06, "loss": 1.3477, "step": 1907 }, { "epoch": 0.76, "learning_rate": 2.7996061830594714e-06, "loss": 1.2881, "step": 1908 }, { "epoch": 0.76, "learning_rate": 2.7906223084974405e-06, "loss": 1.2808, "step": 1909 }, { "epoch": 0.76, "learning_rate": 2.78165053361798e-06, "loss": 1.4268, "step": 1910 }, { "epoch": 0.76, "learning_rate": 2.772690873478656e-06, "loss": 1.3105, "step": 1911 }, { "epoch": 0.76, "learning_rate": 2.7637433431166903e-06, "loss": 1.4043, "step": 1912 }, { "epoch": 0.77, "learning_rate": 2.754807957548955e-06, "loss": 1.3721, "step": 1913 }, { "epoch": 0.77, "learning_rate": 2.745884731771931e-06, "loss": 1.3477, "step": 1914 }, { "epoch": 0.77, "learning_rate": 2.736973680761702e-06, "loss": 1.3438, "step": 1915 }, { "epoch": 0.77, "learning_rate": 2.728074819473908e-06, "loss": 1.4014, "step": 1916 }, { "epoch": 0.77, "learning_rate": 2.7191881628437335e-06, "loss": 1.4355, "step": 1917 }, { "epoch": 0.77, "learning_rate": 2.7103137257858867e-06, "loss": 1.3389, "step": 1918 }, { "epoch": 0.77, "learning_rate": 2.7014515231945557e-06, "loss": 1.2871, "step": 1919 }, { "epoch": 0.77, "learning_rate": 2.692601569943407e-06, "loss": 1.375, "step": 1920 }, { "epoch": 0.77, "learning_rate": 2.683763880885538e-06, "loss": 1.3115, "step": 1921 }, { "epoch": 0.77, "learning_rate": 2.674938470853472e-06, "loss": 1.4014, "step": 1922 }, { "epoch": 0.77, "learning_rate": 2.6661253546591158e-06, "loss": 1.3623, "step": 1923 }, { "epoch": 0.77, "learning_rate": 2.6573245470937527e-06, "loss": 1.332, "step": 1924 }, { "epoch": 0.77, "learning_rate": 2.648536062927999e-06, "loss": 1.4072, "step": 1925 }, { "epoch": 0.77, "learning_rate": 2.639759916911788e-06, "loss": 1.333, "step": 1926 }, { "epoch": 0.77, "learning_rate": 2.6309961237743587e-06, "loss": 1.3271, "step": 1927 }, { "epoch": 0.77, "learning_rate": 2.6222446982242e-06, "loss": 1.29, "step": 1928 }, { "epoch": 0.77, "learning_rate": 2.61350565494906e-06, "loss": 1.2998, "step": 1929 }, { "epoch": 0.77, "learning_rate": 2.604779008615895e-06, "loss": 1.3047, "step": 1930 }, { "epoch": 0.77, "learning_rate": 2.5960647738708553e-06, "loss": 1.248, "step": 1931 }, { "epoch": 0.77, "learning_rate": 2.5873629653392653e-06, "loss": 1.3252, "step": 1932 }, { "epoch": 0.77, "learning_rate": 2.578673597625597e-06, "loss": 1.29, "step": 1933 }, { "epoch": 0.77, "learning_rate": 2.569996685313434e-06, "loss": 1.3955, "step": 1934 }, { "epoch": 0.77, "learning_rate": 2.5613322429654573e-06, "loss": 1.3477, "step": 1935 }, { "epoch": 0.77, "learning_rate": 2.5526802851234268e-06, "loss": 1.3076, "step": 1936 }, { "epoch": 0.77, "learning_rate": 2.5440408263081385e-06, "loss": 1.3906, "step": 1937 }, { "epoch": 0.78, "learning_rate": 2.535413881019423e-06, "loss": 1.3359, "step": 1938 }, { "epoch": 0.78, "learning_rate": 2.526799463736099e-06, "loss": 1.3857, "step": 1939 }, { "epoch": 0.78, "learning_rate": 2.5181975889159615e-06, "loss": 1.3477, "step": 1940 }, { "epoch": 0.78, "learning_rate": 2.509608270995758e-06, "loss": 1.3828, "step": 1941 }, { "epoch": 0.78, "learning_rate": 2.501031524391163e-06, "loss": 1.3125, "step": 1942 }, { "epoch": 0.78, "learning_rate": 2.492467363496747e-06, "loss": 1.3301, "step": 1943 }, { "epoch": 0.78, "learning_rate": 2.483915802685959e-06, "loss": 1.3594, "step": 1944 }, { "epoch": 0.78, "learning_rate": 2.475376856311097e-06, "loss": 1.4307, "step": 1945 }, { "epoch": 0.78, "learning_rate": 2.4668505387033025e-06, "loss": 1.3486, "step": 1946 }, { "epoch": 0.78, "learning_rate": 2.458336864172508e-06, "loss": 1.3809, "step": 1947 }, { "epoch": 0.78, "learning_rate": 2.44983584700743e-06, "loss": 1.3184, "step": 1948 }, { "epoch": 0.78, "learning_rate": 2.4413475014755396e-06, "loss": 1.3223, "step": 1949 }, { "epoch": 0.78, "learning_rate": 2.432871841823047e-06, "loss": 1.3018, "step": 1950 }, { "epoch": 0.78, "learning_rate": 2.42440888227487e-06, "loss": 1.3408, "step": 1951 }, { "epoch": 0.78, "learning_rate": 2.415958637034609e-06, "loss": 1.3799, "step": 1952 }, { "epoch": 0.78, "learning_rate": 2.407521120284523e-06, "loss": 1.3447, "step": 1953 }, { "epoch": 0.78, "learning_rate": 2.3990963461855075e-06, "loss": 1.3457, "step": 1954 }, { "epoch": 0.78, "learning_rate": 2.390684328877089e-06, "loss": 1.3008, "step": 1955 }, { "epoch": 0.78, "learning_rate": 2.3822850824773623e-06, "loss": 1.3047, "step": 1956 }, { "epoch": 0.78, "learning_rate": 2.3738986210829997e-06, "loss": 1.417, "step": 1957 }, { "epoch": 0.78, "learning_rate": 2.3655249587692073e-06, "loss": 1.2354, "step": 1958 }, { "epoch": 0.78, "learning_rate": 2.3571641095897223e-06, "loss": 1.3926, "step": 1959 }, { "epoch": 0.78, "learning_rate": 2.3488160875767717e-06, "loss": 1.3301, "step": 1960 }, { "epoch": 0.78, "learning_rate": 2.340480906741053e-06, "loss": 1.373, "step": 1961 }, { "epoch": 0.78, "learning_rate": 2.332158581071712e-06, "loss": 1.2471, "step": 1962 }, { "epoch": 0.79, "learning_rate": 2.323849124536315e-06, "loss": 1.3145, "step": 1963 }, { "epoch": 0.79, "learning_rate": 2.3155525510808453e-06, "loss": 1.3721, "step": 1964 }, { "epoch": 0.79, "learning_rate": 2.307268874629649e-06, "loss": 1.3027, "step": 1965 }, { "epoch": 0.79, "learning_rate": 2.2989981090854306e-06, "loss": 1.3613, "step": 1966 }, { "epoch": 0.79, "learning_rate": 2.2907402683292268e-06, "loss": 1.4346, "step": 1967 }, { "epoch": 0.79, "learning_rate": 2.2824953662203832e-06, "loss": 1.4297, "step": 1968 }, { "epoch": 0.79, "learning_rate": 2.2742634165965317e-06, "loss": 1.3926, "step": 1969 }, { "epoch": 0.79, "learning_rate": 2.266044433273562e-06, "loss": 1.2734, "step": 1970 }, { "epoch": 0.79, "learning_rate": 2.2578384300456014e-06, "loss": 1.3242, "step": 1971 }, { "epoch": 0.79, "learning_rate": 2.249645420684998e-06, "loss": 1.3701, "step": 1972 }, { "epoch": 0.79, "learning_rate": 2.2414654189422845e-06, "loss": 1.3379, "step": 1973 }, { "epoch": 0.79, "learning_rate": 2.233298438546172e-06, "loss": 1.3623, "step": 1974 }, { "epoch": 0.79, "learning_rate": 2.2251444932035094e-06, "loss": 1.4121, "step": 1975 }, { "epoch": 0.79, "learning_rate": 2.2170035965992674e-06, "loss": 1.4004, "step": 1976 }, { "epoch": 0.79, "learning_rate": 2.2088757623965263e-06, "loss": 1.376, "step": 1977 }, { "epoch": 0.79, "learning_rate": 2.2007610042364337e-06, "loss": 1.2939, "step": 1978 }, { "epoch": 0.79, "learning_rate": 2.1926593357382e-06, "loss": 1.3604, "step": 1979 }, { "epoch": 0.79, "learning_rate": 2.184570770499056e-06, "loss": 1.3467, "step": 1980 }, { "epoch": 0.79, "learning_rate": 2.176495322094254e-06, "loss": 1.3115, "step": 1981 }, { "epoch": 0.79, "learning_rate": 2.1684330040770183e-06, "loss": 1.3535, "step": 1982 }, { "epoch": 0.79, "learning_rate": 2.1603838299785486e-06, "loss": 1.3799, "step": 1983 }, { "epoch": 0.79, "learning_rate": 2.1523478133079776e-06, "loss": 1.3418, "step": 1984 }, { "epoch": 0.79, "learning_rate": 2.1443249675523536e-06, "loss": 1.4219, "step": 1985 }, { "epoch": 0.79, "learning_rate": 2.1363153061766297e-06, "loss": 1.3213, "step": 1986 }, { "epoch": 0.79, "learning_rate": 2.128318842623618e-06, "loss": 1.4141, "step": 1987 }, { "epoch": 0.8, "learning_rate": 2.1203355903139934e-06, "loss": 1.3125, "step": 1988 }, { "epoch": 0.8, "learning_rate": 2.112365562646248e-06, "loss": 1.2861, "step": 1989 }, { "epoch": 0.8, "learning_rate": 2.1044087729966856e-06, "loss": 1.3633, "step": 1990 }, { "epoch": 0.8, "learning_rate": 2.0964652347193894e-06, "loss": 1.3594, "step": 1991 }, { "epoch": 0.8, "learning_rate": 2.088534961146197e-06, "loss": 1.2959, "step": 1992 }, { "epoch": 0.8, "learning_rate": 2.0806179655866964e-06, "loss": 1.2793, "step": 1993 }, { "epoch": 0.8, "learning_rate": 2.072714261328177e-06, "loss": 1.3105, "step": 1994 }, { "epoch": 0.8, "learning_rate": 2.064823861635633e-06, "loss": 1.3506, "step": 1995 }, { "epoch": 0.8, "learning_rate": 2.0569467797517173e-06, "loss": 1.335, "step": 1996 }, { "epoch": 0.8, "learning_rate": 2.0490830288967443e-06, "loss": 1.3281, "step": 1997 }, { "epoch": 0.8, "learning_rate": 2.041232622268642e-06, "loss": 1.3838, "step": 1998 }, { "epoch": 0.8, "learning_rate": 2.033395573042952e-06, "loss": 1.3457, "step": 1999 }, { "epoch": 0.8, "learning_rate": 2.025571894372794e-06, "loss": 1.3096, "step": 2000 }, { "epoch": 0.8, "learning_rate": 2.017761599388842e-06, "loss": 1.3623, "step": 2001 }, { "epoch": 0.8, "learning_rate": 2.0099647011993217e-06, "loss": 1.3359, "step": 2002 }, { "epoch": 0.8, "learning_rate": 2.00218121288996e-06, "loss": 1.2852, "step": 2003 }, { "epoch": 0.8, "learning_rate": 1.994411147523987e-06, "loss": 1.3652, "step": 2004 }, { "epoch": 0.8, "learning_rate": 1.9866545181421016e-06, "loss": 1.3213, "step": 2005 }, { "epoch": 0.8, "learning_rate": 1.97891133776245e-06, "loss": 1.2754, "step": 2006 }, { "epoch": 0.8, "learning_rate": 1.971181619380611e-06, "loss": 1.3271, "step": 2007 }, { "epoch": 0.8, "learning_rate": 1.963465375969572e-06, "loss": 1.3555, "step": 2008 }, { "epoch": 0.8, "learning_rate": 1.955762620479699e-06, "loss": 1.3652, "step": 2009 }, { "epoch": 0.8, "learning_rate": 1.9480733658387175e-06, "loss": 1.3564, "step": 2010 }, { "epoch": 0.8, "learning_rate": 1.940397624951709e-06, "loss": 1.4688, "step": 2011 }, { "epoch": 0.8, "learning_rate": 1.9327354107010566e-06, "loss": 1.377, "step": 2012 }, { "epoch": 0.81, "learning_rate": 1.9250867359464575e-06, "loss": 1.3428, "step": 2013 }, { "epoch": 0.81, "learning_rate": 1.9174516135248745e-06, "loss": 1.4102, "step": 2014 }, { "epoch": 0.81, "learning_rate": 1.9098300562505266e-06, "loss": 1.2744, "step": 2015 }, { "epoch": 0.81, "learning_rate": 1.902222076914869e-06, "loss": 1.3535, "step": 2016 }, { "epoch": 0.81, "learning_rate": 1.894627688286571e-06, "loss": 1.3535, "step": 2017 }, { "epoch": 0.81, "learning_rate": 1.8870469031114868e-06, "loss": 1.3564, "step": 2018 }, { "epoch": 0.81, "learning_rate": 1.8794797341126403e-06, "loss": 1.2695, "step": 2019 }, { "epoch": 0.81, "learning_rate": 1.8719261939902023e-06, "loss": 1.3545, "step": 2020 }, { "epoch": 0.81, "learning_rate": 1.8643862954214754e-06, "loss": 1.3438, "step": 2021 }, { "epoch": 0.81, "learning_rate": 1.8568600510608659e-06, "loss": 1.3516, "step": 2022 }, { "epoch": 0.81, "learning_rate": 1.8493474735398575e-06, "loss": 1.29, "step": 2023 }, { "epoch": 0.81, "learning_rate": 1.8418485754670013e-06, "loss": 1.376, "step": 2024 }, { "epoch": 0.81, "learning_rate": 1.8343633694278895e-06, "loss": 1.3457, "step": 2025 }, { "epoch": 0.81, "learning_rate": 1.8268918679851388e-06, "loss": 1.3994, "step": 2026 }, { "epoch": 0.81, "learning_rate": 1.8194340836783565e-06, "loss": 1.3994, "step": 2027 }, { "epoch": 0.81, "learning_rate": 1.8119900290241331e-06, "loss": 1.3027, "step": 2028 }, { "epoch": 0.81, "learning_rate": 1.8045597165160134e-06, "loss": 1.2783, "step": 2029 }, { "epoch": 0.81, "learning_rate": 1.7971431586244814e-06, "loss": 1.3496, "step": 2030 }, { "epoch": 0.81, "learning_rate": 1.7897403677969405e-06, "loss": 1.4033, "step": 2031 }, { "epoch": 0.81, "learning_rate": 1.7823513564576788e-06, "loss": 1.4072, "step": 2032 }, { "epoch": 0.81, "learning_rate": 1.774976137007861e-06, "loss": 1.3115, "step": 2033 }, { "epoch": 0.81, "learning_rate": 1.7676147218255092e-06, "loss": 1.3447, "step": 2034 }, { "epoch": 0.81, "learning_rate": 1.7602671232654755e-06, "loss": 1.3545, "step": 2035 }, { "epoch": 0.81, "learning_rate": 1.7529333536594217e-06, "loss": 1.3027, "step": 2036 }, { "epoch": 0.81, "learning_rate": 1.7456134253157976e-06, "loss": 1.2871, "step": 2037 }, { "epoch": 0.82, "learning_rate": 1.7383073505198255e-06, "loss": 1.3564, "step": 2038 }, { "epoch": 0.82, "learning_rate": 1.7310151415334798e-06, "loss": 1.2598, "step": 2039 }, { "epoch": 0.82, "learning_rate": 1.723736810595461e-06, "loss": 1.4189, "step": 2040 }, { "epoch": 0.82, "learning_rate": 1.7164723699211782e-06, "loss": 1.3262, "step": 2041 }, { "epoch": 0.82, "learning_rate": 1.709221831702723e-06, "loss": 1.3594, "step": 2042 }, { "epoch": 0.82, "learning_rate": 1.7019852081088616e-06, "loss": 1.2754, "step": 2043 }, { "epoch": 0.82, "learning_rate": 1.6947625112850074e-06, "loss": 1.2744, "step": 2044 }, { "epoch": 0.82, "learning_rate": 1.687553753353195e-06, "loss": 1.332, "step": 2045 }, { "epoch": 0.82, "learning_rate": 1.680358946412064e-06, "loss": 1.3604, "step": 2046 }, { "epoch": 0.82, "learning_rate": 1.6731781025368422e-06, "loss": 1.3975, "step": 2047 }, { "epoch": 0.82, "learning_rate": 1.6660112337793256e-06, "loss": 1.4023, "step": 2048 }, { "epoch": 0.82, "learning_rate": 1.6588583521678536e-06, "loss": 1.3818, "step": 2049 }, { "epoch": 0.82, "learning_rate": 1.6517194697072903e-06, "loss": 1.4004, "step": 2050 }, { "epoch": 0.82, "learning_rate": 1.644594598378999e-06, "loss": 1.3115, "step": 2051 }, { "epoch": 0.82, "learning_rate": 1.6374837501408403e-06, "loss": 1.2812, "step": 2052 }, { "epoch": 0.82, "learning_rate": 1.6303869369271264e-06, "loss": 1.3506, "step": 2053 }, { "epoch": 0.82, "learning_rate": 1.6233041706486253e-06, "loss": 1.335, "step": 2054 }, { "epoch": 0.82, "learning_rate": 1.6162354631925203e-06, "loss": 1.3428, "step": 2055 }, { "epoch": 0.82, "learning_rate": 1.609180826422404e-06, "loss": 1.3584, "step": 2056 }, { "epoch": 0.82, "learning_rate": 1.602140272178253e-06, "loss": 1.4062, "step": 2057 }, { "epoch": 0.82, "learning_rate": 1.5951138122764132e-06, "loss": 1.3672, "step": 2058 }, { "epoch": 0.82, "learning_rate": 1.58810145850957e-06, "loss": 1.3965, "step": 2059 }, { "epoch": 0.82, "learning_rate": 1.5811032226467304e-06, "loss": 1.3291, "step": 2060 }, { "epoch": 0.82, "learning_rate": 1.5741191164332192e-06, "loss": 1.3438, "step": 2061 }, { "epoch": 0.82, "learning_rate": 1.5671491515906355e-06, "loss": 1.3252, "step": 2062 }, { "epoch": 0.83, "learning_rate": 1.5601933398168523e-06, "loss": 1.3389, "step": 2063 }, { "epoch": 0.83, "learning_rate": 1.5532516927859853e-06, "loss": 1.3262, "step": 2064 }, { "epoch": 0.83, "learning_rate": 1.5463242221483742e-06, "loss": 1.415, "step": 2065 }, { "epoch": 0.83, "learning_rate": 1.5394109395305757e-06, "loss": 1.3047, "step": 2066 }, { "epoch": 0.83, "learning_rate": 1.5325118565353237e-06, "loss": 1.3369, "step": 2067 }, { "epoch": 0.83, "learning_rate": 1.5256269847415283e-06, "loss": 1.3154, "step": 2068 }, { "epoch": 0.83, "learning_rate": 1.5187563357042423e-06, "loss": 1.293, "step": 2069 }, { "epoch": 0.83, "learning_rate": 1.511899920954656e-06, "loss": 1.3545, "step": 2070 }, { "epoch": 0.83, "learning_rate": 1.5050577520000608e-06, "loss": 1.3584, "step": 2071 }, { "epoch": 0.83, "learning_rate": 1.498229840323847e-06, "loss": 1.3311, "step": 2072 }, { "epoch": 0.83, "learning_rate": 1.4914161973854714e-06, "loss": 1.3584, "step": 2073 }, { "epoch": 0.83, "learning_rate": 1.4846168346204425e-06, "loss": 1.2891, "step": 2074 }, { "epoch": 0.83, "learning_rate": 1.4778317634403082e-06, "loss": 1.417, "step": 2075 }, { "epoch": 0.83, "learning_rate": 1.4710609952326239e-06, "loss": 1.3848, "step": 2076 }, { "epoch": 0.83, "learning_rate": 1.464304541360946e-06, "loss": 1.3032, "step": 2077 }, { "epoch": 0.83, "learning_rate": 1.457562413164799e-06, "loss": 1.3662, "step": 2078 }, { "epoch": 0.83, "learning_rate": 1.4508346219596725e-06, "loss": 1.3711, "step": 2079 }, { "epoch": 0.83, "learning_rate": 1.4441211790369892e-06, "loss": 1.3535, "step": 2080 }, { "epoch": 0.83, "learning_rate": 1.4374220956640895e-06, "loss": 1.3164, "step": 2081 }, { "epoch": 0.83, "learning_rate": 1.4307373830842174e-06, "loss": 1.3545, "step": 2082 }, { "epoch": 0.83, "learning_rate": 1.424067052516499e-06, "loss": 1.3535, "step": 2083 }, { "epoch": 0.83, "learning_rate": 1.4174111151559188e-06, "loss": 1.3906, "step": 2084 }, { "epoch": 0.83, "learning_rate": 1.4107695821733026e-06, "loss": 1.3555, "step": 2085 }, { "epoch": 0.83, "learning_rate": 1.4041424647153112e-06, "loss": 1.3867, "step": 2086 }, { "epoch": 0.83, "learning_rate": 1.3975297739043992e-06, "loss": 1.3477, "step": 2087 }, { "epoch": 0.84, "learning_rate": 1.3909315208388185e-06, "loss": 1.3643, "step": 2088 }, { "epoch": 0.84, "learning_rate": 1.3843477165925846e-06, "loss": 1.2656, "step": 2089 }, { "epoch": 0.84, "learning_rate": 1.3777783722154603e-06, "loss": 1.3174, "step": 2090 }, { "epoch": 0.84, "learning_rate": 1.3712234987329486e-06, "loss": 1.3311, "step": 2091 }, { "epoch": 0.84, "learning_rate": 1.3646831071462606e-06, "loss": 1.3193, "step": 2092 }, { "epoch": 0.84, "learning_rate": 1.3581572084323014e-06, "loss": 1.3691, "step": 2093 }, { "epoch": 0.84, "learning_rate": 1.3516458135436539e-06, "loss": 1.3311, "step": 2094 }, { "epoch": 0.84, "learning_rate": 1.3451489334085555e-06, "loss": 1.3086, "step": 2095 }, { "epoch": 0.84, "learning_rate": 1.3386665789308885e-06, "loss": 1.4375, "step": 2096 }, { "epoch": 0.84, "learning_rate": 1.3321987609901553e-06, "loss": 1.374, "step": 2097 }, { "epoch": 0.84, "learning_rate": 1.325745490441458e-06, "loss": 1.3096, "step": 2098 }, { "epoch": 0.84, "learning_rate": 1.3193067781154835e-06, "loss": 1.3164, "step": 2099 }, { "epoch": 0.84, "learning_rate": 1.3128826348184886e-06, "loss": 1.3291, "step": 2100 }, { "epoch": 0.84, "learning_rate": 1.3064730713322793e-06, "loss": 1.3643, "step": 2101 }, { "epoch": 0.84, "learning_rate": 1.3000780984141881e-06, "loss": 1.2842, "step": 2102 }, { "epoch": 0.84, "learning_rate": 1.2936977267970597e-06, "loss": 1.4531, "step": 2103 }, { "epoch": 0.84, "learning_rate": 1.2873319671892337e-06, "loss": 1.3115, "step": 2104 }, { "epoch": 0.84, "learning_rate": 1.2809808302745298e-06, "loss": 1.3486, "step": 2105 }, { "epoch": 0.84, "learning_rate": 1.2746443267122233e-06, "loss": 1.3193, "step": 2106 }, { "epoch": 0.84, "learning_rate": 1.2683224671370286e-06, "loss": 1.3301, "step": 2107 }, { "epoch": 0.84, "learning_rate": 1.262015262159082e-06, "loss": 1.2471, "step": 2108 }, { "epoch": 0.84, "learning_rate": 1.255722722363929e-06, "loss": 1.3232, "step": 2109 }, { "epoch": 0.84, "learning_rate": 1.249444858312502e-06, "loss": 1.3799, "step": 2110 }, { "epoch": 0.84, "learning_rate": 1.2431816805410968e-06, "loss": 1.2949, "step": 2111 }, { "epoch": 0.84, "learning_rate": 1.2369331995613664e-06, "loss": 1.3252, "step": 2112 }, { "epoch": 0.85, "learning_rate": 1.2306994258602922e-06, "loss": 1.3145, "step": 2113 }, { "epoch": 0.85, "learning_rate": 1.2244803699001785e-06, "loss": 1.3525, "step": 2114 }, { "epoch": 0.85, "learning_rate": 1.218276042118629e-06, "loss": 1.3701, "step": 2115 }, { "epoch": 0.85, "learning_rate": 1.2120864529285203e-06, "loss": 1.293, "step": 2116 }, { "epoch": 0.85, "learning_rate": 1.2059116127179993e-06, "loss": 1.3154, "step": 2117 }, { "epoch": 0.85, "learning_rate": 1.199751531850457e-06, "loss": 1.4414, "step": 2118 }, { "epoch": 0.85, "learning_rate": 1.1936062206645183e-06, "loss": 1.2939, "step": 2119 }, { "epoch": 0.85, "learning_rate": 1.1874756894740137e-06, "loss": 1.3281, "step": 2120 }, { "epoch": 0.85, "learning_rate": 1.1813599485679684e-06, "loss": 1.3301, "step": 2121 }, { "epoch": 0.85, "learning_rate": 1.1752590082105863e-06, "loss": 1.3281, "step": 2122 }, { "epoch": 0.85, "learning_rate": 1.1691728786412315e-06, "loss": 1.4111, "step": 2123 }, { "epoch": 0.85, "learning_rate": 1.1631015700744153e-06, "loss": 1.29, "step": 2124 }, { "epoch": 0.85, "learning_rate": 1.1570450926997657e-06, "loss": 1.4111, "step": 2125 }, { "epoch": 0.85, "learning_rate": 1.1510034566820205e-06, "loss": 1.3945, "step": 2126 }, { "epoch": 0.85, "learning_rate": 1.144976672161019e-06, "loss": 1.3232, "step": 2127 }, { "epoch": 0.85, "learning_rate": 1.1389647492516598e-06, "loss": 1.3721, "step": 2128 }, { "epoch": 0.85, "learning_rate": 1.132967698043913e-06, "loss": 1.3428, "step": 2129 }, { "epoch": 0.85, "learning_rate": 1.1269855286027798e-06, "loss": 1.3467, "step": 2130 }, { "epoch": 0.85, "learning_rate": 1.1210182509682854e-06, "loss": 1.3477, "step": 2131 }, { "epoch": 0.85, "learning_rate": 1.1150658751554667e-06, "loss": 1.3203, "step": 2132 }, { "epoch": 0.85, "learning_rate": 1.1091284111543499e-06, "loss": 1.3711, "step": 2133 }, { "epoch": 0.85, "learning_rate": 1.1032058689299297e-06, "loss": 1.3857, "step": 2134 }, { "epoch": 0.85, "learning_rate": 1.0972982584221592e-06, "loss": 1.3037, "step": 2135 }, { "epoch": 0.85, "learning_rate": 1.0914055895459353e-06, "loss": 1.3809, "step": 2136 }, { "epoch": 0.85, "learning_rate": 1.08552787219107e-06, "loss": 1.2568, "step": 2137 }, { "epoch": 0.86, "learning_rate": 1.0796651162222916e-06, "loss": 1.3633, "step": 2138 }, { "epoch": 0.86, "learning_rate": 1.07381733147921e-06, "loss": 1.3486, "step": 2139 }, { "epoch": 0.86, "learning_rate": 1.067984527776309e-06, "loss": 1.3652, "step": 2140 }, { "epoch": 0.86, "learning_rate": 1.062166714902938e-06, "loss": 1.3555, "step": 2141 }, { "epoch": 0.86, "learning_rate": 1.0563639026232742e-06, "loss": 1.3125, "step": 2142 }, { "epoch": 0.86, "learning_rate": 1.0505761006763315e-06, "loss": 1.3145, "step": 2143 }, { "epoch": 0.86, "learning_rate": 1.044803318775922e-06, "loss": 1.3652, "step": 2144 }, { "epoch": 0.86, "learning_rate": 1.0390455666106547e-06, "loss": 1.3438, "step": 2145 }, { "epoch": 0.86, "learning_rate": 1.0333028538439093e-06, "loss": 1.3643, "step": 2146 }, { "epoch": 0.86, "learning_rate": 1.027575190113832e-06, "loss": 1.418, "step": 2147 }, { "epoch": 0.86, "learning_rate": 1.021862585033304e-06, "loss": 1.3721, "step": 2148 }, { "epoch": 0.86, "learning_rate": 1.0161650481899344e-06, "loss": 1.2949, "step": 2149 }, { "epoch": 0.86, "learning_rate": 1.010482589146048e-06, "loss": 1.3271, "step": 2150 }, { "epoch": 0.86, "learning_rate": 1.0048152174386584e-06, "loss": 1.376, "step": 2151 }, { "epoch": 0.86, "learning_rate": 9.991629425794624e-07, "loss": 1.3672, "step": 2152 }, { "epoch": 0.86, "learning_rate": 9.935257740548143e-07, "loss": 1.3379, "step": 2153 }, { "epoch": 0.86, "learning_rate": 9.879037213257214e-07, "loss": 1.3369, "step": 2154 }, { "epoch": 0.86, "learning_rate": 9.822967938278172e-07, "loss": 1.3281, "step": 2155 }, { "epoch": 0.86, "learning_rate": 9.767050009713476e-07, "loss": 1.3652, "step": 2156 }, { "epoch": 0.86, "learning_rate": 9.711283521411674e-07, "loss": 1.3857, "step": 2157 }, { "epoch": 0.86, "learning_rate": 9.655668566967026e-07, "loss": 1.3271, "step": 2158 }, { "epoch": 0.86, "learning_rate": 9.600205239719584e-07, "loss": 1.2861, "step": 2159 }, { "epoch": 0.86, "learning_rate": 9.544893632754816e-07, "loss": 1.292, "step": 2160 }, { "epoch": 0.86, "learning_rate": 9.489733838903648e-07, "loss": 1.2725, "step": 2161 }, { "epoch": 0.86, "learning_rate": 9.434725950742119e-07, "loss": 1.3633, "step": 2162 }, { "epoch": 0.87, "learning_rate": 9.379870060591434e-07, "loss": 1.3076, "step": 2163 }, { "epoch": 0.87, "learning_rate": 9.325166260517593e-07, "loss": 1.3438, "step": 2164 }, { "epoch": 0.87, "learning_rate": 9.270614642331377e-07, "loss": 1.4463, "step": 2165 }, { "epoch": 0.87, "learning_rate": 9.216215297588182e-07, "loss": 1.3262, "step": 2166 }, { "epoch": 0.87, "learning_rate": 9.161968317587788e-07, "loss": 1.4668, "step": 2167 }, { "epoch": 0.87, "learning_rate": 9.107873793374322e-07, "loss": 1.4189, "step": 2168 }, { "epoch": 0.87, "learning_rate": 9.053931815735995e-07, "loss": 1.2656, "step": 2169 }, { "epoch": 0.87, "learning_rate": 9.000142475204965e-07, "loss": 1.2969, "step": 2170 }, { "epoch": 0.87, "learning_rate": 8.946505862057286e-07, "loss": 1.3438, "step": 2171 }, { "epoch": 0.87, "learning_rate": 8.893022066312674e-07, "loss": 1.3691, "step": 2172 }, { "epoch": 0.87, "learning_rate": 8.839691177734322e-07, "loss": 1.3262, "step": 2173 }, { "epoch": 0.87, "learning_rate": 8.786513285828835e-07, "loss": 1.3281, "step": 2174 }, { "epoch": 0.87, "learning_rate": 8.733488479845997e-07, "loss": 1.4277, "step": 2175 }, { "epoch": 0.87, "learning_rate": 8.680616848778711e-07, "loss": 1.3125, "step": 2176 }, { "epoch": 0.87, "learning_rate": 8.627898481362817e-07, "loss": 1.3369, "step": 2177 }, { "epoch": 0.87, "learning_rate": 8.575333466076863e-07, "loss": 1.3291, "step": 2178 }, { "epoch": 0.87, "learning_rate": 8.522921891142034e-07, "loss": 1.29, "step": 2179 }, { "epoch": 0.87, "learning_rate": 8.470663844522053e-07, "loss": 1.3359, "step": 2180 }, { "epoch": 0.87, "learning_rate": 8.418559413922933e-07, "loss": 1.3701, "step": 2181 }, { "epoch": 0.87, "learning_rate": 8.366608686792854e-07, "loss": 1.3018, "step": 2182 }, { "epoch": 0.87, "learning_rate": 8.31481175032206e-07, "loss": 1.3799, "step": 2183 }, { "epoch": 0.87, "learning_rate": 8.263168691442624e-07, "loss": 1.4326, "step": 2184 }, { "epoch": 0.87, "learning_rate": 8.211679596828481e-07, "loss": 1.3047, "step": 2185 }, { "epoch": 0.87, "learning_rate": 8.160344552895061e-07, "loss": 1.377, "step": 2186 }, { "epoch": 0.87, "learning_rate": 8.109163645799267e-07, "loss": 1.3252, "step": 2187 }, { "epoch": 0.88, "learning_rate": 8.058136961439333e-07, "loss": 1.3379, "step": 2188 }, { "epoch": 0.88, "learning_rate": 8.007264585454632e-07, "loss": 1.3398, "step": 2189 }, { "epoch": 0.88, "learning_rate": 7.956546603225601e-07, "loss": 1.2861, "step": 2190 }, { "epoch": 0.88, "learning_rate": 7.905983099873504e-07, "loss": 1.3564, "step": 2191 }, { "epoch": 0.88, "learning_rate": 7.855574160260371e-07, "loss": 1.2793, "step": 2192 }, { "epoch": 0.88, "learning_rate": 7.805319868988759e-07, "loss": 1.3223, "step": 2193 }, { "epoch": 0.88, "learning_rate": 7.755220310401812e-07, "loss": 1.3496, "step": 2194 }, { "epoch": 0.88, "learning_rate": 7.705275568582848e-07, "loss": 1.3281, "step": 2195 }, { "epoch": 0.88, "learning_rate": 7.655485727355416e-07, "loss": 1.3438, "step": 2196 }, { "epoch": 0.88, "learning_rate": 7.60585087028305e-07, "loss": 1.3291, "step": 2197 }, { "epoch": 0.88, "learning_rate": 7.556371080669222e-07, "loss": 1.3115, "step": 2198 }, { "epoch": 0.88, "learning_rate": 7.507046441557142e-07, "loss": 1.3398, "step": 2199 }, { "epoch": 0.88, "learning_rate": 7.457877035729588e-07, "loss": 1.3086, "step": 2200 }, { "epoch": 0.88, "learning_rate": 7.408862945708839e-07, "loss": 1.3535, "step": 2201 }, { "epoch": 0.88, "learning_rate": 7.360004253756459e-07, "loss": 1.2617, "step": 2202 }, { "epoch": 0.88, "learning_rate": 7.311301041873276e-07, "loss": 1.375, "step": 2203 }, { "epoch": 0.88, "learning_rate": 7.262753391799127e-07, "loss": 1.3145, "step": 2204 }, { "epoch": 0.88, "learning_rate": 7.21436138501278e-07, "loss": 1.3818, "step": 2205 }, { "epoch": 0.88, "learning_rate": 7.166125102731735e-07, "loss": 1.3271, "step": 2206 }, { "epoch": 0.88, "learning_rate": 7.118044625912213e-07, "loss": 1.3105, "step": 2207 }, { "epoch": 0.88, "learning_rate": 7.070120035248906e-07, "loss": 1.334, "step": 2208 }, { "epoch": 0.88, "learning_rate": 7.022351411174866e-07, "loss": 1.3115, "step": 2209 }, { "epoch": 0.88, "learning_rate": 6.974738833861383e-07, "loss": 1.3311, "step": 2210 }, { "epoch": 0.88, "learning_rate": 6.927282383217893e-07, "loss": 1.3496, "step": 2211 }, { "epoch": 0.88, "learning_rate": 6.879982138891717e-07, "loss": 1.3174, "step": 2212 }, { "epoch": 0.89, "learning_rate": 6.83283818026812e-07, "loss": 1.3379, "step": 2213 }, { "epoch": 0.89, "learning_rate": 6.785850586469989e-07, "loss": 1.3818, "step": 2214 }, { "epoch": 0.89, "learning_rate": 6.739019436357774e-07, "loss": 1.4268, "step": 2215 }, { "epoch": 0.89, "learning_rate": 6.692344808529427e-07, "loss": 1.3535, "step": 2216 }, { "epoch": 0.89, "learning_rate": 6.645826781320141e-07, "loss": 1.3145, "step": 2217 }, { "epoch": 0.89, "learning_rate": 6.599465432802332e-07, "loss": 1.3438, "step": 2218 }, { "epoch": 0.89, "learning_rate": 6.553260840785414e-07, "loss": 1.3184, "step": 2219 }, { "epoch": 0.89, "learning_rate": 6.507213082815745e-07, "loss": 1.3945, "step": 2220 }, { "epoch": 0.89, "learning_rate": 6.461322236176438e-07, "loss": 1.3652, "step": 2221 }, { "epoch": 0.89, "learning_rate": 6.415588377887305e-07, "loss": 1.3574, "step": 2222 }, { "epoch": 0.89, "learning_rate": 6.370011584704617e-07, "loss": 1.2988, "step": 2223 }, { "epoch": 0.89, "learning_rate": 6.324591933121072e-07, "loss": 1.3154, "step": 2224 }, { "epoch": 0.89, "learning_rate": 6.279329499365649e-07, "loss": 1.3867, "step": 2225 }, { "epoch": 0.89, "learning_rate": 6.234224359403407e-07, "loss": 1.3672, "step": 2226 }, { "epoch": 0.89, "learning_rate": 6.1892765889355e-07, "loss": 1.373, "step": 2227 }, { "epoch": 0.89, "learning_rate": 6.144486263398886e-07, "loss": 1.3711, "step": 2228 }, { "epoch": 0.89, "learning_rate": 6.099853457966342e-07, "loss": 1.3848, "step": 2229 }, { "epoch": 0.89, "learning_rate": 6.055378247546217e-07, "loss": 1.377, "step": 2230 }, { "epoch": 0.89, "learning_rate": 6.01106070678239e-07, "loss": 1.374, "step": 2231 }, { "epoch": 0.89, "learning_rate": 5.966900910054141e-07, "loss": 1.3398, "step": 2232 }, { "epoch": 0.89, "learning_rate": 5.922898931475973e-07, "loss": 1.3525, "step": 2233 }, { "epoch": 0.89, "learning_rate": 5.879054844897536e-07, "loss": 1.334, "step": 2234 }, { "epoch": 0.89, "learning_rate": 5.835368723903456e-07, "loss": 1.3213, "step": 2235 }, { "epoch": 0.89, "learning_rate": 5.791840641813295e-07, "loss": 1.292, "step": 2236 }, { "epoch": 0.89, "learning_rate": 5.748470671681328e-07, "loss": 1.3701, "step": 2237 }, { "epoch": 0.9, "learning_rate": 5.705258886296494e-07, "loss": 1.3242, "step": 2238 }, { "epoch": 0.9, "learning_rate": 5.662205358182226e-07, "loss": 1.3271, "step": 2239 }, { "epoch": 0.9, "learning_rate": 5.619310159596358e-07, "loss": 1.3594, "step": 2240 }, { "epoch": 0.9, "learning_rate": 5.576573362531001e-07, "loss": 1.4395, "step": 2241 }, { "epoch": 0.9, "learning_rate": 5.533995038712403e-07, "loss": 1.2793, "step": 2242 }, { "epoch": 0.9, "learning_rate": 5.491575259600879e-07, "loss": 1.3203, "step": 2243 }, { "epoch": 0.9, "learning_rate": 5.449314096390601e-07, "loss": 1.2949, "step": 2244 }, { "epoch": 0.9, "learning_rate": 5.407211620009545e-07, "loss": 1.3525, "step": 2245 }, { "epoch": 0.9, "learning_rate": 5.365267901119398e-07, "loss": 1.3477, "step": 2246 }, { "epoch": 0.9, "learning_rate": 5.323483010115382e-07, "loss": 1.3389, "step": 2247 }, { "epoch": 0.9, "learning_rate": 5.281857017126124e-07, "loss": 1.375, "step": 2248 }, { "epoch": 0.9, "learning_rate": 5.240389992013606e-07, "loss": 1.4053, "step": 2249 }, { "epoch": 0.9, "learning_rate": 5.199082004372958e-07, "loss": 1.3408, "step": 2250 }, { "epoch": 0.9, "learning_rate": 5.157933123532466e-07, "loss": 1.3193, "step": 2251 }, { "epoch": 0.9, "learning_rate": 5.116943418553355e-07, "loss": 1.3516, "step": 2252 }, { "epoch": 0.9, "learning_rate": 5.076112958229673e-07, "loss": 1.3564, "step": 2253 }, { "epoch": 0.9, "learning_rate": 5.035441811088204e-07, "loss": 1.2627, "step": 2254 }, { "epoch": 0.9, "learning_rate": 4.994930045388414e-07, "loss": 1.3643, "step": 2255 }, { "epoch": 0.9, "learning_rate": 4.954577729122212e-07, "loss": 1.3359, "step": 2256 }, { "epoch": 0.9, "learning_rate": 4.914384930013927e-07, "loss": 1.3213, "step": 2257 }, { "epoch": 0.9, "learning_rate": 4.874351715520154e-07, "loss": 1.3408, "step": 2258 }, { "epoch": 0.9, "learning_rate": 4.834478152829658e-07, "loss": 1.3818, "step": 2259 }, { "epoch": 0.9, "learning_rate": 4.794764308863242e-07, "loss": 1.3662, "step": 2260 }, { "epoch": 0.9, "learning_rate": 4.755210250273701e-07, "loss": 1.3242, "step": 2261 }, { "epoch": 0.9, "learning_rate": 4.715816043445609e-07, "loss": 1.4004, "step": 2262 }, { "epoch": 0.91, "learning_rate": 4.676581754495235e-07, "loss": 1.3525, "step": 2263 }, { "epoch": 0.91, "learning_rate": 4.6375074492705173e-07, "loss": 1.3604, "step": 2264 }, { "epoch": 0.91, "learning_rate": 4.5985931933508757e-07, "loss": 1.2998, "step": 2265 }, { "epoch": 0.91, "learning_rate": 4.559839052047066e-07, "loss": 1.3574, "step": 2266 }, { "epoch": 0.91, "learning_rate": 4.521245090401172e-07, "loss": 1.3291, "step": 2267 }, { "epoch": 0.91, "learning_rate": 4.482811373186402e-07, "loss": 1.3379, "step": 2268 }, { "epoch": 0.91, "learning_rate": 4.4445379649070587e-07, "loss": 1.417, "step": 2269 }, { "epoch": 0.91, "learning_rate": 4.406424929798403e-07, "loss": 1.3623, "step": 2270 }, { "epoch": 0.91, "learning_rate": 4.368472331826479e-07, "loss": 1.2822, "step": 2271 }, { "epoch": 0.91, "learning_rate": 4.3306802346881116e-07, "loss": 1.3799, "step": 2272 }, { "epoch": 0.91, "learning_rate": 4.2930487018107425e-07, "loss": 1.3428, "step": 2273 }, { "epoch": 0.91, "learning_rate": 4.2555777963523506e-07, "loss": 1.373, "step": 2274 }, { "epoch": 0.91, "learning_rate": 4.218267581201296e-07, "loss": 1.3242, "step": 2275 }, { "epoch": 0.91, "learning_rate": 4.1811181189762684e-07, "loss": 1.3877, "step": 2276 }, { "epoch": 0.91, "learning_rate": 4.1441294720261373e-07, "loss": 1.2891, "step": 2277 }, { "epoch": 0.91, "learning_rate": 4.107301702429922e-07, "loss": 1.3242, "step": 2278 }, { "epoch": 0.91, "learning_rate": 4.070634871996615e-07, "loss": 1.3496, "step": 2279 }, { "epoch": 0.91, "learning_rate": 4.034129042265067e-07, "loss": 1.2793, "step": 2280 }, { "epoch": 0.91, "learning_rate": 3.9977842745039464e-07, "loss": 1.3311, "step": 2281 }, { "epoch": 0.91, "learning_rate": 3.961600629711615e-07, "loss": 1.3076, "step": 2282 }, { "epoch": 0.91, "learning_rate": 3.925578168616007e-07, "loss": 1.3555, "step": 2283 }, { "epoch": 0.91, "learning_rate": 3.889716951674549e-07, "loss": 1.3467, "step": 2284 }, { "epoch": 0.91, "learning_rate": 3.8540170390740097e-07, "loss": 1.4111, "step": 2285 }, { "epoch": 0.91, "learning_rate": 3.8184784907304704e-07, "loss": 1.3623, "step": 2286 }, { "epoch": 0.91, "learning_rate": 3.783101366289199e-07, "loss": 1.3408, "step": 2287 }, { "epoch": 0.92, "learning_rate": 3.747885725124523e-07, "loss": 1.2783, "step": 2288 }, { "epoch": 0.92, "learning_rate": 3.712831626339752e-07, "loss": 1.3174, "step": 2289 }, { "epoch": 0.92, "learning_rate": 3.67793912876705e-07, "loss": 1.3262, "step": 2290 }, { "epoch": 0.92, "learning_rate": 3.643208290967415e-07, "loss": 1.4238, "step": 2291 }, { "epoch": 0.92, "learning_rate": 3.608639171230488e-07, "loss": 1.375, "step": 2292 }, { "epoch": 0.92, "learning_rate": 3.5742318275745147e-07, "loss": 1.2891, "step": 2293 }, { "epoch": 0.92, "learning_rate": 3.5399863177462024e-07, "loss": 1.4072, "step": 2294 }, { "epoch": 0.92, "learning_rate": 3.5059026992206645e-07, "loss": 1.3564, "step": 2295 }, { "epoch": 0.92, "learning_rate": 3.4719810292013214e-07, "loss": 1.416, "step": 2296 }, { "epoch": 0.92, "learning_rate": 3.438221364619776e-07, "loss": 1.4033, "step": 2297 }, { "epoch": 0.92, "learning_rate": 3.404623762135728e-07, "loss": 1.3125, "step": 2298 }, { "epoch": 0.92, "learning_rate": 3.371188278136883e-07, "loss": 1.3604, "step": 2299 }, { "epoch": 0.92, "learning_rate": 3.3379149687388866e-07, "loss": 1.4014, "step": 2300 }, { "epoch": 0.92, "learning_rate": 3.3048038897851576e-07, "loss": 1.3594, "step": 2301 }, { "epoch": 0.92, "learning_rate": 3.271855096846899e-07, "loss": 1.2588, "step": 2302 }, { "epoch": 0.92, "learning_rate": 3.2390686452228983e-07, "loss": 1.2891, "step": 2303 }, { "epoch": 0.92, "learning_rate": 3.2064445899394723e-07, "loss": 1.3242, "step": 2304 }, { "epoch": 0.92, "learning_rate": 3.1739829857504235e-07, "loss": 1.3584, "step": 2305 }, { "epoch": 0.92, "learning_rate": 3.1416838871368925e-07, "loss": 1.3311, "step": 2306 }, { "epoch": 0.92, "learning_rate": 3.1095473483072733e-07, "loss": 1.3848, "step": 2307 }, { "epoch": 0.92, "learning_rate": 3.0775734231971443e-07, "loss": 1.3271, "step": 2308 }, { "epoch": 0.92, "learning_rate": 3.045762165469168e-07, "loss": 1.3662, "step": 2309 }, { "epoch": 0.92, "learning_rate": 3.0141136285129825e-07, "loss": 1.3213, "step": 2310 }, { "epoch": 0.92, "learning_rate": 2.982627865445109e-07, "loss": 1.3477, "step": 2311 }, { "epoch": 0.92, "learning_rate": 2.951304929108956e-07, "loss": 1.4131, "step": 2312 }, { "epoch": 0.93, "learning_rate": 2.9201448720745706e-07, "loss": 1.3184, "step": 2313 }, { "epoch": 0.93, "learning_rate": 2.8891477466386987e-07, "loss": 1.3379, "step": 2314 }, { "epoch": 0.93, "learning_rate": 2.8583136048245697e-07, "loss": 1.3359, "step": 2315 }, { "epoch": 0.93, "learning_rate": 2.827642498381955e-07, "loss": 1.3975, "step": 2316 }, { "epoch": 0.93, "learning_rate": 2.7971344787869114e-07, "loss": 1.3047, "step": 2317 }, { "epoch": 0.93, "learning_rate": 2.76678959724187e-07, "loss": 1.2812, "step": 2318 }, { "epoch": 0.93, "learning_rate": 2.7366079046753925e-07, "loss": 1.3281, "step": 2319 }, { "epoch": 0.93, "learning_rate": 2.706589451742181e-07, "loss": 1.3672, "step": 2320 }, { "epoch": 0.93, "learning_rate": 2.6767342888229907e-07, "loss": 1.3779, "step": 2321 }, { "epoch": 0.93, "learning_rate": 2.647042466024485e-07, "loss": 1.3652, "step": 2322 }, { "epoch": 0.93, "learning_rate": 2.617514033179236e-07, "loss": 1.4014, "step": 2323 }, { "epoch": 0.93, "learning_rate": 2.588149039845533e-07, "loss": 1.3799, "step": 2324 }, { "epoch": 0.93, "learning_rate": 2.5589475353073987e-07, "loss": 1.3203, "step": 2325 }, { "epoch": 0.93, "learning_rate": 2.5299095685744734e-07, "loss": 1.3125, "step": 2326 }, { "epoch": 0.93, "learning_rate": 2.5010351883819283e-07, "loss": 1.3691, "step": 2327 }, { "epoch": 0.93, "learning_rate": 2.472324443190355e-07, "loss": 1.3193, "step": 2328 }, { "epoch": 0.93, "learning_rate": 2.4437773811857304e-07, "loss": 1.333, "step": 2329 }, { "epoch": 0.93, "learning_rate": 2.4153940502793185e-07, "loss": 1.3311, "step": 2330 }, { "epoch": 0.93, "learning_rate": 2.387174498107614e-07, "loss": 1.3076, "step": 2331 }, { "epoch": 0.93, "learning_rate": 2.359118772032176e-07, "loss": 1.2998, "step": 2332 }, { "epoch": 0.93, "learning_rate": 2.3312269191396619e-07, "loss": 1.377, "step": 2333 }, { "epoch": 0.93, "learning_rate": 2.30349898624167e-07, "loss": 1.373, "step": 2334 }, { "epoch": 0.93, "learning_rate": 2.2759350198746978e-07, "loss": 1.3672, "step": 2335 }, { "epoch": 0.93, "learning_rate": 2.2485350663000727e-07, "loss": 1.3457, "step": 2336 }, { "epoch": 0.93, "learning_rate": 2.2212991715038324e-07, "loss": 1.4043, "step": 2337 }, { "epoch": 0.94, "learning_rate": 2.1942273811966563e-07, "loss": 1.292, "step": 2338 }, { "epoch": 0.94, "learning_rate": 2.1673197408138115e-07, "loss": 1.3623, "step": 2339 }, { "epoch": 0.94, "learning_rate": 2.1405762955151178e-07, "loss": 1.3232, "step": 2340 }, { "epoch": 0.94, "learning_rate": 2.1139970901847607e-07, "loss": 1.3281, "step": 2341 }, { "epoch": 0.94, "learning_rate": 2.0875821694313014e-07, "loss": 1.373, "step": 2342 }, { "epoch": 0.94, "learning_rate": 2.0613315775875665e-07, "loss": 1.3291, "step": 2343 }, { "epoch": 0.94, "learning_rate": 2.0352453587105914e-07, "loss": 1.3457, "step": 2344 }, { "epoch": 0.94, "learning_rate": 2.009323556581566e-07, "loss": 1.2881, "step": 2345 }, { "epoch": 0.94, "learning_rate": 1.9835662147057012e-07, "loss": 1.3828, "step": 2346 }, { "epoch": 0.94, "learning_rate": 1.9579733763121943e-07, "loss": 1.292, "step": 2347 }, { "epoch": 0.94, "learning_rate": 1.932545084354154e-07, "loss": 1.3389, "step": 2348 }, { "epoch": 0.94, "learning_rate": 1.9072813815085523e-07, "loss": 1.3525, "step": 2349 }, { "epoch": 0.94, "learning_rate": 1.8821823101760949e-07, "loss": 1.332, "step": 2350 }, { "epoch": 0.94, "learning_rate": 1.857247912481197e-07, "loss": 1.3232, "step": 2351 }, { "epoch": 0.94, "learning_rate": 1.8324782302718835e-07, "loss": 1.3301, "step": 2352 }, { "epoch": 0.94, "learning_rate": 1.8078733051197561e-07, "loss": 1.3496, "step": 2353 }, { "epoch": 0.94, "learning_rate": 1.7834331783198933e-07, "loss": 1.3232, "step": 2354 }, { "epoch": 0.94, "learning_rate": 1.7591578908907724e-07, "loss": 1.3682, "step": 2355 }, { "epoch": 0.94, "learning_rate": 1.735047483574215e-07, "loss": 1.3486, "step": 2356 }, { "epoch": 0.94, "learning_rate": 1.7111019968353625e-07, "loss": 1.2988, "step": 2357 }, { "epoch": 0.94, "learning_rate": 1.687321470862524e-07, "loss": 1.3105, "step": 2358 }, { "epoch": 0.94, "learning_rate": 1.6637059455671623e-07, "loss": 1.4023, "step": 2359 }, { "epoch": 0.94, "learning_rate": 1.6402554605838173e-07, "loss": 1.3818, "step": 2360 }, { "epoch": 0.94, "learning_rate": 1.6169700552700284e-07, "loss": 1.3535, "step": 2361 }, { "epoch": 0.94, "learning_rate": 1.5938497687062905e-07, "loss": 1.2529, "step": 2362 }, { "epoch": 0.95, "learning_rate": 1.5708946396959856e-07, "loss": 1.311, "step": 2363 }, { "epoch": 0.95, "learning_rate": 1.5481047067652744e-07, "loss": 1.3315, "step": 2364 }, { "epoch": 0.95, "learning_rate": 1.5254800081630828e-07, "loss": 1.2969, "step": 2365 }, { "epoch": 0.95, "learning_rate": 1.5030205818610255e-07, "loss": 1.2773, "step": 2366 }, { "epoch": 0.95, "learning_rate": 1.4807264655533282e-07, "loss": 1.2646, "step": 2367 }, { "epoch": 0.95, "learning_rate": 1.4585976966567826e-07, "loss": 1.2637, "step": 2368 }, { "epoch": 0.95, "learning_rate": 1.4366343123106697e-07, "loss": 1.3311, "step": 2369 }, { "epoch": 0.95, "learning_rate": 1.4148363493766803e-07, "loss": 1.333, "step": 2370 }, { "epoch": 0.95, "learning_rate": 1.3932038444389063e-07, "loss": 1.2549, "step": 2371 }, { "epoch": 0.95, "learning_rate": 1.3717368338037163e-07, "loss": 1.3252, "step": 2372 }, { "epoch": 0.95, "learning_rate": 1.3504353534997682e-07, "loss": 1.3477, "step": 2373 }, { "epoch": 0.95, "learning_rate": 1.3292994392778535e-07, "loss": 1.3145, "step": 2374 }, { "epoch": 0.95, "learning_rate": 1.30832912661093e-07, "loss": 1.3027, "step": 2375 }, { "epoch": 0.95, "learning_rate": 1.287524450694011e-07, "loss": 1.3408, "step": 2376 }, { "epoch": 0.95, "learning_rate": 1.2668854464441104e-07, "loss": 1.3975, "step": 2377 }, { "epoch": 0.95, "learning_rate": 1.246412148500198e-07, "loss": 1.3672, "step": 2378 }, { "epoch": 0.95, "learning_rate": 1.2261045912231318e-07, "loss": 1.3418, "step": 2379 }, { "epoch": 0.95, "learning_rate": 1.2059628086956044e-07, "loss": 1.376, "step": 2380 }, { "epoch": 0.95, "learning_rate": 1.1859868347220749e-07, "loss": 1.2637, "step": 2381 }, { "epoch": 0.95, "learning_rate": 1.1661767028287363e-07, "loss": 1.3027, "step": 2382 }, { "epoch": 0.95, "learning_rate": 1.1465324462634375e-07, "loss": 1.3799, "step": 2383 }, { "epoch": 0.95, "learning_rate": 1.1270540979956501e-07, "loss": 1.3428, "step": 2384 }, { "epoch": 0.95, "learning_rate": 1.1077416907163573e-07, "loss": 1.3613, "step": 2385 }, { "epoch": 0.95, "learning_rate": 1.0885952568380764e-07, "loss": 1.373, "step": 2386 }, { "epoch": 0.95, "learning_rate": 1.0696148284947694e-07, "loss": 1.3477, "step": 2387 }, { "epoch": 0.96, "learning_rate": 1.0508004375417546e-07, "loss": 1.3506, "step": 2388 }, { "epoch": 0.96, "learning_rate": 1.032152115555718e-07, "loss": 1.2881, "step": 2389 }, { "epoch": 0.96, "learning_rate": 1.0136698938346012e-07, "loss": 1.3164, "step": 2390 }, { "epoch": 0.96, "learning_rate": 9.953538033975918e-08, "loss": 1.3711, "step": 2391 }, { "epoch": 0.96, "learning_rate": 9.772038749850665e-08, "loss": 1.4043, "step": 2392 }, { "epoch": 0.96, "learning_rate": 9.59220139058492e-08, "loss": 1.3555, "step": 2393 }, { "epoch": 0.96, "learning_rate": 9.414026258004583e-08, "loss": 1.3379, "step": 2394 }, { "epoch": 0.96, "learning_rate": 9.237513651145224e-08, "loss": 1.4414, "step": 2395 }, { "epoch": 0.96, "learning_rate": 9.062663866252541e-08, "loss": 1.3516, "step": 2396 }, { "epoch": 0.96, "learning_rate": 8.889477196781571e-08, "loss": 1.3457, "step": 2397 }, { "epoch": 0.96, "learning_rate": 8.717953933395695e-08, "loss": 1.3428, "step": 2398 }, { "epoch": 0.96, "learning_rate": 8.548094363966974e-08, "loss": 1.3545, "step": 2399 }, { "epoch": 0.96, "learning_rate": 8.379898773574924e-08, "loss": 1.3623, "step": 2400 }, { "epoch": 0.96, "learning_rate": 8.213367444506515e-08, "loss": 1.3672, "step": 2401 }, { "epoch": 0.96, "learning_rate": 8.04850065625551e-08, "loss": 1.3213, "step": 2402 }, { "epoch": 0.96, "learning_rate": 7.885298685522235e-08, "loss": 1.3906, "step": 2403 }, { "epoch": 0.96, "learning_rate": 7.723761806212371e-08, "loss": 1.3389, "step": 2404 }, { "epoch": 0.96, "learning_rate": 7.563890289437825e-08, "loss": 1.3408, "step": 2405 }, { "epoch": 0.96, "learning_rate": 7.405684403514635e-08, "loss": 1.3984, "step": 2406 }, { "epoch": 0.96, "learning_rate": 7.24914441396396e-08, "loss": 1.3496, "step": 2407 }, { "epoch": 0.96, "learning_rate": 7.094270583510976e-08, "loss": 1.3691, "step": 2408 }, { "epoch": 0.96, "learning_rate": 6.941063172084094e-08, "loss": 1.2969, "step": 2409 }, { "epoch": 0.96, "learning_rate": 6.78952243681541e-08, "loss": 1.3428, "step": 2410 }, { "epoch": 0.96, "learning_rate": 6.639648632039697e-08, "loss": 1.335, "step": 2411 }, { "epoch": 0.96, "learning_rate": 6.491442009293858e-08, "loss": 1.3701, "step": 2412 }, { "epoch": 0.97, "learning_rate": 6.344902817316811e-08, "loss": 1.3154, "step": 2413 }, { "epoch": 0.97, "learning_rate": 6.200031302049048e-08, "loss": 1.3467, "step": 2414 }, { "epoch": 0.97, "learning_rate": 6.056827706632185e-08, "loss": 1.3506, "step": 2415 }, { "epoch": 0.97, "learning_rate": 5.915292271408524e-08, "loss": 1.3613, "step": 2416 }, { "epoch": 0.97, "learning_rate": 5.7754252339204955e-08, "loss": 1.332, "step": 2417 }, { "epoch": 0.97, "learning_rate": 5.637226828910436e-08, "loss": 1.3721, "step": 2418 }, { "epoch": 0.97, "learning_rate": 5.5006972883204776e-08, "loss": 1.2988, "step": 2419 }, { "epoch": 0.97, "learning_rate": 5.365836841291439e-08, "loss": 1.293, "step": 2420 }, { "epoch": 0.97, "learning_rate": 5.232645714163265e-08, "loss": 1.373, "step": 2421 }, { "epoch": 0.97, "learning_rate": 5.1011241304738115e-08, "loss": 1.3418, "step": 2422 }, { "epoch": 0.97, "learning_rate": 4.9712723109590636e-08, "loss": 1.3213, "step": 2423 }, { "epoch": 0.97, "learning_rate": 4.843090473552914e-08, "loss": 1.3779, "step": 2424 }, { "epoch": 0.97, "learning_rate": 4.716578833386054e-08, "loss": 1.333, "step": 2425 }, { "epoch": 0.97, "learning_rate": 4.5917376027861945e-08, "loss": 1.332, "step": 2426 }, { "epoch": 0.97, "learning_rate": 4.468566991277512e-08, "loss": 1.3535, "step": 2427 }, { "epoch": 0.97, "learning_rate": 4.347067205580424e-08, "loss": 1.3682, "step": 2428 }, { "epoch": 0.97, "learning_rate": 4.2272384496112597e-08, "loss": 1.416, "step": 2429 }, { "epoch": 0.97, "learning_rate": 4.109080924481479e-08, "loss": 1.2559, "step": 2430 }, { "epoch": 0.97, "learning_rate": 3.9925948284980086e-08, "loss": 1.4053, "step": 2431 }, { "epoch": 0.97, "learning_rate": 3.877780357162353e-08, "loss": 1.3574, "step": 2432 }, { "epoch": 0.97, "learning_rate": 3.764637703170593e-08, "loss": 1.2871, "step": 2433 }, { "epoch": 0.97, "learning_rate": 3.653167056413054e-08, "loss": 1.3486, "step": 2434 }, { "epoch": 0.97, "learning_rate": 3.543368603973529e-08, "loss": 1.3857, "step": 2435 }, { "epoch": 0.97, "learning_rate": 3.435242530129723e-08, "loss": 1.3779, "step": 2436 }, { "epoch": 0.97, "learning_rate": 3.3287890163523626e-08, "loss": 1.3994, "step": 2437 }, { "epoch": 0.98, "learning_rate": 3.224008241304977e-08, "loss": 1.3633, "step": 2438 }, { "epoch": 0.98, "learning_rate": 3.120900380844116e-08, "loss": 1.3721, "step": 2439 }, { "epoch": 0.98, "learning_rate": 3.019465608018024e-08, "loss": 1.3418, "step": 2440 }, { "epoch": 0.98, "learning_rate": 2.9197040930674102e-08, "loss": 1.333, "step": 2441 }, { "epoch": 0.98, "learning_rate": 2.8216160034244544e-08, "loss": 1.4141, "step": 2442 }, { "epoch": 0.98, "learning_rate": 2.7252015037131373e-08, "loss": 1.4014, "step": 2443 }, { "epoch": 0.98, "learning_rate": 2.6304607557481322e-08, "loss": 1.3447, "step": 2444 }, { "epoch": 0.98, "learning_rate": 2.537393918535358e-08, "loss": 1.333, "step": 2445 }, { "epoch": 0.98, "learning_rate": 2.4460011482713153e-08, "loss": 1.3779, "step": 2446 }, { "epoch": 0.98, "learning_rate": 2.3562825983427517e-08, "loss": 1.3311, "step": 2447 }, { "epoch": 0.98, "learning_rate": 2.2682384193266625e-08, "loss": 1.3594, "step": 2448 }, { "epoch": 0.98, "learning_rate": 2.1818687589896248e-08, "loss": 1.3652, "step": 2449 }, { "epoch": 0.98, "learning_rate": 2.0971737622883515e-08, "loss": 1.3838, "step": 2450 }, { "epoch": 0.98, "learning_rate": 2.01415357136836e-08, "loss": 1.3877, "step": 2451 }, { "epoch": 0.98, "learning_rate": 1.93280832556475e-08, "loss": 1.3789, "step": 2452 }, { "epoch": 0.98, "learning_rate": 1.8531381614013133e-08, "loss": 1.3564, "step": 2453 }, { "epoch": 0.98, "learning_rate": 1.7751432125903133e-08, "loss": 1.3584, "step": 2454 }, { "epoch": 0.98, "learning_rate": 1.698823610032929e-08, "loss": 1.4004, "step": 2455 }, { "epoch": 0.98, "learning_rate": 1.6241794818180333e-08, "loss": 1.3818, "step": 2456 }, { "epoch": 0.98, "learning_rate": 1.5512109532229703e-08, "loss": 1.334, "step": 2457 }, { "epoch": 0.98, "learning_rate": 1.4799181467125557e-08, "loss": 1.3428, "step": 2458 }, { "epoch": 0.98, "learning_rate": 1.4103011819395218e-08, "loss": 1.3711, "step": 2459 }, { "epoch": 0.98, "learning_rate": 1.3423601757436289e-08, "loss": 1.1953, "step": 2460 }, { "epoch": 0.98, "learning_rate": 1.276095242151998e-08, "loss": 1.3623, "step": 2461 }, { "epoch": 0.98, "learning_rate": 1.2115064923787778e-08, "loss": 1.4033, "step": 2462 }, { "epoch": 0.99, "learning_rate": 1.1485940348249235e-08, "loss": 1.3447, "step": 2463 }, { "epoch": 0.99, "learning_rate": 1.087357975078085e-08, "loss": 1.3516, "step": 2464 }, { "epoch": 0.99, "learning_rate": 1.0277984159122734e-08, "loss": 1.2812, "step": 2465 }, { "epoch": 0.99, "learning_rate": 9.699154572877511e-09, "loss": 1.2891, "step": 2466 }, { "epoch": 0.99, "learning_rate": 9.137091963510314e-09, "loss": 1.3242, "step": 2467 }, { "epoch": 0.99, "learning_rate": 8.591797274344338e-09, "loss": 1.4082, "step": 2468 }, { "epoch": 0.99, "learning_rate": 8.063271420563068e-09, "loss": 1.2891, "step": 2469 }, { "epoch": 0.99, "learning_rate": 7.551515289203615e-09, "loss": 1.3096, "step": 2470 }, { "epoch": 0.99, "learning_rate": 7.056529739158935e-09, "loss": 1.3818, "step": 2471 }, { "epoch": 0.99, "learning_rate": 6.5783156011778315e-09, "loss": 1.3066, "step": 2472 }, { "epoch": 0.99, "learning_rate": 6.116873677858293e-09, "loss": 1.4131, "step": 2473 }, { "epoch": 0.99, "learning_rate": 5.6722047436497115e-09, "loss": 1.4346, "step": 2474 }, { "epoch": 0.99, "learning_rate": 5.2443095448506674e-09, "loss": 1.3262, "step": 2475 }, { "epoch": 0.99, "learning_rate": 4.833188799610033e-09, "loss": 1.2656, "step": 2476 }, { "epoch": 0.99, "learning_rate": 4.438843197922538e-09, "loss": 1.376, "step": 2477 }, { "epoch": 0.99, "learning_rate": 4.061273401627653e-09, "loss": 1.3066, "step": 2478 }, { "epoch": 0.99, "learning_rate": 3.7004800444095933e-09, "loss": 1.2832, "step": 2479 }, { "epoch": 0.99, "learning_rate": 3.3564637317984318e-09, "loss": 1.3506, "step": 2480 }, { "epoch": 0.99, "learning_rate": 3.0292250411645406e-09, "loss": 1.3213, "step": 2481 }, { "epoch": 0.99, "learning_rate": 2.7187645217219283e-09, "loss": 1.4131, "step": 2482 }, { "epoch": 0.99, "learning_rate": 2.4250826945226847e-09, "loss": 1.334, "step": 2483 }, { "epoch": 0.99, "learning_rate": 2.148180052462534e-09, "loss": 1.3457, "step": 2484 }, { "epoch": 0.99, "learning_rate": 1.888057060274173e-09, "loss": 1.3701, "step": 2485 }, { "epoch": 0.99, "learning_rate": 1.6447141545272717e-09, "loss": 1.2969, "step": 2486 }, { "epoch": 0.99, "learning_rate": 1.4181517436306913e-09, "loss": 1.2705, "step": 2487 }, { "epoch": 1.0, "learning_rate": 1.2083702078302673e-09, "loss": 1.292, "step": 2488 }, { "epoch": 1.0, "learning_rate": 1.0153698992088069e-09, "loss": 1.4014, "step": 2489 }, { "epoch": 1.0, "learning_rate": 8.391511416816489e-10, "loss": 1.3359, "step": 2490 }, { "epoch": 1.0, "learning_rate": 6.797142310022154e-10, "loss": 1.3564, "step": 2491 }, { "epoch": 1.0, "learning_rate": 5.370594347575697e-10, "loss": 1.3301, "step": 2492 }, { "epoch": 1.0, "learning_rate": 4.1118699236841753e-10, "loss": 1.3672, "step": 2493 }, { "epoch": 1.0, "learning_rate": 3.0209711509132657e-10, "loss": 1.3496, "step": 2494 }, { "epoch": 1.0, "learning_rate": 2.0978998601206558e-10, "loss": 1.374, "step": 2495 }, { "epoch": 1.0, "learning_rate": 1.342657600544861e-10, "loss": 1.3877, "step": 2496 }, { "epoch": 1.0, "learning_rate": 7.552456397053042e-11, "loss": 1.3223, "step": 2497 }, { "epoch": 1.0, "learning_rate": 3.3566496349113355e-11, "loss": 1.3164, "step": 2498 }, { "epoch": 1.0, "learning_rate": 8.39162760835066e-12, "loss": 1.3066, "step": 2499 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 1.3516, "step": 2500 }, { "epoch": 1.0, "step": 2500, "total_flos": 8.923034575983084e+17, "train_loss": 1.4047638671875, "train_runtime": 5551.0228, "train_samples_per_second": 14.412, "train_steps_per_second": 0.45 } ], "max_steps": 2500, "num_train_epochs": 1, "total_flos": 8.923034575983084e+17, "trial_name": null, "trial_params": null }