{ "best_metric": 0.03707250580191612, "best_model_checkpoint": "./phishing-email-detection/checkpoint-2196", "epoch": 3.0, "global_step": 3294, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 0.7022, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6837, "step": 2 }, { "epoch": 0.0, "learning_rate": 3e-06, "loss": 0.6956, "step": 3 }, { "epoch": 0.0, "learning_rate": 4.000000000000001e-06, "loss": 0.6936, "step": 4 }, { "epoch": 0.0, "learning_rate": 5e-06, "loss": 0.687, "step": 5 }, { "epoch": 0.01, "learning_rate": 6e-06, "loss": 0.6838, "step": 6 }, { "epoch": 0.01, "learning_rate": 7.000000000000001e-06, "loss": 0.6982, "step": 7 }, { "epoch": 0.01, "learning_rate": 8.000000000000001e-06, "loss": 0.6948, "step": 8 }, { "epoch": 0.01, "learning_rate": 9e-06, "loss": 0.6805, "step": 9 }, { "epoch": 0.01, "learning_rate": 1e-05, "loss": 0.6995, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.1000000000000001e-05, "loss": 0.7028, "step": 11 }, { "epoch": 0.01, "learning_rate": 1.2e-05, "loss": 0.6965, "step": 12 }, { "epoch": 0.01, "learning_rate": 1.3000000000000001e-05, "loss": 0.6835, "step": 13 }, { "epoch": 0.01, "learning_rate": 1.4000000000000001e-05, "loss": 0.6879, "step": 14 }, { "epoch": 0.01, "learning_rate": 1.5e-05, "loss": 0.6769, "step": 15 }, { "epoch": 0.01, "learning_rate": 1.6000000000000003e-05, "loss": 0.7155, "step": 16 }, { "epoch": 0.02, "learning_rate": 1.7000000000000003e-05, "loss": 0.6811, "step": 17 }, { "epoch": 0.02, "learning_rate": 1.8e-05, "loss": 0.6727, "step": 18 }, { "epoch": 0.02, "learning_rate": 1.9e-05, "loss": 0.6834, "step": 19 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.6674, "step": 20 }, { "epoch": 0.02, "learning_rate": 2.1e-05, "loss": 0.6672, "step": 21 }, { "epoch": 0.02, "learning_rate": 2.2000000000000003e-05, "loss": 0.6812, "step": 22 }, { "epoch": 0.02, "learning_rate": 2.3000000000000003e-05, "loss": 0.6874, "step": 23 }, { "epoch": 0.02, "learning_rate": 2.4e-05, "loss": 0.6365, "step": 24 }, { "epoch": 0.02, "learning_rate": 2.5e-05, "loss": 0.6393, "step": 25 }, { "epoch": 0.02, "learning_rate": 2.6000000000000002e-05, "loss": 0.6681, "step": 26 }, { "epoch": 0.02, "learning_rate": 2.7000000000000002e-05, "loss": 0.6282, "step": 27 }, { "epoch": 0.03, "learning_rate": 2.8000000000000003e-05, "loss": 0.6342, "step": 28 }, { "epoch": 0.03, "learning_rate": 2.9e-05, "loss": 0.6132, "step": 29 }, { "epoch": 0.03, "learning_rate": 3e-05, "loss": 0.5506, "step": 30 }, { "epoch": 0.03, "learning_rate": 3.1e-05, "loss": 0.5207, "step": 31 }, { "epoch": 0.03, "learning_rate": 3.2000000000000005e-05, "loss": 0.4944, "step": 32 }, { "epoch": 0.03, "learning_rate": 3.3e-05, "loss": 0.5201, "step": 33 }, { "epoch": 0.03, "learning_rate": 3.4000000000000007e-05, "loss": 0.3777, "step": 34 }, { "epoch": 0.03, "learning_rate": 3.5e-05, "loss": 0.5256, "step": 35 }, { "epoch": 0.03, "learning_rate": 3.6e-05, "loss": 0.4247, "step": 36 }, { "epoch": 0.03, "learning_rate": 3.7e-05, "loss": 0.4869, "step": 37 }, { "epoch": 0.03, "learning_rate": 3.8e-05, "loss": 0.3235, "step": 38 }, { "epoch": 0.04, "learning_rate": 3.9000000000000006e-05, "loss": 0.3241, "step": 39 }, { "epoch": 0.04, "learning_rate": 4e-05, "loss": 0.4529, "step": 40 }, { "epoch": 0.04, "learning_rate": 4.1e-05, "loss": 0.1684, "step": 41 }, { "epoch": 0.04, "learning_rate": 4.2e-05, "loss": 0.3919, "step": 42 }, { "epoch": 0.04, "learning_rate": 4.3e-05, "loss": 0.2534, "step": 43 }, { "epoch": 0.04, "learning_rate": 4.4000000000000006e-05, "loss": 0.3468, "step": 44 }, { "epoch": 0.04, "learning_rate": 4.5e-05, "loss": 0.1609, "step": 45 }, { "epoch": 0.04, "learning_rate": 4.600000000000001e-05, "loss": 0.2447, "step": 46 }, { "epoch": 0.04, "learning_rate": 4.7e-05, "loss": 0.3794, "step": 47 }, { "epoch": 0.04, "learning_rate": 4.8e-05, "loss": 0.2932, "step": 48 }, { "epoch": 0.04, "learning_rate": 4.9e-05, "loss": 0.2164, "step": 49 }, { "epoch": 0.05, "learning_rate": 5e-05, "loss": 0.3335, "step": 50 }, { "epoch": 0.05, "learning_rate": 4.9984586929716405e-05, "loss": 0.5085, "step": 51 }, { "epoch": 0.05, "learning_rate": 4.99691738594328e-05, "loss": 0.1875, "step": 52 }, { "epoch": 0.05, "learning_rate": 4.99537607891492e-05, "loss": 0.1964, "step": 53 }, { "epoch": 0.05, "learning_rate": 4.9938347718865605e-05, "loss": 0.2442, "step": 54 }, { "epoch": 0.05, "learning_rate": 4.9922934648582e-05, "loss": 0.2489, "step": 55 }, { "epoch": 0.05, "learning_rate": 4.99075215782984e-05, "loss": 0.3017, "step": 56 }, { "epoch": 0.05, "learning_rate": 4.98921085080148e-05, "loss": 0.3468, "step": 57 }, { "epoch": 0.05, "learning_rate": 4.9876695437731194e-05, "loss": 0.6424, "step": 58 }, { "epoch": 0.05, "learning_rate": 4.9861282367447596e-05, "loss": 0.0898, "step": 59 }, { "epoch": 0.05, "learning_rate": 4.9845869297164e-05, "loss": 0.1585, "step": 60 }, { "epoch": 0.06, "learning_rate": 4.9830456226880394e-05, "loss": 0.185, "step": 61 }, { "epoch": 0.06, "learning_rate": 4.9815043156596796e-05, "loss": 0.2936, "step": 62 }, { "epoch": 0.06, "learning_rate": 4.97996300863132e-05, "loss": 0.6241, "step": 63 }, { "epoch": 0.06, "learning_rate": 4.9784217016029594e-05, "loss": 0.2769, "step": 64 }, { "epoch": 0.06, "learning_rate": 4.9768803945745996e-05, "loss": 0.1915, "step": 65 }, { "epoch": 0.06, "learning_rate": 4.97533908754624e-05, "loss": 0.1106, "step": 66 }, { "epoch": 0.06, "learning_rate": 4.9737977805178794e-05, "loss": 0.4, "step": 67 }, { "epoch": 0.06, "learning_rate": 4.9722564734895196e-05, "loss": 0.2603, "step": 68 }, { "epoch": 0.06, "learning_rate": 4.97071516646116e-05, "loss": 0.2849, "step": 69 }, { "epoch": 0.06, "learning_rate": 4.9691738594327994e-05, "loss": 0.0758, "step": 70 }, { "epoch": 0.06, "learning_rate": 4.967632552404439e-05, "loss": 0.0657, "step": 71 }, { "epoch": 0.07, "learning_rate": 4.966091245376079e-05, "loss": 0.1914, "step": 72 }, { "epoch": 0.07, "learning_rate": 4.964549938347719e-05, "loss": 0.3203, "step": 73 }, { "epoch": 0.07, "learning_rate": 4.963008631319359e-05, "loss": 0.0725, "step": 74 }, { "epoch": 0.07, "learning_rate": 4.961467324290999e-05, "loss": 0.4044, "step": 75 }, { "epoch": 0.07, "learning_rate": 4.959926017262639e-05, "loss": 0.1029, "step": 76 }, { "epoch": 0.07, "learning_rate": 4.958384710234279e-05, "loss": 0.2754, "step": 77 }, { "epoch": 0.07, "learning_rate": 4.956843403205919e-05, "loss": 0.0372, "step": 78 }, { "epoch": 0.07, "learning_rate": 4.955302096177559e-05, "loss": 0.1509, "step": 79 }, { "epoch": 0.07, "learning_rate": 4.953760789149199e-05, "loss": 0.0387, "step": 80 }, { "epoch": 0.07, "learning_rate": 4.952219482120839e-05, "loss": 0.0669, "step": 81 }, { "epoch": 0.07, "learning_rate": 4.950678175092479e-05, "loss": 0.0175, "step": 82 }, { "epoch": 0.08, "learning_rate": 4.949136868064119e-05, "loss": 0.1144, "step": 83 }, { "epoch": 0.08, "learning_rate": 4.9475955610357585e-05, "loss": 0.0177, "step": 84 }, { "epoch": 0.08, "learning_rate": 4.946054254007399e-05, "loss": 0.065, "step": 85 }, { "epoch": 0.08, "learning_rate": 4.944512946979038e-05, "loss": 0.1989, "step": 86 }, { "epoch": 0.08, "learning_rate": 4.942971639950678e-05, "loss": 0.2918, "step": 87 }, { "epoch": 0.08, "learning_rate": 4.941430332922318e-05, "loss": 0.4348, "step": 88 }, { "epoch": 0.08, "learning_rate": 4.939889025893958e-05, "loss": 0.4145, "step": 89 }, { "epoch": 0.08, "learning_rate": 4.938347718865598e-05, "loss": 0.0472, "step": 90 }, { "epoch": 0.08, "learning_rate": 4.936806411837238e-05, "loss": 0.2278, "step": 91 }, { "epoch": 0.08, "learning_rate": 4.935265104808878e-05, "loss": 0.1183, "step": 92 }, { "epoch": 0.08, "learning_rate": 4.933723797780518e-05, "loss": 0.0903, "step": 93 }, { "epoch": 0.09, "learning_rate": 4.932182490752158e-05, "loss": 0.0395, "step": 94 }, { "epoch": 0.09, "learning_rate": 4.930641183723798e-05, "loss": 0.089, "step": 95 }, { "epoch": 0.09, "learning_rate": 4.929099876695438e-05, "loss": 0.041, "step": 96 }, { "epoch": 0.09, "learning_rate": 4.927558569667078e-05, "loss": 0.0672, "step": 97 }, { "epoch": 0.09, "learning_rate": 4.926017262638718e-05, "loss": 0.2325, "step": 98 }, { "epoch": 0.09, "learning_rate": 4.924475955610358e-05, "loss": 0.1052, "step": 99 }, { "epoch": 0.09, "learning_rate": 4.9229346485819974e-05, "loss": 0.087, "step": 100 }, { "epoch": 0.09, "learning_rate": 4.9213933415536376e-05, "loss": 0.0363, "step": 101 }, { "epoch": 0.09, "learning_rate": 4.919852034525277e-05, "loss": 0.0185, "step": 102 }, { "epoch": 0.09, "learning_rate": 4.9183107274969174e-05, "loss": 0.0082, "step": 103 }, { "epoch": 0.09, "learning_rate": 4.9167694204685577e-05, "loss": 0.056, "step": 104 }, { "epoch": 0.1, "learning_rate": 4.915228113440197e-05, "loss": 0.0162, "step": 105 }, { "epoch": 0.1, "learning_rate": 4.9136868064118374e-05, "loss": 0.0162, "step": 106 }, { "epoch": 0.1, "learning_rate": 4.912145499383478e-05, "loss": 0.0134, "step": 107 }, { "epoch": 0.1, "learning_rate": 4.910604192355117e-05, "loss": 0.2928, "step": 108 }, { "epoch": 0.1, "learning_rate": 4.9090628853267574e-05, "loss": 0.2841, "step": 109 }, { "epoch": 0.1, "learning_rate": 4.907521578298398e-05, "loss": 0.4202, "step": 110 }, { "epoch": 0.1, "learning_rate": 4.905980271270037e-05, "loss": 0.0033, "step": 111 }, { "epoch": 0.1, "learning_rate": 4.9044389642416774e-05, "loss": 0.5551, "step": 112 }, { "epoch": 0.1, "learning_rate": 4.902897657213317e-05, "loss": 0.1007, "step": 113 }, { "epoch": 0.1, "learning_rate": 4.9013563501849565e-05, "loss": 0.0045, "step": 114 }, { "epoch": 0.1, "learning_rate": 4.899815043156597e-05, "loss": 0.0088, "step": 115 }, { "epoch": 0.11, "learning_rate": 4.898273736128237e-05, "loss": 0.1765, "step": 116 }, { "epoch": 0.11, "learning_rate": 4.8967324290998766e-05, "loss": 0.0855, "step": 117 }, { "epoch": 0.11, "learning_rate": 4.895191122071517e-05, "loss": 0.0027, "step": 118 }, { "epoch": 0.11, "learning_rate": 4.893649815043157e-05, "loss": 0.315, "step": 119 }, { "epoch": 0.11, "learning_rate": 4.8921085080147966e-05, "loss": 0.3212, "step": 120 }, { "epoch": 0.11, "learning_rate": 4.890567200986437e-05, "loss": 0.2402, "step": 121 }, { "epoch": 0.11, "learning_rate": 4.889025893958077e-05, "loss": 0.0099, "step": 122 }, { "epoch": 0.11, "learning_rate": 4.8874845869297166e-05, "loss": 0.9157, "step": 123 }, { "epoch": 0.11, "learning_rate": 4.885943279901357e-05, "loss": 0.1004, "step": 124 }, { "epoch": 0.11, "learning_rate": 4.884401972872997e-05, "loss": 0.0088, "step": 125 }, { "epoch": 0.11, "learning_rate": 4.8828606658446366e-05, "loss": 0.0038, "step": 126 }, { "epoch": 0.12, "learning_rate": 4.881319358816276e-05, "loss": 0.4306, "step": 127 }, { "epoch": 0.12, "learning_rate": 4.8797780517879164e-05, "loss": 0.0142, "step": 128 }, { "epoch": 0.12, "learning_rate": 4.878236744759556e-05, "loss": 0.4888, "step": 129 }, { "epoch": 0.12, "learning_rate": 4.876695437731196e-05, "loss": 0.0365, "step": 130 }, { "epoch": 0.12, "learning_rate": 4.8751541307028364e-05, "loss": 0.0066, "step": 131 }, { "epoch": 0.12, "learning_rate": 4.873612823674476e-05, "loss": 0.5105, "step": 132 }, { "epoch": 0.12, "learning_rate": 4.872071516646116e-05, "loss": 0.0654, "step": 133 }, { "epoch": 0.12, "learning_rate": 4.8705302096177564e-05, "loss": 0.0038, "step": 134 }, { "epoch": 0.12, "learning_rate": 4.868988902589396e-05, "loss": 0.0984, "step": 135 }, { "epoch": 0.12, "learning_rate": 4.867447595561036e-05, "loss": 0.0202, "step": 136 }, { "epoch": 0.12, "learning_rate": 4.8659062885326764e-05, "loss": 0.1943, "step": 137 }, { "epoch": 0.13, "learning_rate": 4.864364981504316e-05, "loss": 0.7161, "step": 138 }, { "epoch": 0.13, "learning_rate": 4.862823674475956e-05, "loss": 0.2464, "step": 139 }, { "epoch": 0.13, "learning_rate": 4.861282367447596e-05, "loss": 0.2055, "step": 140 }, { "epoch": 0.13, "learning_rate": 4.859741060419236e-05, "loss": 0.0071, "step": 141 }, { "epoch": 0.13, "learning_rate": 4.8581997533908755e-05, "loss": 0.0043, "step": 142 }, { "epoch": 0.13, "learning_rate": 4.856658446362516e-05, "loss": 0.231, "step": 143 }, { "epoch": 0.13, "learning_rate": 4.855117139334155e-05, "loss": 0.0978, "step": 144 }, { "epoch": 0.13, "learning_rate": 4.8535758323057955e-05, "loss": 0.0053, "step": 145 }, { "epoch": 0.13, "learning_rate": 4.852034525277436e-05, "loss": 0.0053, "step": 146 }, { "epoch": 0.13, "learning_rate": 4.850493218249075e-05, "loss": 0.0063, "step": 147 }, { "epoch": 0.13, "learning_rate": 4.8489519112207155e-05, "loss": 0.0138, "step": 148 }, { "epoch": 0.14, "learning_rate": 4.847410604192356e-05, "loss": 0.2408, "step": 149 }, { "epoch": 0.14, "learning_rate": 4.845869297163995e-05, "loss": 0.1458, "step": 150 }, { "epoch": 0.14, "learning_rate": 4.8443279901356355e-05, "loss": 0.0037, "step": 151 }, { "epoch": 0.14, "learning_rate": 4.842786683107276e-05, "loss": 0.0182, "step": 152 }, { "epoch": 0.14, "learning_rate": 4.841245376078915e-05, "loss": 0.0178, "step": 153 }, { "epoch": 0.14, "learning_rate": 4.8397040690505555e-05, "loss": 0.3435, "step": 154 }, { "epoch": 0.14, "learning_rate": 4.838162762022195e-05, "loss": 0.3426, "step": 155 }, { "epoch": 0.14, "learning_rate": 4.8366214549938346e-05, "loss": 0.1803, "step": 156 }, { "epoch": 0.14, "learning_rate": 4.835080147965475e-05, "loss": 0.1841, "step": 157 }, { "epoch": 0.14, "learning_rate": 4.833538840937115e-05, "loss": 0.0268, "step": 158 }, { "epoch": 0.14, "learning_rate": 4.8319975339087546e-05, "loss": 0.0522, "step": 159 }, { "epoch": 0.15, "learning_rate": 4.830456226880395e-05, "loss": 0.9797, "step": 160 }, { "epoch": 0.15, "learning_rate": 4.8289149198520344e-05, "loss": 0.3329, "step": 161 }, { "epoch": 0.15, "learning_rate": 4.8273736128236746e-05, "loss": 0.4755, "step": 162 }, { "epoch": 0.15, "learning_rate": 4.825832305795315e-05, "loss": 0.4914, "step": 163 }, { "epoch": 0.15, "learning_rate": 4.8242909987669544e-05, "loss": 0.7619, "step": 164 }, { "epoch": 0.15, "learning_rate": 4.8227496917385946e-05, "loss": 0.1818, "step": 165 }, { "epoch": 0.15, "learning_rate": 4.821208384710235e-05, "loss": 0.0092, "step": 166 }, { "epoch": 0.15, "learning_rate": 4.8196670776818744e-05, "loss": 0.0152, "step": 167 }, { "epoch": 0.15, "learning_rate": 4.8181257706535146e-05, "loss": 0.2107, "step": 168 }, { "epoch": 0.15, "learning_rate": 4.816584463625154e-05, "loss": 0.1817, "step": 169 }, { "epoch": 0.15, "learning_rate": 4.815043156596794e-05, "loss": 0.0845, "step": 170 }, { "epoch": 0.16, "learning_rate": 4.813501849568434e-05, "loss": 0.0233, "step": 171 }, { "epoch": 0.16, "learning_rate": 4.811960542540074e-05, "loss": 0.0471, "step": 172 }, { "epoch": 0.16, "learning_rate": 4.810419235511714e-05, "loss": 0.0261, "step": 173 }, { "epoch": 0.16, "learning_rate": 4.808877928483354e-05, "loss": 0.0201, "step": 174 }, { "epoch": 0.16, "learning_rate": 4.807336621454994e-05, "loss": 0.3822, "step": 175 }, { "epoch": 0.16, "learning_rate": 4.805795314426634e-05, "loss": 0.0799, "step": 176 }, { "epoch": 0.16, "learning_rate": 4.804254007398274e-05, "loss": 0.0055, "step": 177 }, { "epoch": 0.16, "learning_rate": 4.802712700369914e-05, "loss": 0.0126, "step": 178 }, { "epoch": 0.16, "learning_rate": 4.801171393341554e-05, "loss": 0.3471, "step": 179 }, { "epoch": 0.16, "learning_rate": 4.799630086313194e-05, "loss": 0.0118, "step": 180 }, { "epoch": 0.16, "learning_rate": 4.798088779284834e-05, "loss": 0.0196, "step": 181 }, { "epoch": 0.17, "learning_rate": 4.796547472256474e-05, "loss": 0.1366, "step": 182 }, { "epoch": 0.17, "learning_rate": 4.795006165228113e-05, "loss": 0.3279, "step": 183 }, { "epoch": 0.17, "learning_rate": 4.7934648581997536e-05, "loss": 0.014, "step": 184 }, { "epoch": 0.17, "learning_rate": 4.791923551171393e-05, "loss": 0.1363, "step": 185 }, { "epoch": 0.17, "learning_rate": 4.790382244143033e-05, "loss": 0.4293, "step": 186 }, { "epoch": 0.17, "learning_rate": 4.7888409371146736e-05, "loss": 0.3484, "step": 187 }, { "epoch": 0.17, "learning_rate": 4.787299630086313e-05, "loss": 0.1952, "step": 188 }, { "epoch": 0.17, "learning_rate": 4.785758323057953e-05, "loss": 0.0179, "step": 189 }, { "epoch": 0.17, "learning_rate": 4.7842170160295936e-05, "loss": 0.2721, "step": 190 }, { "epoch": 0.17, "learning_rate": 4.782675709001233e-05, "loss": 0.0802, "step": 191 }, { "epoch": 0.17, "learning_rate": 4.7811344019728733e-05, "loss": 0.1416, "step": 192 }, { "epoch": 0.18, "learning_rate": 4.7795930949445136e-05, "loss": 0.8448, "step": 193 }, { "epoch": 0.18, "learning_rate": 4.778051787916153e-05, "loss": 0.3513, "step": 194 }, { "epoch": 0.18, "learning_rate": 4.7765104808877934e-05, "loss": 0.0042, "step": 195 }, { "epoch": 0.18, "learning_rate": 4.774969173859433e-05, "loss": 0.0436, "step": 196 }, { "epoch": 0.18, "learning_rate": 4.773427866831073e-05, "loss": 0.6608, "step": 197 }, { "epoch": 0.18, "learning_rate": 4.771886559802713e-05, "loss": 0.2199, "step": 198 }, { "epoch": 0.18, "learning_rate": 4.770345252774353e-05, "loss": 0.2829, "step": 199 }, { "epoch": 0.18, "learning_rate": 4.7688039457459925e-05, "loss": 0.498, "step": 200 }, { "epoch": 0.18, "learning_rate": 4.767262638717633e-05, "loss": 0.0246, "step": 201 }, { "epoch": 0.18, "learning_rate": 4.765721331689273e-05, "loss": 0.1832, "step": 202 }, { "epoch": 0.18, "learning_rate": 4.7641800246609125e-05, "loss": 0.2133, "step": 203 }, { "epoch": 0.19, "learning_rate": 4.762638717632553e-05, "loss": 0.3909, "step": 204 }, { "epoch": 0.19, "learning_rate": 4.761097410604193e-05, "loss": 0.0236, "step": 205 }, { "epoch": 0.19, "learning_rate": 4.7595561035758325e-05, "loss": 0.1417, "step": 206 }, { "epoch": 0.19, "learning_rate": 4.758014796547473e-05, "loss": 0.1494, "step": 207 }, { "epoch": 0.19, "learning_rate": 4.756473489519113e-05, "loss": 0.3789, "step": 208 }, { "epoch": 0.19, "learning_rate": 4.7549321824907525e-05, "loss": 0.1206, "step": 209 }, { "epoch": 0.19, "learning_rate": 4.753390875462392e-05, "loss": 0.0822, "step": 210 }, { "epoch": 0.19, "learning_rate": 4.751849568434032e-05, "loss": 0.2448, "step": 211 }, { "epoch": 0.19, "learning_rate": 4.750308261405672e-05, "loss": 0.1909, "step": 212 }, { "epoch": 0.19, "learning_rate": 4.748766954377312e-05, "loss": 0.0174, "step": 213 }, { "epoch": 0.19, "learning_rate": 4.747225647348952e-05, "loss": 0.1579, "step": 214 }, { "epoch": 0.2, "learning_rate": 4.745684340320592e-05, "loss": 0.1925, "step": 215 }, { "epoch": 0.2, "learning_rate": 4.744143033292232e-05, "loss": 0.1041, "step": 216 }, { "epoch": 0.2, "learning_rate": 4.742601726263872e-05, "loss": 0.0214, "step": 217 }, { "epoch": 0.2, "learning_rate": 4.741060419235512e-05, "loss": 0.0632, "step": 218 }, { "epoch": 0.2, "learning_rate": 4.739519112207152e-05, "loss": 0.1283, "step": 219 }, { "epoch": 0.2, "learning_rate": 4.737977805178792e-05, "loss": 0.0242, "step": 220 }, { "epoch": 0.2, "learning_rate": 4.736436498150432e-05, "loss": 0.1727, "step": 221 }, { "epoch": 0.2, "learning_rate": 4.734895191122072e-05, "loss": 0.1659, "step": 222 }, { "epoch": 0.2, "learning_rate": 4.7333538840937116e-05, "loss": 0.0115, "step": 223 }, { "epoch": 0.2, "learning_rate": 4.731812577065352e-05, "loss": 0.0066, "step": 224 }, { "epoch": 0.2, "learning_rate": 4.7302712700369914e-05, "loss": 0.0089, "step": 225 }, { "epoch": 0.21, "learning_rate": 4.7287299630086316e-05, "loss": 0.0138, "step": 226 }, { "epoch": 0.21, "learning_rate": 4.727188655980271e-05, "loss": 0.0949, "step": 227 }, { "epoch": 0.21, "learning_rate": 4.7256473489519114e-05, "loss": 0.3039, "step": 228 }, { "epoch": 0.21, "learning_rate": 4.7241060419235516e-05, "loss": 0.0098, "step": 229 }, { "epoch": 0.21, "learning_rate": 4.722564734895191e-05, "loss": 0.0063, "step": 230 }, { "epoch": 0.21, "learning_rate": 4.7210234278668314e-05, "loss": 0.1198, "step": 231 }, { "epoch": 0.21, "learning_rate": 4.7194821208384716e-05, "loss": 0.0718, "step": 232 }, { "epoch": 0.21, "learning_rate": 4.717940813810111e-05, "loss": 0.0059, "step": 233 }, { "epoch": 0.21, "learning_rate": 4.7163995067817514e-05, "loss": 0.2145, "step": 234 }, { "epoch": 0.21, "learning_rate": 4.714858199753391e-05, "loss": 0.0372, "step": 235 }, { "epoch": 0.21, "learning_rate": 4.713316892725031e-05, "loss": 0.1689, "step": 236 }, { "epoch": 0.22, "learning_rate": 4.7117755856966714e-05, "loss": 0.198, "step": 237 }, { "epoch": 0.22, "learning_rate": 4.710234278668311e-05, "loss": 0.3523, "step": 238 }, { "epoch": 0.22, "learning_rate": 4.7086929716399505e-05, "loss": 0.3983, "step": 239 }, { "epoch": 0.22, "learning_rate": 4.707151664611591e-05, "loss": 0.1888, "step": 240 }, { "epoch": 0.22, "learning_rate": 4.70561035758323e-05, "loss": 0.0172, "step": 241 }, { "epoch": 0.22, "learning_rate": 4.7040690505548705e-05, "loss": 0.0229, "step": 242 }, { "epoch": 0.22, "learning_rate": 4.702527743526511e-05, "loss": 0.0715, "step": 243 }, { "epoch": 0.22, "learning_rate": 4.70098643649815e-05, "loss": 0.1341, "step": 244 }, { "epoch": 0.22, "learning_rate": 4.6994451294697905e-05, "loss": 0.0272, "step": 245 }, { "epoch": 0.22, "learning_rate": 4.697903822441431e-05, "loss": 0.6443, "step": 246 }, { "epoch": 0.22, "learning_rate": 4.69636251541307e-05, "loss": 0.1703, "step": 247 }, { "epoch": 0.23, "learning_rate": 4.6948212083847105e-05, "loss": 0.2898, "step": 248 }, { "epoch": 0.23, "learning_rate": 4.693279901356351e-05, "loss": 0.0112, "step": 249 }, { "epoch": 0.23, "learning_rate": 4.69173859432799e-05, "loss": 0.032, "step": 250 }, { "epoch": 0.23, "learning_rate": 4.6901972872996305e-05, "loss": 0.0832, "step": 251 }, { "epoch": 0.23, "learning_rate": 4.68865598027127e-05, "loss": 0.0179, "step": 252 }, { "epoch": 0.23, "learning_rate": 4.6871146732429096e-05, "loss": 0.0313, "step": 253 }, { "epoch": 0.23, "learning_rate": 4.68557336621455e-05, "loss": 0.0056, "step": 254 }, { "epoch": 0.23, "learning_rate": 4.68403205918619e-05, "loss": 0.0614, "step": 255 }, { "epoch": 0.23, "learning_rate": 4.6824907521578297e-05, "loss": 0.03, "step": 256 }, { "epoch": 0.23, "learning_rate": 4.68094944512947e-05, "loss": 0.0227, "step": 257 }, { "epoch": 0.23, "learning_rate": 4.67940813810111e-05, "loss": 0.0078, "step": 258 }, { "epoch": 0.24, "learning_rate": 4.6778668310727497e-05, "loss": 0.0037, "step": 259 }, { "epoch": 0.24, "learning_rate": 4.67632552404439e-05, "loss": 0.1599, "step": 260 }, { "epoch": 0.24, "learning_rate": 4.67478421701603e-05, "loss": 0.0341, "step": 261 }, { "epoch": 0.24, "learning_rate": 4.67324290998767e-05, "loss": 0.0599, "step": 262 }, { "epoch": 0.24, "learning_rate": 4.67170160295931e-05, "loss": 0.0171, "step": 263 }, { "epoch": 0.24, "learning_rate": 4.67016029593095e-05, "loss": 0.0897, "step": 264 }, { "epoch": 0.24, "learning_rate": 4.66861898890259e-05, "loss": 0.3561, "step": 265 }, { "epoch": 0.24, "learning_rate": 4.667077681874229e-05, "loss": 0.0027, "step": 266 }, { "epoch": 0.24, "learning_rate": 4.6655363748458695e-05, "loss": 0.1664, "step": 267 }, { "epoch": 0.24, "learning_rate": 4.663995067817509e-05, "loss": 0.0046, "step": 268 }, { "epoch": 0.24, "learning_rate": 4.662453760789149e-05, "loss": 0.3711, "step": 269 }, { "epoch": 0.25, "learning_rate": 4.6609124537607895e-05, "loss": 0.0132, "step": 270 }, { "epoch": 0.25, "learning_rate": 4.659371146732429e-05, "loss": 0.0898, "step": 271 }, { "epoch": 0.25, "learning_rate": 4.657829839704069e-05, "loss": 0.0032, "step": 272 }, { "epoch": 0.25, "learning_rate": 4.6562885326757095e-05, "loss": 0.2465, "step": 273 }, { "epoch": 0.25, "learning_rate": 4.654747225647349e-05, "loss": 0.0031, "step": 274 }, { "epoch": 0.25, "learning_rate": 4.653205918618989e-05, "loss": 0.0055, "step": 275 }, { "epoch": 0.25, "learning_rate": 4.6516646115906295e-05, "loss": 0.0187, "step": 276 }, { "epoch": 0.25, "learning_rate": 4.650123304562269e-05, "loss": 0.1055, "step": 277 }, { "epoch": 0.25, "learning_rate": 4.648581997533909e-05, "loss": 0.3103, "step": 278 }, { "epoch": 0.25, "learning_rate": 4.647040690505549e-05, "loss": 0.2236, "step": 279 }, { "epoch": 0.26, "learning_rate": 4.645499383477189e-05, "loss": 0.0025, "step": 280 }, { "epoch": 0.26, "learning_rate": 4.6439580764488286e-05, "loss": 0.0195, "step": 281 }, { "epoch": 0.26, "learning_rate": 4.642416769420469e-05, "loss": 0.0365, "step": 282 }, { "epoch": 0.26, "learning_rate": 4.6408754623921084e-05, "loss": 0.2471, "step": 283 }, { "epoch": 0.26, "learning_rate": 4.6393341553637486e-05, "loss": 0.339, "step": 284 }, { "epoch": 0.26, "learning_rate": 4.637792848335389e-05, "loss": 0.1557, "step": 285 }, { "epoch": 0.26, "learning_rate": 4.6362515413070284e-05, "loss": 0.0088, "step": 286 }, { "epoch": 0.26, "learning_rate": 4.6347102342786686e-05, "loss": 0.0099, "step": 287 }, { "epoch": 0.26, "learning_rate": 4.633168927250309e-05, "loss": 0.0029, "step": 288 }, { "epoch": 0.26, "learning_rate": 4.6316276202219484e-05, "loss": 0.0047, "step": 289 }, { "epoch": 0.26, "learning_rate": 4.6300863131935886e-05, "loss": 0.2115, "step": 290 }, { "epoch": 0.27, "learning_rate": 4.628545006165229e-05, "loss": 0.004, "step": 291 }, { "epoch": 0.27, "learning_rate": 4.6270036991368684e-05, "loss": 0.0033, "step": 292 }, { "epoch": 0.27, "learning_rate": 4.6254623921085086e-05, "loss": 0.0025, "step": 293 }, { "epoch": 0.27, "learning_rate": 4.623921085080148e-05, "loss": 0.0025, "step": 294 }, { "epoch": 0.27, "learning_rate": 4.622379778051788e-05, "loss": 0.0028, "step": 295 }, { "epoch": 0.27, "learning_rate": 4.620838471023428e-05, "loss": 0.0059, "step": 296 }, { "epoch": 0.27, "learning_rate": 4.619297163995068e-05, "loss": 0.0029, "step": 297 }, { "epoch": 0.27, "learning_rate": 4.617755856966708e-05, "loss": 0.0445, "step": 298 }, { "epoch": 0.27, "learning_rate": 4.616214549938348e-05, "loss": 0.0432, "step": 299 }, { "epoch": 0.27, "learning_rate": 4.614673242909988e-05, "loss": 0.2325, "step": 300 }, { "epoch": 0.27, "learning_rate": 4.613131935881628e-05, "loss": 0.0195, "step": 301 }, { "epoch": 0.28, "learning_rate": 4.611590628853268e-05, "loss": 0.1756, "step": 302 }, { "epoch": 0.28, "learning_rate": 4.610049321824908e-05, "loss": 0.0044, "step": 303 }, { "epoch": 0.28, "learning_rate": 4.608508014796548e-05, "loss": 0.0018, "step": 304 }, { "epoch": 0.28, "learning_rate": 4.606966707768188e-05, "loss": 0.3524, "step": 305 }, { "epoch": 0.28, "learning_rate": 4.605425400739828e-05, "loss": 0.0033, "step": 306 }, { "epoch": 0.28, "learning_rate": 4.603884093711468e-05, "loss": 0.3018, "step": 307 }, { "epoch": 0.28, "learning_rate": 4.602342786683107e-05, "loss": 0.0471, "step": 308 }, { "epoch": 0.28, "learning_rate": 4.600801479654747e-05, "loss": 0.0902, "step": 309 }, { "epoch": 0.28, "learning_rate": 4.599260172626387e-05, "loss": 0.3098, "step": 310 }, { "epoch": 0.28, "learning_rate": 4.597718865598027e-05, "loss": 0.0041, "step": 311 }, { "epoch": 0.28, "learning_rate": 4.596177558569667e-05, "loss": 0.5342, "step": 312 }, { "epoch": 0.29, "learning_rate": 4.594636251541307e-05, "loss": 0.1162, "step": 313 }, { "epoch": 0.29, "learning_rate": 4.593094944512947e-05, "loss": 0.0098, "step": 314 }, { "epoch": 0.29, "learning_rate": 4.591553637484587e-05, "loss": 0.1333, "step": 315 }, { "epoch": 0.29, "learning_rate": 4.590012330456227e-05, "loss": 0.3174, "step": 316 }, { "epoch": 0.29, "learning_rate": 4.588471023427867e-05, "loss": 0.0164, "step": 317 }, { "epoch": 0.29, "learning_rate": 4.586929716399507e-05, "loss": 0.0033, "step": 318 }, { "epoch": 0.29, "learning_rate": 4.585388409371147e-05, "loss": 0.0046, "step": 319 }, { "epoch": 0.29, "learning_rate": 4.583847102342787e-05, "loss": 0.2797, "step": 320 }, { "epoch": 0.29, "learning_rate": 4.582305795314427e-05, "loss": 0.0084, "step": 321 }, { "epoch": 0.29, "learning_rate": 4.5807644882860664e-05, "loss": 0.0113, "step": 322 }, { "epoch": 0.29, "learning_rate": 4.5792231812577066e-05, "loss": 0.0334, "step": 323 }, { "epoch": 0.3, "learning_rate": 4.577681874229346e-05, "loss": 0.0412, "step": 324 }, { "epoch": 0.3, "learning_rate": 4.5761405672009864e-05, "loss": 0.0242, "step": 325 }, { "epoch": 0.3, "learning_rate": 4.5745992601726267e-05, "loss": 0.031, "step": 326 }, { "epoch": 0.3, "learning_rate": 4.573057953144266e-05, "loss": 0.0121, "step": 327 }, { "epoch": 0.3, "learning_rate": 4.5715166461159064e-05, "loss": 0.0037, "step": 328 }, { "epoch": 0.3, "learning_rate": 4.569975339087547e-05, "loss": 0.0361, "step": 329 }, { "epoch": 0.3, "learning_rate": 4.568434032059186e-05, "loss": 0.0137, "step": 330 }, { "epoch": 0.3, "learning_rate": 4.5668927250308264e-05, "loss": 0.0029, "step": 331 }, { "epoch": 0.3, "learning_rate": 4.565351418002467e-05, "loss": 0.5282, "step": 332 }, { "epoch": 0.3, "learning_rate": 4.563810110974106e-05, "loss": 0.0581, "step": 333 }, { "epoch": 0.3, "learning_rate": 4.5622688039457464e-05, "loss": 0.0033, "step": 334 }, { "epoch": 0.31, "learning_rate": 4.560727496917386e-05, "loss": 0.0397, "step": 335 }, { "epoch": 0.31, "learning_rate": 4.559186189889026e-05, "loss": 0.0731, "step": 336 }, { "epoch": 0.31, "learning_rate": 4.557644882860666e-05, "loss": 0.3565, "step": 337 }, { "epoch": 0.31, "learning_rate": 4.556103575832306e-05, "loss": 0.2693, "step": 338 }, { "epoch": 0.31, "learning_rate": 4.5545622688039456e-05, "loss": 0.0085, "step": 339 }, { "epoch": 0.31, "learning_rate": 4.553020961775586e-05, "loss": 0.0079, "step": 340 }, { "epoch": 0.31, "learning_rate": 4.551479654747226e-05, "loss": 0.1846, "step": 341 }, { "epoch": 0.31, "learning_rate": 4.5499383477188656e-05, "loss": 0.0116, "step": 342 }, { "epoch": 0.31, "learning_rate": 4.548397040690506e-05, "loss": 0.2786, "step": 343 }, { "epoch": 0.31, "learning_rate": 4.546855733662146e-05, "loss": 0.1938, "step": 344 }, { "epoch": 0.31, "learning_rate": 4.5453144266337856e-05, "loss": 0.0055, "step": 345 }, { "epoch": 0.32, "learning_rate": 4.543773119605426e-05, "loss": 0.0034, "step": 346 }, { "epoch": 0.32, "learning_rate": 4.542231812577066e-05, "loss": 0.0059, "step": 347 }, { "epoch": 0.32, "learning_rate": 4.5406905055487056e-05, "loss": 0.005, "step": 348 }, { "epoch": 0.32, "learning_rate": 4.539149198520346e-05, "loss": 0.0043, "step": 349 }, { "epoch": 0.32, "learning_rate": 4.5376078914919854e-05, "loss": 0.2746, "step": 350 }, { "epoch": 0.32, "learning_rate": 4.536066584463625e-05, "loss": 0.0065, "step": 351 }, { "epoch": 0.32, "learning_rate": 4.534525277435265e-05, "loss": 0.0865, "step": 352 }, { "epoch": 0.32, "learning_rate": 4.5329839704069054e-05, "loss": 0.0968, "step": 353 }, { "epoch": 0.32, "learning_rate": 4.531442663378545e-05, "loss": 0.1466, "step": 354 }, { "epoch": 0.32, "learning_rate": 4.529901356350185e-05, "loss": 0.0039, "step": 355 }, { "epoch": 0.32, "learning_rate": 4.5283600493218254e-05, "loss": 0.0368, "step": 356 }, { "epoch": 0.33, "learning_rate": 4.526818742293465e-05, "loss": 0.0161, "step": 357 }, { "epoch": 0.33, "learning_rate": 4.525277435265105e-05, "loss": 0.0031, "step": 358 }, { "epoch": 0.33, "learning_rate": 4.5237361282367454e-05, "loss": 0.0017, "step": 359 }, { "epoch": 0.33, "learning_rate": 4.522194821208385e-05, "loss": 0.0015, "step": 360 }, { "epoch": 0.33, "learning_rate": 4.520653514180025e-05, "loss": 0.368, "step": 361 }, { "epoch": 0.33, "learning_rate": 4.5191122071516654e-05, "loss": 0.5389, "step": 362 }, { "epoch": 0.33, "learning_rate": 4.517570900123305e-05, "loss": 0.0112, "step": 363 }, { "epoch": 0.33, "learning_rate": 4.5160295930949445e-05, "loss": 0.0014, "step": 364 }, { "epoch": 0.33, "learning_rate": 4.514488286066585e-05, "loss": 0.0042, "step": 365 }, { "epoch": 0.33, "learning_rate": 4.512946979038224e-05, "loss": 0.1527, "step": 366 }, { "epoch": 0.33, "learning_rate": 4.5114056720098645e-05, "loss": 0.0016, "step": 367 }, { "epoch": 0.34, "learning_rate": 4.509864364981505e-05, "loss": 0.0027, "step": 368 }, { "epoch": 0.34, "learning_rate": 4.508323057953144e-05, "loss": 0.002, "step": 369 }, { "epoch": 0.34, "learning_rate": 4.5067817509247845e-05, "loss": 0.2218, "step": 370 }, { "epoch": 0.34, "learning_rate": 4.505240443896425e-05, "loss": 0.3571, "step": 371 }, { "epoch": 0.34, "learning_rate": 4.503699136868064e-05, "loss": 0.24, "step": 372 }, { "epoch": 0.34, "learning_rate": 4.5021578298397045e-05, "loss": 0.0017, "step": 373 }, { "epoch": 0.34, "learning_rate": 4.500616522811345e-05, "loss": 0.0031, "step": 374 }, { "epoch": 0.34, "learning_rate": 4.499075215782984e-05, "loss": 0.0017, "step": 375 }, { "epoch": 0.34, "learning_rate": 4.4975339087546245e-05, "loss": 0.6421, "step": 376 }, { "epoch": 0.34, "learning_rate": 4.495992601726264e-05, "loss": 0.0031, "step": 377 }, { "epoch": 0.34, "learning_rate": 4.4944512946979036e-05, "loss": 0.0145, "step": 378 }, { "epoch": 0.35, "learning_rate": 4.492909987669544e-05, "loss": 0.1832, "step": 379 }, { "epoch": 0.35, "learning_rate": 4.491368680641184e-05, "loss": 0.0018, "step": 380 }, { "epoch": 0.35, "learning_rate": 4.4898273736128236e-05, "loss": 0.0018, "step": 381 }, { "epoch": 0.35, "learning_rate": 4.488286066584464e-05, "loss": 0.3842, "step": 382 }, { "epoch": 0.35, "learning_rate": 4.4867447595561034e-05, "loss": 0.0555, "step": 383 }, { "epoch": 0.35, "learning_rate": 4.4852034525277436e-05, "loss": 0.0066, "step": 384 }, { "epoch": 0.35, "learning_rate": 4.483662145499384e-05, "loss": 0.0361, "step": 385 }, { "epoch": 0.35, "learning_rate": 4.4821208384710234e-05, "loss": 0.0094, "step": 386 }, { "epoch": 0.35, "learning_rate": 4.4805795314426636e-05, "loss": 0.007, "step": 387 }, { "epoch": 0.35, "learning_rate": 4.479038224414304e-05, "loss": 0.0205, "step": 388 }, { "epoch": 0.35, "learning_rate": 4.4774969173859434e-05, "loss": 0.0018, "step": 389 }, { "epoch": 0.36, "learning_rate": 4.4759556103575836e-05, "loss": 0.1393, "step": 390 }, { "epoch": 0.36, "learning_rate": 4.474414303329223e-05, "loss": 0.3189, "step": 391 }, { "epoch": 0.36, "learning_rate": 4.4728729963008634e-05, "loss": 0.0034, "step": 392 }, { "epoch": 0.36, "learning_rate": 4.471331689272503e-05, "loss": 0.0074, "step": 393 }, { "epoch": 0.36, "learning_rate": 4.469790382244143e-05, "loss": 0.0236, "step": 394 }, { "epoch": 0.36, "learning_rate": 4.468249075215783e-05, "loss": 0.01, "step": 395 }, { "epoch": 0.36, "learning_rate": 4.466707768187423e-05, "loss": 0.015, "step": 396 }, { "epoch": 0.36, "learning_rate": 4.465166461159063e-05, "loss": 0.0033, "step": 397 }, { "epoch": 0.36, "learning_rate": 4.463625154130703e-05, "loss": 0.285, "step": 398 }, { "epoch": 0.36, "learning_rate": 4.462083847102343e-05, "loss": 0.0074, "step": 399 }, { "epoch": 0.36, "learning_rate": 4.460542540073983e-05, "loss": 0.001, "step": 400 }, { "epoch": 0.37, "learning_rate": 4.459001233045623e-05, "loss": 0.3007, "step": 401 }, { "epoch": 0.37, "learning_rate": 4.457459926017263e-05, "loss": 0.0019, "step": 402 }, { "epoch": 0.37, "learning_rate": 4.455918618988903e-05, "loss": 0.0029, "step": 403 }, { "epoch": 0.37, "learning_rate": 4.454377311960543e-05, "loss": 0.0125, "step": 404 }, { "epoch": 0.37, "learning_rate": 4.452836004932183e-05, "loss": 0.0415, "step": 405 }, { "epoch": 0.37, "learning_rate": 4.4512946979038226e-05, "loss": 0.0031, "step": 406 }, { "epoch": 0.37, "learning_rate": 4.449753390875462e-05, "loss": 0.0025, "step": 407 }, { "epoch": 0.37, "learning_rate": 4.448212083847102e-05, "loss": 0.0048, "step": 408 }, { "epoch": 0.37, "learning_rate": 4.4466707768187426e-05, "loss": 0.0755, "step": 409 }, { "epoch": 0.37, "learning_rate": 4.445129469790382e-05, "loss": 0.1365, "step": 410 }, { "epoch": 0.37, "learning_rate": 4.443588162762022e-05, "loss": 0.0036, "step": 411 }, { "epoch": 0.38, "learning_rate": 4.4420468557336626e-05, "loss": 0.0262, "step": 412 }, { "epoch": 0.38, "learning_rate": 4.440505548705302e-05, "loss": 0.0023, "step": 413 }, { "epoch": 0.38, "learning_rate": 4.4389642416769423e-05, "loss": 0.1611, "step": 414 }, { "epoch": 0.38, "learning_rate": 4.4374229346485826e-05, "loss": 0.1367, "step": 415 }, { "epoch": 0.38, "learning_rate": 4.435881627620222e-05, "loss": 0.0034, "step": 416 }, { "epoch": 0.38, "learning_rate": 4.4343403205918624e-05, "loss": 0.1921, "step": 417 }, { "epoch": 0.38, "learning_rate": 4.4327990135635026e-05, "loss": 0.0025, "step": 418 }, { "epoch": 0.38, "learning_rate": 4.431257706535142e-05, "loss": 0.0452, "step": 419 }, { "epoch": 0.38, "learning_rate": 4.429716399506782e-05, "loss": 0.0165, "step": 420 }, { "epoch": 0.38, "learning_rate": 4.428175092478422e-05, "loss": 0.0011, "step": 421 }, { "epoch": 0.38, "learning_rate": 4.4266337854500615e-05, "loss": 0.001, "step": 422 }, { "epoch": 0.39, "learning_rate": 4.425092478421702e-05, "loss": 0.0041, "step": 423 }, { "epoch": 0.39, "learning_rate": 4.423551171393342e-05, "loss": 0.0158, "step": 424 }, { "epoch": 0.39, "learning_rate": 4.4220098643649815e-05, "loss": 0.4164, "step": 425 }, { "epoch": 0.39, "learning_rate": 4.420468557336622e-05, "loss": 0.0099, "step": 426 }, { "epoch": 0.39, "learning_rate": 4.418927250308262e-05, "loss": 0.0015, "step": 427 }, { "epoch": 0.39, "learning_rate": 4.4173859432799015e-05, "loss": 0.0012, "step": 428 }, { "epoch": 0.39, "learning_rate": 4.415844636251542e-05, "loss": 0.377, "step": 429 }, { "epoch": 0.39, "learning_rate": 4.414303329223182e-05, "loss": 0.0014, "step": 430 }, { "epoch": 0.39, "learning_rate": 4.4127620221948215e-05, "loss": 0.0296, "step": 431 }, { "epoch": 0.39, "learning_rate": 4.411220715166462e-05, "loss": 0.0012, "step": 432 }, { "epoch": 0.39, "learning_rate": 4.409679408138101e-05, "loss": 0.0028, "step": 433 }, { "epoch": 0.4, "learning_rate": 4.408138101109741e-05, "loss": 0.003, "step": 434 }, { "epoch": 0.4, "learning_rate": 4.406596794081381e-05, "loss": 0.0013, "step": 435 }, { "epoch": 0.4, "learning_rate": 4.405055487053021e-05, "loss": 0.0015, "step": 436 }, { "epoch": 0.4, "learning_rate": 4.403514180024661e-05, "loss": 0.003, "step": 437 }, { "epoch": 0.4, "learning_rate": 4.401972872996301e-05, "loss": 0.0028, "step": 438 }, { "epoch": 0.4, "learning_rate": 4.400431565967941e-05, "loss": 0.0009, "step": 439 }, { "epoch": 0.4, "learning_rate": 4.398890258939581e-05, "loss": 0.3179, "step": 440 }, { "epoch": 0.4, "learning_rate": 4.397348951911221e-05, "loss": 0.0871, "step": 441 }, { "epoch": 0.4, "learning_rate": 4.395807644882861e-05, "loss": 0.0103, "step": 442 }, { "epoch": 0.4, "learning_rate": 4.394266337854501e-05, "loss": 0.0018, "step": 443 }, { "epoch": 0.4, "learning_rate": 4.392725030826141e-05, "loss": 0.5116, "step": 444 }, { "epoch": 0.41, "learning_rate": 4.391183723797781e-05, "loss": 0.0027, "step": 445 }, { "epoch": 0.41, "learning_rate": 4.389642416769421e-05, "loss": 0.0624, "step": 446 }, { "epoch": 0.41, "learning_rate": 4.3881011097410604e-05, "loss": 0.0013, "step": 447 }, { "epoch": 0.41, "learning_rate": 4.3865598027127006e-05, "loss": 0.2554, "step": 448 }, { "epoch": 0.41, "learning_rate": 4.38501849568434e-05, "loss": 0.3956, "step": 449 }, { "epoch": 0.41, "learning_rate": 4.3834771886559804e-05, "loss": 0.0018, "step": 450 }, { "epoch": 0.41, "learning_rate": 4.3819358816276206e-05, "loss": 0.0011, "step": 451 }, { "epoch": 0.41, "learning_rate": 4.38039457459926e-05, "loss": 0.0025, "step": 452 }, { "epoch": 0.41, "learning_rate": 4.3788532675709004e-05, "loss": 0.0023, "step": 453 }, { "epoch": 0.41, "learning_rate": 4.3773119605425406e-05, "loss": 0.0072, "step": 454 }, { "epoch": 0.41, "learning_rate": 4.37577065351418e-05, "loss": 0.0182, "step": 455 }, { "epoch": 0.42, "learning_rate": 4.3742293464858204e-05, "loss": 0.0015, "step": 456 }, { "epoch": 0.42, "learning_rate": 4.37268803945746e-05, "loss": 0.0016, "step": 457 }, { "epoch": 0.42, "learning_rate": 4.3711467324291e-05, "loss": 0.0012, "step": 458 }, { "epoch": 0.42, "learning_rate": 4.3696054254007404e-05, "loss": 0.0419, "step": 459 }, { "epoch": 0.42, "learning_rate": 4.36806411837238e-05, "loss": 0.0132, "step": 460 }, { "epoch": 0.42, "learning_rate": 4.36652281134402e-05, "loss": 0.6488, "step": 461 }, { "epoch": 0.42, "learning_rate": 4.36498150431566e-05, "loss": 0.0024, "step": 462 }, { "epoch": 0.42, "learning_rate": 4.363440197287299e-05, "loss": 0.0036, "step": 463 }, { "epoch": 0.42, "learning_rate": 4.3618988902589395e-05, "loss": 0.0013, "step": 464 }, { "epoch": 0.42, "learning_rate": 4.36035758323058e-05, "loss": 0.0018, "step": 465 }, { "epoch": 0.42, "learning_rate": 4.358816276202219e-05, "loss": 0.0015, "step": 466 }, { "epoch": 0.43, "learning_rate": 4.3572749691738595e-05, "loss": 0.3114, "step": 467 }, { "epoch": 0.43, "learning_rate": 4.3557336621455e-05, "loss": 0.0012, "step": 468 }, { "epoch": 0.43, "learning_rate": 4.354192355117139e-05, "loss": 0.0012, "step": 469 }, { "epoch": 0.43, "learning_rate": 4.3526510480887795e-05, "loss": 0.0016, "step": 470 }, { "epoch": 0.43, "learning_rate": 4.35110974106042e-05, "loss": 0.0063, "step": 471 }, { "epoch": 0.43, "learning_rate": 4.349568434032059e-05, "loss": 0.0113, "step": 472 }, { "epoch": 0.43, "learning_rate": 4.3480271270036995e-05, "loss": 0.0013, "step": 473 }, { "epoch": 0.43, "learning_rate": 4.34648581997534e-05, "loss": 0.0202, "step": 474 }, { "epoch": 0.43, "learning_rate": 4.344944512946979e-05, "loss": 0.8024, "step": 475 }, { "epoch": 0.43, "learning_rate": 4.343403205918619e-05, "loss": 0.1431, "step": 476 }, { "epoch": 0.43, "learning_rate": 4.341861898890259e-05, "loss": 0.0395, "step": 477 }, { "epoch": 0.44, "learning_rate": 4.3403205918618987e-05, "loss": 0.0492, "step": 478 }, { "epoch": 0.44, "learning_rate": 4.338779284833539e-05, "loss": 0.0033, "step": 479 }, { "epoch": 0.44, "learning_rate": 4.337237977805179e-05, "loss": 0.3942, "step": 480 }, { "epoch": 0.44, "learning_rate": 4.3356966707768187e-05, "loss": 0.0011, "step": 481 }, { "epoch": 0.44, "learning_rate": 4.334155363748459e-05, "loss": 0.0992, "step": 482 }, { "epoch": 0.44, "learning_rate": 4.332614056720099e-05, "loss": 0.0046, "step": 483 }, { "epoch": 0.44, "learning_rate": 4.331072749691739e-05, "loss": 0.0047, "step": 484 }, { "epoch": 0.44, "learning_rate": 4.329531442663379e-05, "loss": 0.388, "step": 485 }, { "epoch": 0.44, "learning_rate": 4.327990135635019e-05, "loss": 0.4356, "step": 486 }, { "epoch": 0.44, "learning_rate": 4.326448828606659e-05, "loss": 0.1234, "step": 487 }, { "epoch": 0.44, "learning_rate": 4.324907521578299e-05, "loss": 0.0022, "step": 488 }, { "epoch": 0.45, "learning_rate": 4.3233662145499385e-05, "loss": 0.2838, "step": 489 }, { "epoch": 0.45, "learning_rate": 4.321824907521578e-05, "loss": 0.0012, "step": 490 }, { "epoch": 0.45, "learning_rate": 4.320283600493218e-05, "loss": 0.3868, "step": 491 }, { "epoch": 0.45, "learning_rate": 4.3187422934648585e-05, "loss": 0.0014, "step": 492 }, { "epoch": 0.45, "learning_rate": 4.317200986436498e-05, "loss": 0.0931, "step": 493 }, { "epoch": 0.45, "learning_rate": 4.315659679408138e-05, "loss": 0.0479, "step": 494 }, { "epoch": 0.45, "learning_rate": 4.3141183723797785e-05, "loss": 0.3443, "step": 495 }, { "epoch": 0.45, "learning_rate": 4.312577065351418e-05, "loss": 0.0019, "step": 496 }, { "epoch": 0.45, "learning_rate": 4.311035758323058e-05, "loss": 0.0217, "step": 497 }, { "epoch": 0.45, "learning_rate": 4.3094944512946985e-05, "loss": 0.0034, "step": 498 }, { "epoch": 0.45, "learning_rate": 4.307953144266338e-05, "loss": 0.0029, "step": 499 }, { "epoch": 0.46, "learning_rate": 4.306411837237978e-05, "loss": 0.0033, "step": 500 }, { "epoch": 0.46, "learning_rate": 4.3048705302096185e-05, "loss": 0.0286, "step": 501 }, { "epoch": 0.46, "learning_rate": 4.303329223181258e-05, "loss": 0.3277, "step": 502 }, { "epoch": 0.46, "learning_rate": 4.3017879161528976e-05, "loss": 0.0323, "step": 503 }, { "epoch": 0.46, "learning_rate": 4.300246609124538e-05, "loss": 0.0044, "step": 504 }, { "epoch": 0.46, "learning_rate": 4.2987053020961774e-05, "loss": 0.0076, "step": 505 }, { "epoch": 0.46, "learning_rate": 4.2971639950678176e-05, "loss": 0.0179, "step": 506 }, { "epoch": 0.46, "learning_rate": 4.295622688039458e-05, "loss": 0.0271, "step": 507 }, { "epoch": 0.46, "learning_rate": 4.2940813810110974e-05, "loss": 0.0072, "step": 508 }, { "epoch": 0.46, "learning_rate": 4.2925400739827376e-05, "loss": 0.0058, "step": 509 }, { "epoch": 0.46, "learning_rate": 4.290998766954378e-05, "loss": 0.0026, "step": 510 }, { "epoch": 0.47, "learning_rate": 4.2894574599260174e-05, "loss": 0.0565, "step": 511 }, { "epoch": 0.47, "learning_rate": 4.2879161528976576e-05, "loss": 0.0288, "step": 512 }, { "epoch": 0.47, "learning_rate": 4.286374845869298e-05, "loss": 0.0157, "step": 513 }, { "epoch": 0.47, "learning_rate": 4.2848335388409374e-05, "loss": 0.0716, "step": 514 }, { "epoch": 0.47, "learning_rate": 4.2832922318125776e-05, "loss": 0.0282, "step": 515 }, { "epoch": 0.47, "learning_rate": 4.281750924784217e-05, "loss": 0.1884, "step": 516 }, { "epoch": 0.47, "learning_rate": 4.280209617755857e-05, "loss": 0.0066, "step": 517 }, { "epoch": 0.47, "learning_rate": 4.278668310727497e-05, "loss": 0.004, "step": 518 }, { "epoch": 0.47, "learning_rate": 4.277127003699137e-05, "loss": 0.1473, "step": 519 }, { "epoch": 0.47, "learning_rate": 4.275585696670777e-05, "loss": 0.0018, "step": 520 }, { "epoch": 0.47, "learning_rate": 4.274044389642417e-05, "loss": 0.0013, "step": 521 }, { "epoch": 0.48, "learning_rate": 4.272503082614057e-05, "loss": 0.0166, "step": 522 }, { "epoch": 0.48, "learning_rate": 4.270961775585697e-05, "loss": 0.3395, "step": 523 }, { "epoch": 0.48, "learning_rate": 4.269420468557337e-05, "loss": 0.3455, "step": 524 }, { "epoch": 0.48, "learning_rate": 4.267879161528977e-05, "loss": 0.1434, "step": 525 }, { "epoch": 0.48, "learning_rate": 4.266337854500617e-05, "loss": 0.011, "step": 526 }, { "epoch": 0.48, "learning_rate": 4.264796547472257e-05, "loss": 0.2195, "step": 527 }, { "epoch": 0.48, "learning_rate": 4.263255240443897e-05, "loss": 0.0471, "step": 528 }, { "epoch": 0.48, "learning_rate": 4.261713933415537e-05, "loss": 0.0145, "step": 529 }, { "epoch": 0.48, "learning_rate": 4.260172626387176e-05, "loss": 0.0846, "step": 530 }, { "epoch": 0.48, "learning_rate": 4.2586313193588165e-05, "loss": 0.0244, "step": 531 }, { "epoch": 0.48, "learning_rate": 4.257090012330456e-05, "loss": 0.0035, "step": 532 }, { "epoch": 0.49, "learning_rate": 4.255548705302096e-05, "loss": 0.3924, "step": 533 }, { "epoch": 0.49, "learning_rate": 4.254007398273736e-05, "loss": 0.0021, "step": 534 }, { "epoch": 0.49, "learning_rate": 4.252466091245376e-05, "loss": 0.4137, "step": 535 }, { "epoch": 0.49, "learning_rate": 4.250924784217016e-05, "loss": 0.2125, "step": 536 }, { "epoch": 0.49, "learning_rate": 4.249383477188656e-05, "loss": 0.2046, "step": 537 }, { "epoch": 0.49, "learning_rate": 4.247842170160296e-05, "loss": 0.2159, "step": 538 }, { "epoch": 0.49, "learning_rate": 4.246300863131936e-05, "loss": 0.0051, "step": 539 }, { "epoch": 0.49, "learning_rate": 4.244759556103576e-05, "loss": 0.0145, "step": 540 }, { "epoch": 0.49, "learning_rate": 4.243218249075216e-05, "loss": 0.0304, "step": 541 }, { "epoch": 0.49, "learning_rate": 4.241676942046856e-05, "loss": 0.0055, "step": 542 }, { "epoch": 0.49, "learning_rate": 4.240135635018496e-05, "loss": 0.0047, "step": 543 }, { "epoch": 0.5, "learning_rate": 4.238594327990136e-05, "loss": 0.0092, "step": 544 }, { "epoch": 0.5, "learning_rate": 4.2370530209617756e-05, "loss": 0.1466, "step": 545 }, { "epoch": 0.5, "learning_rate": 4.235511713933415e-05, "loss": 0.43, "step": 546 }, { "epoch": 0.5, "learning_rate": 4.2339704069050554e-05, "loss": 0.0028, "step": 547 }, { "epoch": 0.5, "learning_rate": 4.2324290998766957e-05, "loss": 0.0354, "step": 548 }, { "epoch": 0.5, "learning_rate": 4.230887792848335e-05, "loss": 0.1144, "step": 549 }, { "epoch": 0.5, "learning_rate": 4.2293464858199754e-05, "loss": 0.0106, "step": 550 }, { "epoch": 0.5, "learning_rate": 4.227805178791616e-05, "loss": 0.0047, "step": 551 }, { "epoch": 0.5, "learning_rate": 4.226263871763255e-05, "loss": 0.0167, "step": 552 }, { "epoch": 0.5, "learning_rate": 4.2247225647348954e-05, "loss": 0.2297, "step": 553 }, { "epoch": 0.5, "learning_rate": 4.223181257706536e-05, "loss": 0.0049, "step": 554 }, { "epoch": 0.51, "learning_rate": 4.221639950678175e-05, "loss": 0.009, "step": 555 }, { "epoch": 0.51, "learning_rate": 4.2200986436498154e-05, "loss": 0.0541, "step": 556 }, { "epoch": 0.51, "learning_rate": 4.218557336621456e-05, "loss": 0.0051, "step": 557 }, { "epoch": 0.51, "learning_rate": 4.217016029593095e-05, "loss": 0.0477, "step": 558 }, { "epoch": 0.51, "learning_rate": 4.215474722564735e-05, "loss": 0.0592, "step": 559 }, { "epoch": 0.51, "learning_rate": 4.213933415536375e-05, "loss": 0.0048, "step": 560 }, { "epoch": 0.51, "learning_rate": 4.2123921085080146e-05, "loss": 0.008, "step": 561 }, { "epoch": 0.51, "learning_rate": 4.210850801479655e-05, "loss": 0.0052, "step": 562 }, { "epoch": 0.51, "learning_rate": 4.209309494451295e-05, "loss": 0.0039, "step": 563 }, { "epoch": 0.51, "learning_rate": 4.2077681874229346e-05, "loss": 0.006, "step": 564 }, { "epoch": 0.51, "learning_rate": 4.206226880394575e-05, "loss": 0.0022, "step": 565 }, { "epoch": 0.52, "learning_rate": 4.204685573366215e-05, "loss": 0.007, "step": 566 }, { "epoch": 0.52, "learning_rate": 4.2031442663378546e-05, "loss": 0.0026, "step": 567 }, { "epoch": 0.52, "learning_rate": 4.201602959309495e-05, "loss": 0.0023, "step": 568 }, { "epoch": 0.52, "learning_rate": 4.200061652281135e-05, "loss": 0.0022, "step": 569 }, { "epoch": 0.52, "learning_rate": 4.1985203452527746e-05, "loss": 0.219, "step": 570 }, { "epoch": 0.52, "learning_rate": 4.196979038224415e-05, "loss": 0.0018, "step": 571 }, { "epoch": 0.52, "learning_rate": 4.1954377311960544e-05, "loss": 0.004, "step": 572 }, { "epoch": 0.52, "learning_rate": 4.193896424167694e-05, "loss": 0.3496, "step": 573 }, { "epoch": 0.52, "learning_rate": 4.192355117139334e-05, "loss": 0.0058, "step": 574 }, { "epoch": 0.52, "learning_rate": 4.1908138101109744e-05, "loss": 0.0141, "step": 575 }, { "epoch": 0.52, "learning_rate": 4.189272503082614e-05, "loss": 0.3665, "step": 576 }, { "epoch": 0.53, "learning_rate": 4.187731196054254e-05, "loss": 0.0655, "step": 577 }, { "epoch": 0.53, "learning_rate": 4.1861898890258944e-05, "loss": 0.0012, "step": 578 }, { "epoch": 0.53, "learning_rate": 4.184648581997534e-05, "loss": 0.0067, "step": 579 }, { "epoch": 0.53, "learning_rate": 4.183107274969174e-05, "loss": 0.0015, "step": 580 }, { "epoch": 0.53, "learning_rate": 4.1815659679408144e-05, "loss": 0.0015, "step": 581 }, { "epoch": 0.53, "learning_rate": 4.180024660912454e-05, "loss": 0.0019, "step": 582 }, { "epoch": 0.53, "learning_rate": 4.178483353884094e-05, "loss": 0.0356, "step": 583 }, { "epoch": 0.53, "learning_rate": 4.1769420468557344e-05, "loss": 0.0012, "step": 584 }, { "epoch": 0.53, "learning_rate": 4.175400739827374e-05, "loss": 0.0147, "step": 585 }, { "epoch": 0.53, "learning_rate": 4.1738594327990135e-05, "loss": 0.0632, "step": 586 }, { "epoch": 0.53, "learning_rate": 4.172318125770654e-05, "loss": 0.0017, "step": 587 }, { "epoch": 0.54, "learning_rate": 4.170776818742293e-05, "loss": 0.0017, "step": 588 }, { "epoch": 0.54, "learning_rate": 4.1692355117139335e-05, "loss": 0.0049, "step": 589 }, { "epoch": 0.54, "learning_rate": 4.167694204685574e-05, "loss": 0.0012, "step": 590 }, { "epoch": 0.54, "learning_rate": 4.166152897657213e-05, "loss": 0.0012, "step": 591 }, { "epoch": 0.54, "learning_rate": 4.1646115906288535e-05, "loss": 0.3058, "step": 592 }, { "epoch": 0.54, "learning_rate": 4.163070283600494e-05, "loss": 0.0026, "step": 593 }, { "epoch": 0.54, "learning_rate": 4.161528976572133e-05, "loss": 0.3416, "step": 594 }, { "epoch": 0.54, "learning_rate": 4.1599876695437735e-05, "loss": 0.0092, "step": 595 }, { "epoch": 0.54, "learning_rate": 4.158446362515414e-05, "loss": 0.0016, "step": 596 }, { "epoch": 0.54, "learning_rate": 4.156905055487053e-05, "loss": 0.002, "step": 597 }, { "epoch": 0.54, "learning_rate": 4.1553637484586935e-05, "loss": 0.0014, "step": 598 }, { "epoch": 0.55, "learning_rate": 4.153822441430333e-05, "loss": 0.6758, "step": 599 }, { "epoch": 0.55, "learning_rate": 4.152281134401973e-05, "loss": 0.0049, "step": 600 }, { "epoch": 0.55, "learning_rate": 4.150739827373613e-05, "loss": 0.0047, "step": 601 }, { "epoch": 0.55, "learning_rate": 4.149198520345253e-05, "loss": 0.003, "step": 602 }, { "epoch": 0.55, "learning_rate": 4.1476572133168926e-05, "loss": 0.0064, "step": 603 }, { "epoch": 0.55, "learning_rate": 4.146115906288533e-05, "loss": 0.0634, "step": 604 }, { "epoch": 0.55, "learning_rate": 4.1445745992601724e-05, "loss": 0.0036, "step": 605 }, { "epoch": 0.55, "learning_rate": 4.1430332922318126e-05, "loss": 0.0051, "step": 606 }, { "epoch": 0.55, "learning_rate": 4.141491985203453e-05, "loss": 0.238, "step": 607 }, { "epoch": 0.55, "learning_rate": 4.1399506781750924e-05, "loss": 0.003, "step": 608 }, { "epoch": 0.55, "learning_rate": 4.1384093711467326e-05, "loss": 0.3641, "step": 609 }, { "epoch": 0.56, "learning_rate": 4.136868064118373e-05, "loss": 0.005, "step": 610 }, { "epoch": 0.56, "learning_rate": 4.1353267570900124e-05, "loss": 0.0023, "step": 611 }, { "epoch": 0.56, "learning_rate": 4.1337854500616526e-05, "loss": 0.0011, "step": 612 }, { "epoch": 0.56, "learning_rate": 4.132244143033293e-05, "loss": 0.3954, "step": 613 }, { "epoch": 0.56, "learning_rate": 4.1307028360049324e-05, "loss": 0.5307, "step": 614 }, { "epoch": 0.56, "learning_rate": 4.129161528976572e-05, "loss": 0.0015, "step": 615 }, { "epoch": 0.56, "learning_rate": 4.127620221948212e-05, "loss": 0.0011, "step": 616 }, { "epoch": 0.56, "learning_rate": 4.126078914919852e-05, "loss": 0.0016, "step": 617 }, { "epoch": 0.56, "learning_rate": 4.124537607891492e-05, "loss": 0.0022, "step": 618 }, { "epoch": 0.56, "learning_rate": 4.122996300863132e-05, "loss": 0.0029, "step": 619 }, { "epoch": 0.56, "learning_rate": 4.121454993834772e-05, "loss": 0.0032, "step": 620 }, { "epoch": 0.57, "learning_rate": 4.119913686806412e-05, "loss": 0.0022, "step": 621 }, { "epoch": 0.57, "learning_rate": 4.118372379778052e-05, "loss": 0.0015, "step": 622 }, { "epoch": 0.57, "learning_rate": 4.116831072749692e-05, "loss": 0.0012, "step": 623 }, { "epoch": 0.57, "learning_rate": 4.115289765721332e-05, "loss": 0.0029, "step": 624 }, { "epoch": 0.57, "learning_rate": 4.113748458692972e-05, "loss": 0.0022, "step": 625 }, { "epoch": 0.57, "learning_rate": 4.112207151664612e-05, "loss": 0.0015, "step": 626 }, { "epoch": 0.57, "learning_rate": 4.110665844636252e-05, "loss": 0.4074, "step": 627 }, { "epoch": 0.57, "learning_rate": 4.1091245376078915e-05, "loss": 0.0013, "step": 628 }, { "epoch": 0.57, "learning_rate": 4.107583230579531e-05, "loss": 0.0129, "step": 629 }, { "epoch": 0.57, "learning_rate": 4.106041923551171e-05, "loss": 0.0019, "step": 630 }, { "epoch": 0.57, "learning_rate": 4.1045006165228116e-05, "loss": 0.0011, "step": 631 }, { "epoch": 0.58, "learning_rate": 4.102959309494451e-05, "loss": 0.0033, "step": 632 }, { "epoch": 0.58, "learning_rate": 4.101418002466091e-05, "loss": 0.0014, "step": 633 }, { "epoch": 0.58, "learning_rate": 4.0998766954377316e-05, "loss": 0.0027, "step": 634 }, { "epoch": 0.58, "learning_rate": 4.098335388409371e-05, "loss": 0.0016, "step": 635 }, { "epoch": 0.58, "learning_rate": 4.0967940813810113e-05, "loss": 0.0015, "step": 636 }, { "epoch": 0.58, "learning_rate": 4.0952527743526516e-05, "loss": 0.0067, "step": 637 }, { "epoch": 0.58, "learning_rate": 4.093711467324291e-05, "loss": 0.3461, "step": 638 }, { "epoch": 0.58, "learning_rate": 4.0921701602959313e-05, "loss": 0.001, "step": 639 }, { "epoch": 0.58, "learning_rate": 4.0906288532675716e-05, "loss": 0.0378, "step": 640 }, { "epoch": 0.58, "learning_rate": 4.089087546239211e-05, "loss": 0.0035, "step": 641 }, { "epoch": 0.58, "learning_rate": 4.087546239210851e-05, "loss": 0.0021, "step": 642 }, { "epoch": 0.59, "learning_rate": 4.086004932182491e-05, "loss": 0.0138, "step": 643 }, { "epoch": 0.59, "learning_rate": 4.0844636251541305e-05, "loss": 0.2242, "step": 644 }, { "epoch": 0.59, "learning_rate": 4.082922318125771e-05, "loss": 0.1115, "step": 645 }, { "epoch": 0.59, "learning_rate": 4.081381011097411e-05, "loss": 0.0015, "step": 646 }, { "epoch": 0.59, "learning_rate": 4.0798397040690505e-05, "loss": 0.0011, "step": 647 }, { "epoch": 0.59, "learning_rate": 4.078298397040691e-05, "loss": 0.0009, "step": 648 }, { "epoch": 0.59, "learning_rate": 4.076757090012331e-05, "loss": 0.2452, "step": 649 }, { "epoch": 0.59, "learning_rate": 4.0752157829839705e-05, "loss": 0.0534, "step": 650 }, { "epoch": 0.59, "learning_rate": 4.073674475955611e-05, "loss": 0.2856, "step": 651 }, { "epoch": 0.59, "learning_rate": 4.072133168927251e-05, "loss": 0.1185, "step": 652 }, { "epoch": 0.59, "learning_rate": 4.0705918618988905e-05, "loss": 0.0034, "step": 653 }, { "epoch": 0.6, "learning_rate": 4.069050554870531e-05, "loss": 0.0039, "step": 654 }, { "epoch": 0.6, "learning_rate": 4.06750924784217e-05, "loss": 0.0022, "step": 655 }, { "epoch": 0.6, "learning_rate": 4.0659679408138105e-05, "loss": 0.001, "step": 656 }, { "epoch": 0.6, "learning_rate": 4.06442663378545e-05, "loss": 0.0013, "step": 657 }, { "epoch": 0.6, "learning_rate": 4.06288532675709e-05, "loss": 0.0306, "step": 658 }, { "epoch": 0.6, "learning_rate": 4.06134401972873e-05, "loss": 0.0477, "step": 659 }, { "epoch": 0.6, "learning_rate": 4.05980271270037e-05, "loss": 0.2355, "step": 660 }, { "epoch": 0.6, "learning_rate": 4.05826140567201e-05, "loss": 0.0856, "step": 661 }, { "epoch": 0.6, "learning_rate": 4.05672009864365e-05, "loss": 0.2559, "step": 662 }, { "epoch": 0.6, "learning_rate": 4.05517879161529e-05, "loss": 0.1194, "step": 663 }, { "epoch": 0.6, "learning_rate": 4.05363748458693e-05, "loss": 0.0016, "step": 664 }, { "epoch": 0.61, "learning_rate": 4.05209617755857e-05, "loss": 0.0014, "step": 665 }, { "epoch": 0.61, "learning_rate": 4.05055487053021e-05, "loss": 0.2945, "step": 666 }, { "epoch": 0.61, "learning_rate": 4.04901356350185e-05, "loss": 0.2178, "step": 667 }, { "epoch": 0.61, "learning_rate": 4.04747225647349e-05, "loss": 0.3807, "step": 668 }, { "epoch": 0.61, "learning_rate": 4.04593094944513e-05, "loss": 0.0016, "step": 669 }, { "epoch": 0.61, "learning_rate": 4.0443896424167696e-05, "loss": 0.2671, "step": 670 }, { "epoch": 0.61, "learning_rate": 4.042848335388409e-05, "loss": 0.0023, "step": 671 }, { "epoch": 0.61, "learning_rate": 4.0413070283600494e-05, "loss": 0.0036, "step": 672 }, { "epoch": 0.61, "learning_rate": 4.0397657213316896e-05, "loss": 0.0031, "step": 673 }, { "epoch": 0.61, "learning_rate": 4.038224414303329e-05, "loss": 0.0132, "step": 674 }, { "epoch": 0.61, "learning_rate": 4.0366831072749694e-05, "loss": 0.0129, "step": 675 }, { "epoch": 0.62, "learning_rate": 4.0351418002466096e-05, "loss": 0.0027, "step": 676 }, { "epoch": 0.62, "learning_rate": 4.033600493218249e-05, "loss": 0.0015, "step": 677 }, { "epoch": 0.62, "learning_rate": 4.0320591861898894e-05, "loss": 0.0826, "step": 678 }, { "epoch": 0.62, "learning_rate": 4.030517879161529e-05, "loss": 0.0075, "step": 679 }, { "epoch": 0.62, "learning_rate": 4.028976572133169e-05, "loss": 0.0381, "step": 680 }, { "epoch": 0.62, "learning_rate": 4.0274352651048094e-05, "loss": 0.0065, "step": 681 }, { "epoch": 0.62, "learning_rate": 4.025893958076449e-05, "loss": 0.0099, "step": 682 }, { "epoch": 0.62, "learning_rate": 4.024352651048089e-05, "loss": 0.0035, "step": 683 }, { "epoch": 0.62, "learning_rate": 4.022811344019729e-05, "loss": 0.0036, "step": 684 }, { "epoch": 0.62, "learning_rate": 4.021270036991368e-05, "loss": 0.0048, "step": 685 }, { "epoch": 0.62, "learning_rate": 4.0197287299630085e-05, "loss": 0.1165, "step": 686 }, { "epoch": 0.63, "learning_rate": 4.018187422934649e-05, "loss": 0.0055, "step": 687 }, { "epoch": 0.63, "learning_rate": 4.016646115906288e-05, "loss": 0.0151, "step": 688 }, { "epoch": 0.63, "learning_rate": 4.0151048088779285e-05, "loss": 0.0049, "step": 689 }, { "epoch": 0.63, "learning_rate": 4.013563501849569e-05, "loss": 0.0022, "step": 690 }, { "epoch": 0.63, "learning_rate": 4.012022194821208e-05, "loss": 0.0017, "step": 691 }, { "epoch": 0.63, "learning_rate": 4.0104808877928485e-05, "loss": 0.0088, "step": 692 }, { "epoch": 0.63, "learning_rate": 4.008939580764489e-05, "loss": 0.0021, "step": 693 }, { "epoch": 0.63, "learning_rate": 4.007398273736128e-05, "loss": 0.0025, "step": 694 }, { "epoch": 0.63, "learning_rate": 4.0058569667077685e-05, "loss": 0.1682, "step": 695 }, { "epoch": 0.63, "learning_rate": 4.004315659679409e-05, "loss": 0.001, "step": 696 }, { "epoch": 0.63, "learning_rate": 4.002774352651048e-05, "loss": 0.0016, "step": 697 }, { "epoch": 0.64, "learning_rate": 4.001233045622688e-05, "loss": 0.0027, "step": 698 }, { "epoch": 0.64, "learning_rate": 3.999691738594328e-05, "loss": 0.3733, "step": 699 }, { "epoch": 0.64, "learning_rate": 3.9981504315659677e-05, "loss": 0.001, "step": 700 }, { "epoch": 0.64, "learning_rate": 3.996609124537608e-05, "loss": 0.0011, "step": 701 }, { "epoch": 0.64, "learning_rate": 3.995067817509248e-05, "loss": 0.0626, "step": 702 }, { "epoch": 0.64, "learning_rate": 3.9935265104808877e-05, "loss": 0.002, "step": 703 }, { "epoch": 0.64, "learning_rate": 3.991985203452528e-05, "loss": 0.1558, "step": 704 }, { "epoch": 0.64, "learning_rate": 3.990443896424168e-05, "loss": 0.0077, "step": 705 }, { "epoch": 0.64, "learning_rate": 3.988902589395808e-05, "loss": 0.0016, "step": 706 }, { "epoch": 0.64, "learning_rate": 3.987361282367448e-05, "loss": 0.0012, "step": 707 }, { "epoch": 0.64, "learning_rate": 3.985819975339088e-05, "loss": 0.001, "step": 708 }, { "epoch": 0.65, "learning_rate": 3.984278668310728e-05, "loss": 0.0009, "step": 709 }, { "epoch": 0.65, "learning_rate": 3.982737361282368e-05, "loss": 0.0011, "step": 710 }, { "epoch": 0.65, "learning_rate": 3.9811960542540075e-05, "loss": 0.0892, "step": 711 }, { "epoch": 0.65, "learning_rate": 3.979654747225648e-05, "loss": 0.1312, "step": 712 }, { "epoch": 0.65, "learning_rate": 3.978113440197287e-05, "loss": 0.0013, "step": 713 }, { "epoch": 0.65, "learning_rate": 3.9765721331689275e-05, "loss": 0.0219, "step": 714 }, { "epoch": 0.65, "learning_rate": 3.975030826140567e-05, "loss": 0.0009, "step": 715 }, { "epoch": 0.65, "learning_rate": 3.973489519112207e-05, "loss": 0.0377, "step": 716 }, { "epoch": 0.65, "learning_rate": 3.9719482120838475e-05, "loss": 0.0623, "step": 717 }, { "epoch": 0.65, "learning_rate": 3.970406905055487e-05, "loss": 0.001, "step": 718 }, { "epoch": 0.65, "learning_rate": 3.968865598027127e-05, "loss": 0.2698, "step": 719 }, { "epoch": 0.66, "learning_rate": 3.9673242909987675e-05, "loss": 0.1464, "step": 720 }, { "epoch": 0.66, "learning_rate": 3.965782983970407e-05, "loss": 0.0027, "step": 721 }, { "epoch": 0.66, "learning_rate": 3.964241676942047e-05, "loss": 0.0016, "step": 722 }, { "epoch": 0.66, "learning_rate": 3.9627003699136875e-05, "loss": 0.001, "step": 723 }, { "epoch": 0.66, "learning_rate": 3.961159062885327e-05, "loss": 0.0009, "step": 724 }, { "epoch": 0.66, "learning_rate": 3.959617755856967e-05, "loss": 0.0256, "step": 725 }, { "epoch": 0.66, "learning_rate": 3.958076448828607e-05, "loss": 0.0073, "step": 726 }, { "epoch": 0.66, "learning_rate": 3.9565351418002464e-05, "loss": 0.0014, "step": 727 }, { "epoch": 0.66, "learning_rate": 3.9549938347718866e-05, "loss": 0.001, "step": 728 }, { "epoch": 0.66, "learning_rate": 3.953452527743527e-05, "loss": 0.1047, "step": 729 }, { "epoch": 0.66, "learning_rate": 3.9519112207151664e-05, "loss": 0.714, "step": 730 }, { "epoch": 0.67, "learning_rate": 3.9503699136868066e-05, "loss": 0.0011, "step": 731 }, { "epoch": 0.67, "learning_rate": 3.948828606658447e-05, "loss": 0.0008, "step": 732 }, { "epoch": 0.67, "learning_rate": 3.9472872996300864e-05, "loss": 0.2677, "step": 733 }, { "epoch": 0.67, "learning_rate": 3.9457459926017266e-05, "loss": 0.0008, "step": 734 }, { "epoch": 0.67, "learning_rate": 3.944204685573367e-05, "loss": 0.4921, "step": 735 }, { "epoch": 0.67, "learning_rate": 3.9426633785450064e-05, "loss": 0.0467, "step": 736 }, { "epoch": 0.67, "learning_rate": 3.9411220715166466e-05, "loss": 0.0624, "step": 737 }, { "epoch": 0.67, "learning_rate": 3.939580764488287e-05, "loss": 0.0007, "step": 738 }, { "epoch": 0.67, "learning_rate": 3.9380394574599264e-05, "loss": 0.0022, "step": 739 }, { "epoch": 0.67, "learning_rate": 3.936498150431566e-05, "loss": 0.0178, "step": 740 }, { "epoch": 0.67, "learning_rate": 3.934956843403206e-05, "loss": 0.0025, "step": 741 }, { "epoch": 0.68, "learning_rate": 3.933415536374846e-05, "loss": 0.1864, "step": 742 }, { "epoch": 0.68, "learning_rate": 3.931874229346486e-05, "loss": 0.0007, "step": 743 }, { "epoch": 0.68, "learning_rate": 3.930332922318126e-05, "loss": 0.0027, "step": 744 }, { "epoch": 0.68, "learning_rate": 3.928791615289766e-05, "loss": 0.0123, "step": 745 }, { "epoch": 0.68, "learning_rate": 3.927250308261406e-05, "loss": 0.0016, "step": 746 }, { "epoch": 0.68, "learning_rate": 3.925709001233046e-05, "loss": 0.0013, "step": 747 }, { "epoch": 0.68, "learning_rate": 3.924167694204686e-05, "loss": 0.0012, "step": 748 }, { "epoch": 0.68, "learning_rate": 3.922626387176326e-05, "loss": 0.0061, "step": 749 }, { "epoch": 0.68, "learning_rate": 3.921085080147966e-05, "loss": 0.0011, "step": 750 }, { "epoch": 0.68, "learning_rate": 3.919543773119606e-05, "loss": 0.0061, "step": 751 }, { "epoch": 0.68, "learning_rate": 3.918002466091246e-05, "loss": 0.001, "step": 752 }, { "epoch": 0.69, "learning_rate": 3.9164611590628855e-05, "loss": 0.0018, "step": 753 }, { "epoch": 0.69, "learning_rate": 3.914919852034525e-05, "loss": 0.0008, "step": 754 }, { "epoch": 0.69, "learning_rate": 3.913378545006165e-05, "loss": 0.0007, "step": 755 }, { "epoch": 0.69, "learning_rate": 3.911837237977805e-05, "loss": 0.0008, "step": 756 }, { "epoch": 0.69, "learning_rate": 3.910295930949445e-05, "loss": 0.0008, "step": 757 }, { "epoch": 0.69, "learning_rate": 3.908754623921085e-05, "loss": 0.1326, "step": 758 }, { "epoch": 0.69, "learning_rate": 3.907213316892725e-05, "loss": 0.0006, "step": 759 }, { "epoch": 0.69, "learning_rate": 3.905672009864365e-05, "loss": 0.0012, "step": 760 }, { "epoch": 0.69, "learning_rate": 3.904130702836005e-05, "loss": 0.1199, "step": 761 }, { "epoch": 0.69, "learning_rate": 3.902589395807645e-05, "loss": 0.0006, "step": 762 }, { "epoch": 0.69, "learning_rate": 3.901048088779285e-05, "loss": 0.0006, "step": 763 }, { "epoch": 0.7, "learning_rate": 3.899506781750925e-05, "loss": 0.2864, "step": 764 }, { "epoch": 0.7, "learning_rate": 3.897965474722565e-05, "loss": 0.0006, "step": 765 }, { "epoch": 0.7, "learning_rate": 3.896424167694205e-05, "loss": 0.0014, "step": 766 }, { "epoch": 0.7, "learning_rate": 3.8948828606658446e-05, "loss": 0.0564, "step": 767 }, { "epoch": 0.7, "learning_rate": 3.893341553637485e-05, "loss": 0.0008, "step": 768 }, { "epoch": 0.7, "learning_rate": 3.8918002466091244e-05, "loss": 0.0014, "step": 769 }, { "epoch": 0.7, "learning_rate": 3.8902589395807647e-05, "loss": 0.0022, "step": 770 }, { "epoch": 0.7, "learning_rate": 3.888717632552404e-05, "loss": 0.0093, "step": 771 }, { "epoch": 0.7, "learning_rate": 3.8871763255240444e-05, "loss": 0.0008, "step": 772 }, { "epoch": 0.7, "learning_rate": 3.8856350184956847e-05, "loss": 0.4581, "step": 773 }, { "epoch": 0.7, "learning_rate": 3.884093711467324e-05, "loss": 0.007, "step": 774 }, { "epoch": 0.71, "learning_rate": 3.8825524044389644e-05, "loss": 0.2129, "step": 775 }, { "epoch": 0.71, "learning_rate": 3.881011097410605e-05, "loss": 0.0026, "step": 776 }, { "epoch": 0.71, "learning_rate": 3.879469790382244e-05, "loss": 0.0014, "step": 777 }, { "epoch": 0.71, "learning_rate": 3.8779284833538844e-05, "loss": 0.0021, "step": 778 }, { "epoch": 0.71, "learning_rate": 3.876387176325525e-05, "loss": 0.0013, "step": 779 }, { "epoch": 0.71, "learning_rate": 3.874845869297164e-05, "loss": 0.0018, "step": 780 }, { "epoch": 0.71, "learning_rate": 3.8733045622688045e-05, "loss": 0.0096, "step": 781 }, { "epoch": 0.71, "learning_rate": 3.871763255240444e-05, "loss": 0.0028, "step": 782 }, { "epoch": 0.71, "learning_rate": 3.8702219482120836e-05, "loss": 0.0742, "step": 783 }, { "epoch": 0.71, "learning_rate": 3.868680641183724e-05, "loss": 0.3255, "step": 784 }, { "epoch": 0.71, "learning_rate": 3.867139334155364e-05, "loss": 0.4515, "step": 785 }, { "epoch": 0.72, "learning_rate": 3.8655980271270036e-05, "loss": 0.0019, "step": 786 }, { "epoch": 0.72, "learning_rate": 3.864056720098644e-05, "loss": 0.0023, "step": 787 }, { "epoch": 0.72, "learning_rate": 3.862515413070284e-05, "loss": 0.0096, "step": 788 }, { "epoch": 0.72, "learning_rate": 3.8609741060419236e-05, "loss": 0.1568, "step": 789 }, { "epoch": 0.72, "learning_rate": 3.859432799013564e-05, "loss": 0.1501, "step": 790 }, { "epoch": 0.72, "learning_rate": 3.857891491985204e-05, "loss": 0.278, "step": 791 }, { "epoch": 0.72, "learning_rate": 3.8563501849568436e-05, "loss": 0.2169, "step": 792 }, { "epoch": 0.72, "learning_rate": 3.854808877928484e-05, "loss": 0.2421, "step": 793 }, { "epoch": 0.72, "learning_rate": 3.8532675709001234e-05, "loss": 0.1288, "step": 794 }, { "epoch": 0.72, "learning_rate": 3.8517262638717636e-05, "loss": 0.0946, "step": 795 }, { "epoch": 0.72, "learning_rate": 3.850184956843403e-05, "loss": 0.0706, "step": 796 }, { "epoch": 0.73, "learning_rate": 3.8486436498150434e-05, "loss": 0.0273, "step": 797 }, { "epoch": 0.73, "learning_rate": 3.847102342786683e-05, "loss": 0.0427, "step": 798 }, { "epoch": 0.73, "learning_rate": 3.845561035758323e-05, "loss": 0.0752, "step": 799 }, { "epoch": 0.73, "learning_rate": 3.8440197287299634e-05, "loss": 0.6192, "step": 800 }, { "epoch": 0.73, "learning_rate": 3.842478421701603e-05, "loss": 0.0923, "step": 801 }, { "epoch": 0.73, "learning_rate": 3.840937114673243e-05, "loss": 0.0593, "step": 802 }, { "epoch": 0.73, "learning_rate": 3.8393958076448834e-05, "loss": 0.0135, "step": 803 }, { "epoch": 0.73, "learning_rate": 3.837854500616523e-05, "loss": 0.0103, "step": 804 }, { "epoch": 0.73, "learning_rate": 3.836313193588163e-05, "loss": 0.0453, "step": 805 }, { "epoch": 0.73, "learning_rate": 3.8347718865598034e-05, "loss": 0.0046, "step": 806 }, { "epoch": 0.73, "learning_rate": 3.833230579531443e-05, "loss": 0.0033, "step": 807 }, { "epoch": 0.74, "learning_rate": 3.831689272503083e-05, "loss": 0.0636, "step": 808 }, { "epoch": 0.74, "learning_rate": 3.830147965474723e-05, "loss": 0.0041, "step": 809 }, { "epoch": 0.74, "learning_rate": 3.828606658446362e-05, "loss": 0.0028, "step": 810 }, { "epoch": 0.74, "learning_rate": 3.8270653514180025e-05, "loss": 0.0011, "step": 811 }, { "epoch": 0.74, "learning_rate": 3.825524044389643e-05, "loss": 0.0013, "step": 812 }, { "epoch": 0.74, "learning_rate": 3.823982737361282e-05, "loss": 0.0022, "step": 813 }, { "epoch": 0.74, "learning_rate": 3.8224414303329225e-05, "loss": 0.0692, "step": 814 }, { "epoch": 0.74, "learning_rate": 3.820900123304563e-05, "loss": 0.0011, "step": 815 }, { "epoch": 0.74, "learning_rate": 3.819358816276202e-05, "loss": 0.0015, "step": 816 }, { "epoch": 0.74, "learning_rate": 3.8178175092478425e-05, "loss": 0.0013, "step": 817 }, { "epoch": 0.74, "learning_rate": 3.816276202219483e-05, "loss": 0.0008, "step": 818 }, { "epoch": 0.75, "learning_rate": 3.814734895191122e-05, "loss": 0.0598, "step": 819 }, { "epoch": 0.75, "learning_rate": 3.8131935881627625e-05, "loss": 0.0122, "step": 820 }, { "epoch": 0.75, "learning_rate": 3.811652281134403e-05, "loss": 0.0009, "step": 821 }, { "epoch": 0.75, "learning_rate": 3.810110974106042e-05, "loss": 0.0014, "step": 822 }, { "epoch": 0.75, "learning_rate": 3.808569667077682e-05, "loss": 0.0012, "step": 823 }, { "epoch": 0.75, "learning_rate": 3.807028360049322e-05, "loss": 0.0015, "step": 824 }, { "epoch": 0.75, "learning_rate": 3.8054870530209616e-05, "loss": 0.2001, "step": 825 }, { "epoch": 0.75, "learning_rate": 3.803945745992602e-05, "loss": 0.0011, "step": 826 }, { "epoch": 0.75, "learning_rate": 3.8024044389642414e-05, "loss": 0.4526, "step": 827 }, { "epoch": 0.75, "learning_rate": 3.8008631319358816e-05, "loss": 0.0006, "step": 828 }, { "epoch": 0.76, "learning_rate": 3.799321824907522e-05, "loss": 0.2262, "step": 829 }, { "epoch": 0.76, "learning_rate": 3.7977805178791614e-05, "loss": 0.0106, "step": 830 }, { "epoch": 0.76, "learning_rate": 3.7962392108508016e-05, "loss": 0.0349, "step": 831 }, { "epoch": 0.76, "learning_rate": 3.794697903822442e-05, "loss": 0.0037, "step": 832 }, { "epoch": 0.76, "learning_rate": 3.7931565967940814e-05, "loss": 0.2165, "step": 833 }, { "epoch": 0.76, "learning_rate": 3.7916152897657216e-05, "loss": 0.0013, "step": 834 }, { "epoch": 0.76, "learning_rate": 3.790073982737362e-05, "loss": 0.1627, "step": 835 }, { "epoch": 0.76, "learning_rate": 3.7885326757090014e-05, "loss": 0.3021, "step": 836 }, { "epoch": 0.76, "learning_rate": 3.786991368680641e-05, "loss": 0.0008, "step": 837 }, { "epoch": 0.76, "learning_rate": 3.785450061652281e-05, "loss": 0.0006, "step": 838 }, { "epoch": 0.76, "learning_rate": 3.783908754623921e-05, "loss": 0.1849, "step": 839 }, { "epoch": 0.77, "learning_rate": 3.782367447595561e-05, "loss": 0.0009, "step": 840 }, { "epoch": 0.77, "learning_rate": 3.780826140567201e-05, "loss": 0.0008, "step": 841 }, { "epoch": 0.77, "learning_rate": 3.779284833538841e-05, "loss": 0.0006, "step": 842 }, { "epoch": 0.77, "learning_rate": 3.777743526510481e-05, "loss": 0.0008, "step": 843 }, { "epoch": 0.77, "learning_rate": 3.776202219482121e-05, "loss": 0.0011, "step": 844 }, { "epoch": 0.77, "learning_rate": 3.774660912453761e-05, "loss": 0.0068, "step": 845 }, { "epoch": 0.77, "learning_rate": 3.773119605425401e-05, "loss": 0.001, "step": 846 }, { "epoch": 0.77, "learning_rate": 3.771578298397041e-05, "loss": 0.0007, "step": 847 }, { "epoch": 0.77, "learning_rate": 3.770036991368681e-05, "loss": 0.0128, "step": 848 }, { "epoch": 0.77, "learning_rate": 3.768495684340321e-05, "loss": 0.0011, "step": 849 }, { "epoch": 0.77, "learning_rate": 3.7669543773119605e-05, "loss": 0.0006, "step": 850 }, { "epoch": 0.78, "learning_rate": 3.765413070283601e-05, "loss": 0.0006, "step": 851 }, { "epoch": 0.78, "learning_rate": 3.76387176325524e-05, "loss": 0.0108, "step": 852 }, { "epoch": 0.78, "learning_rate": 3.7623304562268806e-05, "loss": 0.0005, "step": 853 }, { "epoch": 0.78, "learning_rate": 3.76078914919852e-05, "loss": 0.0023, "step": 854 }, { "epoch": 0.78, "learning_rate": 3.75924784217016e-05, "loss": 0.0008, "step": 855 }, { "epoch": 0.78, "learning_rate": 3.7577065351418006e-05, "loss": 0.001, "step": 856 }, { "epoch": 0.78, "learning_rate": 3.75616522811344e-05, "loss": 0.0043, "step": 857 }, { "epoch": 0.78, "learning_rate": 3.7546239210850803e-05, "loss": 0.0004, "step": 858 }, { "epoch": 0.78, "learning_rate": 3.7530826140567206e-05, "loss": 0.003, "step": 859 }, { "epoch": 0.78, "learning_rate": 3.75154130702836e-05, "loss": 0.0089, "step": 860 }, { "epoch": 0.78, "learning_rate": 3.7500000000000003e-05, "loss": 0.0262, "step": 861 }, { "epoch": 0.79, "learning_rate": 3.7484586929716406e-05, "loss": 0.0048, "step": 862 }, { "epoch": 0.79, "learning_rate": 3.74691738594328e-05, "loss": 0.0007, "step": 863 }, { "epoch": 0.79, "learning_rate": 3.7453760789149204e-05, "loss": 0.0008, "step": 864 }, { "epoch": 0.79, "learning_rate": 3.74383477188656e-05, "loss": 0.0005, "step": 865 }, { "epoch": 0.79, "learning_rate": 3.7422934648581995e-05, "loss": 0.0005, "step": 866 }, { "epoch": 0.79, "learning_rate": 3.74075215782984e-05, "loss": 0.0008, "step": 867 }, { "epoch": 0.79, "learning_rate": 3.73921085080148e-05, "loss": 0.1344, "step": 868 }, { "epoch": 0.79, "learning_rate": 3.7376695437731195e-05, "loss": 0.001, "step": 869 }, { "epoch": 0.79, "learning_rate": 3.73612823674476e-05, "loss": 0.0005, "step": 870 }, { "epoch": 0.79, "learning_rate": 3.7345869297164e-05, "loss": 0.001, "step": 871 }, { "epoch": 0.79, "learning_rate": 3.7330456226880395e-05, "loss": 0.0254, "step": 872 }, { "epoch": 0.8, "learning_rate": 3.73150431565968e-05, "loss": 0.0006, "step": 873 }, { "epoch": 0.8, "learning_rate": 3.72996300863132e-05, "loss": 0.0004, "step": 874 }, { "epoch": 0.8, "learning_rate": 3.7284217016029595e-05, "loss": 0.0172, "step": 875 }, { "epoch": 0.8, "learning_rate": 3.7268803945746e-05, "loss": 0.0008, "step": 876 }, { "epoch": 0.8, "learning_rate": 3.72533908754624e-05, "loss": 0.0004, "step": 877 }, { "epoch": 0.8, "learning_rate": 3.7237977805178795e-05, "loss": 0.0031, "step": 878 }, { "epoch": 0.8, "learning_rate": 3.722256473489519e-05, "loss": 0.0017, "step": 879 }, { "epoch": 0.8, "learning_rate": 3.720715166461159e-05, "loss": 0.0003, "step": 880 }, { "epoch": 0.8, "learning_rate": 3.719173859432799e-05, "loss": 0.0004, "step": 881 }, { "epoch": 0.8, "learning_rate": 3.717632552404439e-05, "loss": 0.0004, "step": 882 }, { "epoch": 0.8, "learning_rate": 3.716091245376079e-05, "loss": 0.0003, "step": 883 }, { "epoch": 0.81, "learning_rate": 3.714549938347719e-05, "loss": 0.0003, "step": 884 }, { "epoch": 0.81, "learning_rate": 3.713008631319359e-05, "loss": 0.0006, "step": 885 }, { "epoch": 0.81, "learning_rate": 3.711467324290999e-05, "loss": 0.0004, "step": 886 }, { "epoch": 0.81, "learning_rate": 3.709926017262639e-05, "loss": 0.002, "step": 887 }, { "epoch": 0.81, "learning_rate": 3.708384710234279e-05, "loss": 0.0004, "step": 888 }, { "epoch": 0.81, "learning_rate": 3.706843403205919e-05, "loss": 0.0002, "step": 889 }, { "epoch": 0.81, "learning_rate": 3.705302096177559e-05, "loss": 0.0005, "step": 890 }, { "epoch": 0.81, "learning_rate": 3.703760789149199e-05, "loss": 0.0003, "step": 891 }, { "epoch": 0.81, "learning_rate": 3.7022194821208386e-05, "loss": 0.0006, "step": 892 }, { "epoch": 0.81, "learning_rate": 3.700678175092478e-05, "loss": 0.0005, "step": 893 }, { "epoch": 0.81, "learning_rate": 3.6991368680641184e-05, "loss": 0.0006, "step": 894 }, { "epoch": 0.82, "learning_rate": 3.6975955610357586e-05, "loss": 0.3184, "step": 895 }, { "epoch": 0.82, "learning_rate": 3.696054254007398e-05, "loss": 0.0006, "step": 896 }, { "epoch": 0.82, "learning_rate": 3.6945129469790384e-05, "loss": 0.0004, "step": 897 }, { "epoch": 0.82, "learning_rate": 3.692971639950678e-05, "loss": 0.963, "step": 898 }, { "epoch": 0.82, "learning_rate": 3.691430332922318e-05, "loss": 0.366, "step": 899 }, { "epoch": 0.82, "learning_rate": 3.6898890258939584e-05, "loss": 0.0003, "step": 900 }, { "epoch": 0.82, "learning_rate": 3.688347718865598e-05, "loss": 0.0002, "step": 901 }, { "epoch": 0.82, "learning_rate": 3.686806411837238e-05, "loss": 0.0004, "step": 902 }, { "epoch": 0.82, "learning_rate": 3.6852651048088784e-05, "loss": 0.0003, "step": 903 }, { "epoch": 0.82, "learning_rate": 3.683723797780518e-05, "loss": 0.0004, "step": 904 }, { "epoch": 0.82, "learning_rate": 3.682182490752158e-05, "loss": 0.0003, "step": 905 }, { "epoch": 0.83, "learning_rate": 3.680641183723798e-05, "loss": 0.0004, "step": 906 }, { "epoch": 0.83, "learning_rate": 3.679099876695438e-05, "loss": 0.0009, "step": 907 }, { "epoch": 0.83, "learning_rate": 3.6775585696670775e-05, "loss": 0.4215, "step": 908 }, { "epoch": 0.83, "learning_rate": 3.676017262638718e-05, "loss": 0.0005, "step": 909 }, { "epoch": 0.83, "learning_rate": 3.674475955610357e-05, "loss": 0.0004, "step": 910 }, { "epoch": 0.83, "learning_rate": 3.6729346485819975e-05, "loss": 0.0003, "step": 911 }, { "epoch": 0.83, "learning_rate": 3.671393341553638e-05, "loss": 0.0004, "step": 912 }, { "epoch": 0.83, "learning_rate": 3.669852034525277e-05, "loss": 0.3228, "step": 913 }, { "epoch": 0.83, "learning_rate": 3.6683107274969175e-05, "loss": 0.0496, "step": 914 }, { "epoch": 0.83, "learning_rate": 3.666769420468558e-05, "loss": 0.0004, "step": 915 }, { "epoch": 0.83, "learning_rate": 3.665228113440197e-05, "loss": 0.0003, "step": 916 }, { "epoch": 0.84, "learning_rate": 3.6636868064118375e-05, "loss": 0.0004, "step": 917 }, { "epoch": 0.84, "learning_rate": 3.662145499383478e-05, "loss": 0.0004, "step": 918 }, { "epoch": 0.84, "learning_rate": 3.660604192355117e-05, "loss": 0.0004, "step": 919 }, { "epoch": 0.84, "learning_rate": 3.6590628853267576e-05, "loss": 0.0005, "step": 920 }, { "epoch": 0.84, "learning_rate": 3.657521578298397e-05, "loss": 0.0004, "step": 921 }, { "epoch": 0.84, "learning_rate": 3.6559802712700367e-05, "loss": 0.0005, "step": 922 }, { "epoch": 0.84, "learning_rate": 3.654438964241677e-05, "loss": 0.0003, "step": 923 }, { "epoch": 0.84, "learning_rate": 3.652897657213317e-05, "loss": 0.1785, "step": 924 }, { "epoch": 0.84, "learning_rate": 3.6513563501849567e-05, "loss": 0.0004, "step": 925 }, { "epoch": 0.84, "learning_rate": 3.649815043156597e-05, "loss": 0.0004, "step": 926 }, { "epoch": 0.84, "learning_rate": 3.648273736128237e-05, "loss": 0.0005, "step": 927 }, { "epoch": 0.85, "learning_rate": 3.646732429099877e-05, "loss": 0.0049, "step": 928 }, { "epoch": 0.85, "learning_rate": 3.645191122071517e-05, "loss": 0.0004, "step": 929 }, { "epoch": 0.85, "learning_rate": 3.643649815043157e-05, "loss": 0.0008, "step": 930 }, { "epoch": 0.85, "learning_rate": 3.642108508014797e-05, "loss": 0.0003, "step": 931 }, { "epoch": 0.85, "learning_rate": 3.640567200986437e-05, "loss": 0.0004, "step": 932 }, { "epoch": 0.85, "learning_rate": 3.639025893958077e-05, "loss": 0.0005, "step": 933 }, { "epoch": 0.85, "learning_rate": 3.637484586929717e-05, "loss": 0.0005, "step": 934 }, { "epoch": 0.85, "learning_rate": 3.635943279901356e-05, "loss": 0.0006, "step": 935 }, { "epoch": 0.85, "learning_rate": 3.6344019728729965e-05, "loss": 0.0007, "step": 936 }, { "epoch": 0.85, "learning_rate": 3.632860665844636e-05, "loss": 0.0794, "step": 937 }, { "epoch": 0.85, "learning_rate": 3.631319358816276e-05, "loss": 0.004, "step": 938 }, { "epoch": 0.86, "learning_rate": 3.6297780517879165e-05, "loss": 0.0006, "step": 939 }, { "epoch": 0.86, "learning_rate": 3.628236744759556e-05, "loss": 0.0003, "step": 940 }, { "epoch": 0.86, "learning_rate": 3.626695437731196e-05, "loss": 0.0005, "step": 941 }, { "epoch": 0.86, "learning_rate": 3.6251541307028365e-05, "loss": 0.0003, "step": 942 }, { "epoch": 0.86, "learning_rate": 3.623612823674476e-05, "loss": 0.0003, "step": 943 }, { "epoch": 0.86, "learning_rate": 3.622071516646116e-05, "loss": 0.5048, "step": 944 }, { "epoch": 0.86, "learning_rate": 3.6205302096177565e-05, "loss": 0.4882, "step": 945 }, { "epoch": 0.86, "learning_rate": 3.618988902589396e-05, "loss": 0.0007, "step": 946 }, { "epoch": 0.86, "learning_rate": 3.617447595561036e-05, "loss": 0.0006, "step": 947 }, { "epoch": 0.86, "learning_rate": 3.615906288532676e-05, "loss": 0.0005, "step": 948 }, { "epoch": 0.86, "learning_rate": 3.6143649815043154e-05, "loss": 0.001, "step": 949 }, { "epoch": 0.87, "learning_rate": 3.6128236744759556e-05, "loss": 0.0004, "step": 950 }, { "epoch": 0.87, "learning_rate": 3.611282367447596e-05, "loss": 0.0008, "step": 951 }, { "epoch": 0.87, "learning_rate": 3.6097410604192354e-05, "loss": 0.0147, "step": 952 }, { "epoch": 0.87, "learning_rate": 3.6081997533908756e-05, "loss": 0.0006, "step": 953 }, { "epoch": 0.87, "learning_rate": 3.606658446362516e-05, "loss": 0.0008, "step": 954 }, { "epoch": 0.87, "learning_rate": 3.6051171393341554e-05, "loss": 0.0009, "step": 955 }, { "epoch": 0.87, "learning_rate": 3.6035758323057956e-05, "loss": 0.2269, "step": 956 }, { "epoch": 0.87, "learning_rate": 3.602034525277436e-05, "loss": 0.001, "step": 957 }, { "epoch": 0.87, "learning_rate": 3.6004932182490754e-05, "loss": 0.012, "step": 958 }, { "epoch": 0.87, "learning_rate": 3.5989519112207156e-05, "loss": 0.0018, "step": 959 }, { "epoch": 0.87, "learning_rate": 3.597410604192356e-05, "loss": 0.0012, "step": 960 }, { "epoch": 0.88, "learning_rate": 3.5958692971639954e-05, "loss": 0.001, "step": 961 }, { "epoch": 0.88, "learning_rate": 3.594327990135635e-05, "loss": 0.0021, "step": 962 }, { "epoch": 0.88, "learning_rate": 3.592786683107275e-05, "loss": 0.0015, "step": 963 }, { "epoch": 0.88, "learning_rate": 3.591245376078915e-05, "loss": 0.0012, "step": 964 }, { "epoch": 0.88, "learning_rate": 3.589704069050555e-05, "loss": 0.0017, "step": 965 }, { "epoch": 0.88, "learning_rate": 3.588162762022195e-05, "loss": 0.0372, "step": 966 }, { "epoch": 0.88, "learning_rate": 3.586621454993835e-05, "loss": 0.0024, "step": 967 }, { "epoch": 0.88, "learning_rate": 3.585080147965475e-05, "loss": 0.0018, "step": 968 }, { "epoch": 0.88, "learning_rate": 3.583538840937115e-05, "loss": 0.0013, "step": 969 }, { "epoch": 0.88, "learning_rate": 3.581997533908755e-05, "loss": 0.3861, "step": 970 }, { "epoch": 0.88, "learning_rate": 3.580456226880395e-05, "loss": 0.281, "step": 971 }, { "epoch": 0.89, "learning_rate": 3.5789149198520345e-05, "loss": 0.0025, "step": 972 }, { "epoch": 0.89, "learning_rate": 3.577373612823675e-05, "loss": 0.0006, "step": 973 }, { "epoch": 0.89, "learning_rate": 3.575832305795315e-05, "loss": 0.001, "step": 974 }, { "epoch": 0.89, "learning_rate": 3.5742909987669545e-05, "loss": 0.3288, "step": 975 }, { "epoch": 0.89, "learning_rate": 3.572749691738595e-05, "loss": 0.0015, "step": 976 }, { "epoch": 0.89, "learning_rate": 3.571208384710234e-05, "loss": 0.0148, "step": 977 }, { "epoch": 0.89, "learning_rate": 3.569667077681874e-05, "loss": 0.0049, "step": 978 }, { "epoch": 0.89, "learning_rate": 3.568125770653514e-05, "loss": 0.001, "step": 979 }, { "epoch": 0.89, "learning_rate": 3.566584463625154e-05, "loss": 0.2691, "step": 980 }, { "epoch": 0.89, "learning_rate": 3.565043156596794e-05, "loss": 0.0017, "step": 981 }, { "epoch": 0.89, "learning_rate": 3.563501849568434e-05, "loss": 0.004, "step": 982 }, { "epoch": 0.9, "learning_rate": 3.561960542540074e-05, "loss": 0.0037, "step": 983 }, { "epoch": 0.9, "learning_rate": 3.560419235511714e-05, "loss": 0.1971, "step": 984 }, { "epoch": 0.9, "learning_rate": 3.558877928483354e-05, "loss": 0.002, "step": 985 }, { "epoch": 0.9, "learning_rate": 3.557336621454994e-05, "loss": 0.1229, "step": 986 }, { "epoch": 0.9, "learning_rate": 3.555795314426634e-05, "loss": 0.0672, "step": 987 }, { "epoch": 0.9, "learning_rate": 3.554254007398274e-05, "loss": 0.0057, "step": 988 }, { "epoch": 0.9, "learning_rate": 3.552712700369914e-05, "loss": 0.0055, "step": 989 }, { "epoch": 0.9, "learning_rate": 3.551171393341554e-05, "loss": 0.0078, "step": 990 }, { "epoch": 0.9, "learning_rate": 3.5496300863131934e-05, "loss": 0.0752, "step": 991 }, { "epoch": 0.9, "learning_rate": 3.5480887792848337e-05, "loss": 0.0034, "step": 992 }, { "epoch": 0.9, "learning_rate": 3.546547472256473e-05, "loss": 0.0117, "step": 993 }, { "epoch": 0.91, "learning_rate": 3.5450061652281134e-05, "loss": 0.0052, "step": 994 }, { "epoch": 0.91, "learning_rate": 3.5434648581997537e-05, "loss": 0.0074, "step": 995 }, { "epoch": 0.91, "learning_rate": 3.541923551171393e-05, "loss": 0.0018, "step": 996 }, { "epoch": 0.91, "learning_rate": 3.5403822441430334e-05, "loss": 0.001, "step": 997 }, { "epoch": 0.91, "learning_rate": 3.538840937114674e-05, "loss": 0.075, "step": 998 }, { "epoch": 0.91, "learning_rate": 3.537299630086313e-05, "loss": 0.2192, "step": 999 }, { "epoch": 0.91, "learning_rate": 3.5357583230579534e-05, "loss": 0.0014, "step": 1000 }, { "epoch": 0.91, "learning_rate": 3.534217016029594e-05, "loss": 0.0011, "step": 1001 }, { "epoch": 0.91, "learning_rate": 3.532675709001233e-05, "loss": 0.0063, "step": 1002 }, { "epoch": 0.91, "learning_rate": 3.5311344019728735e-05, "loss": 0.0039, "step": 1003 }, { "epoch": 0.91, "learning_rate": 3.529593094944513e-05, "loss": 0.0007, "step": 1004 }, { "epoch": 0.92, "learning_rate": 3.5280517879161526e-05, "loss": 0.0015, "step": 1005 }, { "epoch": 0.92, "learning_rate": 3.526510480887793e-05, "loss": 0.0015, "step": 1006 }, { "epoch": 0.92, "learning_rate": 3.524969173859433e-05, "loss": 0.004, "step": 1007 }, { "epoch": 0.92, "learning_rate": 3.5234278668310726e-05, "loss": 0.2604, "step": 1008 }, { "epoch": 0.92, "learning_rate": 3.521886559802713e-05, "loss": 0.0016, "step": 1009 }, { "epoch": 0.92, "learning_rate": 3.520345252774353e-05, "loss": 0.1464, "step": 1010 }, { "epoch": 0.92, "learning_rate": 3.5188039457459926e-05, "loss": 0.0013, "step": 1011 }, { "epoch": 0.92, "learning_rate": 3.517262638717633e-05, "loss": 0.0009, "step": 1012 }, { "epoch": 0.92, "learning_rate": 3.515721331689273e-05, "loss": 0.0005, "step": 1013 }, { "epoch": 0.92, "learning_rate": 3.5141800246609126e-05, "loss": 0.0008, "step": 1014 }, { "epoch": 0.92, "learning_rate": 3.512638717632553e-05, "loss": 0.0011, "step": 1015 }, { "epoch": 0.93, "learning_rate": 3.511097410604193e-05, "loss": 0.0007, "step": 1016 }, { "epoch": 0.93, "learning_rate": 3.5095561035758326e-05, "loss": 0.0006, "step": 1017 }, { "epoch": 0.93, "learning_rate": 3.508014796547472e-05, "loss": 0.0007, "step": 1018 }, { "epoch": 0.93, "learning_rate": 3.5064734895191124e-05, "loss": 0.005, "step": 1019 }, { "epoch": 0.93, "learning_rate": 3.504932182490752e-05, "loss": 0.0126, "step": 1020 }, { "epoch": 0.93, "learning_rate": 3.503390875462392e-05, "loss": 0.0005, "step": 1021 }, { "epoch": 0.93, "learning_rate": 3.5018495684340324e-05, "loss": 0.001, "step": 1022 }, { "epoch": 0.93, "learning_rate": 3.500308261405672e-05, "loss": 0.0008, "step": 1023 }, { "epoch": 0.93, "learning_rate": 3.498766954377312e-05, "loss": 0.0005, "step": 1024 }, { "epoch": 0.93, "learning_rate": 3.4972256473489524e-05, "loss": 0.0005, "step": 1025 }, { "epoch": 0.93, "learning_rate": 3.495684340320592e-05, "loss": 0.0061, "step": 1026 }, { "epoch": 0.94, "learning_rate": 3.494143033292232e-05, "loss": 0.001, "step": 1027 }, { "epoch": 0.94, "learning_rate": 3.4926017262638724e-05, "loss": 0.0004, "step": 1028 }, { "epoch": 0.94, "learning_rate": 3.491060419235512e-05, "loss": 0.0006, "step": 1029 }, { "epoch": 0.94, "learning_rate": 3.489519112207152e-05, "loss": 0.0008, "step": 1030 }, { "epoch": 0.94, "learning_rate": 3.487977805178792e-05, "loss": 0.4525, "step": 1031 }, { "epoch": 0.94, "learning_rate": 3.486436498150432e-05, "loss": 0.4309, "step": 1032 }, { "epoch": 0.94, "learning_rate": 3.4848951911220715e-05, "loss": 0.0904, "step": 1033 }, { "epoch": 0.94, "learning_rate": 3.483353884093712e-05, "loss": 0.3958, "step": 1034 }, { "epoch": 0.94, "learning_rate": 3.481812577065351e-05, "loss": 0.0362, "step": 1035 }, { "epoch": 0.94, "learning_rate": 3.4802712700369915e-05, "loss": 0.001, "step": 1036 }, { "epoch": 0.94, "learning_rate": 3.478729963008632e-05, "loss": 0.253, "step": 1037 }, { "epoch": 0.95, "learning_rate": 3.477188655980271e-05, "loss": 0.0017, "step": 1038 }, { "epoch": 0.95, "learning_rate": 3.4756473489519115e-05, "loss": 0.0467, "step": 1039 }, { "epoch": 0.95, "learning_rate": 3.474106041923552e-05, "loss": 0.2376, "step": 1040 }, { "epoch": 0.95, "learning_rate": 3.472564734895191e-05, "loss": 0.0014, "step": 1041 }, { "epoch": 0.95, "learning_rate": 3.4710234278668315e-05, "loss": 0.0014, "step": 1042 }, { "epoch": 0.95, "learning_rate": 3.469482120838472e-05, "loss": 0.0013, "step": 1043 }, { "epoch": 0.95, "learning_rate": 3.467940813810111e-05, "loss": 0.3713, "step": 1044 }, { "epoch": 0.95, "learning_rate": 3.4663995067817515e-05, "loss": 0.0062, "step": 1045 }, { "epoch": 0.95, "learning_rate": 3.464858199753391e-05, "loss": 0.0093, "step": 1046 }, { "epoch": 0.95, "learning_rate": 3.4633168927250306e-05, "loss": 0.0013, "step": 1047 }, { "epoch": 0.95, "learning_rate": 3.461775585696671e-05, "loss": 0.0628, "step": 1048 }, { "epoch": 0.96, "learning_rate": 3.4602342786683104e-05, "loss": 0.2886, "step": 1049 }, { "epoch": 0.96, "learning_rate": 3.4586929716399506e-05, "loss": 0.0012, "step": 1050 }, { "epoch": 0.96, "learning_rate": 3.457151664611591e-05, "loss": 0.0017, "step": 1051 }, { "epoch": 0.96, "learning_rate": 3.4556103575832304e-05, "loss": 0.2664, "step": 1052 }, { "epoch": 0.96, "learning_rate": 3.4540690505548706e-05, "loss": 0.002, "step": 1053 }, { "epoch": 0.96, "learning_rate": 3.452527743526511e-05, "loss": 0.3553, "step": 1054 }, { "epoch": 0.96, "learning_rate": 3.4509864364981504e-05, "loss": 0.1123, "step": 1055 }, { "epoch": 0.96, "learning_rate": 3.4494451294697906e-05, "loss": 0.3231, "step": 1056 }, { "epoch": 0.96, "learning_rate": 3.447903822441431e-05, "loss": 0.0034, "step": 1057 }, { "epoch": 0.96, "learning_rate": 3.4463625154130704e-05, "loss": 0.1834, "step": 1058 }, { "epoch": 0.96, "learning_rate": 3.4448212083847106e-05, "loss": 0.0121, "step": 1059 }, { "epoch": 0.97, "learning_rate": 3.44327990135635e-05, "loss": 0.1922, "step": 1060 }, { "epoch": 0.97, "learning_rate": 3.44173859432799e-05, "loss": 0.0315, "step": 1061 }, { "epoch": 0.97, "learning_rate": 3.44019728729963e-05, "loss": 0.0335, "step": 1062 }, { "epoch": 0.97, "learning_rate": 3.43865598027127e-05, "loss": 0.0226, "step": 1063 }, { "epoch": 0.97, "learning_rate": 3.43711467324291e-05, "loss": 0.0236, "step": 1064 }, { "epoch": 0.97, "learning_rate": 3.43557336621455e-05, "loss": 0.005, "step": 1065 }, { "epoch": 0.97, "learning_rate": 3.43403205918619e-05, "loss": 0.0091, "step": 1066 }, { "epoch": 0.97, "learning_rate": 3.43249075215783e-05, "loss": 0.0053, "step": 1067 }, { "epoch": 0.97, "learning_rate": 3.43094944512947e-05, "loss": 0.0068, "step": 1068 }, { "epoch": 0.97, "learning_rate": 3.42940813810111e-05, "loss": 0.0026, "step": 1069 }, { "epoch": 0.97, "learning_rate": 3.42786683107275e-05, "loss": 0.0027, "step": 1070 }, { "epoch": 0.98, "learning_rate": 3.42632552404439e-05, "loss": 0.0541, "step": 1071 }, { "epoch": 0.98, "learning_rate": 3.42478421701603e-05, "loss": 0.0021, "step": 1072 }, { "epoch": 0.98, "learning_rate": 3.42324290998767e-05, "loss": 0.0019, "step": 1073 }, { "epoch": 0.98, "learning_rate": 3.421701602959309e-05, "loss": 0.0014, "step": 1074 }, { "epoch": 0.98, "learning_rate": 3.4201602959309496e-05, "loss": 0.0218, "step": 1075 }, { "epoch": 0.98, "learning_rate": 3.418618988902589e-05, "loss": 0.0016, "step": 1076 }, { "epoch": 0.98, "learning_rate": 3.417077681874229e-05, "loss": 0.001, "step": 1077 }, { "epoch": 0.98, "learning_rate": 3.4155363748458696e-05, "loss": 0.0012, "step": 1078 }, { "epoch": 0.98, "learning_rate": 3.413995067817509e-05, "loss": 0.2197, "step": 1079 }, { "epoch": 0.98, "learning_rate": 3.4124537607891493e-05, "loss": 0.0009, "step": 1080 }, { "epoch": 0.98, "learning_rate": 3.4109124537607896e-05, "loss": 0.4255, "step": 1081 }, { "epoch": 0.99, "learning_rate": 3.409371146732429e-05, "loss": 0.0007, "step": 1082 }, { "epoch": 0.99, "learning_rate": 3.4078298397040693e-05, "loss": 0.0018, "step": 1083 }, { "epoch": 0.99, "learning_rate": 3.4062885326757096e-05, "loss": 0.0013, "step": 1084 }, { "epoch": 0.99, "learning_rate": 3.404747225647349e-05, "loss": 0.0015, "step": 1085 }, { "epoch": 0.99, "learning_rate": 3.4032059186189894e-05, "loss": 0.0027, "step": 1086 }, { "epoch": 0.99, "learning_rate": 3.401664611590629e-05, "loss": 0.0017, "step": 1087 }, { "epoch": 0.99, "learning_rate": 3.400123304562269e-05, "loss": 0.3513, "step": 1088 }, { "epoch": 0.99, "learning_rate": 3.398581997533909e-05, "loss": 0.0033, "step": 1089 }, { "epoch": 0.99, "learning_rate": 3.397040690505549e-05, "loss": 0.0328, "step": 1090 }, { "epoch": 0.99, "learning_rate": 3.3954993834771885e-05, "loss": 0.0497, "step": 1091 }, { "epoch": 0.99, "learning_rate": 3.393958076448829e-05, "loss": 0.0048, "step": 1092 }, { "epoch": 1.0, "learning_rate": 3.392416769420469e-05, "loss": 0.003, "step": 1093 }, { "epoch": 1.0, "learning_rate": 3.3908754623921085e-05, "loss": 0.0018, "step": 1094 }, { "epoch": 1.0, "learning_rate": 3.389334155363749e-05, "loss": 0.0925, "step": 1095 }, { "epoch": 1.0, "learning_rate": 3.387792848335389e-05, "loss": 0.0055, "step": 1096 }, { "epoch": 1.0, "learning_rate": 3.3862515413070285e-05, "loss": 0.0048, "step": 1097 }, { "epoch": 1.0, "learning_rate": 3.384710234278669e-05, "loss": 0.002, "step": 1098 }, { "epoch": 1.0, "eval_accuracy": 0.9888433515482696, "eval_loss": 0.046589694917201996, "eval_runtime": 41.2835, "eval_samples_per_second": 106.386, "eval_steps_per_second": 6.661, "step": 1098 }, { "epoch": 1.0, "learning_rate": 3.383168927250309e-05, "loss": 0.001, "step": 1099 }, { "epoch": 1.0, "learning_rate": 3.3816276202219485e-05, "loss": 0.0033, "step": 1100 }, { "epoch": 1.0, "learning_rate": 3.380086313193588e-05, "loss": 0.0009, "step": 1101 }, { "epoch": 1.0, "learning_rate": 3.378545006165228e-05, "loss": 0.0018, "step": 1102 }, { "epoch": 1.0, "learning_rate": 3.377003699136868e-05, "loss": 0.0011, "step": 1103 }, { "epoch": 1.01, "learning_rate": 3.375462392108508e-05, "loss": 0.0013, "step": 1104 }, { "epoch": 1.01, "learning_rate": 3.373921085080148e-05, "loss": 0.0012, "step": 1105 }, { "epoch": 1.01, "learning_rate": 3.372379778051788e-05, "loss": 0.0084, "step": 1106 }, { "epoch": 1.01, "learning_rate": 3.370838471023428e-05, "loss": 0.0011, "step": 1107 }, { "epoch": 1.01, "learning_rate": 3.369297163995068e-05, "loss": 0.0018, "step": 1108 }, { "epoch": 1.01, "learning_rate": 3.367755856966708e-05, "loss": 0.001, "step": 1109 }, { "epoch": 1.01, "learning_rate": 3.366214549938348e-05, "loss": 0.0019, "step": 1110 }, { "epoch": 1.01, "learning_rate": 3.364673242909988e-05, "loss": 0.0014, "step": 1111 }, { "epoch": 1.01, "learning_rate": 3.363131935881628e-05, "loss": 0.001, "step": 1112 }, { "epoch": 1.01, "learning_rate": 3.361590628853268e-05, "loss": 0.4436, "step": 1113 }, { "epoch": 1.01, "learning_rate": 3.3600493218249076e-05, "loss": 0.0011, "step": 1114 }, { "epoch": 1.02, "learning_rate": 3.358508014796548e-05, "loss": 0.0023, "step": 1115 }, { "epoch": 1.02, "learning_rate": 3.3569667077681874e-05, "loss": 0.0025, "step": 1116 }, { "epoch": 1.02, "learning_rate": 3.3554254007398276e-05, "loss": 0.0006, "step": 1117 }, { "epoch": 1.02, "learning_rate": 3.353884093711467e-05, "loss": 0.0007, "step": 1118 }, { "epoch": 1.02, "learning_rate": 3.3523427866831074e-05, "loss": 0.0012, "step": 1119 }, { "epoch": 1.02, "learning_rate": 3.350801479654747e-05, "loss": 0.001, "step": 1120 }, { "epoch": 1.02, "learning_rate": 3.349260172626387e-05, "loss": 0.0012, "step": 1121 }, { "epoch": 1.02, "learning_rate": 3.3477188655980274e-05, "loss": 0.0007, "step": 1122 }, { "epoch": 1.02, "learning_rate": 3.346177558569667e-05, "loss": 0.0008, "step": 1123 }, { "epoch": 1.02, "learning_rate": 3.344636251541307e-05, "loss": 0.0013, "step": 1124 }, { "epoch": 1.02, "learning_rate": 3.3430949445129474e-05, "loss": 0.0009, "step": 1125 }, { "epoch": 1.03, "learning_rate": 3.341553637484587e-05, "loss": 0.001, "step": 1126 }, { "epoch": 1.03, "learning_rate": 3.340012330456227e-05, "loss": 0.0015, "step": 1127 }, { "epoch": 1.03, "learning_rate": 3.3384710234278674e-05, "loss": 0.0014, "step": 1128 }, { "epoch": 1.03, "learning_rate": 3.336929716399507e-05, "loss": 0.0013, "step": 1129 }, { "epoch": 1.03, "learning_rate": 3.3353884093711465e-05, "loss": 0.0025, "step": 1130 }, { "epoch": 1.03, "learning_rate": 3.333847102342787e-05, "loss": 0.0018, "step": 1131 }, { "epoch": 1.03, "learning_rate": 3.332305795314426e-05, "loss": 0.001, "step": 1132 }, { "epoch": 1.03, "learning_rate": 3.3307644882860665e-05, "loss": 0.0012, "step": 1133 }, { "epoch": 1.03, "learning_rate": 3.329223181257707e-05, "loss": 0.0005, "step": 1134 }, { "epoch": 1.03, "learning_rate": 3.327681874229346e-05, "loss": 0.0016, "step": 1135 }, { "epoch": 1.03, "learning_rate": 3.3261405672009865e-05, "loss": 0.0007, "step": 1136 }, { "epoch": 1.04, "learning_rate": 3.324599260172627e-05, "loss": 0.0005, "step": 1137 }, { "epoch": 1.04, "learning_rate": 3.323057953144266e-05, "loss": 0.0006, "step": 1138 }, { "epoch": 1.04, "learning_rate": 3.3215166461159065e-05, "loss": 0.0005, "step": 1139 }, { "epoch": 1.04, "learning_rate": 3.319975339087547e-05, "loss": 0.2421, "step": 1140 }, { "epoch": 1.04, "learning_rate": 3.318434032059186e-05, "loss": 0.0007, "step": 1141 }, { "epoch": 1.04, "learning_rate": 3.3168927250308266e-05, "loss": 0.0006, "step": 1142 }, { "epoch": 1.04, "learning_rate": 3.315351418002466e-05, "loss": 0.0006, "step": 1143 }, { "epoch": 1.04, "learning_rate": 3.3138101109741057e-05, "loss": 0.0005, "step": 1144 }, { "epoch": 1.04, "learning_rate": 3.312268803945746e-05, "loss": 0.0005, "step": 1145 }, { "epoch": 1.04, "learning_rate": 3.310727496917386e-05, "loss": 0.0008, "step": 1146 }, { "epoch": 1.04, "learning_rate": 3.3091861898890257e-05, "loss": 0.0005, "step": 1147 }, { "epoch": 1.05, "learning_rate": 3.307644882860666e-05, "loss": 0.0987, "step": 1148 }, { "epoch": 1.05, "learning_rate": 3.306103575832306e-05, "loss": 0.0085, "step": 1149 }, { "epoch": 1.05, "learning_rate": 3.304562268803946e-05, "loss": 0.0024, "step": 1150 }, { "epoch": 1.05, "learning_rate": 3.303020961775586e-05, "loss": 0.0003, "step": 1151 }, { "epoch": 1.05, "learning_rate": 3.301479654747226e-05, "loss": 0.0017, "step": 1152 }, { "epoch": 1.05, "learning_rate": 3.299938347718866e-05, "loss": 0.0036, "step": 1153 }, { "epoch": 1.05, "learning_rate": 3.298397040690506e-05, "loss": 0.0009, "step": 1154 }, { "epoch": 1.05, "learning_rate": 3.296855733662146e-05, "loss": 0.0021, "step": 1155 }, { "epoch": 1.05, "learning_rate": 3.295314426633786e-05, "loss": 0.0005, "step": 1156 }, { "epoch": 1.05, "learning_rate": 3.293773119605425e-05, "loss": 0.0003, "step": 1157 }, { "epoch": 1.05, "learning_rate": 3.2922318125770655e-05, "loss": 0.0008, "step": 1158 }, { "epoch": 1.06, "learning_rate": 3.290690505548705e-05, "loss": 0.0008, "step": 1159 }, { "epoch": 1.06, "learning_rate": 3.289149198520345e-05, "loss": 0.0012, "step": 1160 }, { "epoch": 1.06, "learning_rate": 3.2876078914919855e-05, "loss": 0.0005, "step": 1161 }, { "epoch": 1.06, "learning_rate": 3.286066584463625e-05, "loss": 0.0005, "step": 1162 }, { "epoch": 1.06, "learning_rate": 3.284525277435265e-05, "loss": 0.0005, "step": 1163 }, { "epoch": 1.06, "learning_rate": 3.2829839704069055e-05, "loss": 0.0005, "step": 1164 }, { "epoch": 1.06, "learning_rate": 3.281442663378545e-05, "loss": 0.0224, "step": 1165 }, { "epoch": 1.06, "learning_rate": 3.279901356350185e-05, "loss": 0.0006, "step": 1166 }, { "epoch": 1.06, "learning_rate": 3.2783600493218255e-05, "loss": 0.0004, "step": 1167 }, { "epoch": 1.06, "learning_rate": 3.276818742293465e-05, "loss": 0.0005, "step": 1168 }, { "epoch": 1.06, "learning_rate": 3.275277435265105e-05, "loss": 0.0004, "step": 1169 }, { "epoch": 1.07, "learning_rate": 3.273736128236745e-05, "loss": 0.0003, "step": 1170 }, { "epoch": 1.07, "learning_rate": 3.272194821208385e-05, "loss": 0.2164, "step": 1171 }, { "epoch": 1.07, "learning_rate": 3.2706535141800246e-05, "loss": 0.0007, "step": 1172 }, { "epoch": 1.07, "learning_rate": 3.269112207151665e-05, "loss": 0.0004, "step": 1173 }, { "epoch": 1.07, "learning_rate": 3.2675709001233044e-05, "loss": 0.0002, "step": 1174 }, { "epoch": 1.07, "learning_rate": 3.2660295930949446e-05, "loss": 0.426, "step": 1175 }, { "epoch": 1.07, "learning_rate": 3.264488286066585e-05, "loss": 0.0003, "step": 1176 }, { "epoch": 1.07, "learning_rate": 3.2629469790382244e-05, "loss": 0.0002, "step": 1177 }, { "epoch": 1.07, "learning_rate": 3.2614056720098646e-05, "loss": 0.0004, "step": 1178 }, { "epoch": 1.07, "learning_rate": 3.259864364981505e-05, "loss": 0.0004, "step": 1179 }, { "epoch": 1.07, "learning_rate": 3.2583230579531444e-05, "loss": 0.0009, "step": 1180 }, { "epoch": 1.08, "learning_rate": 3.2567817509247846e-05, "loss": 0.0004, "step": 1181 }, { "epoch": 1.08, "learning_rate": 3.255240443896425e-05, "loss": 0.4271, "step": 1182 }, { "epoch": 1.08, "learning_rate": 3.2536991368680644e-05, "loss": 0.0004, "step": 1183 }, { "epoch": 1.08, "learning_rate": 3.2521578298397046e-05, "loss": 0.0004, "step": 1184 }, { "epoch": 1.08, "learning_rate": 3.250616522811344e-05, "loss": 0.0005, "step": 1185 }, { "epoch": 1.08, "learning_rate": 3.249075215782984e-05, "loss": 0.0003, "step": 1186 }, { "epoch": 1.08, "learning_rate": 3.247533908754624e-05, "loss": 0.0006, "step": 1187 }, { "epoch": 1.08, "learning_rate": 3.245992601726264e-05, "loss": 0.0007, "step": 1188 }, { "epoch": 1.08, "learning_rate": 3.244451294697904e-05, "loss": 0.0006, "step": 1189 }, { "epoch": 1.08, "learning_rate": 3.242909987669544e-05, "loss": 0.0006, "step": 1190 }, { "epoch": 1.08, "learning_rate": 3.241368680641184e-05, "loss": 0.0004, "step": 1191 }, { "epoch": 1.09, "learning_rate": 3.239827373612824e-05, "loss": 0.0005, "step": 1192 }, { "epoch": 1.09, "learning_rate": 3.238286066584464e-05, "loss": 0.0005, "step": 1193 }, { "epoch": 1.09, "learning_rate": 3.2367447595561035e-05, "loss": 0.0006, "step": 1194 }, { "epoch": 1.09, "learning_rate": 3.235203452527744e-05, "loss": 0.0008, "step": 1195 }, { "epoch": 1.09, "learning_rate": 3.233662145499384e-05, "loss": 0.0011, "step": 1196 }, { "epoch": 1.09, "learning_rate": 3.2321208384710235e-05, "loss": 0.0037, "step": 1197 }, { "epoch": 1.09, "learning_rate": 3.230579531442664e-05, "loss": 0.0007, "step": 1198 }, { "epoch": 1.09, "learning_rate": 3.229038224414303e-05, "loss": 0.001, "step": 1199 }, { "epoch": 1.09, "learning_rate": 3.227496917385943e-05, "loss": 0.0007, "step": 1200 }, { "epoch": 1.09, "learning_rate": 3.225955610357583e-05, "loss": 0.0024, "step": 1201 }, { "epoch": 1.09, "learning_rate": 3.224414303329223e-05, "loss": 0.0005, "step": 1202 }, { "epoch": 1.1, "learning_rate": 3.222872996300863e-05, "loss": 0.0008, "step": 1203 }, { "epoch": 1.1, "learning_rate": 3.221331689272503e-05, "loss": 0.0008, "step": 1204 }, { "epoch": 1.1, "learning_rate": 3.219790382244143e-05, "loss": 0.0008, "step": 1205 }, { "epoch": 1.1, "learning_rate": 3.218249075215783e-05, "loss": 0.0029, "step": 1206 }, { "epoch": 1.1, "learning_rate": 3.216707768187423e-05, "loss": 0.0005, "step": 1207 }, { "epoch": 1.1, "learning_rate": 3.215166461159063e-05, "loss": 0.0006, "step": 1208 }, { "epoch": 1.1, "learning_rate": 3.213625154130703e-05, "loss": 0.0005, "step": 1209 }, { "epoch": 1.1, "learning_rate": 3.212083847102343e-05, "loss": 0.0007, "step": 1210 }, { "epoch": 1.1, "learning_rate": 3.210542540073983e-05, "loss": 0.0004, "step": 1211 }, { "epoch": 1.1, "learning_rate": 3.209001233045623e-05, "loss": 0.0008, "step": 1212 }, { "epoch": 1.1, "learning_rate": 3.2074599260172624e-05, "loss": 0.001, "step": 1213 }, { "epoch": 1.11, "learning_rate": 3.2059186189889027e-05, "loss": 0.0007, "step": 1214 }, { "epoch": 1.11, "learning_rate": 3.204377311960542e-05, "loss": 0.0004, "step": 1215 }, { "epoch": 1.11, "learning_rate": 3.2028360049321824e-05, "loss": 0.0007, "step": 1216 }, { "epoch": 1.11, "learning_rate": 3.2012946979038227e-05, "loss": 0.0141, "step": 1217 }, { "epoch": 1.11, "learning_rate": 3.199753390875462e-05, "loss": 0.0023, "step": 1218 }, { "epoch": 1.11, "learning_rate": 3.1982120838471024e-05, "loss": 0.0005, "step": 1219 }, { "epoch": 1.11, "learning_rate": 3.196670776818743e-05, "loss": 0.0004, "step": 1220 }, { "epoch": 1.11, "learning_rate": 3.195129469790382e-05, "loss": 0.0002, "step": 1221 }, { "epoch": 1.11, "learning_rate": 3.1935881627620224e-05, "loss": 0.0003, "step": 1222 }, { "epoch": 1.11, "learning_rate": 3.192046855733663e-05, "loss": 0.0005, "step": 1223 }, { "epoch": 1.11, "learning_rate": 3.190505548705302e-05, "loss": 0.0004, "step": 1224 }, { "epoch": 1.12, "learning_rate": 3.1889642416769425e-05, "loss": 0.0002, "step": 1225 }, { "epoch": 1.12, "learning_rate": 3.187422934648582e-05, "loss": 0.0003, "step": 1226 }, { "epoch": 1.12, "learning_rate": 3.185881627620222e-05, "loss": 0.0008, "step": 1227 }, { "epoch": 1.12, "learning_rate": 3.184340320591862e-05, "loss": 0.0007, "step": 1228 }, { "epoch": 1.12, "learning_rate": 3.182799013563502e-05, "loss": 0.0006, "step": 1229 }, { "epoch": 1.12, "learning_rate": 3.1812577065351416e-05, "loss": 0.0004, "step": 1230 }, { "epoch": 1.12, "learning_rate": 3.179716399506782e-05, "loss": 0.0004, "step": 1231 }, { "epoch": 1.12, "learning_rate": 3.178175092478422e-05, "loss": 0.0012, "step": 1232 }, { "epoch": 1.12, "learning_rate": 3.1766337854500616e-05, "loss": 0.0005, "step": 1233 }, { "epoch": 1.12, "learning_rate": 3.175092478421702e-05, "loss": 0.0005, "step": 1234 }, { "epoch": 1.12, "learning_rate": 3.173551171393342e-05, "loss": 0.0027, "step": 1235 }, { "epoch": 1.13, "learning_rate": 3.1720098643649816e-05, "loss": 0.0005, "step": 1236 }, { "epoch": 1.13, "learning_rate": 3.170468557336622e-05, "loss": 0.0006, "step": 1237 }, { "epoch": 1.13, "learning_rate": 3.168927250308262e-05, "loss": 0.0003, "step": 1238 }, { "epoch": 1.13, "learning_rate": 3.1673859432799016e-05, "loss": 0.0005, "step": 1239 }, { "epoch": 1.13, "learning_rate": 3.165844636251542e-05, "loss": 0.0003, "step": 1240 }, { "epoch": 1.13, "learning_rate": 3.1643033292231814e-05, "loss": 0.0004, "step": 1241 }, { "epoch": 1.13, "learning_rate": 3.162762022194821e-05, "loss": 0.0004, "step": 1242 }, { "epoch": 1.13, "learning_rate": 3.161220715166461e-05, "loss": 0.0002, "step": 1243 }, { "epoch": 1.13, "learning_rate": 3.1596794081381014e-05, "loss": 0.0004, "step": 1244 }, { "epoch": 1.13, "learning_rate": 3.158138101109741e-05, "loss": 0.0003, "step": 1245 }, { "epoch": 1.13, "learning_rate": 3.156596794081381e-05, "loss": 0.0003, "step": 1246 }, { "epoch": 1.14, "learning_rate": 3.1550554870530214e-05, "loss": 0.0005, "step": 1247 }, { "epoch": 1.14, "learning_rate": 3.153514180024661e-05, "loss": 0.0005, "step": 1248 }, { "epoch": 1.14, "learning_rate": 3.151972872996301e-05, "loss": 0.0006, "step": 1249 }, { "epoch": 1.14, "learning_rate": 3.1504315659679414e-05, "loss": 0.0004, "step": 1250 }, { "epoch": 1.14, "learning_rate": 3.148890258939581e-05, "loss": 0.0004, "step": 1251 }, { "epoch": 1.14, "learning_rate": 3.147348951911221e-05, "loss": 0.0007, "step": 1252 }, { "epoch": 1.14, "learning_rate": 3.1458076448828614e-05, "loss": 0.0006, "step": 1253 }, { "epoch": 1.14, "learning_rate": 3.144266337854501e-05, "loss": 0.0006, "step": 1254 }, { "epoch": 1.14, "learning_rate": 3.1427250308261405e-05, "loss": 0.0004, "step": 1255 }, { "epoch": 1.14, "learning_rate": 3.141183723797781e-05, "loss": 0.0006, "step": 1256 }, { "epoch": 1.14, "learning_rate": 3.13964241676942e-05, "loss": 0.0005, "step": 1257 }, { "epoch": 1.15, "learning_rate": 3.1381011097410605e-05, "loss": 0.0002, "step": 1258 }, { "epoch": 1.15, "learning_rate": 3.136559802712701e-05, "loss": 0.0003, "step": 1259 }, { "epoch": 1.15, "learning_rate": 3.13501849568434e-05, "loss": 0.0005, "step": 1260 }, { "epoch": 1.15, "learning_rate": 3.1334771886559805e-05, "loss": 0.4839, "step": 1261 }, { "epoch": 1.15, "learning_rate": 3.131935881627621e-05, "loss": 0.028, "step": 1262 }, { "epoch": 1.15, "learning_rate": 3.13039457459926e-05, "loss": 0.0004, "step": 1263 }, { "epoch": 1.15, "learning_rate": 3.1288532675709005e-05, "loss": 0.0003, "step": 1264 }, { "epoch": 1.15, "learning_rate": 3.127311960542541e-05, "loss": 0.2656, "step": 1265 }, { "epoch": 1.15, "learning_rate": 3.12577065351418e-05, "loss": 0.0003, "step": 1266 }, { "epoch": 1.15, "learning_rate": 3.1242293464858205e-05, "loss": 0.0004, "step": 1267 }, { "epoch": 1.15, "learning_rate": 3.12268803945746e-05, "loss": 0.0005, "step": 1268 }, { "epoch": 1.16, "learning_rate": 3.1211467324290996e-05, "loss": 0.0003, "step": 1269 }, { "epoch": 1.16, "learning_rate": 3.11960542540074e-05, "loss": 0.0003, "step": 1270 }, { "epoch": 1.16, "learning_rate": 3.1180641183723794e-05, "loss": 0.0002, "step": 1271 }, { "epoch": 1.16, "learning_rate": 3.1165228113440196e-05, "loss": 0.0004, "step": 1272 }, { "epoch": 1.16, "learning_rate": 3.11498150431566e-05, "loss": 0.0003, "step": 1273 }, { "epoch": 1.16, "learning_rate": 3.1134401972872994e-05, "loss": 0.0057, "step": 1274 }, { "epoch": 1.16, "learning_rate": 3.1118988902589396e-05, "loss": 0.0004, "step": 1275 }, { "epoch": 1.16, "learning_rate": 3.11035758323058e-05, "loss": 0.0004, "step": 1276 }, { "epoch": 1.16, "learning_rate": 3.1088162762022194e-05, "loss": 0.0003, "step": 1277 }, { "epoch": 1.16, "learning_rate": 3.1072749691738596e-05, "loss": 0.0003, "step": 1278 }, { "epoch": 1.16, "learning_rate": 3.1057336621455e-05, "loss": 0.3232, "step": 1279 }, { "epoch": 1.17, "learning_rate": 3.1041923551171394e-05, "loss": 0.0003, "step": 1280 }, { "epoch": 1.17, "learning_rate": 3.1026510480887796e-05, "loss": 0.0004, "step": 1281 }, { "epoch": 1.17, "learning_rate": 3.101109741060419e-05, "loss": 0.0004, "step": 1282 }, { "epoch": 1.17, "learning_rate": 3.0995684340320594e-05, "loss": 0.0059, "step": 1283 }, { "epoch": 1.17, "learning_rate": 3.098027127003699e-05, "loss": 0.0005, "step": 1284 }, { "epoch": 1.17, "learning_rate": 3.096485819975339e-05, "loss": 0.0003, "step": 1285 }, { "epoch": 1.17, "learning_rate": 3.094944512946979e-05, "loss": 0.0004, "step": 1286 }, { "epoch": 1.17, "learning_rate": 3.093403205918619e-05, "loss": 0.0004, "step": 1287 }, { "epoch": 1.17, "learning_rate": 3.091861898890259e-05, "loss": 0.0002, "step": 1288 }, { "epoch": 1.17, "learning_rate": 3.090320591861899e-05, "loss": 0.0003, "step": 1289 }, { "epoch": 1.17, "learning_rate": 3.088779284833539e-05, "loss": 0.0003, "step": 1290 }, { "epoch": 1.18, "learning_rate": 3.087237977805179e-05, "loss": 0.0136, "step": 1291 }, { "epoch": 1.18, "learning_rate": 3.085696670776819e-05, "loss": 0.0004, "step": 1292 }, { "epoch": 1.18, "learning_rate": 3.084155363748459e-05, "loss": 0.1448, "step": 1293 }, { "epoch": 1.18, "learning_rate": 3.082614056720099e-05, "loss": 0.5205, "step": 1294 }, { "epoch": 1.18, "learning_rate": 3.081072749691739e-05, "loss": 0.0004, "step": 1295 }, { "epoch": 1.18, "learning_rate": 3.079531442663379e-05, "loss": 0.0002, "step": 1296 }, { "epoch": 1.18, "learning_rate": 3.0779901356350186e-05, "loss": 0.0015, "step": 1297 }, { "epoch": 1.18, "learning_rate": 3.076448828606658e-05, "loss": 0.0009, "step": 1298 }, { "epoch": 1.18, "learning_rate": 3.074907521578298e-05, "loss": 0.0165, "step": 1299 }, { "epoch": 1.18, "learning_rate": 3.0733662145499386e-05, "loss": 0.3434, "step": 1300 }, { "epoch": 1.18, "learning_rate": 3.071824907521578e-05, "loss": 0.0003, "step": 1301 }, { "epoch": 1.19, "learning_rate": 3.0702836004932183e-05, "loss": 0.0005, "step": 1302 }, { "epoch": 1.19, "learning_rate": 3.0687422934648586e-05, "loss": 0.0003, "step": 1303 }, { "epoch": 1.19, "learning_rate": 3.067200986436498e-05, "loss": 0.0008, "step": 1304 }, { "epoch": 1.19, "learning_rate": 3.0656596794081383e-05, "loss": 0.0003, "step": 1305 }, { "epoch": 1.19, "learning_rate": 3.0641183723797786e-05, "loss": 0.0003, "step": 1306 }, { "epoch": 1.19, "learning_rate": 3.062577065351418e-05, "loss": 0.0004, "step": 1307 }, { "epoch": 1.19, "learning_rate": 3.0610357583230584e-05, "loss": 0.0006, "step": 1308 }, { "epoch": 1.19, "learning_rate": 3.0594944512946986e-05, "loss": 0.3079, "step": 1309 }, { "epoch": 1.19, "learning_rate": 3.057953144266338e-05, "loss": 0.0004, "step": 1310 }, { "epoch": 1.19, "learning_rate": 3.056411837237978e-05, "loss": 0.0004, "step": 1311 }, { "epoch": 1.19, "learning_rate": 3.054870530209618e-05, "loss": 0.0023, "step": 1312 }, { "epoch": 1.2, "learning_rate": 3.0533292231812575e-05, "loss": 0.0005, "step": 1313 }, { "epoch": 1.2, "learning_rate": 3.051787916152898e-05, "loss": 0.0003, "step": 1314 }, { "epoch": 1.2, "learning_rate": 3.050246609124538e-05, "loss": 0.0006, "step": 1315 }, { "epoch": 1.2, "learning_rate": 3.0487053020961775e-05, "loss": 0.0003, "step": 1316 }, { "epoch": 1.2, "learning_rate": 3.0471639950678177e-05, "loss": 0.0005, "step": 1317 }, { "epoch": 1.2, "learning_rate": 3.045622688039458e-05, "loss": 0.0009, "step": 1318 }, { "epoch": 1.2, "learning_rate": 3.0440813810110975e-05, "loss": 0.0004, "step": 1319 }, { "epoch": 1.2, "learning_rate": 3.0425400739827377e-05, "loss": 0.0017, "step": 1320 }, { "epoch": 1.2, "learning_rate": 3.0409987669543776e-05, "loss": 0.0004, "step": 1321 }, { "epoch": 1.2, "learning_rate": 3.039457459926017e-05, "loss": 0.0047, "step": 1322 }, { "epoch": 1.2, "learning_rate": 3.0379161528976574e-05, "loss": 0.7652, "step": 1323 }, { "epoch": 1.21, "learning_rate": 3.0363748458692976e-05, "loss": 0.0005, "step": 1324 }, { "epoch": 1.21, "learning_rate": 3.034833538840937e-05, "loss": 0.0005, "step": 1325 }, { "epoch": 1.21, "learning_rate": 3.0332922318125774e-05, "loss": 0.001, "step": 1326 }, { "epoch": 1.21, "learning_rate": 3.0317509247842173e-05, "loss": 0.0005, "step": 1327 }, { "epoch": 1.21, "learning_rate": 3.0302096177558568e-05, "loss": 0.0021, "step": 1328 }, { "epoch": 1.21, "learning_rate": 3.028668310727497e-05, "loss": 0.0004, "step": 1329 }, { "epoch": 1.21, "learning_rate": 3.0271270036991373e-05, "loss": 0.0004, "step": 1330 }, { "epoch": 1.21, "learning_rate": 3.0255856966707768e-05, "loss": 0.0006, "step": 1331 }, { "epoch": 1.21, "learning_rate": 3.024044389642417e-05, "loss": 0.0008, "step": 1332 }, { "epoch": 1.21, "learning_rate": 3.0225030826140573e-05, "loss": 0.0004, "step": 1333 }, { "epoch": 1.21, "learning_rate": 3.020961775585697e-05, "loss": 0.0003, "step": 1334 }, { "epoch": 1.22, "learning_rate": 3.0194204685573367e-05, "loss": 0.0139, "step": 1335 }, { "epoch": 1.22, "learning_rate": 3.017879161528977e-05, "loss": 0.0006, "step": 1336 }, { "epoch": 1.22, "learning_rate": 3.0163378545006165e-05, "loss": 0.0007, "step": 1337 }, { "epoch": 1.22, "learning_rate": 3.0147965474722567e-05, "loss": 0.0005, "step": 1338 }, { "epoch": 1.22, "learning_rate": 3.013255240443897e-05, "loss": 0.0007, "step": 1339 }, { "epoch": 1.22, "learning_rate": 3.0117139334155365e-05, "loss": 0.0004, "step": 1340 }, { "epoch": 1.22, "learning_rate": 3.0101726263871764e-05, "loss": 0.0004, "step": 1341 }, { "epoch": 1.22, "learning_rate": 3.0086313193588163e-05, "loss": 0.3314, "step": 1342 }, { "epoch": 1.22, "learning_rate": 3.0070900123304562e-05, "loss": 0.0003, "step": 1343 }, { "epoch": 1.22, "learning_rate": 3.0055487053020964e-05, "loss": 0.0004, "step": 1344 }, { "epoch": 1.22, "learning_rate": 3.004007398273736e-05, "loss": 0.0003, "step": 1345 }, { "epoch": 1.23, "learning_rate": 3.0024660912453762e-05, "loss": 0.0004, "step": 1346 }, { "epoch": 1.23, "learning_rate": 3.0009247842170164e-05, "loss": 0.0004, "step": 1347 }, { "epoch": 1.23, "learning_rate": 2.999383477188656e-05, "loss": 0.006, "step": 1348 }, { "epoch": 1.23, "learning_rate": 2.997842170160296e-05, "loss": 0.0008, "step": 1349 }, { "epoch": 1.23, "learning_rate": 2.996300863131936e-05, "loss": 0.001, "step": 1350 }, { "epoch": 1.23, "learning_rate": 2.9947595561035756e-05, "loss": 0.0006, "step": 1351 }, { "epoch": 1.23, "learning_rate": 2.993218249075216e-05, "loss": 0.0005, "step": 1352 }, { "epoch": 1.23, "learning_rate": 2.991676942046856e-05, "loss": 0.0473, "step": 1353 }, { "epoch": 1.23, "learning_rate": 2.9901356350184956e-05, "loss": 0.0027, "step": 1354 }, { "epoch": 1.23, "learning_rate": 2.988594327990136e-05, "loss": 0.0006, "step": 1355 }, { "epoch": 1.23, "learning_rate": 2.9870530209617758e-05, "loss": 0.0006, "step": 1356 }, { "epoch": 1.24, "learning_rate": 2.9855117139334153e-05, "loss": 0.0003, "step": 1357 }, { "epoch": 1.24, "learning_rate": 2.9839704069050555e-05, "loss": 0.0005, "step": 1358 }, { "epoch": 1.24, "learning_rate": 2.9824290998766958e-05, "loss": 0.0007, "step": 1359 }, { "epoch": 1.24, "learning_rate": 2.9808877928483353e-05, "loss": 0.0006, "step": 1360 }, { "epoch": 1.24, "learning_rate": 2.9793464858199755e-05, "loss": 0.0005, "step": 1361 }, { "epoch": 1.24, "learning_rate": 2.9778051787916154e-05, "loss": 0.0003, "step": 1362 }, { "epoch": 1.24, "learning_rate": 2.9762638717632553e-05, "loss": 0.0003, "step": 1363 }, { "epoch": 1.24, "learning_rate": 2.9747225647348952e-05, "loss": 0.0004, "step": 1364 }, { "epoch": 1.24, "learning_rate": 2.9731812577065354e-05, "loss": 0.0004, "step": 1365 }, { "epoch": 1.24, "learning_rate": 2.971639950678175e-05, "loss": 0.0004, "step": 1366 }, { "epoch": 1.24, "learning_rate": 2.9700986436498152e-05, "loss": 0.0004, "step": 1367 }, { "epoch": 1.25, "learning_rate": 2.9685573366214554e-05, "loss": 0.0003, "step": 1368 }, { "epoch": 1.25, "learning_rate": 2.967016029593095e-05, "loss": 0.0006, "step": 1369 }, { "epoch": 1.25, "learning_rate": 2.965474722564735e-05, "loss": 0.0003, "step": 1370 }, { "epoch": 1.25, "learning_rate": 2.963933415536375e-05, "loss": 0.0005, "step": 1371 }, { "epoch": 1.25, "learning_rate": 2.9623921085080147e-05, "loss": 0.0004, "step": 1372 }, { "epoch": 1.25, "learning_rate": 2.960850801479655e-05, "loss": 0.0007, "step": 1373 }, { "epoch": 1.25, "learning_rate": 2.959309494451295e-05, "loss": 0.0005, "step": 1374 }, { "epoch": 1.25, "learning_rate": 2.9577681874229347e-05, "loss": 0.0003, "step": 1375 }, { "epoch": 1.25, "learning_rate": 2.956226880394575e-05, "loss": 0.0002, "step": 1376 }, { "epoch": 1.25, "learning_rate": 2.9546855733662148e-05, "loss": 0.0007, "step": 1377 }, { "epoch": 1.26, "learning_rate": 2.9531442663378543e-05, "loss": 0.0005, "step": 1378 }, { "epoch": 1.26, "learning_rate": 2.9516029593094946e-05, "loss": 0.0002, "step": 1379 }, { "epoch": 1.26, "learning_rate": 2.9500616522811348e-05, "loss": 0.1311, "step": 1380 }, { "epoch": 1.26, "learning_rate": 2.9485203452527743e-05, "loss": 0.0003, "step": 1381 }, { "epoch": 1.26, "learning_rate": 2.9469790382244146e-05, "loss": 0.0004, "step": 1382 }, { "epoch": 1.26, "learning_rate": 2.9454377311960545e-05, "loss": 0.0003, "step": 1383 }, { "epoch": 1.26, "learning_rate": 2.943896424167694e-05, "loss": 0.0003, "step": 1384 }, { "epoch": 1.26, "learning_rate": 2.9423551171393342e-05, "loss": 0.0004, "step": 1385 }, { "epoch": 1.26, "learning_rate": 2.9408138101109745e-05, "loss": 0.0003, "step": 1386 }, { "epoch": 1.26, "learning_rate": 2.939272503082614e-05, "loss": 0.0005, "step": 1387 }, { "epoch": 1.26, "learning_rate": 2.9377311960542543e-05, "loss": 0.0003, "step": 1388 }, { "epoch": 1.27, "learning_rate": 2.9361898890258945e-05, "loss": 0.0006, "step": 1389 }, { "epoch": 1.27, "learning_rate": 2.934648581997534e-05, "loss": 0.019, "step": 1390 }, { "epoch": 1.27, "learning_rate": 2.933107274969174e-05, "loss": 0.0004, "step": 1391 }, { "epoch": 1.27, "learning_rate": 2.931565967940814e-05, "loss": 0.0002, "step": 1392 }, { "epoch": 1.27, "learning_rate": 2.9300246609124537e-05, "loss": 0.0012, "step": 1393 }, { "epoch": 1.27, "learning_rate": 2.928483353884094e-05, "loss": 0.0003, "step": 1394 }, { "epoch": 1.27, "learning_rate": 2.926942046855734e-05, "loss": 0.0003, "step": 1395 }, { "epoch": 1.27, "learning_rate": 2.9254007398273737e-05, "loss": 0.3911, "step": 1396 }, { "epoch": 1.27, "learning_rate": 2.9238594327990136e-05, "loss": 0.0003, "step": 1397 }, { "epoch": 1.27, "learning_rate": 2.9223181257706538e-05, "loss": 0.0002, "step": 1398 }, { "epoch": 1.27, "learning_rate": 2.9207768187422934e-05, "loss": 0.0003, "step": 1399 }, { "epoch": 1.28, "learning_rate": 2.9192355117139336e-05, "loss": 0.0002, "step": 1400 }, { "epoch": 1.28, "learning_rate": 2.9176942046855738e-05, "loss": 0.0004, "step": 1401 }, { "epoch": 1.28, "learning_rate": 2.9161528976572134e-05, "loss": 0.2476, "step": 1402 }, { "epoch": 1.28, "learning_rate": 2.9146115906288536e-05, "loss": 0.0003, "step": 1403 }, { "epoch": 1.28, "learning_rate": 2.9130702836004935e-05, "loss": 0.0004, "step": 1404 }, { "epoch": 1.28, "learning_rate": 2.911528976572133e-05, "loss": 0.0003, "step": 1405 }, { "epoch": 1.28, "learning_rate": 2.9099876695437733e-05, "loss": 0.0009, "step": 1406 }, { "epoch": 1.28, "learning_rate": 2.9084463625154135e-05, "loss": 0.0006, "step": 1407 }, { "epoch": 1.28, "learning_rate": 2.906905055487053e-05, "loss": 0.0005, "step": 1408 }, { "epoch": 1.28, "learning_rate": 2.9053637484586933e-05, "loss": 0.0004, "step": 1409 }, { "epoch": 1.28, "learning_rate": 2.9038224414303332e-05, "loss": 0.0003, "step": 1410 }, { "epoch": 1.29, "learning_rate": 2.902281134401973e-05, "loss": 0.001, "step": 1411 }, { "epoch": 1.29, "learning_rate": 2.900739827373613e-05, "loss": 0.0013, "step": 1412 }, { "epoch": 1.29, "learning_rate": 2.8991985203452532e-05, "loss": 0.0004, "step": 1413 }, { "epoch": 1.29, "learning_rate": 2.8976572133168927e-05, "loss": 0.0005, "step": 1414 }, { "epoch": 1.29, "learning_rate": 2.896115906288533e-05, "loss": 0.0002, "step": 1415 }, { "epoch": 1.29, "learning_rate": 2.8945745992601725e-05, "loss": 0.0004, "step": 1416 }, { "epoch": 1.29, "learning_rate": 2.8930332922318127e-05, "loss": 0.0003, "step": 1417 }, { "epoch": 1.29, "learning_rate": 2.8914919852034526e-05, "loss": 0.006, "step": 1418 }, { "epoch": 1.29, "learning_rate": 2.8899506781750925e-05, "loss": 0.0021, "step": 1419 }, { "epoch": 1.29, "learning_rate": 2.8884093711467324e-05, "loss": 0.001, "step": 1420 }, { "epoch": 1.29, "learning_rate": 2.8868680641183726e-05, "loss": 0.0005, "step": 1421 }, { "epoch": 1.3, "learning_rate": 2.8853267570900122e-05, "loss": 0.0008, "step": 1422 }, { "epoch": 1.3, "learning_rate": 2.8837854500616524e-05, "loss": 0.0003, "step": 1423 }, { "epoch": 1.3, "learning_rate": 2.8822441430332926e-05, "loss": 0.0005, "step": 1424 }, { "epoch": 1.3, "learning_rate": 2.8807028360049322e-05, "loss": 0.0066, "step": 1425 }, { "epoch": 1.3, "learning_rate": 2.879161528976572e-05, "loss": 0.0005, "step": 1426 }, { "epoch": 1.3, "learning_rate": 2.8776202219482123e-05, "loss": 0.0003, "step": 1427 }, { "epoch": 1.3, "learning_rate": 2.876078914919852e-05, "loss": 0.0003, "step": 1428 }, { "epoch": 1.3, "learning_rate": 2.874537607891492e-05, "loss": 0.0015, "step": 1429 }, { "epoch": 1.3, "learning_rate": 2.8729963008631323e-05, "loss": 0.0003, "step": 1430 }, { "epoch": 1.3, "learning_rate": 2.871454993834772e-05, "loss": 0.0008, "step": 1431 }, { "epoch": 1.3, "learning_rate": 2.869913686806412e-05, "loss": 0.0004, "step": 1432 }, { "epoch": 1.31, "learning_rate": 2.868372379778052e-05, "loss": 0.0002, "step": 1433 }, { "epoch": 1.31, "learning_rate": 2.8668310727496915e-05, "loss": 0.0002, "step": 1434 }, { "epoch": 1.31, "learning_rate": 2.8652897657213318e-05, "loss": 0.0002, "step": 1435 }, { "epoch": 1.31, "learning_rate": 2.863748458692972e-05, "loss": 0.0002, "step": 1436 }, { "epoch": 1.31, "learning_rate": 2.8622071516646115e-05, "loss": 0.0004, "step": 1437 }, { "epoch": 1.31, "learning_rate": 2.8606658446362518e-05, "loss": 0.0002, "step": 1438 }, { "epoch": 1.31, "learning_rate": 2.8591245376078917e-05, "loss": 0.0003, "step": 1439 }, { "epoch": 1.31, "learning_rate": 2.8575832305795312e-05, "loss": 0.0002, "step": 1440 }, { "epoch": 1.31, "learning_rate": 2.8560419235511714e-05, "loss": 0.0003, "step": 1441 }, { "epoch": 1.31, "learning_rate": 2.8545006165228117e-05, "loss": 0.0617, "step": 1442 }, { "epoch": 1.31, "learning_rate": 2.8529593094944512e-05, "loss": 0.0003, "step": 1443 }, { "epoch": 1.32, "learning_rate": 2.8514180024660914e-05, "loss": 0.0003, "step": 1444 }, { "epoch": 1.32, "learning_rate": 2.8498766954377313e-05, "loss": 0.0003, "step": 1445 }, { "epoch": 1.32, "learning_rate": 2.8483353884093712e-05, "loss": 0.0002, "step": 1446 }, { "epoch": 1.32, "learning_rate": 2.846794081381011e-05, "loss": 0.0001, "step": 1447 }, { "epoch": 1.32, "learning_rate": 2.8452527743526513e-05, "loss": 0.0002, "step": 1448 }, { "epoch": 1.32, "learning_rate": 2.843711467324291e-05, "loss": 0.0002, "step": 1449 }, { "epoch": 1.32, "learning_rate": 2.842170160295931e-05, "loss": 0.0003, "step": 1450 }, { "epoch": 1.32, "learning_rate": 2.8406288532675713e-05, "loss": 0.2294, "step": 1451 }, { "epoch": 1.32, "learning_rate": 2.839087546239211e-05, "loss": 0.0004, "step": 1452 }, { "epoch": 1.32, "learning_rate": 2.8375462392108508e-05, "loss": 0.0003, "step": 1453 }, { "epoch": 1.32, "learning_rate": 2.836004932182491e-05, "loss": 0.0002, "step": 1454 }, { "epoch": 1.33, "learning_rate": 2.8344636251541306e-05, "loss": 0.0002, "step": 1455 }, { "epoch": 1.33, "learning_rate": 2.8329223181257708e-05, "loss": 0.0003, "step": 1456 }, { "epoch": 1.33, "learning_rate": 2.831381011097411e-05, "loss": 0.0005, "step": 1457 }, { "epoch": 1.33, "learning_rate": 2.8298397040690506e-05, "loss": 0.3788, "step": 1458 }, { "epoch": 1.33, "learning_rate": 2.8282983970406908e-05, "loss": 0.0003, "step": 1459 }, { "epoch": 1.33, "learning_rate": 2.8267570900123307e-05, "loss": 0.0003, "step": 1460 }, { "epoch": 1.33, "learning_rate": 2.8252157829839702e-05, "loss": 0.0001, "step": 1461 }, { "epoch": 1.33, "learning_rate": 2.8236744759556105e-05, "loss": 0.0003, "step": 1462 }, { "epoch": 1.33, "learning_rate": 2.8221331689272507e-05, "loss": 0.3587, "step": 1463 }, { "epoch": 1.33, "learning_rate": 2.8205918618988902e-05, "loss": 0.0002, "step": 1464 }, { "epoch": 1.33, "learning_rate": 2.8190505548705305e-05, "loss": 0.0002, "step": 1465 }, { "epoch": 1.34, "learning_rate": 2.8175092478421704e-05, "loss": 0.4382, "step": 1466 }, { "epoch": 1.34, "learning_rate": 2.8159679408138103e-05, "loss": 0.0003, "step": 1467 }, { "epoch": 1.34, "learning_rate": 2.81442663378545e-05, "loss": 0.0004, "step": 1468 }, { "epoch": 1.34, "learning_rate": 2.8128853267570904e-05, "loss": 0.0002, "step": 1469 }, { "epoch": 1.34, "learning_rate": 2.81134401972873e-05, "loss": 0.0011, "step": 1470 }, { "epoch": 1.34, "learning_rate": 2.80980271270037e-05, "loss": 0.0003, "step": 1471 }, { "epoch": 1.34, "learning_rate": 2.8082614056720104e-05, "loss": 0.4456, "step": 1472 }, { "epoch": 1.34, "learning_rate": 2.80672009864365e-05, "loss": 0.0003, "step": 1473 }, { "epoch": 1.34, "learning_rate": 2.8051787916152898e-05, "loss": 0.0004, "step": 1474 }, { "epoch": 1.34, "learning_rate": 2.80363748458693e-05, "loss": 0.0002, "step": 1475 }, { "epoch": 1.34, "learning_rate": 2.8020961775585696e-05, "loss": 0.0336, "step": 1476 }, { "epoch": 1.35, "learning_rate": 2.8005548705302098e-05, "loss": 0.0004, "step": 1477 }, { "epoch": 1.35, "learning_rate": 2.79901356350185e-05, "loss": 0.0026, "step": 1478 }, { "epoch": 1.35, "learning_rate": 2.7974722564734896e-05, "loss": 0.0002, "step": 1479 }, { "epoch": 1.35, "learning_rate": 2.79593094944513e-05, "loss": 0.0003, "step": 1480 }, { "epoch": 1.35, "learning_rate": 2.7943896424167697e-05, "loss": 0.0003, "step": 1481 }, { "epoch": 1.35, "learning_rate": 2.7928483353884093e-05, "loss": 0.0003, "step": 1482 }, { "epoch": 1.35, "learning_rate": 2.7913070283600495e-05, "loss": 0.0003, "step": 1483 }, { "epoch": 1.35, "learning_rate": 2.7897657213316897e-05, "loss": 0.0008, "step": 1484 }, { "epoch": 1.35, "learning_rate": 2.7882244143033293e-05, "loss": 0.0006, "step": 1485 }, { "epoch": 1.35, "learning_rate": 2.7866831072749695e-05, "loss": 0.0004, "step": 1486 }, { "epoch": 1.35, "learning_rate": 2.7851418002466094e-05, "loss": 0.0191, "step": 1487 }, { "epoch": 1.36, "learning_rate": 2.783600493218249e-05, "loss": 0.0061, "step": 1488 }, { "epoch": 1.36, "learning_rate": 2.7820591861898892e-05, "loss": 0.0006, "step": 1489 }, { "epoch": 1.36, "learning_rate": 2.7805178791615287e-05, "loss": 0.0004, "step": 1490 }, { "epoch": 1.36, "learning_rate": 2.778976572133169e-05, "loss": 0.0003, "step": 1491 }, { "epoch": 1.36, "learning_rate": 2.7774352651048092e-05, "loss": 0.0148, "step": 1492 }, { "epoch": 1.36, "learning_rate": 2.7758939580764487e-05, "loss": 0.0003, "step": 1493 }, { "epoch": 1.36, "learning_rate": 2.774352651048089e-05, "loss": 0.0005, "step": 1494 }, { "epoch": 1.36, "learning_rate": 2.772811344019729e-05, "loss": 0.0133, "step": 1495 }, { "epoch": 1.36, "learning_rate": 2.7712700369913684e-05, "loss": 0.0008, "step": 1496 }, { "epoch": 1.36, "learning_rate": 2.7697287299630086e-05, "loss": 0.0002, "step": 1497 }, { "epoch": 1.36, "learning_rate": 2.768187422934649e-05, "loss": 0.0003, "step": 1498 }, { "epoch": 1.37, "learning_rate": 2.7666461159062884e-05, "loss": 0.0066, "step": 1499 }, { "epoch": 1.37, "learning_rate": 2.7651048088779286e-05, "loss": 0.0005, "step": 1500 }, { "epoch": 1.37, "learning_rate": 2.7635635018495685e-05, "loss": 0.0002, "step": 1501 }, { "epoch": 1.37, "learning_rate": 2.7620221948212084e-05, "loss": 0.0008, "step": 1502 }, { "epoch": 1.37, "learning_rate": 2.7604808877928483e-05, "loss": 0.0003, "step": 1503 }, { "epoch": 1.37, "learning_rate": 2.7589395807644885e-05, "loss": 0.0018, "step": 1504 }, { "epoch": 1.37, "learning_rate": 2.757398273736128e-05, "loss": 0.0005, "step": 1505 }, { "epoch": 1.37, "learning_rate": 2.7558569667077683e-05, "loss": 0.0237, "step": 1506 }, { "epoch": 1.37, "learning_rate": 2.7543156596794085e-05, "loss": 0.0003, "step": 1507 }, { "epoch": 1.37, "learning_rate": 2.752774352651048e-05, "loss": 0.0003, "step": 1508 }, { "epoch": 1.37, "learning_rate": 2.751233045622688e-05, "loss": 0.0002, "step": 1509 }, { "epoch": 1.38, "learning_rate": 2.7496917385943282e-05, "loss": 0.0003, "step": 1510 }, { "epoch": 1.38, "learning_rate": 2.7481504315659678e-05, "loss": 0.2048, "step": 1511 }, { "epoch": 1.38, "learning_rate": 2.746609124537608e-05, "loss": 0.0002, "step": 1512 }, { "epoch": 1.38, "learning_rate": 2.7450678175092482e-05, "loss": 0.0004, "step": 1513 }, { "epoch": 1.38, "learning_rate": 2.7435265104808878e-05, "loss": 0.7059, "step": 1514 }, { "epoch": 1.38, "learning_rate": 2.741985203452528e-05, "loss": 0.0002, "step": 1515 }, { "epoch": 1.38, "learning_rate": 2.740443896424168e-05, "loss": 0.0004, "step": 1516 }, { "epoch": 1.38, "learning_rate": 2.7389025893958074e-05, "loss": 0.0002, "step": 1517 }, { "epoch": 1.38, "learning_rate": 2.7373612823674477e-05, "loss": 0.0002, "step": 1518 }, { "epoch": 1.38, "learning_rate": 2.735819975339088e-05, "loss": 0.0023, "step": 1519 }, { "epoch": 1.38, "learning_rate": 2.7342786683107274e-05, "loss": 0.0002, "step": 1520 }, { "epoch": 1.39, "learning_rate": 2.7327373612823677e-05, "loss": 0.0002, "step": 1521 }, { "epoch": 1.39, "learning_rate": 2.7311960542540076e-05, "loss": 0.0006, "step": 1522 }, { "epoch": 1.39, "learning_rate": 2.7296547472256475e-05, "loss": 0.0003, "step": 1523 }, { "epoch": 1.39, "learning_rate": 2.7281134401972873e-05, "loss": 0.4828, "step": 1524 }, { "epoch": 1.39, "learning_rate": 2.7265721331689276e-05, "loss": 0.0003, "step": 1525 }, { "epoch": 1.39, "learning_rate": 2.725030826140567e-05, "loss": 0.0005, "step": 1526 }, { "epoch": 1.39, "learning_rate": 2.7234895191122073e-05, "loss": 0.0002, "step": 1527 }, { "epoch": 1.39, "learning_rate": 2.7219482120838476e-05, "loss": 0.0005, "step": 1528 }, { "epoch": 1.39, "learning_rate": 2.720406905055487e-05, "loss": 0.0002, "step": 1529 }, { "epoch": 1.39, "learning_rate": 2.718865598027127e-05, "loss": 0.0002, "step": 1530 }, { "epoch": 1.39, "learning_rate": 2.7173242909987672e-05, "loss": 0.0009, "step": 1531 }, { "epoch": 1.4, "learning_rate": 2.7157829839704068e-05, "loss": 0.0007, "step": 1532 }, { "epoch": 1.4, "learning_rate": 2.714241676942047e-05, "loss": 0.0003, "step": 1533 }, { "epoch": 1.4, "learning_rate": 2.7127003699136873e-05, "loss": 0.0003, "step": 1534 }, { "epoch": 1.4, "learning_rate": 2.7111590628853268e-05, "loss": 0.0008, "step": 1535 }, { "epoch": 1.4, "learning_rate": 2.709617755856967e-05, "loss": 0.1094, "step": 1536 }, { "epoch": 1.4, "learning_rate": 2.708076448828607e-05, "loss": 0.0003, "step": 1537 }, { "epoch": 1.4, "learning_rate": 2.7065351418002465e-05, "loss": 0.0005, "step": 1538 }, { "epoch": 1.4, "learning_rate": 2.7049938347718867e-05, "loss": 0.0019, "step": 1539 }, { "epoch": 1.4, "learning_rate": 2.703452527743527e-05, "loss": 0.0002, "step": 1540 }, { "epoch": 1.4, "learning_rate": 2.7019112207151665e-05, "loss": 0.0061, "step": 1541 }, { "epoch": 1.4, "learning_rate": 2.7003699136868067e-05, "loss": 0.0003, "step": 1542 }, { "epoch": 1.41, "learning_rate": 2.6988286066584466e-05, "loss": 0.0003, "step": 1543 }, { "epoch": 1.41, "learning_rate": 2.697287299630086e-05, "loss": 0.0002, "step": 1544 }, { "epoch": 1.41, "learning_rate": 2.6957459926017264e-05, "loss": 0.0011, "step": 1545 }, { "epoch": 1.41, "learning_rate": 2.6942046855733666e-05, "loss": 0.0003, "step": 1546 }, { "epoch": 1.41, "learning_rate": 2.692663378545006e-05, "loss": 0.0004, "step": 1547 }, { "epoch": 1.41, "learning_rate": 2.6911220715166464e-05, "loss": 0.0003, "step": 1548 }, { "epoch": 1.41, "learning_rate": 2.6895807644882866e-05, "loss": 0.0005, "step": 1549 }, { "epoch": 1.41, "learning_rate": 2.688039457459926e-05, "loss": 0.0002, "step": 1550 }, { "epoch": 1.41, "learning_rate": 2.686498150431566e-05, "loss": 0.0002, "step": 1551 }, { "epoch": 1.41, "learning_rate": 2.6849568434032063e-05, "loss": 0.0002, "step": 1552 }, { "epoch": 1.41, "learning_rate": 2.6834155363748458e-05, "loss": 0.0002, "step": 1553 }, { "epoch": 1.42, "learning_rate": 2.681874229346486e-05, "loss": 0.0022, "step": 1554 }, { "epoch": 1.42, "learning_rate": 2.6803329223181263e-05, "loss": 0.0002, "step": 1555 }, { "epoch": 1.42, "learning_rate": 2.678791615289766e-05, "loss": 0.0002, "step": 1556 }, { "epoch": 1.42, "learning_rate": 2.6772503082614057e-05, "loss": 0.0002, "step": 1557 }, { "epoch": 1.42, "learning_rate": 2.675709001233046e-05, "loss": 0.0014, "step": 1558 }, { "epoch": 1.42, "learning_rate": 2.6741676942046855e-05, "loss": 0.0003, "step": 1559 }, { "epoch": 1.42, "learning_rate": 2.6726263871763257e-05, "loss": 0.0003, "step": 1560 }, { "epoch": 1.42, "learning_rate": 2.671085080147966e-05, "loss": 0.0021, "step": 1561 }, { "epoch": 1.42, "learning_rate": 2.6695437731196055e-05, "loss": 0.0003, "step": 1562 }, { "epoch": 1.42, "learning_rate": 2.6680024660912457e-05, "loss": 0.0002, "step": 1563 }, { "epoch": 1.42, "learning_rate": 2.6664611590628853e-05, "loss": 0.0002, "step": 1564 }, { "epoch": 1.43, "learning_rate": 2.6649198520345252e-05, "loss": 0.0003, "step": 1565 }, { "epoch": 1.43, "learning_rate": 2.6633785450061654e-05, "loss": 0.0003, "step": 1566 }, { "epoch": 1.43, "learning_rate": 2.661837237977805e-05, "loss": 0.0002, "step": 1567 }, { "epoch": 1.43, "learning_rate": 2.6602959309494452e-05, "loss": 0.3296, "step": 1568 }, { "epoch": 1.43, "learning_rate": 2.6587546239210854e-05, "loss": 0.0002, "step": 1569 }, { "epoch": 1.43, "learning_rate": 2.657213316892725e-05, "loss": 0.0002, "step": 1570 }, { "epoch": 1.43, "learning_rate": 2.6556720098643652e-05, "loss": 0.0002, "step": 1571 }, { "epoch": 1.43, "learning_rate": 2.654130702836005e-05, "loss": 0.0009, "step": 1572 }, { "epoch": 1.43, "learning_rate": 2.6525893958076446e-05, "loss": 0.0057, "step": 1573 }, { "epoch": 1.43, "learning_rate": 2.651048088779285e-05, "loss": 0.0002, "step": 1574 }, { "epoch": 1.43, "learning_rate": 2.649506781750925e-05, "loss": 0.0002, "step": 1575 }, { "epoch": 1.44, "learning_rate": 2.6479654747225646e-05, "loss": 0.0003, "step": 1576 }, { "epoch": 1.44, "learning_rate": 2.646424167694205e-05, "loss": 0.0002, "step": 1577 }, { "epoch": 1.44, "learning_rate": 2.6448828606658448e-05, "loss": 0.0003, "step": 1578 }, { "epoch": 1.44, "learning_rate": 2.6433415536374846e-05, "loss": 0.0002, "step": 1579 }, { "epoch": 1.44, "learning_rate": 2.6418002466091245e-05, "loss": 0.0004, "step": 1580 }, { "epoch": 1.44, "learning_rate": 2.6402589395807648e-05, "loss": 0.0002, "step": 1581 }, { "epoch": 1.44, "learning_rate": 2.6387176325524043e-05, "loss": 0.0002, "step": 1582 }, { "epoch": 1.44, "learning_rate": 2.6371763255240445e-05, "loss": 0.0713, "step": 1583 }, { "epoch": 1.44, "learning_rate": 2.6356350184956848e-05, "loss": 0.0002, "step": 1584 }, { "epoch": 1.44, "learning_rate": 2.6340937114673243e-05, "loss": 0.0002, "step": 1585 }, { "epoch": 1.44, "learning_rate": 2.6325524044389642e-05, "loss": 0.0003, "step": 1586 }, { "epoch": 1.45, "learning_rate": 2.6310110974106044e-05, "loss": 0.0002, "step": 1587 }, { "epoch": 1.45, "learning_rate": 2.629469790382244e-05, "loss": 0.1369, "step": 1588 }, { "epoch": 1.45, "learning_rate": 2.6279284833538842e-05, "loss": 0.0002, "step": 1589 }, { "epoch": 1.45, "learning_rate": 2.6263871763255244e-05, "loss": 0.0003, "step": 1590 }, { "epoch": 1.45, "learning_rate": 2.624845869297164e-05, "loss": 0.0004, "step": 1591 }, { "epoch": 1.45, "learning_rate": 2.6233045622688042e-05, "loss": 0.0036, "step": 1592 }, { "epoch": 1.45, "learning_rate": 2.621763255240444e-05, "loss": 0.0002, "step": 1593 }, { "epoch": 1.45, "learning_rate": 2.6202219482120837e-05, "loss": 0.0002, "step": 1594 }, { "epoch": 1.45, "learning_rate": 2.618680641183724e-05, "loss": 0.0002, "step": 1595 }, { "epoch": 1.45, "learning_rate": 2.617139334155364e-05, "loss": 0.0001, "step": 1596 }, { "epoch": 1.45, "learning_rate": 2.6155980271270037e-05, "loss": 0.0002, "step": 1597 }, { "epoch": 1.46, "learning_rate": 2.614056720098644e-05, "loss": 0.0002, "step": 1598 }, { "epoch": 1.46, "learning_rate": 2.6125154130702838e-05, "loss": 0.0002, "step": 1599 }, { "epoch": 1.46, "learning_rate": 2.6109741060419233e-05, "loss": 0.0687, "step": 1600 }, { "epoch": 1.46, "learning_rate": 2.6094327990135636e-05, "loss": 0.0002, "step": 1601 }, { "epoch": 1.46, "learning_rate": 2.6078914919852038e-05, "loss": 0.0323, "step": 1602 }, { "epoch": 1.46, "learning_rate": 2.6063501849568433e-05, "loss": 0.0002, "step": 1603 }, { "epoch": 1.46, "learning_rate": 2.6048088779284836e-05, "loss": 0.0002, "step": 1604 }, { "epoch": 1.46, "learning_rate": 2.6032675709001235e-05, "loss": 0.0005, "step": 1605 }, { "epoch": 1.46, "learning_rate": 2.6017262638717634e-05, "loss": 0.0006, "step": 1606 }, { "epoch": 1.46, "learning_rate": 2.6001849568434032e-05, "loss": 0.0002, "step": 1607 }, { "epoch": 1.46, "learning_rate": 2.5986436498150435e-05, "loss": 0.0003, "step": 1608 }, { "epoch": 1.47, "learning_rate": 2.597102342786683e-05, "loss": 0.0002, "step": 1609 }, { "epoch": 1.47, "learning_rate": 2.5955610357583233e-05, "loss": 0.0023, "step": 1610 }, { "epoch": 1.47, "learning_rate": 2.5940197287299635e-05, "loss": 0.0044, "step": 1611 }, { "epoch": 1.47, "learning_rate": 2.592478421701603e-05, "loss": 0.0002, "step": 1612 }, { "epoch": 1.47, "learning_rate": 2.590937114673243e-05, "loss": 0.0007, "step": 1613 }, { "epoch": 1.47, "learning_rate": 2.589395807644883e-05, "loss": 0.0002, "step": 1614 }, { "epoch": 1.47, "learning_rate": 2.5878545006165227e-05, "loss": 0.0002, "step": 1615 }, { "epoch": 1.47, "learning_rate": 2.586313193588163e-05, "loss": 0.0001, "step": 1616 }, { "epoch": 1.47, "learning_rate": 2.584771886559803e-05, "loss": 0.0003, "step": 1617 }, { "epoch": 1.47, "learning_rate": 2.5832305795314427e-05, "loss": 0.0003, "step": 1618 }, { "epoch": 1.47, "learning_rate": 2.581689272503083e-05, "loss": 0.0002, "step": 1619 }, { "epoch": 1.48, "learning_rate": 2.5801479654747228e-05, "loss": 0.0001, "step": 1620 }, { "epoch": 1.48, "learning_rate": 2.5786066584463624e-05, "loss": 0.0002, "step": 1621 }, { "epoch": 1.48, "learning_rate": 2.5770653514180026e-05, "loss": 0.0002, "step": 1622 }, { "epoch": 1.48, "learning_rate": 2.5755240443896428e-05, "loss": 0.0002, "step": 1623 }, { "epoch": 1.48, "learning_rate": 2.5739827373612824e-05, "loss": 0.0002, "step": 1624 }, { "epoch": 1.48, "learning_rate": 2.5724414303329226e-05, "loss": 0.4581, "step": 1625 }, { "epoch": 1.48, "learning_rate": 2.5709001233045625e-05, "loss": 0.0005, "step": 1626 }, { "epoch": 1.48, "learning_rate": 2.5693588162762024e-05, "loss": 0.0004, "step": 1627 }, { "epoch": 1.48, "learning_rate": 2.5678175092478423e-05, "loss": 0.0002, "step": 1628 }, { "epoch": 1.48, "learning_rate": 2.5662762022194825e-05, "loss": 0.0001, "step": 1629 }, { "epoch": 1.48, "learning_rate": 2.564734895191122e-05, "loss": 0.0002, "step": 1630 }, { "epoch": 1.49, "learning_rate": 2.5631935881627623e-05, "loss": 0.0003, "step": 1631 }, { "epoch": 1.49, "learning_rate": 2.5616522811344025e-05, "loss": 0.0002, "step": 1632 }, { "epoch": 1.49, "learning_rate": 2.560110974106042e-05, "loss": 0.0002, "step": 1633 }, { "epoch": 1.49, "learning_rate": 2.558569667077682e-05, "loss": 0.0002, "step": 1634 }, { "epoch": 1.49, "learning_rate": 2.5570283600493222e-05, "loss": 0.0002, "step": 1635 }, { "epoch": 1.49, "learning_rate": 2.5554870530209617e-05, "loss": 0.0002, "step": 1636 }, { "epoch": 1.49, "learning_rate": 2.553945745992602e-05, "loss": 0.0003, "step": 1637 }, { "epoch": 1.49, "learning_rate": 2.5524044389642415e-05, "loss": 0.3661, "step": 1638 }, { "epoch": 1.49, "learning_rate": 2.5508631319358817e-05, "loss": 0.0009, "step": 1639 }, { "epoch": 1.49, "learning_rate": 2.549321824907522e-05, "loss": 0.008, "step": 1640 }, { "epoch": 1.49, "learning_rate": 2.5477805178791615e-05, "loss": 0.0002, "step": 1641 }, { "epoch": 1.5, "learning_rate": 2.5462392108508014e-05, "loss": 0.0002, "step": 1642 }, { "epoch": 1.5, "learning_rate": 2.5446979038224416e-05, "loss": 0.0002, "step": 1643 }, { "epoch": 1.5, "learning_rate": 2.5431565967940812e-05, "loss": 0.0002, "step": 1644 }, { "epoch": 1.5, "learning_rate": 2.5416152897657214e-05, "loss": 0.0002, "step": 1645 }, { "epoch": 1.5, "learning_rate": 2.5400739827373616e-05, "loss": 0.0002, "step": 1646 }, { "epoch": 1.5, "learning_rate": 2.5385326757090012e-05, "loss": 0.0003, "step": 1647 }, { "epoch": 1.5, "learning_rate": 2.536991368680641e-05, "loss": 0.0003, "step": 1648 }, { "epoch": 1.5, "learning_rate": 2.5354500616522813e-05, "loss": 0.0002, "step": 1649 }, { "epoch": 1.5, "learning_rate": 2.533908754623921e-05, "loss": 0.0004, "step": 1650 }, { "epoch": 1.5, "learning_rate": 2.532367447595561e-05, "loss": 0.0002, "step": 1651 }, { "epoch": 1.5, "learning_rate": 2.5308261405672013e-05, "loss": 0.0002, "step": 1652 }, { "epoch": 1.51, "learning_rate": 2.529284833538841e-05, "loss": 0.0002, "step": 1653 }, { "epoch": 1.51, "learning_rate": 2.527743526510481e-05, "loss": 0.0002, "step": 1654 }, { "epoch": 1.51, "learning_rate": 2.526202219482121e-05, "loss": 0.0424, "step": 1655 }, { "epoch": 1.51, "learning_rate": 2.5246609124537605e-05, "loss": 0.0003, "step": 1656 }, { "epoch": 1.51, "learning_rate": 2.5231196054254008e-05, "loss": 0.0003, "step": 1657 }, { "epoch": 1.51, "learning_rate": 2.521578298397041e-05, "loss": 0.0013, "step": 1658 }, { "epoch": 1.51, "learning_rate": 2.5200369913686805e-05, "loss": 0.0004, "step": 1659 }, { "epoch": 1.51, "learning_rate": 2.5184956843403208e-05, "loss": 0.0094, "step": 1660 }, { "epoch": 1.51, "learning_rate": 2.5169543773119607e-05, "loss": 0.0002, "step": 1661 }, { "epoch": 1.51, "learning_rate": 2.5154130702836005e-05, "loss": 0.0001, "step": 1662 }, { "epoch": 1.51, "learning_rate": 2.5138717632552404e-05, "loss": 0.0002, "step": 1663 }, { "epoch": 1.52, "learning_rate": 2.5123304562268807e-05, "loss": 0.0002, "step": 1664 }, { "epoch": 1.52, "learning_rate": 2.5107891491985202e-05, "loss": 0.0003, "step": 1665 }, { "epoch": 1.52, "learning_rate": 2.5092478421701604e-05, "loss": 0.0002, "step": 1666 }, { "epoch": 1.52, "learning_rate": 2.5077065351418007e-05, "loss": 0.0001, "step": 1667 }, { "epoch": 1.52, "learning_rate": 2.5061652281134402e-05, "loss": 0.0003, "step": 1668 }, { "epoch": 1.52, "learning_rate": 2.50462392108508e-05, "loss": 0.0006, "step": 1669 }, { "epoch": 1.52, "learning_rate": 2.5030826140567203e-05, "loss": 0.0003, "step": 1670 }, { "epoch": 1.52, "learning_rate": 2.50154130702836e-05, "loss": 0.0001, "step": 1671 }, { "epoch": 1.52, "learning_rate": 2.5e-05, "loss": 0.0012, "step": 1672 }, { "epoch": 1.52, "learning_rate": 2.49845869297164e-05, "loss": 0.0009, "step": 1673 }, { "epoch": 1.52, "learning_rate": 2.4969173859432802e-05, "loss": 0.0002, "step": 1674 }, { "epoch": 1.53, "learning_rate": 2.49537607891492e-05, "loss": 0.0002, "step": 1675 }, { "epoch": 1.53, "learning_rate": 2.4938347718865597e-05, "loss": 0.0002, "step": 1676 }, { "epoch": 1.53, "learning_rate": 2.4922934648582e-05, "loss": 0.0001, "step": 1677 }, { "epoch": 1.53, "learning_rate": 2.4907521578298398e-05, "loss": 0.0008, "step": 1678 }, { "epoch": 1.53, "learning_rate": 2.4892108508014797e-05, "loss": 0.0002, "step": 1679 }, { "epoch": 1.53, "learning_rate": 2.48766954377312e-05, "loss": 0.0002, "step": 1680 }, { "epoch": 1.53, "learning_rate": 2.4861282367447598e-05, "loss": 0.0003, "step": 1681 }, { "epoch": 1.53, "learning_rate": 2.4845869297163997e-05, "loss": 0.0003, "step": 1682 }, { "epoch": 1.53, "learning_rate": 2.4830456226880396e-05, "loss": 0.0004, "step": 1683 }, { "epoch": 1.53, "learning_rate": 2.4815043156596795e-05, "loss": 0.0004, "step": 1684 }, { "epoch": 1.53, "learning_rate": 2.4799630086313194e-05, "loss": 0.0002, "step": 1685 }, { "epoch": 1.54, "learning_rate": 2.4784217016029596e-05, "loss": 0.0003, "step": 1686 }, { "epoch": 1.54, "learning_rate": 2.4768803945745995e-05, "loss": 0.0154, "step": 1687 }, { "epoch": 1.54, "learning_rate": 2.4753390875462394e-05, "loss": 0.0001, "step": 1688 }, { "epoch": 1.54, "learning_rate": 2.4737977805178793e-05, "loss": 0.0002, "step": 1689 }, { "epoch": 1.54, "learning_rate": 2.472256473489519e-05, "loss": 0.0002, "step": 1690 }, { "epoch": 1.54, "learning_rate": 2.470715166461159e-05, "loss": 0.0004, "step": 1691 }, { "epoch": 1.54, "learning_rate": 2.469173859432799e-05, "loss": 0.0002, "step": 1692 }, { "epoch": 1.54, "learning_rate": 2.467632552404439e-05, "loss": 0.0001, "step": 1693 }, { "epoch": 1.54, "learning_rate": 2.466091245376079e-05, "loss": 0.0002, "step": 1694 }, { "epoch": 1.54, "learning_rate": 2.464549938347719e-05, "loss": 0.0002, "step": 1695 }, { "epoch": 1.54, "learning_rate": 2.463008631319359e-05, "loss": 0.0001, "step": 1696 }, { "epoch": 1.55, "learning_rate": 2.4614673242909987e-05, "loss": 0.0001, "step": 1697 }, { "epoch": 1.55, "learning_rate": 2.4599260172626386e-05, "loss": 0.0002, "step": 1698 }, { "epoch": 1.55, "learning_rate": 2.4583847102342788e-05, "loss": 0.0004, "step": 1699 }, { "epoch": 1.55, "learning_rate": 2.4568434032059187e-05, "loss": 0.0002, "step": 1700 }, { "epoch": 1.55, "learning_rate": 2.4553020961775586e-05, "loss": 0.0005, "step": 1701 }, { "epoch": 1.55, "learning_rate": 2.453760789149199e-05, "loss": 0.0001, "step": 1702 }, { "epoch": 1.55, "learning_rate": 2.4522194821208387e-05, "loss": 0.0001, "step": 1703 }, { "epoch": 1.55, "learning_rate": 2.4506781750924783e-05, "loss": 0.0002, "step": 1704 }, { "epoch": 1.55, "learning_rate": 2.4491368680641185e-05, "loss": 0.0109, "step": 1705 }, { "epoch": 1.55, "learning_rate": 2.4475955610357584e-05, "loss": 0.0001, "step": 1706 }, { "epoch": 1.55, "learning_rate": 2.4460542540073983e-05, "loss": 0.0045, "step": 1707 }, { "epoch": 1.56, "learning_rate": 2.4445129469790385e-05, "loss": 0.0002, "step": 1708 }, { "epoch": 1.56, "learning_rate": 2.4429716399506784e-05, "loss": 0.0002, "step": 1709 }, { "epoch": 1.56, "learning_rate": 2.4414303329223183e-05, "loss": 0.0014, "step": 1710 }, { "epoch": 1.56, "learning_rate": 2.4398890258939582e-05, "loss": 0.0002, "step": 1711 }, { "epoch": 1.56, "learning_rate": 2.438347718865598e-05, "loss": 0.0001, "step": 1712 }, { "epoch": 1.56, "learning_rate": 2.436806411837238e-05, "loss": 0.0005, "step": 1713 }, { "epoch": 1.56, "learning_rate": 2.4352651048088782e-05, "loss": 0.0002, "step": 1714 }, { "epoch": 1.56, "learning_rate": 2.433723797780518e-05, "loss": 0.0002, "step": 1715 }, { "epoch": 1.56, "learning_rate": 2.432182490752158e-05, "loss": 0.0001, "step": 1716 }, { "epoch": 1.56, "learning_rate": 2.430641183723798e-05, "loss": 0.0001, "step": 1717 }, { "epoch": 1.56, "learning_rate": 2.4290998766954377e-05, "loss": 0.0001, "step": 1718 }, { "epoch": 1.57, "learning_rate": 2.4275585696670776e-05, "loss": 0.0003, "step": 1719 }, { "epoch": 1.57, "learning_rate": 2.426017262638718e-05, "loss": 0.0001, "step": 1720 }, { "epoch": 1.57, "learning_rate": 2.4244759556103577e-05, "loss": 0.0573, "step": 1721 }, { "epoch": 1.57, "learning_rate": 2.4229346485819976e-05, "loss": 0.0001, "step": 1722 }, { "epoch": 1.57, "learning_rate": 2.421393341553638e-05, "loss": 0.0031, "step": 1723 }, { "epoch": 1.57, "learning_rate": 2.4198520345252778e-05, "loss": 0.0001, "step": 1724 }, { "epoch": 1.57, "learning_rate": 2.4183107274969173e-05, "loss": 0.0001, "step": 1725 }, { "epoch": 1.57, "learning_rate": 2.4167694204685575e-05, "loss": 0.4348, "step": 1726 }, { "epoch": 1.57, "learning_rate": 2.4152281134401974e-05, "loss": 0.0001, "step": 1727 }, { "epoch": 1.57, "learning_rate": 2.4136868064118373e-05, "loss": 0.0001, "step": 1728 }, { "epoch": 1.57, "learning_rate": 2.4121454993834772e-05, "loss": 0.0001, "step": 1729 }, { "epoch": 1.58, "learning_rate": 2.4106041923551174e-05, "loss": 0.0001, "step": 1730 }, { "epoch": 1.58, "learning_rate": 2.4090628853267573e-05, "loss": 0.0001, "step": 1731 }, { "epoch": 1.58, "learning_rate": 2.407521578298397e-05, "loss": 0.2769, "step": 1732 }, { "epoch": 1.58, "learning_rate": 2.405980271270037e-05, "loss": 0.0002, "step": 1733 }, { "epoch": 1.58, "learning_rate": 2.404438964241677e-05, "loss": 0.0002, "step": 1734 }, { "epoch": 1.58, "learning_rate": 2.402897657213317e-05, "loss": 0.325, "step": 1735 }, { "epoch": 1.58, "learning_rate": 2.401356350184957e-05, "loss": 0.0002, "step": 1736 }, { "epoch": 1.58, "learning_rate": 2.399815043156597e-05, "loss": 0.0001, "step": 1737 }, { "epoch": 1.58, "learning_rate": 2.398273736128237e-05, "loss": 0.0002, "step": 1738 }, { "epoch": 1.58, "learning_rate": 2.3967324290998768e-05, "loss": 0.0002, "step": 1739 }, { "epoch": 1.58, "learning_rate": 2.3951911220715167e-05, "loss": 0.0004, "step": 1740 }, { "epoch": 1.59, "learning_rate": 2.3936498150431566e-05, "loss": 0.0002, "step": 1741 }, { "epoch": 1.59, "learning_rate": 2.3921085080147968e-05, "loss": 0.1449, "step": 1742 }, { "epoch": 1.59, "learning_rate": 2.3905672009864367e-05, "loss": 0.0003, "step": 1743 }, { "epoch": 1.59, "learning_rate": 2.3890258939580766e-05, "loss": 0.0002, "step": 1744 }, { "epoch": 1.59, "learning_rate": 2.3874845869297165e-05, "loss": 0.0002, "step": 1745 }, { "epoch": 1.59, "learning_rate": 2.3859432799013563e-05, "loss": 0.0002, "step": 1746 }, { "epoch": 1.59, "learning_rate": 2.3844019728729962e-05, "loss": 0.0002, "step": 1747 }, { "epoch": 1.59, "learning_rate": 2.3828606658446365e-05, "loss": 0.4122, "step": 1748 }, { "epoch": 1.59, "learning_rate": 2.3813193588162763e-05, "loss": 0.0007, "step": 1749 }, { "epoch": 1.59, "learning_rate": 2.3797780517879162e-05, "loss": 0.0004, "step": 1750 }, { "epoch": 1.59, "learning_rate": 2.3782367447595565e-05, "loss": 0.0002, "step": 1751 }, { "epoch": 1.6, "learning_rate": 2.376695437731196e-05, "loss": 0.0003, "step": 1752 }, { "epoch": 1.6, "learning_rate": 2.375154130702836e-05, "loss": 0.0002, "step": 1753 }, { "epoch": 1.6, "learning_rate": 2.373612823674476e-05, "loss": 0.025, "step": 1754 }, { "epoch": 1.6, "learning_rate": 2.372071516646116e-05, "loss": 0.0003, "step": 1755 }, { "epoch": 1.6, "learning_rate": 2.370530209617756e-05, "loss": 0.0003, "step": 1756 }, { "epoch": 1.6, "learning_rate": 2.368988902589396e-05, "loss": 0.0001, "step": 1757 }, { "epoch": 1.6, "learning_rate": 2.367447595561036e-05, "loss": 0.0003, "step": 1758 }, { "epoch": 1.6, "learning_rate": 2.365906288532676e-05, "loss": 0.0008, "step": 1759 }, { "epoch": 1.6, "learning_rate": 2.3643649815043158e-05, "loss": 0.0002, "step": 1760 }, { "epoch": 1.6, "learning_rate": 2.3628236744759557e-05, "loss": 0.0002, "step": 1761 }, { "epoch": 1.6, "learning_rate": 2.3612823674475956e-05, "loss": 0.0002, "step": 1762 }, { "epoch": 1.61, "learning_rate": 2.3597410604192358e-05, "loss": 0.0002, "step": 1763 }, { "epoch": 1.61, "learning_rate": 2.3581997533908757e-05, "loss": 0.0714, "step": 1764 }, { "epoch": 1.61, "learning_rate": 2.3566584463625156e-05, "loss": 0.0009, "step": 1765 }, { "epoch": 1.61, "learning_rate": 2.3551171393341555e-05, "loss": 0.0002, "step": 1766 }, { "epoch": 1.61, "learning_rate": 2.3535758323057954e-05, "loss": 0.0002, "step": 1767 }, { "epoch": 1.61, "learning_rate": 2.3520345252774353e-05, "loss": 0.0002, "step": 1768 }, { "epoch": 1.61, "learning_rate": 2.350493218249075e-05, "loss": 0.0002, "step": 1769 }, { "epoch": 1.61, "learning_rate": 2.3489519112207154e-05, "loss": 0.0002, "step": 1770 }, { "epoch": 1.61, "learning_rate": 2.3474106041923553e-05, "loss": 0.0005, "step": 1771 }, { "epoch": 1.61, "learning_rate": 2.345869297163995e-05, "loss": 0.0002, "step": 1772 }, { "epoch": 1.61, "learning_rate": 2.344327990135635e-05, "loss": 0.0009, "step": 1773 }, { "epoch": 1.62, "learning_rate": 2.342786683107275e-05, "loss": 0.0001, "step": 1774 }, { "epoch": 1.62, "learning_rate": 2.3412453760789148e-05, "loss": 0.0002, "step": 1775 }, { "epoch": 1.62, "learning_rate": 2.339704069050555e-05, "loss": 0.0002, "step": 1776 }, { "epoch": 1.62, "learning_rate": 2.338162762022195e-05, "loss": 0.0002, "step": 1777 }, { "epoch": 1.62, "learning_rate": 2.336621454993835e-05, "loss": 0.0003, "step": 1778 }, { "epoch": 1.62, "learning_rate": 2.335080147965475e-05, "loss": 0.0007, "step": 1779 }, { "epoch": 1.62, "learning_rate": 2.3335388409371146e-05, "loss": 0.0762, "step": 1780 }, { "epoch": 1.62, "learning_rate": 2.3319975339087545e-05, "loss": 0.0002, "step": 1781 }, { "epoch": 1.62, "learning_rate": 2.3304562268803947e-05, "loss": 0.0002, "step": 1782 }, { "epoch": 1.62, "learning_rate": 2.3289149198520346e-05, "loss": 0.1104, "step": 1783 }, { "epoch": 1.62, "learning_rate": 2.3273736128236745e-05, "loss": 0.0001, "step": 1784 }, { "epoch": 1.63, "learning_rate": 2.3258323057953147e-05, "loss": 0.0002, "step": 1785 }, { "epoch": 1.63, "learning_rate": 2.3242909987669546e-05, "loss": 0.0002, "step": 1786 }, { "epoch": 1.63, "learning_rate": 2.3227496917385945e-05, "loss": 0.0001, "step": 1787 }, { "epoch": 1.63, "learning_rate": 2.3212083847102344e-05, "loss": 0.0002, "step": 1788 }, { "epoch": 1.63, "learning_rate": 2.3196670776818743e-05, "loss": 0.0003, "step": 1789 }, { "epoch": 1.63, "learning_rate": 2.3181257706535142e-05, "loss": 0.1291, "step": 1790 }, { "epoch": 1.63, "learning_rate": 2.3165844636251544e-05, "loss": 0.0004, "step": 1791 }, { "epoch": 1.63, "learning_rate": 2.3150431565967943e-05, "loss": 0.0012, "step": 1792 }, { "epoch": 1.63, "learning_rate": 2.3135018495684342e-05, "loss": 0.0001, "step": 1793 }, { "epoch": 1.63, "learning_rate": 2.311960542540074e-05, "loss": 0.0002, "step": 1794 }, { "epoch": 1.63, "learning_rate": 2.310419235511714e-05, "loss": 0.0003, "step": 1795 }, { "epoch": 1.64, "learning_rate": 2.308877928483354e-05, "loss": 0.0002, "step": 1796 }, { "epoch": 1.64, "learning_rate": 2.307336621454994e-05, "loss": 0.0002, "step": 1797 }, { "epoch": 1.64, "learning_rate": 2.305795314426634e-05, "loss": 0.0002, "step": 1798 }, { "epoch": 1.64, "learning_rate": 2.304254007398274e-05, "loss": 0.0002, "step": 1799 }, { "epoch": 1.64, "learning_rate": 2.302712700369914e-05, "loss": 0.0005, "step": 1800 }, { "epoch": 1.64, "learning_rate": 2.3011713933415536e-05, "loss": 0.0002, "step": 1801 }, { "epoch": 1.64, "learning_rate": 2.2996300863131935e-05, "loss": 0.0002, "step": 1802 }, { "epoch": 1.64, "learning_rate": 2.2980887792848334e-05, "loss": 0.0004, "step": 1803 }, { "epoch": 1.64, "learning_rate": 2.2965474722564737e-05, "loss": 0.0004, "step": 1804 }, { "epoch": 1.64, "learning_rate": 2.2950061652281135e-05, "loss": 0.0001, "step": 1805 }, { "epoch": 1.64, "learning_rate": 2.2934648581997534e-05, "loss": 0.0003, "step": 1806 }, { "epoch": 1.65, "learning_rate": 2.2919235511713937e-05, "loss": 0.2283, "step": 1807 }, { "epoch": 1.65, "learning_rate": 2.2903822441430332e-05, "loss": 0.0011, "step": 1808 }, { "epoch": 1.65, "learning_rate": 2.288840937114673e-05, "loss": 0.0002, "step": 1809 }, { "epoch": 1.65, "learning_rate": 2.2872996300863133e-05, "loss": 0.3757, "step": 1810 }, { "epoch": 1.65, "learning_rate": 2.2857583230579532e-05, "loss": 0.0002, "step": 1811 }, { "epoch": 1.65, "learning_rate": 2.284217016029593e-05, "loss": 0.0003, "step": 1812 }, { "epoch": 1.65, "learning_rate": 2.2826757090012333e-05, "loss": 0.0002, "step": 1813 }, { "epoch": 1.65, "learning_rate": 2.2811344019728732e-05, "loss": 0.0002, "step": 1814 }, { "epoch": 1.65, "learning_rate": 2.279593094944513e-05, "loss": 0.0002, "step": 1815 }, { "epoch": 1.65, "learning_rate": 2.278051787916153e-05, "loss": 0.0004, "step": 1816 }, { "epoch": 1.65, "learning_rate": 2.276510480887793e-05, "loss": 0.0002, "step": 1817 }, { "epoch": 1.66, "learning_rate": 2.2749691738594328e-05, "loss": 0.0001, "step": 1818 }, { "epoch": 1.66, "learning_rate": 2.273427866831073e-05, "loss": 0.0005, "step": 1819 }, { "epoch": 1.66, "learning_rate": 2.271886559802713e-05, "loss": 0.0002, "step": 1820 }, { "epoch": 1.66, "learning_rate": 2.2703452527743528e-05, "loss": 0.3631, "step": 1821 }, { "epoch": 1.66, "learning_rate": 2.2688039457459927e-05, "loss": 0.0001, "step": 1822 }, { "epoch": 1.66, "learning_rate": 2.2672626387176326e-05, "loss": 0.0123, "step": 1823 }, { "epoch": 1.66, "learning_rate": 2.2657213316892725e-05, "loss": 0.0056, "step": 1824 }, { "epoch": 1.66, "learning_rate": 2.2641800246609127e-05, "loss": 0.0002, "step": 1825 }, { "epoch": 1.66, "learning_rate": 2.2626387176325526e-05, "loss": 0.297, "step": 1826 }, { "epoch": 1.66, "learning_rate": 2.2610974106041925e-05, "loss": 0.001, "step": 1827 }, { "epoch": 1.66, "learning_rate": 2.2595561035758327e-05, "loss": 0.0018, "step": 1828 }, { "epoch": 1.67, "learning_rate": 2.2580147965474722e-05, "loss": 0.0002, "step": 1829 }, { "epoch": 1.67, "learning_rate": 2.256473489519112e-05, "loss": 0.0002, "step": 1830 }, { "epoch": 1.67, "learning_rate": 2.2549321824907524e-05, "loss": 0.0095, "step": 1831 }, { "epoch": 1.67, "learning_rate": 2.2533908754623922e-05, "loss": 0.0005, "step": 1832 }, { "epoch": 1.67, "learning_rate": 2.251849568434032e-05, "loss": 0.0003, "step": 1833 }, { "epoch": 1.67, "learning_rate": 2.2503082614056724e-05, "loss": 0.0002, "step": 1834 }, { "epoch": 1.67, "learning_rate": 2.2487669543773123e-05, "loss": 0.0003, "step": 1835 }, { "epoch": 1.67, "learning_rate": 2.2472256473489518e-05, "loss": 0.0004, "step": 1836 }, { "epoch": 1.67, "learning_rate": 2.245684340320592e-05, "loss": 0.0018, "step": 1837 }, { "epoch": 1.67, "learning_rate": 2.244143033292232e-05, "loss": 0.0008, "step": 1838 }, { "epoch": 1.67, "learning_rate": 2.2426017262638718e-05, "loss": 0.2804, "step": 1839 }, { "epoch": 1.68, "learning_rate": 2.2410604192355117e-05, "loss": 0.0005, "step": 1840 }, { "epoch": 1.68, "learning_rate": 2.239519112207152e-05, "loss": 0.0002, "step": 1841 }, { "epoch": 1.68, "learning_rate": 2.2379778051787918e-05, "loss": 0.0004, "step": 1842 }, { "epoch": 1.68, "learning_rate": 2.2364364981504317e-05, "loss": 0.001, "step": 1843 }, { "epoch": 1.68, "learning_rate": 2.2348951911220716e-05, "loss": 0.0003, "step": 1844 }, { "epoch": 1.68, "learning_rate": 2.2333538840937115e-05, "loss": 0.0011, "step": 1845 }, { "epoch": 1.68, "learning_rate": 2.2318125770653514e-05, "loss": 0.0002, "step": 1846 }, { "epoch": 1.68, "learning_rate": 2.2302712700369916e-05, "loss": 0.0003, "step": 1847 }, { "epoch": 1.68, "learning_rate": 2.2287299630086315e-05, "loss": 0.0005, "step": 1848 }, { "epoch": 1.68, "learning_rate": 2.2271886559802714e-05, "loss": 0.0009, "step": 1849 }, { "epoch": 1.68, "learning_rate": 2.2256473489519113e-05, "loss": 0.0002, "step": 1850 }, { "epoch": 1.69, "learning_rate": 2.224106041923551e-05, "loss": 0.0002, "step": 1851 }, { "epoch": 1.69, "learning_rate": 2.222564734895191e-05, "loss": 0.208, "step": 1852 }, { "epoch": 1.69, "learning_rate": 2.2210234278668313e-05, "loss": 0.0003, "step": 1853 }, { "epoch": 1.69, "learning_rate": 2.2194821208384712e-05, "loss": 0.0003, "step": 1854 }, { "epoch": 1.69, "learning_rate": 2.217940813810111e-05, "loss": 0.0069, "step": 1855 }, { "epoch": 1.69, "learning_rate": 2.2163995067817513e-05, "loss": 0.0011, "step": 1856 }, { "epoch": 1.69, "learning_rate": 2.214858199753391e-05, "loss": 0.0002, "step": 1857 }, { "epoch": 1.69, "learning_rate": 2.2133168927250307e-05, "loss": 0.0002, "step": 1858 }, { "epoch": 1.69, "learning_rate": 2.211775585696671e-05, "loss": 0.0003, "step": 1859 }, { "epoch": 1.69, "learning_rate": 2.210234278668311e-05, "loss": 0.0002, "step": 1860 }, { "epoch": 1.69, "learning_rate": 2.2086929716399507e-05, "loss": 0.0004, "step": 1861 }, { "epoch": 1.7, "learning_rate": 2.207151664611591e-05, "loss": 0.1334, "step": 1862 }, { "epoch": 1.7, "learning_rate": 2.205610357583231e-05, "loss": 0.0004, "step": 1863 }, { "epoch": 1.7, "learning_rate": 2.2040690505548704e-05, "loss": 0.008, "step": 1864 }, { "epoch": 1.7, "learning_rate": 2.2025277435265106e-05, "loss": 0.0004, "step": 1865 }, { "epoch": 1.7, "learning_rate": 2.2009864364981505e-05, "loss": 0.0002, "step": 1866 }, { "epoch": 1.7, "learning_rate": 2.1994451294697904e-05, "loss": 0.0006, "step": 1867 }, { "epoch": 1.7, "learning_rate": 2.1979038224414306e-05, "loss": 0.001, "step": 1868 }, { "epoch": 1.7, "learning_rate": 2.1963625154130705e-05, "loss": 0.0003, "step": 1869 }, { "epoch": 1.7, "learning_rate": 2.1948212083847104e-05, "loss": 0.0003, "step": 1870 }, { "epoch": 1.7, "learning_rate": 2.1932799013563503e-05, "loss": 0.0002, "step": 1871 }, { "epoch": 1.7, "learning_rate": 2.1917385943279902e-05, "loss": 0.0002, "step": 1872 }, { "epoch": 1.71, "learning_rate": 2.19019728729963e-05, "loss": 0.0002, "step": 1873 }, { "epoch": 1.71, "learning_rate": 2.1886559802712703e-05, "loss": 0.0003, "step": 1874 }, { "epoch": 1.71, "learning_rate": 2.1871146732429102e-05, "loss": 0.0003, "step": 1875 }, { "epoch": 1.71, "learning_rate": 2.18557336621455e-05, "loss": 0.3993, "step": 1876 }, { "epoch": 1.71, "learning_rate": 2.18403205918619e-05, "loss": 0.0003, "step": 1877 }, { "epoch": 1.71, "learning_rate": 2.18249075215783e-05, "loss": 0.0002, "step": 1878 }, { "epoch": 1.71, "learning_rate": 2.1809494451294698e-05, "loss": 0.0005, "step": 1879 }, { "epoch": 1.71, "learning_rate": 2.1794081381011097e-05, "loss": 0.0012, "step": 1880 }, { "epoch": 1.71, "learning_rate": 2.17786683107275e-05, "loss": 0.001, "step": 1881 }, { "epoch": 1.71, "learning_rate": 2.1763255240443898e-05, "loss": 0.0002, "step": 1882 }, { "epoch": 1.71, "learning_rate": 2.1747842170160297e-05, "loss": 0.0001, "step": 1883 }, { "epoch": 1.72, "learning_rate": 2.17324290998767e-05, "loss": 0.0002, "step": 1884 }, { "epoch": 1.72, "learning_rate": 2.1717016029593094e-05, "loss": 0.0002, "step": 1885 }, { "epoch": 1.72, "learning_rate": 2.1701602959309493e-05, "loss": 0.0003, "step": 1886 }, { "epoch": 1.72, "learning_rate": 2.1686189889025896e-05, "loss": 0.0003, "step": 1887 }, { "epoch": 1.72, "learning_rate": 2.1670776818742294e-05, "loss": 0.0002, "step": 1888 }, { "epoch": 1.72, "learning_rate": 2.1655363748458693e-05, "loss": 0.0002, "step": 1889 }, { "epoch": 1.72, "learning_rate": 2.1639950678175096e-05, "loss": 0.0002, "step": 1890 }, { "epoch": 1.72, "learning_rate": 2.1624537607891495e-05, "loss": 0.0002, "step": 1891 }, { "epoch": 1.72, "learning_rate": 2.160912453760789e-05, "loss": 0.0003, "step": 1892 }, { "epoch": 1.72, "learning_rate": 2.1593711467324292e-05, "loss": 0.0003, "step": 1893 }, { "epoch": 1.72, "learning_rate": 2.157829839704069e-05, "loss": 0.0001, "step": 1894 }, { "epoch": 1.73, "learning_rate": 2.156288532675709e-05, "loss": 0.0002, "step": 1895 }, { "epoch": 1.73, "learning_rate": 2.1547472256473492e-05, "loss": 0.0002, "step": 1896 }, { "epoch": 1.73, "learning_rate": 2.153205918618989e-05, "loss": 0.0002, "step": 1897 }, { "epoch": 1.73, "learning_rate": 2.151664611590629e-05, "loss": 0.0001, "step": 1898 }, { "epoch": 1.73, "learning_rate": 2.150123304562269e-05, "loss": 0.0002, "step": 1899 }, { "epoch": 1.73, "learning_rate": 2.1485819975339088e-05, "loss": 0.0002, "step": 1900 }, { "epoch": 1.73, "learning_rate": 2.1470406905055487e-05, "loss": 0.0003, "step": 1901 }, { "epoch": 1.73, "learning_rate": 2.145499383477189e-05, "loss": 0.0002, "step": 1902 }, { "epoch": 1.73, "learning_rate": 2.1439580764488288e-05, "loss": 0.0003, "step": 1903 }, { "epoch": 1.73, "learning_rate": 2.1424167694204687e-05, "loss": 0.0002, "step": 1904 }, { "epoch": 1.73, "learning_rate": 2.1408754623921086e-05, "loss": 0.0001, "step": 1905 }, { "epoch": 1.74, "learning_rate": 2.1393341553637485e-05, "loss": 0.0002, "step": 1906 }, { "epoch": 1.74, "learning_rate": 2.1377928483353884e-05, "loss": 0.0002, "step": 1907 }, { "epoch": 1.74, "learning_rate": 2.1362515413070286e-05, "loss": 0.0022, "step": 1908 }, { "epoch": 1.74, "learning_rate": 2.1347102342786685e-05, "loss": 0.0002, "step": 1909 }, { "epoch": 1.74, "learning_rate": 2.1331689272503084e-05, "loss": 0.0002, "step": 1910 }, { "epoch": 1.74, "learning_rate": 2.1316276202219486e-05, "loss": 0.0015, "step": 1911 }, { "epoch": 1.74, "learning_rate": 2.130086313193588e-05, "loss": 0.0002, "step": 1912 }, { "epoch": 1.74, "learning_rate": 2.128545006165228e-05, "loss": 0.0002, "step": 1913 }, { "epoch": 1.74, "learning_rate": 2.127003699136868e-05, "loss": 0.0002, "step": 1914 }, { "epoch": 1.74, "learning_rate": 2.125462392108508e-05, "loss": 0.0001, "step": 1915 }, { "epoch": 1.74, "learning_rate": 2.123921085080148e-05, "loss": 0.0002, "step": 1916 }, { "epoch": 1.75, "learning_rate": 2.122379778051788e-05, "loss": 0.0002, "step": 1917 }, { "epoch": 1.75, "learning_rate": 2.120838471023428e-05, "loss": 0.0002, "step": 1918 }, { "epoch": 1.75, "learning_rate": 2.119297163995068e-05, "loss": 0.3573, "step": 1919 }, { "epoch": 1.75, "learning_rate": 2.1177558569667076e-05, "loss": 0.0063, "step": 1920 }, { "epoch": 1.75, "learning_rate": 2.1162145499383478e-05, "loss": 0.0005, "step": 1921 }, { "epoch": 1.75, "learning_rate": 2.1146732429099877e-05, "loss": 0.0002, "step": 1922 }, { "epoch": 1.75, "learning_rate": 2.1131319358816276e-05, "loss": 0.0118, "step": 1923 }, { "epoch": 1.75, "learning_rate": 2.111590628853268e-05, "loss": 0.0002, "step": 1924 }, { "epoch": 1.75, "learning_rate": 2.1100493218249077e-05, "loss": 0.0002, "step": 1925 }, { "epoch": 1.75, "learning_rate": 2.1085080147965476e-05, "loss": 0.0013, "step": 1926 }, { "epoch": 1.76, "learning_rate": 2.1069667077681875e-05, "loss": 0.0006, "step": 1927 }, { "epoch": 1.76, "learning_rate": 2.1054254007398274e-05, "loss": 0.0003, "step": 1928 }, { "epoch": 1.76, "learning_rate": 2.1038840937114673e-05, "loss": 0.0002, "step": 1929 }, { "epoch": 1.76, "learning_rate": 2.1023427866831075e-05, "loss": 0.0003, "step": 1930 }, { "epoch": 1.76, "learning_rate": 2.1008014796547474e-05, "loss": 0.0777, "step": 1931 }, { "epoch": 1.76, "learning_rate": 2.0992601726263873e-05, "loss": 0.0005, "step": 1932 }, { "epoch": 1.76, "learning_rate": 2.0977188655980272e-05, "loss": 0.0093, "step": 1933 }, { "epoch": 1.76, "learning_rate": 2.096177558569667e-05, "loss": 0.0006, "step": 1934 }, { "epoch": 1.76, "learning_rate": 2.094636251541307e-05, "loss": 0.0001, "step": 1935 }, { "epoch": 1.76, "learning_rate": 2.0930949445129472e-05, "loss": 0.0002, "step": 1936 }, { "epoch": 1.76, "learning_rate": 2.091553637484587e-05, "loss": 0.0005, "step": 1937 }, { "epoch": 1.77, "learning_rate": 2.090012330456227e-05, "loss": 0.0001, "step": 1938 }, { "epoch": 1.77, "learning_rate": 2.0884710234278672e-05, "loss": 0.2467, "step": 1939 }, { "epoch": 1.77, "learning_rate": 2.0869297163995067e-05, "loss": 0.0001, "step": 1940 }, { "epoch": 1.77, "learning_rate": 2.0853884093711466e-05, "loss": 0.0002, "step": 1941 }, { "epoch": 1.77, "learning_rate": 2.083847102342787e-05, "loss": 0.0003, "step": 1942 }, { "epoch": 1.77, "learning_rate": 2.0823057953144267e-05, "loss": 0.0002, "step": 1943 }, { "epoch": 1.77, "learning_rate": 2.0807644882860666e-05, "loss": 0.0004, "step": 1944 }, { "epoch": 1.77, "learning_rate": 2.079223181257707e-05, "loss": 0.0004, "step": 1945 }, { "epoch": 1.77, "learning_rate": 2.0776818742293468e-05, "loss": 0.0001, "step": 1946 }, { "epoch": 1.77, "learning_rate": 2.0761405672009866e-05, "loss": 0.0006, "step": 1947 }, { "epoch": 1.77, "learning_rate": 2.0745992601726265e-05, "loss": 0.0001, "step": 1948 }, { "epoch": 1.78, "learning_rate": 2.0730579531442664e-05, "loss": 0.0002, "step": 1949 }, { "epoch": 1.78, "learning_rate": 2.0715166461159063e-05, "loss": 0.0008, "step": 1950 }, { "epoch": 1.78, "learning_rate": 2.0699753390875462e-05, "loss": 0.0002, "step": 1951 }, { "epoch": 1.78, "learning_rate": 2.0684340320591864e-05, "loss": 0.0002, "step": 1952 }, { "epoch": 1.78, "learning_rate": 2.0668927250308263e-05, "loss": 0.0001, "step": 1953 }, { "epoch": 1.78, "learning_rate": 2.0653514180024662e-05, "loss": 0.0005, "step": 1954 }, { "epoch": 1.78, "learning_rate": 2.063810110974106e-05, "loss": 0.0002, "step": 1955 }, { "epoch": 1.78, "learning_rate": 2.062268803945746e-05, "loss": 0.0001, "step": 1956 }, { "epoch": 1.78, "learning_rate": 2.060727496917386e-05, "loss": 0.0001, "step": 1957 }, { "epoch": 1.78, "learning_rate": 2.059186189889026e-05, "loss": 0.0008, "step": 1958 }, { "epoch": 1.78, "learning_rate": 2.057644882860666e-05, "loss": 0.0001, "step": 1959 }, { "epoch": 1.79, "learning_rate": 2.056103575832306e-05, "loss": 0.0002, "step": 1960 }, { "epoch": 1.79, "learning_rate": 2.0545622688039458e-05, "loss": 0.0001, "step": 1961 }, { "epoch": 1.79, "learning_rate": 2.0530209617755857e-05, "loss": 0.0001, "step": 1962 }, { "epoch": 1.79, "learning_rate": 2.0514796547472256e-05, "loss": 0.0001, "step": 1963 }, { "epoch": 1.79, "learning_rate": 2.0499383477188658e-05, "loss": 0.0001, "step": 1964 }, { "epoch": 1.79, "learning_rate": 2.0483970406905057e-05, "loss": 0.0003, "step": 1965 }, { "epoch": 1.79, "learning_rate": 2.0468557336621456e-05, "loss": 0.0003, "step": 1966 }, { "epoch": 1.79, "learning_rate": 2.0453144266337858e-05, "loss": 0.0002, "step": 1967 }, { "epoch": 1.79, "learning_rate": 2.0437731196054253e-05, "loss": 0.4616, "step": 1968 }, { "epoch": 1.79, "learning_rate": 2.0422318125770652e-05, "loss": 0.0002, "step": 1969 }, { "epoch": 1.79, "learning_rate": 2.0406905055487055e-05, "loss": 0.0001, "step": 1970 }, { "epoch": 1.8, "learning_rate": 2.0391491985203453e-05, "loss": 0.0005, "step": 1971 }, { "epoch": 1.8, "learning_rate": 2.0376078914919852e-05, "loss": 0.0002, "step": 1972 }, { "epoch": 1.8, "learning_rate": 2.0360665844636255e-05, "loss": 0.0002, "step": 1973 }, { "epoch": 1.8, "learning_rate": 2.0345252774352654e-05, "loss": 0.1046, "step": 1974 }, { "epoch": 1.8, "learning_rate": 2.0329839704069052e-05, "loss": 0.0076, "step": 1975 }, { "epoch": 1.8, "learning_rate": 2.031442663378545e-05, "loss": 0.0015, "step": 1976 }, { "epoch": 1.8, "learning_rate": 2.029901356350185e-05, "loss": 0.0002, "step": 1977 }, { "epoch": 1.8, "learning_rate": 2.028360049321825e-05, "loss": 0.0064, "step": 1978 }, { "epoch": 1.8, "learning_rate": 2.026818742293465e-05, "loss": 0.0003, "step": 1979 }, { "epoch": 1.8, "learning_rate": 2.025277435265105e-05, "loss": 0.0001, "step": 1980 }, { "epoch": 1.8, "learning_rate": 2.023736128236745e-05, "loss": 0.0004, "step": 1981 }, { "epoch": 1.81, "learning_rate": 2.0221948212083848e-05, "loss": 0.0002, "step": 1982 }, { "epoch": 1.81, "learning_rate": 2.0206535141800247e-05, "loss": 0.0003, "step": 1983 }, { "epoch": 1.81, "learning_rate": 2.0191122071516646e-05, "loss": 0.3008, "step": 1984 }, { "epoch": 1.81, "learning_rate": 2.0175709001233048e-05, "loss": 0.0003, "step": 1985 }, { "epoch": 1.81, "learning_rate": 2.0160295930949447e-05, "loss": 0.0002, "step": 1986 }, { "epoch": 1.81, "learning_rate": 2.0144882860665846e-05, "loss": 0.0003, "step": 1987 }, { "epoch": 1.81, "learning_rate": 2.0129469790382245e-05, "loss": 0.0002, "step": 1988 }, { "epoch": 1.81, "learning_rate": 2.0114056720098644e-05, "loss": 0.0008, "step": 1989 }, { "epoch": 1.81, "learning_rate": 2.0098643649815043e-05, "loss": 0.0002, "step": 1990 }, { "epoch": 1.81, "learning_rate": 2.008323057953144e-05, "loss": 0.0003, "step": 1991 }, { "epoch": 1.81, "learning_rate": 2.0067817509247844e-05, "loss": 0.0004, "step": 1992 }, { "epoch": 1.82, "learning_rate": 2.0052404438964243e-05, "loss": 0.0002, "step": 1993 }, { "epoch": 1.82, "learning_rate": 2.003699136868064e-05, "loss": 0.0002, "step": 1994 }, { "epoch": 1.82, "learning_rate": 2.0021578298397044e-05, "loss": 0.1638, "step": 1995 }, { "epoch": 1.82, "learning_rate": 2.000616522811344e-05, "loss": 0.0002, "step": 1996 }, { "epoch": 1.82, "learning_rate": 1.9990752157829838e-05, "loss": 0.5267, "step": 1997 }, { "epoch": 1.82, "learning_rate": 1.997533908754624e-05, "loss": 0.0004, "step": 1998 }, { "epoch": 1.82, "learning_rate": 1.995992601726264e-05, "loss": 0.4323, "step": 1999 }, { "epoch": 1.82, "learning_rate": 1.994451294697904e-05, "loss": 0.5001, "step": 2000 }, { "epoch": 1.82, "learning_rate": 1.992909987669544e-05, "loss": 0.0003, "step": 2001 }, { "epoch": 1.82, "learning_rate": 1.991368680641184e-05, "loss": 0.0002, "step": 2002 }, { "epoch": 1.82, "learning_rate": 1.989827373612824e-05, "loss": 0.0005, "step": 2003 }, { "epoch": 1.83, "learning_rate": 1.9882860665844637e-05, "loss": 0.0023, "step": 2004 }, { "epoch": 1.83, "learning_rate": 1.9867447595561036e-05, "loss": 0.0004, "step": 2005 }, { "epoch": 1.83, "learning_rate": 1.9852034525277435e-05, "loss": 0.0003, "step": 2006 }, { "epoch": 1.83, "learning_rate": 1.9836621454993837e-05, "loss": 0.0026, "step": 2007 }, { "epoch": 1.83, "learning_rate": 1.9821208384710236e-05, "loss": 0.0006, "step": 2008 }, { "epoch": 1.83, "learning_rate": 1.9805795314426635e-05, "loss": 0.0008, "step": 2009 }, { "epoch": 1.83, "learning_rate": 1.9790382244143034e-05, "loss": 0.0018, "step": 2010 }, { "epoch": 1.83, "learning_rate": 1.9774969173859433e-05, "loss": 0.0003, "step": 2011 }, { "epoch": 1.83, "learning_rate": 1.9759556103575832e-05, "loss": 0.0008, "step": 2012 }, { "epoch": 1.83, "learning_rate": 1.9744143033292234e-05, "loss": 0.0005, "step": 2013 }, { "epoch": 1.83, "learning_rate": 1.9728729963008633e-05, "loss": 0.0006, "step": 2014 }, { "epoch": 1.84, "learning_rate": 1.9713316892725032e-05, "loss": 0.0005, "step": 2015 }, { "epoch": 1.84, "learning_rate": 1.9697903822441434e-05, "loss": 0.0023, "step": 2016 }, { "epoch": 1.84, "learning_rate": 1.968249075215783e-05, "loss": 0.0244, "step": 2017 }, { "epoch": 1.84, "learning_rate": 1.966707768187423e-05, "loss": 0.0005, "step": 2018 }, { "epoch": 1.84, "learning_rate": 1.965166461159063e-05, "loss": 0.0011, "step": 2019 }, { "epoch": 1.84, "learning_rate": 1.963625154130703e-05, "loss": 0.0003, "step": 2020 }, { "epoch": 1.84, "learning_rate": 1.962083847102343e-05, "loss": 0.0005, "step": 2021 }, { "epoch": 1.84, "learning_rate": 1.960542540073983e-05, "loss": 0.0854, "step": 2022 }, { "epoch": 1.84, "learning_rate": 1.959001233045623e-05, "loss": 0.0004, "step": 2023 }, { "epoch": 1.84, "learning_rate": 1.9574599260172625e-05, "loss": 0.0005, "step": 2024 }, { "epoch": 1.84, "learning_rate": 1.9559186189889024e-05, "loss": 0.2955, "step": 2025 }, { "epoch": 1.85, "learning_rate": 1.9543773119605427e-05, "loss": 0.0004, "step": 2026 }, { "epoch": 1.85, "learning_rate": 1.9528360049321825e-05, "loss": 0.122, "step": 2027 }, { "epoch": 1.85, "learning_rate": 1.9512946979038224e-05, "loss": 0.0004, "step": 2028 }, { "epoch": 1.85, "learning_rate": 1.9497533908754627e-05, "loss": 0.0006, "step": 2029 }, { "epoch": 1.85, "learning_rate": 1.9482120838471025e-05, "loss": 0.0008, "step": 2030 }, { "epoch": 1.85, "learning_rate": 1.9466707768187424e-05, "loss": 0.0003, "step": 2031 }, { "epoch": 1.85, "learning_rate": 1.9451294697903823e-05, "loss": 0.0004, "step": 2032 }, { "epoch": 1.85, "learning_rate": 1.9435881627620222e-05, "loss": 0.0011, "step": 2033 }, { "epoch": 1.85, "learning_rate": 1.942046855733662e-05, "loss": 0.0013, "step": 2034 }, { "epoch": 1.85, "learning_rate": 1.9405055487053023e-05, "loss": 0.0004, "step": 2035 }, { "epoch": 1.85, "learning_rate": 1.9389642416769422e-05, "loss": 0.0007, "step": 2036 }, { "epoch": 1.86, "learning_rate": 1.937422934648582e-05, "loss": 0.0004, "step": 2037 }, { "epoch": 1.86, "learning_rate": 1.935881627620222e-05, "loss": 0.0003, "step": 2038 }, { "epoch": 1.86, "learning_rate": 1.934340320591862e-05, "loss": 0.0003, "step": 2039 }, { "epoch": 1.86, "learning_rate": 1.9327990135635018e-05, "loss": 0.0008, "step": 2040 }, { "epoch": 1.86, "learning_rate": 1.931257706535142e-05, "loss": 0.0005, "step": 2041 }, { "epoch": 1.86, "learning_rate": 1.929716399506782e-05, "loss": 0.0005, "step": 2042 }, { "epoch": 1.86, "learning_rate": 1.9281750924784218e-05, "loss": 0.0007, "step": 2043 }, { "epoch": 1.86, "learning_rate": 1.9266337854500617e-05, "loss": 0.0005, "step": 2044 }, { "epoch": 1.86, "learning_rate": 1.9250924784217016e-05, "loss": 0.0006, "step": 2045 }, { "epoch": 1.86, "learning_rate": 1.9235511713933415e-05, "loss": 0.0007, "step": 2046 }, { "epoch": 1.86, "learning_rate": 1.9220098643649817e-05, "loss": 0.0004, "step": 2047 }, { "epoch": 1.87, "learning_rate": 1.9204685573366216e-05, "loss": 0.0074, "step": 2048 }, { "epoch": 1.87, "learning_rate": 1.9189272503082615e-05, "loss": 0.0003, "step": 2049 }, { "epoch": 1.87, "learning_rate": 1.9173859432799017e-05, "loss": 0.0166, "step": 2050 }, { "epoch": 1.87, "learning_rate": 1.9158446362515416e-05, "loss": 0.0006, "step": 2051 }, { "epoch": 1.87, "learning_rate": 1.914303329223181e-05, "loss": 0.0003, "step": 2052 }, { "epoch": 1.87, "learning_rate": 1.9127620221948214e-05, "loss": 0.0003, "step": 2053 }, { "epoch": 1.87, "learning_rate": 1.9112207151664612e-05, "loss": 0.0007, "step": 2054 }, { "epoch": 1.87, "learning_rate": 1.909679408138101e-05, "loss": 0.0006, "step": 2055 }, { "epoch": 1.87, "learning_rate": 1.9081381011097414e-05, "loss": 0.0005, "step": 2056 }, { "epoch": 1.87, "learning_rate": 1.9065967940813813e-05, "loss": 0.0005, "step": 2057 }, { "epoch": 1.87, "learning_rate": 1.905055487053021e-05, "loss": 0.0003, "step": 2058 }, { "epoch": 1.88, "learning_rate": 1.903514180024661e-05, "loss": 0.0004, "step": 2059 }, { "epoch": 1.88, "learning_rate": 1.901972872996301e-05, "loss": 0.0006, "step": 2060 }, { "epoch": 1.88, "learning_rate": 1.9004315659679408e-05, "loss": 0.0014, "step": 2061 }, { "epoch": 1.88, "learning_rate": 1.8988902589395807e-05, "loss": 0.0003, "step": 2062 }, { "epoch": 1.88, "learning_rate": 1.897348951911221e-05, "loss": 0.0008, "step": 2063 }, { "epoch": 1.88, "learning_rate": 1.8958076448828608e-05, "loss": 0.0008, "step": 2064 }, { "epoch": 1.88, "learning_rate": 1.8942663378545007e-05, "loss": 0.0002, "step": 2065 }, { "epoch": 1.88, "learning_rate": 1.8927250308261406e-05, "loss": 0.0037, "step": 2066 }, { "epoch": 1.88, "learning_rate": 1.8911837237977805e-05, "loss": 0.0002, "step": 2067 }, { "epoch": 1.88, "learning_rate": 1.8896424167694204e-05, "loss": 0.0664, "step": 2068 }, { "epoch": 1.88, "learning_rate": 1.8881011097410606e-05, "loss": 0.0002, "step": 2069 }, { "epoch": 1.89, "learning_rate": 1.8865598027127005e-05, "loss": 0.0002, "step": 2070 }, { "epoch": 1.89, "learning_rate": 1.8850184956843404e-05, "loss": 0.1942, "step": 2071 }, { "epoch": 1.89, "learning_rate": 1.8834771886559803e-05, "loss": 0.0003, "step": 2072 }, { "epoch": 1.89, "learning_rate": 1.88193588162762e-05, "loss": 0.0003, "step": 2073 }, { "epoch": 1.89, "learning_rate": 1.88039457459926e-05, "loss": 0.0005, "step": 2074 }, { "epoch": 1.89, "learning_rate": 1.8788532675709003e-05, "loss": 0.0003, "step": 2075 }, { "epoch": 1.89, "learning_rate": 1.8773119605425402e-05, "loss": 0.0001, "step": 2076 }, { "epoch": 1.89, "learning_rate": 1.87577065351418e-05, "loss": 0.0004, "step": 2077 }, { "epoch": 1.89, "learning_rate": 1.8742293464858203e-05, "loss": 0.0002, "step": 2078 }, { "epoch": 1.89, "learning_rate": 1.8726880394574602e-05, "loss": 0.0002, "step": 2079 }, { "epoch": 1.89, "learning_rate": 1.8711467324290997e-05, "loss": 0.0003, "step": 2080 }, { "epoch": 1.9, "learning_rate": 1.86960542540074e-05, "loss": 0.0003, "step": 2081 }, { "epoch": 1.9, "learning_rate": 1.86806411837238e-05, "loss": 0.0002, "step": 2082 }, { "epoch": 1.9, "learning_rate": 1.8665228113440197e-05, "loss": 0.0002, "step": 2083 }, { "epoch": 1.9, "learning_rate": 1.86498150431566e-05, "loss": 0.0002, "step": 2084 }, { "epoch": 1.9, "learning_rate": 1.8634401972873e-05, "loss": 0.0002, "step": 2085 }, { "epoch": 1.9, "learning_rate": 1.8618988902589397e-05, "loss": 0.0005, "step": 2086 }, { "epoch": 1.9, "learning_rate": 1.8603575832305796e-05, "loss": 0.0002, "step": 2087 }, { "epoch": 1.9, "learning_rate": 1.8588162762022195e-05, "loss": 0.0002, "step": 2088 }, { "epoch": 1.9, "learning_rate": 1.8572749691738594e-05, "loss": 0.0003, "step": 2089 }, { "epoch": 1.9, "learning_rate": 1.8557336621454996e-05, "loss": 0.0496, "step": 2090 }, { "epoch": 1.9, "learning_rate": 1.8541923551171395e-05, "loss": 0.0003, "step": 2091 }, { "epoch": 1.91, "learning_rate": 1.8526510480887794e-05, "loss": 0.0001, "step": 2092 }, { "epoch": 1.91, "learning_rate": 1.8511097410604193e-05, "loss": 0.0118, "step": 2093 }, { "epoch": 1.91, "learning_rate": 1.8495684340320592e-05, "loss": 0.0003, "step": 2094 }, { "epoch": 1.91, "learning_rate": 1.848027127003699e-05, "loss": 0.0001, "step": 2095 }, { "epoch": 1.91, "learning_rate": 1.846485819975339e-05, "loss": 0.0004, "step": 2096 }, { "epoch": 1.91, "learning_rate": 1.8449445129469792e-05, "loss": 0.4546, "step": 2097 }, { "epoch": 1.91, "learning_rate": 1.843403205918619e-05, "loss": 0.0001, "step": 2098 }, { "epoch": 1.91, "learning_rate": 1.841861898890259e-05, "loss": 0.0002, "step": 2099 }, { "epoch": 1.91, "learning_rate": 1.840320591861899e-05, "loss": 0.0002, "step": 2100 }, { "epoch": 1.91, "learning_rate": 1.8387792848335388e-05, "loss": 0.0002, "step": 2101 }, { "epoch": 1.91, "learning_rate": 1.8372379778051787e-05, "loss": 0.0008, "step": 2102 }, { "epoch": 1.92, "learning_rate": 1.835696670776819e-05, "loss": 0.0004, "step": 2103 }, { "epoch": 1.92, "learning_rate": 1.8341553637484588e-05, "loss": 0.0002, "step": 2104 }, { "epoch": 1.92, "learning_rate": 1.8326140567200987e-05, "loss": 0.0002, "step": 2105 }, { "epoch": 1.92, "learning_rate": 1.831072749691739e-05, "loss": 0.0002, "step": 2106 }, { "epoch": 1.92, "learning_rate": 1.8295314426633788e-05, "loss": 0.0002, "step": 2107 }, { "epoch": 1.92, "learning_rate": 1.8279901356350183e-05, "loss": 0.0002, "step": 2108 }, { "epoch": 1.92, "learning_rate": 1.8264488286066586e-05, "loss": 0.0004, "step": 2109 }, { "epoch": 1.92, "learning_rate": 1.8249075215782984e-05, "loss": 0.0002, "step": 2110 }, { "epoch": 1.92, "learning_rate": 1.8233662145499383e-05, "loss": 0.0002, "step": 2111 }, { "epoch": 1.92, "learning_rate": 1.8218249075215786e-05, "loss": 0.0002, "step": 2112 }, { "epoch": 1.92, "learning_rate": 1.8202836004932185e-05, "loss": 0.0004, "step": 2113 }, { "epoch": 1.93, "learning_rate": 1.8187422934648583e-05, "loss": 0.0003, "step": 2114 }, { "epoch": 1.93, "learning_rate": 1.8172009864364982e-05, "loss": 0.0002, "step": 2115 }, { "epoch": 1.93, "learning_rate": 1.815659679408138e-05, "loss": 0.0003, "step": 2116 }, { "epoch": 1.93, "learning_rate": 1.814118372379778e-05, "loss": 0.0012, "step": 2117 }, { "epoch": 1.93, "learning_rate": 1.8125770653514182e-05, "loss": 0.0002, "step": 2118 }, { "epoch": 1.93, "learning_rate": 1.811035758323058e-05, "loss": 0.0002, "step": 2119 }, { "epoch": 1.93, "learning_rate": 1.809494451294698e-05, "loss": 0.0001, "step": 2120 }, { "epoch": 1.93, "learning_rate": 1.807953144266338e-05, "loss": 0.0007, "step": 2121 }, { "epoch": 1.93, "learning_rate": 1.8064118372379778e-05, "loss": 0.0002, "step": 2122 }, { "epoch": 1.93, "learning_rate": 1.8048705302096177e-05, "loss": 0.0002, "step": 2123 }, { "epoch": 1.93, "learning_rate": 1.803329223181258e-05, "loss": 0.0001, "step": 2124 }, { "epoch": 1.94, "learning_rate": 1.8017879161528978e-05, "loss": 0.0003, "step": 2125 }, { "epoch": 1.94, "learning_rate": 1.8002466091245377e-05, "loss": 0.0002, "step": 2126 }, { "epoch": 1.94, "learning_rate": 1.798705302096178e-05, "loss": 0.0002, "step": 2127 }, { "epoch": 1.94, "learning_rate": 1.7971639950678175e-05, "loss": 0.0002, "step": 2128 }, { "epoch": 1.94, "learning_rate": 1.7956226880394574e-05, "loss": 0.0003, "step": 2129 }, { "epoch": 1.94, "learning_rate": 1.7940813810110976e-05, "loss": 0.002, "step": 2130 }, { "epoch": 1.94, "learning_rate": 1.7925400739827375e-05, "loss": 0.0027, "step": 2131 }, { "epoch": 1.94, "learning_rate": 1.7909987669543774e-05, "loss": 0.1586, "step": 2132 }, { "epoch": 1.94, "learning_rate": 1.7894574599260173e-05, "loss": 0.0003, "step": 2133 }, { "epoch": 1.94, "learning_rate": 1.7879161528976575e-05, "loss": 0.0002, "step": 2134 }, { "epoch": 1.94, "learning_rate": 1.7863748458692974e-05, "loss": 0.0001, "step": 2135 }, { "epoch": 1.95, "learning_rate": 1.784833538840937e-05, "loss": 0.0009, "step": 2136 }, { "epoch": 1.95, "learning_rate": 1.783292231812577e-05, "loss": 0.0006, "step": 2137 }, { "epoch": 1.95, "learning_rate": 1.781750924784217e-05, "loss": 0.0002, "step": 2138 }, { "epoch": 1.95, "learning_rate": 1.780209617755857e-05, "loss": 0.0004, "step": 2139 }, { "epoch": 1.95, "learning_rate": 1.778668310727497e-05, "loss": 0.0005, "step": 2140 }, { "epoch": 1.95, "learning_rate": 1.777127003699137e-05, "loss": 0.0002, "step": 2141 }, { "epoch": 1.95, "learning_rate": 1.775585696670777e-05, "loss": 0.0002, "step": 2142 }, { "epoch": 1.95, "learning_rate": 1.7740443896424168e-05, "loss": 0.0001, "step": 2143 }, { "epoch": 1.95, "learning_rate": 1.7725030826140567e-05, "loss": 0.0014, "step": 2144 }, { "epoch": 1.95, "learning_rate": 1.7709617755856966e-05, "loss": 0.0009, "step": 2145 }, { "epoch": 1.95, "learning_rate": 1.769420468557337e-05, "loss": 0.0807, "step": 2146 }, { "epoch": 1.96, "learning_rate": 1.7678791615289767e-05, "loss": 0.0005, "step": 2147 }, { "epoch": 1.96, "learning_rate": 1.7663378545006166e-05, "loss": 0.0003, "step": 2148 }, { "epoch": 1.96, "learning_rate": 1.7647965474722565e-05, "loss": 0.0002, "step": 2149 }, { "epoch": 1.96, "learning_rate": 1.7632552404438964e-05, "loss": 0.0011, "step": 2150 }, { "epoch": 1.96, "learning_rate": 1.7617139334155363e-05, "loss": 0.0001, "step": 2151 }, { "epoch": 1.96, "learning_rate": 1.7601726263871765e-05, "loss": 0.0001, "step": 2152 }, { "epoch": 1.96, "learning_rate": 1.7586313193588164e-05, "loss": 0.0002, "step": 2153 }, { "epoch": 1.96, "learning_rate": 1.7570900123304563e-05, "loss": 0.0014, "step": 2154 }, { "epoch": 1.96, "learning_rate": 1.7555487053020965e-05, "loss": 0.0001, "step": 2155 }, { "epoch": 1.96, "learning_rate": 1.754007398273736e-05, "loss": 0.037, "step": 2156 }, { "epoch": 1.96, "learning_rate": 1.752466091245376e-05, "loss": 0.0001, "step": 2157 }, { "epoch": 1.97, "learning_rate": 1.7509247842170162e-05, "loss": 0.0003, "step": 2158 }, { "epoch": 1.97, "learning_rate": 1.749383477188656e-05, "loss": 0.0002, "step": 2159 }, { "epoch": 1.97, "learning_rate": 1.747842170160296e-05, "loss": 0.0002, "step": 2160 }, { "epoch": 1.97, "learning_rate": 1.7463008631319362e-05, "loss": 0.0004, "step": 2161 }, { "epoch": 1.97, "learning_rate": 1.744759556103576e-05, "loss": 0.0001, "step": 2162 }, { "epoch": 1.97, "learning_rate": 1.743218249075216e-05, "loss": 0.0003, "step": 2163 }, { "epoch": 1.97, "learning_rate": 1.741676942046856e-05, "loss": 0.0002, "step": 2164 }, { "epoch": 1.97, "learning_rate": 1.7401356350184957e-05, "loss": 0.0003, "step": 2165 }, { "epoch": 1.97, "learning_rate": 1.7385943279901356e-05, "loss": 0.0002, "step": 2166 }, { "epoch": 1.97, "learning_rate": 1.737053020961776e-05, "loss": 0.1369, "step": 2167 }, { "epoch": 1.97, "learning_rate": 1.7355117139334158e-05, "loss": 0.0003, "step": 2168 }, { "epoch": 1.98, "learning_rate": 1.7339704069050556e-05, "loss": 0.0002, "step": 2169 }, { "epoch": 1.98, "learning_rate": 1.7324290998766955e-05, "loss": 0.0003, "step": 2170 }, { "epoch": 1.98, "learning_rate": 1.7308877928483354e-05, "loss": 0.0004, "step": 2171 }, { "epoch": 1.98, "learning_rate": 1.7293464858199753e-05, "loss": 0.0001, "step": 2172 }, { "epoch": 1.98, "learning_rate": 1.7278051787916152e-05, "loss": 0.0002, "step": 2173 }, { "epoch": 1.98, "learning_rate": 1.7262638717632554e-05, "loss": 0.0001, "step": 2174 }, { "epoch": 1.98, "learning_rate": 1.7247225647348953e-05, "loss": 0.0002, "step": 2175 }, { "epoch": 1.98, "learning_rate": 1.7231812577065352e-05, "loss": 0.0001, "step": 2176 }, { "epoch": 1.98, "learning_rate": 1.721639950678175e-05, "loss": 0.0001, "step": 2177 }, { "epoch": 1.98, "learning_rate": 1.720098643649815e-05, "loss": 0.0001, "step": 2178 }, { "epoch": 1.98, "learning_rate": 1.718557336621455e-05, "loss": 0.0002, "step": 2179 }, { "epoch": 1.99, "learning_rate": 1.717016029593095e-05, "loss": 0.0002, "step": 2180 }, { "epoch": 1.99, "learning_rate": 1.715474722564735e-05, "loss": 0.0002, "step": 2181 }, { "epoch": 1.99, "learning_rate": 1.713933415536375e-05, "loss": 0.0002, "step": 2182 }, { "epoch": 1.99, "learning_rate": 1.712392108508015e-05, "loss": 0.0002, "step": 2183 }, { "epoch": 1.99, "learning_rate": 1.7108508014796547e-05, "loss": 0.0005, "step": 2184 }, { "epoch": 1.99, "learning_rate": 1.7093094944512946e-05, "loss": 0.0002, "step": 2185 }, { "epoch": 1.99, "learning_rate": 1.7077681874229348e-05, "loss": 0.0002, "step": 2186 }, { "epoch": 1.99, "learning_rate": 1.7062268803945747e-05, "loss": 0.0002, "step": 2187 }, { "epoch": 1.99, "learning_rate": 1.7046855733662146e-05, "loss": 0.0004, "step": 2188 }, { "epoch": 1.99, "learning_rate": 1.7031442663378548e-05, "loss": 0.0002, "step": 2189 }, { "epoch": 1.99, "learning_rate": 1.7016029593094947e-05, "loss": 0.0001, "step": 2190 }, { "epoch": 2.0, "learning_rate": 1.7000616522811346e-05, "loss": 0.0005, "step": 2191 }, { "epoch": 2.0, "learning_rate": 1.6985203452527745e-05, "loss": 0.0072, "step": 2192 }, { "epoch": 2.0, "learning_rate": 1.6969790382244143e-05, "loss": 0.0001, "step": 2193 }, { "epoch": 2.0, "learning_rate": 1.6954377311960542e-05, "loss": 0.0046, "step": 2194 }, { "epoch": 2.0, "learning_rate": 1.6938964241676945e-05, "loss": 0.0002, "step": 2195 }, { "epoch": 2.0, "learning_rate": 1.6923551171393344e-05, "loss": 0.0002, "step": 2196 }, { "epoch": 2.0, "eval_accuracy": 0.9936247723132969, "eval_loss": 0.03707250580191612, "eval_runtime": 41.1925, "eval_samples_per_second": 106.621, "eval_steps_per_second": 6.676, "step": 2196 }, { "epoch": 2.0, "learning_rate": 1.6908138101109742e-05, "loss": 0.0001, "step": 2197 }, { "epoch": 2.0, "learning_rate": 1.689272503082614e-05, "loss": 0.0002, "step": 2198 }, { "epoch": 2.0, "learning_rate": 1.687731196054254e-05, "loss": 0.0001, "step": 2199 }, { "epoch": 2.0, "learning_rate": 1.686189889025894e-05, "loss": 0.0001, "step": 2200 }, { "epoch": 2.0, "learning_rate": 1.684648581997534e-05, "loss": 0.2896, "step": 2201 }, { "epoch": 2.01, "learning_rate": 1.683107274969174e-05, "loss": 0.0002, "step": 2202 }, { "epoch": 2.01, "learning_rate": 1.681565967940814e-05, "loss": 0.0001, "step": 2203 }, { "epoch": 2.01, "learning_rate": 1.6800246609124538e-05, "loss": 0.0002, "step": 2204 }, { "epoch": 2.01, "learning_rate": 1.6784833538840937e-05, "loss": 0.0002, "step": 2205 }, { "epoch": 2.01, "learning_rate": 1.6769420468557336e-05, "loss": 0.0001, "step": 2206 }, { "epoch": 2.01, "learning_rate": 1.6754007398273735e-05, "loss": 0.0001, "step": 2207 }, { "epoch": 2.01, "learning_rate": 1.6738594327990137e-05, "loss": 0.0001, "step": 2208 }, { "epoch": 2.01, "learning_rate": 1.6723181257706536e-05, "loss": 0.0001, "step": 2209 }, { "epoch": 2.01, "learning_rate": 1.6707768187422935e-05, "loss": 0.0001, "step": 2210 }, { "epoch": 2.01, "learning_rate": 1.6692355117139337e-05, "loss": 0.0002, "step": 2211 }, { "epoch": 2.01, "learning_rate": 1.6676942046855733e-05, "loss": 0.0001, "step": 2212 }, { "epoch": 2.02, "learning_rate": 1.666152897657213e-05, "loss": 0.0001, "step": 2213 }, { "epoch": 2.02, "learning_rate": 1.6646115906288534e-05, "loss": 0.0001, "step": 2214 }, { "epoch": 2.02, "learning_rate": 1.6630702836004933e-05, "loss": 0.0001, "step": 2215 }, { "epoch": 2.02, "learning_rate": 1.661528976572133e-05, "loss": 0.0002, "step": 2216 }, { "epoch": 2.02, "learning_rate": 1.6599876695437734e-05, "loss": 0.0001, "step": 2217 }, { "epoch": 2.02, "learning_rate": 1.6584463625154133e-05, "loss": 0.0001, "step": 2218 }, { "epoch": 2.02, "learning_rate": 1.6569050554870528e-05, "loss": 0.0001, "step": 2219 }, { "epoch": 2.02, "learning_rate": 1.655363748458693e-05, "loss": 0.0001, "step": 2220 }, { "epoch": 2.02, "learning_rate": 1.653822441430333e-05, "loss": 0.0009, "step": 2221 }, { "epoch": 2.02, "learning_rate": 1.652281134401973e-05, "loss": 0.0002, "step": 2222 }, { "epoch": 2.02, "learning_rate": 1.650739827373613e-05, "loss": 0.0001, "step": 2223 }, { "epoch": 2.03, "learning_rate": 1.649198520345253e-05, "loss": 0.0002, "step": 2224 }, { "epoch": 2.03, "learning_rate": 1.647657213316893e-05, "loss": 0.0001, "step": 2225 }, { "epoch": 2.03, "learning_rate": 1.6461159062885327e-05, "loss": 0.0001, "step": 2226 }, { "epoch": 2.03, "learning_rate": 1.6445745992601726e-05, "loss": 0.0001, "step": 2227 }, { "epoch": 2.03, "learning_rate": 1.6430332922318125e-05, "loss": 0.0002, "step": 2228 }, { "epoch": 2.03, "learning_rate": 1.6414919852034527e-05, "loss": 0.0001, "step": 2229 }, { "epoch": 2.03, "learning_rate": 1.6399506781750926e-05, "loss": 0.0001, "step": 2230 }, { "epoch": 2.03, "learning_rate": 1.6384093711467325e-05, "loss": 0.0002, "step": 2231 }, { "epoch": 2.03, "learning_rate": 1.6368680641183724e-05, "loss": 0.0002, "step": 2232 }, { "epoch": 2.03, "learning_rate": 1.6353267570900123e-05, "loss": 0.0006, "step": 2233 }, { "epoch": 2.03, "learning_rate": 1.6337854500616522e-05, "loss": 0.0001, "step": 2234 }, { "epoch": 2.04, "learning_rate": 1.6322441430332924e-05, "loss": 0.0002, "step": 2235 }, { "epoch": 2.04, "learning_rate": 1.6307028360049323e-05, "loss": 0.0002, "step": 2236 }, { "epoch": 2.04, "learning_rate": 1.6291615289765722e-05, "loss": 0.0002, "step": 2237 }, { "epoch": 2.04, "learning_rate": 1.6276202219482124e-05, "loss": 0.0001, "step": 2238 }, { "epoch": 2.04, "learning_rate": 1.6260789149198523e-05, "loss": 0.4639, "step": 2239 }, { "epoch": 2.04, "learning_rate": 1.624537607891492e-05, "loss": 0.0002, "step": 2240 }, { "epoch": 2.04, "learning_rate": 1.622996300863132e-05, "loss": 0.0008, "step": 2241 }, { "epoch": 2.04, "learning_rate": 1.621454993834772e-05, "loss": 0.0001, "step": 2242 }, { "epoch": 2.04, "learning_rate": 1.619913686806412e-05, "loss": 0.0003, "step": 2243 }, { "epoch": 2.04, "learning_rate": 1.6183723797780518e-05, "loss": 0.0006, "step": 2244 }, { "epoch": 2.04, "learning_rate": 1.616831072749692e-05, "loss": 0.0002, "step": 2245 }, { "epoch": 2.05, "learning_rate": 1.615289765721332e-05, "loss": 0.0001, "step": 2246 }, { "epoch": 2.05, "learning_rate": 1.6137484586929714e-05, "loss": 0.0002, "step": 2247 }, { "epoch": 2.05, "learning_rate": 1.6122071516646117e-05, "loss": 0.0001, "step": 2248 }, { "epoch": 2.05, "learning_rate": 1.6106658446362515e-05, "loss": 0.0002, "step": 2249 }, { "epoch": 2.05, "learning_rate": 1.6091245376078914e-05, "loss": 0.0157, "step": 2250 }, { "epoch": 2.05, "learning_rate": 1.6075832305795317e-05, "loss": 0.0002, "step": 2251 }, { "epoch": 2.05, "learning_rate": 1.6060419235511715e-05, "loss": 0.0008, "step": 2252 }, { "epoch": 2.05, "learning_rate": 1.6045006165228114e-05, "loss": 0.0002, "step": 2253 }, { "epoch": 2.05, "learning_rate": 1.6029593094944513e-05, "loss": 0.0004, "step": 2254 }, { "epoch": 2.05, "learning_rate": 1.6014180024660912e-05, "loss": 0.0002, "step": 2255 }, { "epoch": 2.05, "learning_rate": 1.599876695437731e-05, "loss": 0.0001, "step": 2256 }, { "epoch": 2.06, "learning_rate": 1.5983353884093713e-05, "loss": 0.0004, "step": 2257 }, { "epoch": 2.06, "learning_rate": 1.5967940813810112e-05, "loss": 0.0002, "step": 2258 }, { "epoch": 2.06, "learning_rate": 1.595252774352651e-05, "loss": 0.0004, "step": 2259 }, { "epoch": 2.06, "learning_rate": 1.593711467324291e-05, "loss": 0.0004, "step": 2260 }, { "epoch": 2.06, "learning_rate": 1.592170160295931e-05, "loss": 0.0002, "step": 2261 }, { "epoch": 2.06, "learning_rate": 1.5906288532675708e-05, "loss": 0.0005, "step": 2262 }, { "epoch": 2.06, "learning_rate": 1.589087546239211e-05, "loss": 0.0002, "step": 2263 }, { "epoch": 2.06, "learning_rate": 1.587546239210851e-05, "loss": 0.0002, "step": 2264 }, { "epoch": 2.06, "learning_rate": 1.5860049321824908e-05, "loss": 0.0001, "step": 2265 }, { "epoch": 2.06, "learning_rate": 1.584463625154131e-05, "loss": 0.0002, "step": 2266 }, { "epoch": 2.06, "learning_rate": 1.582922318125771e-05, "loss": 0.0001, "step": 2267 }, { "epoch": 2.07, "learning_rate": 1.5813810110974105e-05, "loss": 0.0003, "step": 2268 }, { "epoch": 2.07, "learning_rate": 1.5798397040690507e-05, "loss": 0.0028, "step": 2269 }, { "epoch": 2.07, "learning_rate": 1.5782983970406906e-05, "loss": 0.0051, "step": 2270 }, { "epoch": 2.07, "learning_rate": 1.5767570900123305e-05, "loss": 0.0002, "step": 2271 }, { "epoch": 2.07, "learning_rate": 1.5752157829839707e-05, "loss": 0.0002, "step": 2272 }, { "epoch": 2.07, "learning_rate": 1.5736744759556106e-05, "loss": 0.0001, "step": 2273 }, { "epoch": 2.07, "learning_rate": 1.5721331689272505e-05, "loss": 0.0002, "step": 2274 }, { "epoch": 2.07, "learning_rate": 1.5705918618988904e-05, "loss": 0.0002, "step": 2275 }, { "epoch": 2.07, "learning_rate": 1.5690505548705302e-05, "loss": 0.0002, "step": 2276 }, { "epoch": 2.07, "learning_rate": 1.56750924784217e-05, "loss": 0.0001, "step": 2277 }, { "epoch": 2.07, "learning_rate": 1.5659679408138104e-05, "loss": 0.0002, "step": 2278 }, { "epoch": 2.08, "learning_rate": 1.5644266337854503e-05, "loss": 0.0002, "step": 2279 }, { "epoch": 2.08, "learning_rate": 1.56288532675709e-05, "loss": 0.0001, "step": 2280 }, { "epoch": 2.08, "learning_rate": 1.56134401972873e-05, "loss": 0.0003, "step": 2281 }, { "epoch": 2.08, "learning_rate": 1.55980271270037e-05, "loss": 0.0002, "step": 2282 }, { "epoch": 2.08, "learning_rate": 1.5582614056720098e-05, "loss": 0.4426, "step": 2283 }, { "epoch": 2.08, "learning_rate": 1.5567200986436497e-05, "loss": 0.0001, "step": 2284 }, { "epoch": 2.08, "learning_rate": 1.55517879161529e-05, "loss": 0.0001, "step": 2285 }, { "epoch": 2.08, "learning_rate": 1.5536374845869298e-05, "loss": 0.0001, "step": 2286 }, { "epoch": 2.08, "learning_rate": 1.5520961775585697e-05, "loss": 0.0001, "step": 2287 }, { "epoch": 2.08, "learning_rate": 1.5505548705302096e-05, "loss": 0.0001, "step": 2288 }, { "epoch": 2.08, "learning_rate": 1.5490135635018495e-05, "loss": 0.0002, "step": 2289 }, { "epoch": 2.09, "learning_rate": 1.5474722564734894e-05, "loss": 0.0001, "step": 2290 }, { "epoch": 2.09, "learning_rate": 1.5459309494451296e-05, "loss": 0.0002, "step": 2291 }, { "epoch": 2.09, "learning_rate": 1.5443896424167695e-05, "loss": 0.0002, "step": 2292 }, { "epoch": 2.09, "learning_rate": 1.5428483353884094e-05, "loss": 0.0001, "step": 2293 }, { "epoch": 2.09, "learning_rate": 1.5413070283600496e-05, "loss": 0.0002, "step": 2294 }, { "epoch": 2.09, "learning_rate": 1.5397657213316895e-05, "loss": 0.0002, "step": 2295 }, { "epoch": 2.09, "learning_rate": 1.538224414303329e-05, "loss": 0.0003, "step": 2296 }, { "epoch": 2.09, "learning_rate": 1.5366831072749693e-05, "loss": 0.0001, "step": 2297 }, { "epoch": 2.09, "learning_rate": 1.5351418002466092e-05, "loss": 0.0002, "step": 2298 }, { "epoch": 2.09, "learning_rate": 1.533600493218249e-05, "loss": 0.0002, "step": 2299 }, { "epoch": 2.09, "learning_rate": 1.5320591861898893e-05, "loss": 0.0002, "step": 2300 }, { "epoch": 2.1, "learning_rate": 1.5305178791615292e-05, "loss": 0.1936, "step": 2301 }, { "epoch": 2.1, "learning_rate": 1.528976572133169e-05, "loss": 0.0002, "step": 2302 }, { "epoch": 2.1, "learning_rate": 1.527435265104809e-05, "loss": 0.0001, "step": 2303 }, { "epoch": 2.1, "learning_rate": 1.525893958076449e-05, "loss": 0.0002, "step": 2304 }, { "epoch": 2.1, "learning_rate": 1.5243526510480887e-05, "loss": 0.0001, "step": 2305 }, { "epoch": 2.1, "learning_rate": 1.522811344019729e-05, "loss": 0.0002, "step": 2306 }, { "epoch": 2.1, "learning_rate": 1.5212700369913689e-05, "loss": 0.0001, "step": 2307 }, { "epoch": 2.1, "learning_rate": 1.5197287299630086e-05, "loss": 0.0001, "step": 2308 }, { "epoch": 2.1, "learning_rate": 1.5181874229346488e-05, "loss": 0.0003, "step": 2309 }, { "epoch": 2.1, "learning_rate": 1.5166461159062887e-05, "loss": 0.0001, "step": 2310 }, { "epoch": 2.1, "learning_rate": 1.5151048088779284e-05, "loss": 0.0002, "step": 2311 }, { "epoch": 2.11, "learning_rate": 1.5135635018495686e-05, "loss": 0.0002, "step": 2312 }, { "epoch": 2.11, "learning_rate": 1.5120221948212085e-05, "loss": 0.0001, "step": 2313 }, { "epoch": 2.11, "learning_rate": 1.5104808877928484e-05, "loss": 0.0002, "step": 2314 }, { "epoch": 2.11, "learning_rate": 1.5089395807644885e-05, "loss": 0.0001, "step": 2315 }, { "epoch": 2.11, "learning_rate": 1.5073982737361284e-05, "loss": 0.0002, "step": 2316 }, { "epoch": 2.11, "learning_rate": 1.5058569667077683e-05, "loss": 0.0001, "step": 2317 }, { "epoch": 2.11, "learning_rate": 1.5043156596794081e-05, "loss": 0.0001, "step": 2318 }, { "epoch": 2.11, "learning_rate": 1.5027743526510482e-05, "loss": 0.0001, "step": 2319 }, { "epoch": 2.11, "learning_rate": 1.5012330456226881e-05, "loss": 0.0015, "step": 2320 }, { "epoch": 2.11, "learning_rate": 1.499691738594328e-05, "loss": 0.0899, "step": 2321 }, { "epoch": 2.11, "learning_rate": 1.498150431565968e-05, "loss": 0.0002, "step": 2322 }, { "epoch": 2.12, "learning_rate": 1.496609124537608e-05, "loss": 0.0004, "step": 2323 }, { "epoch": 2.12, "learning_rate": 1.4950678175092478e-05, "loss": 0.0002, "step": 2324 }, { "epoch": 2.12, "learning_rate": 1.4935265104808879e-05, "loss": 0.0001, "step": 2325 }, { "epoch": 2.12, "learning_rate": 1.4919852034525278e-05, "loss": 0.0001, "step": 2326 }, { "epoch": 2.12, "learning_rate": 1.4904438964241677e-05, "loss": 0.0003, "step": 2327 }, { "epoch": 2.12, "learning_rate": 1.4889025893958077e-05, "loss": 0.0001, "step": 2328 }, { "epoch": 2.12, "learning_rate": 1.4873612823674476e-05, "loss": 0.0001, "step": 2329 }, { "epoch": 2.12, "learning_rate": 1.4858199753390875e-05, "loss": 0.0002, "step": 2330 }, { "epoch": 2.12, "learning_rate": 1.4842786683107277e-05, "loss": 0.0002, "step": 2331 }, { "epoch": 2.12, "learning_rate": 1.4827373612823674e-05, "loss": 0.0001, "step": 2332 }, { "epoch": 2.12, "learning_rate": 1.4811960542540073e-05, "loss": 0.0006, "step": 2333 }, { "epoch": 2.13, "learning_rate": 1.4796547472256476e-05, "loss": 0.0001, "step": 2334 }, { "epoch": 2.13, "learning_rate": 1.4781134401972875e-05, "loss": 0.0001, "step": 2335 }, { "epoch": 2.13, "learning_rate": 1.4765721331689272e-05, "loss": 0.0001, "step": 2336 }, { "epoch": 2.13, "learning_rate": 1.4750308261405674e-05, "loss": 0.0002, "step": 2337 }, { "epoch": 2.13, "learning_rate": 1.4734895191122073e-05, "loss": 0.0002, "step": 2338 }, { "epoch": 2.13, "learning_rate": 1.471948212083847e-05, "loss": 0.0001, "step": 2339 }, { "epoch": 2.13, "learning_rate": 1.4704069050554872e-05, "loss": 0.0001, "step": 2340 }, { "epoch": 2.13, "learning_rate": 1.4688655980271271e-05, "loss": 0.0002, "step": 2341 }, { "epoch": 2.13, "learning_rate": 1.467324290998767e-05, "loss": 0.0001, "step": 2342 }, { "epoch": 2.13, "learning_rate": 1.465782983970407e-05, "loss": 0.0002, "step": 2343 }, { "epoch": 2.13, "learning_rate": 1.464241676942047e-05, "loss": 0.0001, "step": 2344 }, { "epoch": 2.14, "learning_rate": 1.4627003699136869e-05, "loss": 0.0004, "step": 2345 }, { "epoch": 2.14, "learning_rate": 1.4611590628853269e-05, "loss": 0.0002, "step": 2346 }, { "epoch": 2.14, "learning_rate": 1.4596177558569668e-05, "loss": 0.0001, "step": 2347 }, { "epoch": 2.14, "learning_rate": 1.4580764488286067e-05, "loss": 0.0002, "step": 2348 }, { "epoch": 2.14, "learning_rate": 1.4565351418002467e-05, "loss": 0.0001, "step": 2349 }, { "epoch": 2.14, "learning_rate": 1.4549938347718866e-05, "loss": 0.0002, "step": 2350 }, { "epoch": 2.14, "learning_rate": 1.4534525277435265e-05, "loss": 0.0004, "step": 2351 }, { "epoch": 2.14, "learning_rate": 1.4519112207151666e-05, "loss": 0.0001, "step": 2352 }, { "epoch": 2.14, "learning_rate": 1.4503699136868065e-05, "loss": 0.0002, "step": 2353 }, { "epoch": 2.14, "learning_rate": 1.4488286066584464e-05, "loss": 0.0001, "step": 2354 }, { "epoch": 2.14, "learning_rate": 1.4472872996300863e-05, "loss": 0.0002, "step": 2355 }, { "epoch": 2.15, "learning_rate": 1.4457459926017263e-05, "loss": 0.0002, "step": 2356 }, { "epoch": 2.15, "learning_rate": 1.4442046855733662e-05, "loss": 0.0002, "step": 2357 }, { "epoch": 2.15, "learning_rate": 1.4426633785450061e-05, "loss": 0.0003, "step": 2358 }, { "epoch": 2.15, "learning_rate": 1.4411220715166463e-05, "loss": 0.0003, "step": 2359 }, { "epoch": 2.15, "learning_rate": 1.439580764488286e-05, "loss": 0.0002, "step": 2360 }, { "epoch": 2.15, "learning_rate": 1.438039457459926e-05, "loss": 0.0001, "step": 2361 }, { "epoch": 2.15, "learning_rate": 1.4364981504315662e-05, "loss": 0.0001, "step": 2362 }, { "epoch": 2.15, "learning_rate": 1.434956843403206e-05, "loss": 0.0001, "step": 2363 }, { "epoch": 2.15, "learning_rate": 1.4334155363748458e-05, "loss": 0.0001, "step": 2364 }, { "epoch": 2.15, "learning_rate": 1.431874229346486e-05, "loss": 0.0002, "step": 2365 }, { "epoch": 2.15, "learning_rate": 1.4303329223181259e-05, "loss": 0.0001, "step": 2366 }, { "epoch": 2.16, "learning_rate": 1.4287916152897656e-05, "loss": 0.0004, "step": 2367 }, { "epoch": 2.16, "learning_rate": 1.4272503082614058e-05, "loss": 0.0001, "step": 2368 }, { "epoch": 2.16, "learning_rate": 1.4257090012330457e-05, "loss": 0.0001, "step": 2369 }, { "epoch": 2.16, "learning_rate": 1.4241676942046856e-05, "loss": 0.0001, "step": 2370 }, { "epoch": 2.16, "learning_rate": 1.4226263871763257e-05, "loss": 0.0001, "step": 2371 }, { "epoch": 2.16, "learning_rate": 1.4210850801479656e-05, "loss": 0.0003, "step": 2372 }, { "epoch": 2.16, "learning_rate": 1.4195437731196054e-05, "loss": 0.0003, "step": 2373 }, { "epoch": 2.16, "learning_rate": 1.4180024660912455e-05, "loss": 0.0001, "step": 2374 }, { "epoch": 2.16, "learning_rate": 1.4164611590628854e-05, "loss": 0.0002, "step": 2375 }, { "epoch": 2.16, "learning_rate": 1.4149198520345253e-05, "loss": 0.0002, "step": 2376 }, { "epoch": 2.16, "learning_rate": 1.4133785450061653e-05, "loss": 0.0001, "step": 2377 }, { "epoch": 2.17, "learning_rate": 1.4118372379778052e-05, "loss": 0.0001, "step": 2378 }, { "epoch": 2.17, "learning_rate": 1.4102959309494451e-05, "loss": 0.0002, "step": 2379 }, { "epoch": 2.17, "learning_rate": 1.4087546239210852e-05, "loss": 0.0002, "step": 2380 }, { "epoch": 2.17, "learning_rate": 1.407213316892725e-05, "loss": 0.0002, "step": 2381 }, { "epoch": 2.17, "learning_rate": 1.405672009864365e-05, "loss": 0.0001, "step": 2382 }, { "epoch": 2.17, "learning_rate": 1.4041307028360052e-05, "loss": 0.0001, "step": 2383 }, { "epoch": 2.17, "learning_rate": 1.4025893958076449e-05, "loss": 0.0001, "step": 2384 }, { "epoch": 2.17, "learning_rate": 1.4010480887792848e-05, "loss": 0.0001, "step": 2385 }, { "epoch": 2.17, "learning_rate": 1.399506781750925e-05, "loss": 0.0001, "step": 2386 }, { "epoch": 2.17, "learning_rate": 1.397965474722565e-05, "loss": 0.0001, "step": 2387 }, { "epoch": 2.17, "learning_rate": 1.3964241676942046e-05, "loss": 0.0001, "step": 2388 }, { "epoch": 2.18, "learning_rate": 1.3948828606658449e-05, "loss": 0.0002, "step": 2389 }, { "epoch": 2.18, "learning_rate": 1.3933415536374848e-05, "loss": 0.0001, "step": 2390 }, { "epoch": 2.18, "learning_rate": 1.3918002466091245e-05, "loss": 0.0001, "step": 2391 }, { "epoch": 2.18, "learning_rate": 1.3902589395807644e-05, "loss": 0.0001, "step": 2392 }, { "epoch": 2.18, "learning_rate": 1.3887176325524046e-05, "loss": 0.0001, "step": 2393 }, { "epoch": 2.18, "learning_rate": 1.3871763255240445e-05, "loss": 0.0001, "step": 2394 }, { "epoch": 2.18, "learning_rate": 1.3856350184956842e-05, "loss": 0.0011, "step": 2395 }, { "epoch": 2.18, "learning_rate": 1.3840937114673244e-05, "loss": 0.0001, "step": 2396 }, { "epoch": 2.18, "learning_rate": 1.3825524044389643e-05, "loss": 0.0001, "step": 2397 }, { "epoch": 2.18, "learning_rate": 1.3810110974106042e-05, "loss": 0.0001, "step": 2398 }, { "epoch": 2.18, "learning_rate": 1.3794697903822443e-05, "loss": 0.0001, "step": 2399 }, { "epoch": 2.19, "learning_rate": 1.3779284833538842e-05, "loss": 0.0001, "step": 2400 }, { "epoch": 2.19, "learning_rate": 1.376387176325524e-05, "loss": 0.0001, "step": 2401 }, { "epoch": 2.19, "learning_rate": 1.3748458692971641e-05, "loss": 0.0001, "step": 2402 }, { "epoch": 2.19, "learning_rate": 1.373304562268804e-05, "loss": 0.0004, "step": 2403 }, { "epoch": 2.19, "learning_rate": 1.3717632552404439e-05, "loss": 0.0002, "step": 2404 }, { "epoch": 2.19, "learning_rate": 1.370221948212084e-05, "loss": 0.0002, "step": 2405 }, { "epoch": 2.19, "learning_rate": 1.3686806411837238e-05, "loss": 0.0003, "step": 2406 }, { "epoch": 2.19, "learning_rate": 1.3671393341553637e-05, "loss": 0.0001, "step": 2407 }, { "epoch": 2.19, "learning_rate": 1.3655980271270038e-05, "loss": 0.0002, "step": 2408 }, { "epoch": 2.19, "learning_rate": 1.3640567200986437e-05, "loss": 0.0002, "step": 2409 }, { "epoch": 2.19, "learning_rate": 1.3625154130702836e-05, "loss": 0.0001, "step": 2410 }, { "epoch": 2.2, "learning_rate": 1.3609741060419238e-05, "loss": 0.0001, "step": 2411 }, { "epoch": 2.2, "learning_rate": 1.3594327990135635e-05, "loss": 0.0001, "step": 2412 }, { "epoch": 2.2, "learning_rate": 1.3578914919852034e-05, "loss": 0.0002, "step": 2413 }, { "epoch": 2.2, "learning_rate": 1.3563501849568436e-05, "loss": 0.0003, "step": 2414 }, { "epoch": 2.2, "learning_rate": 1.3548088779284835e-05, "loss": 0.0001, "step": 2415 }, { "epoch": 2.2, "learning_rate": 1.3532675709001232e-05, "loss": 0.0001, "step": 2416 }, { "epoch": 2.2, "learning_rate": 1.3517262638717635e-05, "loss": 0.0001, "step": 2417 }, { "epoch": 2.2, "learning_rate": 1.3501849568434034e-05, "loss": 0.0001, "step": 2418 }, { "epoch": 2.2, "learning_rate": 1.348643649815043e-05, "loss": 0.0001, "step": 2419 }, { "epoch": 2.2, "learning_rate": 1.3471023427866833e-05, "loss": 0.0001, "step": 2420 }, { "epoch": 2.2, "learning_rate": 1.3455610357583232e-05, "loss": 0.0001, "step": 2421 }, { "epoch": 2.21, "learning_rate": 1.344019728729963e-05, "loss": 0.0002, "step": 2422 }, { "epoch": 2.21, "learning_rate": 1.3424784217016031e-05, "loss": 0.0002, "step": 2423 }, { "epoch": 2.21, "learning_rate": 1.340937114673243e-05, "loss": 0.0002, "step": 2424 }, { "epoch": 2.21, "learning_rate": 1.339395807644883e-05, "loss": 0.0001, "step": 2425 }, { "epoch": 2.21, "learning_rate": 1.337854500616523e-05, "loss": 0.0001, "step": 2426 }, { "epoch": 2.21, "learning_rate": 1.3363131935881629e-05, "loss": 0.0001, "step": 2427 }, { "epoch": 2.21, "learning_rate": 1.3347718865598028e-05, "loss": 0.0001, "step": 2428 }, { "epoch": 2.21, "learning_rate": 1.3332305795314426e-05, "loss": 0.0004, "step": 2429 }, { "epoch": 2.21, "learning_rate": 1.3316892725030827e-05, "loss": 0.0001, "step": 2430 }, { "epoch": 2.21, "learning_rate": 1.3301479654747226e-05, "loss": 0.0001, "step": 2431 }, { "epoch": 2.21, "learning_rate": 1.3286066584463625e-05, "loss": 0.0001, "step": 2432 }, { "epoch": 2.22, "learning_rate": 1.3270653514180025e-05, "loss": 0.0001, "step": 2433 }, { "epoch": 2.22, "learning_rate": 1.3255240443896424e-05, "loss": 0.0001, "step": 2434 }, { "epoch": 2.22, "learning_rate": 1.3239827373612823e-05, "loss": 0.0001, "step": 2435 }, { "epoch": 2.22, "learning_rate": 1.3224414303329224e-05, "loss": 0.0001, "step": 2436 }, { "epoch": 2.22, "learning_rate": 1.3209001233045623e-05, "loss": 0.0002, "step": 2437 }, { "epoch": 2.22, "learning_rate": 1.3193588162762022e-05, "loss": 0.0001, "step": 2438 }, { "epoch": 2.22, "learning_rate": 1.3178175092478424e-05, "loss": 0.0001, "step": 2439 }, { "epoch": 2.22, "learning_rate": 1.3162762022194821e-05, "loss": 0.0001, "step": 2440 }, { "epoch": 2.22, "learning_rate": 1.314734895191122e-05, "loss": 0.0001, "step": 2441 }, { "epoch": 2.22, "learning_rate": 1.3131935881627622e-05, "loss": 0.0001, "step": 2442 }, { "epoch": 2.22, "learning_rate": 1.3116522811344021e-05, "loss": 0.0001, "step": 2443 }, { "epoch": 2.23, "learning_rate": 1.3101109741060418e-05, "loss": 0.0002, "step": 2444 }, { "epoch": 2.23, "learning_rate": 1.308569667077682e-05, "loss": 0.0001, "step": 2445 }, { "epoch": 2.23, "learning_rate": 1.307028360049322e-05, "loss": 0.0001, "step": 2446 }, { "epoch": 2.23, "learning_rate": 1.3054870530209617e-05, "loss": 0.0001, "step": 2447 }, { "epoch": 2.23, "learning_rate": 1.3039457459926019e-05, "loss": 0.0005, "step": 2448 }, { "epoch": 2.23, "learning_rate": 1.3024044389642418e-05, "loss": 0.0001, "step": 2449 }, { "epoch": 2.23, "learning_rate": 1.3008631319358817e-05, "loss": 0.0001, "step": 2450 }, { "epoch": 2.23, "learning_rate": 1.2993218249075217e-05, "loss": 0.0005, "step": 2451 }, { "epoch": 2.23, "learning_rate": 1.2977805178791616e-05, "loss": 0.0001, "step": 2452 }, { "epoch": 2.23, "learning_rate": 1.2962392108508015e-05, "loss": 0.0001, "step": 2453 }, { "epoch": 2.23, "learning_rate": 1.2946979038224416e-05, "loss": 0.0001, "step": 2454 }, { "epoch": 2.24, "learning_rate": 1.2931565967940815e-05, "loss": 0.0001, "step": 2455 }, { "epoch": 2.24, "learning_rate": 1.2916152897657214e-05, "loss": 0.0001, "step": 2456 }, { "epoch": 2.24, "learning_rate": 1.2900739827373614e-05, "loss": 0.0005, "step": 2457 }, { "epoch": 2.24, "learning_rate": 1.2885326757090013e-05, "loss": 0.0004, "step": 2458 }, { "epoch": 2.24, "learning_rate": 1.2869913686806412e-05, "loss": 0.0001, "step": 2459 }, { "epoch": 2.24, "learning_rate": 1.2854500616522812e-05, "loss": 0.0001, "step": 2460 }, { "epoch": 2.24, "learning_rate": 1.2839087546239211e-05, "loss": 0.028, "step": 2461 }, { "epoch": 2.24, "learning_rate": 1.282367447595561e-05, "loss": 0.0001, "step": 2462 }, { "epoch": 2.24, "learning_rate": 1.2808261405672013e-05, "loss": 0.0001, "step": 2463 }, { "epoch": 2.24, "learning_rate": 1.279284833538841e-05, "loss": 0.0001, "step": 2464 }, { "epoch": 2.24, "learning_rate": 1.2777435265104809e-05, "loss": 0.0001, "step": 2465 }, { "epoch": 2.25, "learning_rate": 1.2762022194821208e-05, "loss": 0.0002, "step": 2466 }, { "epoch": 2.25, "learning_rate": 1.274660912453761e-05, "loss": 0.0001, "step": 2467 }, { "epoch": 2.25, "learning_rate": 1.2731196054254007e-05, "loss": 0.0004, "step": 2468 }, { "epoch": 2.25, "learning_rate": 1.2715782983970406e-05, "loss": 0.0001, "step": 2469 }, { "epoch": 2.25, "learning_rate": 1.2700369913686808e-05, "loss": 0.0001, "step": 2470 }, { "epoch": 2.25, "learning_rate": 1.2684956843403205e-05, "loss": 0.0001, "step": 2471 }, { "epoch": 2.25, "learning_rate": 1.2669543773119604e-05, "loss": 0.0001, "step": 2472 }, { "epoch": 2.25, "learning_rate": 1.2654130702836007e-05, "loss": 0.0001, "step": 2473 }, { "epoch": 2.25, "learning_rate": 1.2638717632552405e-05, "loss": 0.0002, "step": 2474 }, { "epoch": 2.25, "learning_rate": 1.2623304562268803e-05, "loss": 0.0001, "step": 2475 }, { "epoch": 2.26, "learning_rate": 1.2607891491985205e-05, "loss": 0.0002, "step": 2476 }, { "epoch": 2.26, "learning_rate": 1.2592478421701604e-05, "loss": 0.0001, "step": 2477 }, { "epoch": 2.26, "learning_rate": 1.2577065351418003e-05, "loss": 0.0001, "step": 2478 }, { "epoch": 2.26, "learning_rate": 1.2561652281134403e-05, "loss": 0.0, "step": 2479 }, { "epoch": 2.26, "learning_rate": 1.2546239210850802e-05, "loss": 0.0241, "step": 2480 }, { "epoch": 2.26, "learning_rate": 1.2530826140567201e-05, "loss": 0.0001, "step": 2481 }, { "epoch": 2.26, "learning_rate": 1.2515413070283602e-05, "loss": 0.0001, "step": 2482 }, { "epoch": 2.26, "learning_rate": 1.25e-05, "loss": 0.0001, "step": 2483 }, { "epoch": 2.26, "learning_rate": 1.2484586929716401e-05, "loss": 0.0001, "step": 2484 }, { "epoch": 2.26, "learning_rate": 1.2469173859432798e-05, "loss": 0.0001, "step": 2485 }, { "epoch": 2.26, "learning_rate": 1.2453760789149199e-05, "loss": 0.0002, "step": 2486 }, { "epoch": 2.27, "learning_rate": 1.24383477188656e-05, "loss": 0.0001, "step": 2487 }, { "epoch": 2.27, "learning_rate": 1.2422934648581998e-05, "loss": 0.0001, "step": 2488 }, { "epoch": 2.27, "learning_rate": 1.2407521578298397e-05, "loss": 0.0001, "step": 2489 }, { "epoch": 2.27, "learning_rate": 1.2392108508014798e-05, "loss": 0.0002, "step": 2490 }, { "epoch": 2.27, "learning_rate": 1.2376695437731197e-05, "loss": 0.0008, "step": 2491 }, { "epoch": 2.27, "learning_rate": 1.2361282367447596e-05, "loss": 0.0001, "step": 2492 }, { "epoch": 2.27, "learning_rate": 1.2345869297163995e-05, "loss": 0.0001, "step": 2493 }, { "epoch": 2.27, "learning_rate": 1.2330456226880395e-05, "loss": 0.0001, "step": 2494 }, { "epoch": 2.27, "learning_rate": 1.2315043156596796e-05, "loss": 0.0001, "step": 2495 }, { "epoch": 2.27, "learning_rate": 1.2299630086313193e-05, "loss": 0.0001, "step": 2496 }, { "epoch": 2.27, "learning_rate": 1.2284217016029594e-05, "loss": 0.0001, "step": 2497 }, { "epoch": 2.28, "learning_rate": 1.2268803945745994e-05, "loss": 0.0001, "step": 2498 }, { "epoch": 2.28, "learning_rate": 1.2253390875462391e-05, "loss": 0.0001, "step": 2499 }, { "epoch": 2.28, "learning_rate": 1.2237977805178792e-05, "loss": 0.0001, "step": 2500 }, { "epoch": 2.28, "learning_rate": 1.2222564734895193e-05, "loss": 0.0001, "step": 2501 }, { "epoch": 2.28, "learning_rate": 1.2207151664611591e-05, "loss": 0.0001, "step": 2502 }, { "epoch": 2.28, "learning_rate": 1.219173859432799e-05, "loss": 0.0001, "step": 2503 }, { "epoch": 2.28, "learning_rate": 1.2176325524044391e-05, "loss": 0.0001, "step": 2504 }, { "epoch": 2.28, "learning_rate": 1.216091245376079e-05, "loss": 0.0001, "step": 2505 }, { "epoch": 2.28, "learning_rate": 1.2145499383477189e-05, "loss": 0.0001, "step": 2506 }, { "epoch": 2.28, "learning_rate": 1.213008631319359e-05, "loss": 0.0001, "step": 2507 }, { "epoch": 2.28, "learning_rate": 1.2114673242909988e-05, "loss": 0.0001, "step": 2508 }, { "epoch": 2.29, "learning_rate": 1.2099260172626389e-05, "loss": 0.0001, "step": 2509 }, { "epoch": 2.29, "learning_rate": 1.2083847102342788e-05, "loss": 0.0001, "step": 2510 }, { "epoch": 2.29, "learning_rate": 1.2068434032059187e-05, "loss": 0.0001, "step": 2511 }, { "epoch": 2.29, "learning_rate": 1.2053020961775587e-05, "loss": 0.0001, "step": 2512 }, { "epoch": 2.29, "learning_rate": 1.2037607891491984e-05, "loss": 0.0001, "step": 2513 }, { "epoch": 2.29, "learning_rate": 1.2022194821208385e-05, "loss": 0.0003, "step": 2514 }, { "epoch": 2.29, "learning_rate": 1.2006781750924786e-05, "loss": 0.0001, "step": 2515 }, { "epoch": 2.29, "learning_rate": 1.1991368680641184e-05, "loss": 0.0001, "step": 2516 }, { "epoch": 2.29, "learning_rate": 1.1975955610357583e-05, "loss": 0.0001, "step": 2517 }, { "epoch": 2.29, "learning_rate": 1.1960542540073984e-05, "loss": 0.0001, "step": 2518 }, { "epoch": 2.29, "learning_rate": 1.1945129469790383e-05, "loss": 0.0002, "step": 2519 }, { "epoch": 2.3, "learning_rate": 1.1929716399506782e-05, "loss": 0.0001, "step": 2520 }, { "epoch": 2.3, "learning_rate": 1.1914303329223182e-05, "loss": 0.0001, "step": 2521 }, { "epoch": 2.3, "learning_rate": 1.1898890258939581e-05, "loss": 0.0001, "step": 2522 }, { "epoch": 2.3, "learning_rate": 1.188347718865598e-05, "loss": 0.0001, "step": 2523 }, { "epoch": 2.3, "learning_rate": 1.186806411837238e-05, "loss": 0.0001, "step": 2524 }, { "epoch": 2.3, "learning_rate": 1.185265104808878e-05, "loss": 0.0001, "step": 2525 }, { "epoch": 2.3, "learning_rate": 1.183723797780518e-05, "loss": 0.0001, "step": 2526 }, { "epoch": 2.3, "learning_rate": 1.1821824907521579e-05, "loss": 0.0001, "step": 2527 }, { "epoch": 2.3, "learning_rate": 1.1806411837237978e-05, "loss": 0.0001, "step": 2528 }, { "epoch": 2.3, "learning_rate": 1.1790998766954379e-05, "loss": 0.3977, "step": 2529 }, { "epoch": 2.3, "learning_rate": 1.1775585696670777e-05, "loss": 0.0001, "step": 2530 }, { "epoch": 2.31, "learning_rate": 1.1760172626387176e-05, "loss": 0.0001, "step": 2531 }, { "epoch": 2.31, "learning_rate": 1.1744759556103577e-05, "loss": 0.0001, "step": 2532 }, { "epoch": 2.31, "learning_rate": 1.1729346485819976e-05, "loss": 0.0001, "step": 2533 }, { "epoch": 2.31, "learning_rate": 1.1713933415536375e-05, "loss": 0.0001, "step": 2534 }, { "epoch": 2.31, "learning_rate": 1.1698520345252775e-05, "loss": 0.0001, "step": 2535 }, { "epoch": 2.31, "learning_rate": 1.1683107274969174e-05, "loss": 0.0001, "step": 2536 }, { "epoch": 2.31, "learning_rate": 1.1667694204685573e-05, "loss": 0.0001, "step": 2537 }, { "epoch": 2.31, "learning_rate": 1.1652281134401974e-05, "loss": 0.0001, "step": 2538 }, { "epoch": 2.31, "learning_rate": 1.1636868064118373e-05, "loss": 0.0001, "step": 2539 }, { "epoch": 2.31, "learning_rate": 1.1621454993834773e-05, "loss": 0.0001, "step": 2540 }, { "epoch": 2.31, "learning_rate": 1.1606041923551172e-05, "loss": 0.0001, "step": 2541 }, { "epoch": 2.32, "learning_rate": 1.1590628853267571e-05, "loss": 0.0001, "step": 2542 }, { "epoch": 2.32, "learning_rate": 1.1575215782983972e-05, "loss": 0.0001, "step": 2543 }, { "epoch": 2.32, "learning_rate": 1.155980271270037e-05, "loss": 0.0001, "step": 2544 }, { "epoch": 2.32, "learning_rate": 1.154438964241677e-05, "loss": 0.0001, "step": 2545 }, { "epoch": 2.32, "learning_rate": 1.152897657213317e-05, "loss": 0.0001, "step": 2546 }, { "epoch": 2.32, "learning_rate": 1.151356350184957e-05, "loss": 0.0001, "step": 2547 }, { "epoch": 2.32, "learning_rate": 1.1498150431565968e-05, "loss": 0.0001, "step": 2548 }, { "epoch": 2.32, "learning_rate": 1.1482737361282368e-05, "loss": 0.0001, "step": 2549 }, { "epoch": 2.32, "learning_rate": 1.1467324290998767e-05, "loss": 0.0001, "step": 2550 }, { "epoch": 2.32, "learning_rate": 1.1451911220715166e-05, "loss": 0.0, "step": 2551 }, { "epoch": 2.32, "learning_rate": 1.1436498150431567e-05, "loss": 0.0001, "step": 2552 }, { "epoch": 2.33, "learning_rate": 1.1421085080147966e-05, "loss": 0.0001, "step": 2553 }, { "epoch": 2.33, "learning_rate": 1.1405672009864366e-05, "loss": 0.0001, "step": 2554 }, { "epoch": 2.33, "learning_rate": 1.1390258939580765e-05, "loss": 0.0021, "step": 2555 }, { "epoch": 2.33, "learning_rate": 1.1374845869297164e-05, "loss": 0.0001, "step": 2556 }, { "epoch": 2.33, "learning_rate": 1.1359432799013564e-05, "loss": 0.0001, "step": 2557 }, { "epoch": 2.33, "learning_rate": 1.1344019728729963e-05, "loss": 0.0001, "step": 2558 }, { "epoch": 2.33, "learning_rate": 1.1328606658446362e-05, "loss": 0.0027, "step": 2559 }, { "epoch": 2.33, "learning_rate": 1.1313193588162763e-05, "loss": 0.0001, "step": 2560 }, { "epoch": 2.33, "learning_rate": 1.1297780517879163e-05, "loss": 0.0057, "step": 2561 }, { "epoch": 2.33, "learning_rate": 1.128236744759556e-05, "loss": 0.0001, "step": 2562 }, { "epoch": 2.33, "learning_rate": 1.1266954377311961e-05, "loss": 0.0001, "step": 2563 }, { "epoch": 2.34, "learning_rate": 1.1251541307028362e-05, "loss": 0.0001, "step": 2564 }, { "epoch": 2.34, "learning_rate": 1.1236128236744759e-05, "loss": 0.1637, "step": 2565 }, { "epoch": 2.34, "learning_rate": 1.122071516646116e-05, "loss": 0.0001, "step": 2566 }, { "epoch": 2.34, "learning_rate": 1.1205302096177559e-05, "loss": 0.0002, "step": 2567 }, { "epoch": 2.34, "learning_rate": 1.1189889025893959e-05, "loss": 0.0001, "step": 2568 }, { "epoch": 2.34, "learning_rate": 1.1174475955610358e-05, "loss": 0.0001, "step": 2569 }, { "epoch": 2.34, "learning_rate": 1.1159062885326757e-05, "loss": 0.0001, "step": 2570 }, { "epoch": 2.34, "learning_rate": 1.1143649815043157e-05, "loss": 0.0001, "step": 2571 }, { "epoch": 2.34, "learning_rate": 1.1128236744759556e-05, "loss": 0.0001, "step": 2572 }, { "epoch": 2.34, "learning_rate": 1.1112823674475955e-05, "loss": 0.0001, "step": 2573 }, { "epoch": 2.34, "learning_rate": 1.1097410604192356e-05, "loss": 0.0001, "step": 2574 }, { "epoch": 2.35, "learning_rate": 1.1081997533908756e-05, "loss": 0.0001, "step": 2575 }, { "epoch": 2.35, "learning_rate": 1.1066584463625154e-05, "loss": 0.0001, "step": 2576 }, { "epoch": 2.35, "learning_rate": 1.1051171393341554e-05, "loss": 0.0002, "step": 2577 }, { "epoch": 2.35, "learning_rate": 1.1035758323057955e-05, "loss": 0.0001, "step": 2578 }, { "epoch": 2.35, "learning_rate": 1.1020345252774352e-05, "loss": 0.0001, "step": 2579 }, { "epoch": 2.35, "learning_rate": 1.1004932182490753e-05, "loss": 0.0001, "step": 2580 }, { "epoch": 2.35, "learning_rate": 1.0989519112207153e-05, "loss": 0.0001, "step": 2581 }, { "epoch": 2.35, "learning_rate": 1.0974106041923552e-05, "loss": 0.0001, "step": 2582 }, { "epoch": 2.35, "learning_rate": 1.0958692971639951e-05, "loss": 0.0001, "step": 2583 }, { "epoch": 2.35, "learning_rate": 1.0943279901356352e-05, "loss": 0.0001, "step": 2584 }, { "epoch": 2.35, "learning_rate": 1.092786683107275e-05, "loss": 0.0001, "step": 2585 }, { "epoch": 2.36, "learning_rate": 1.091245376078915e-05, "loss": 0.0004, "step": 2586 }, { "epoch": 2.36, "learning_rate": 1.0897040690505548e-05, "loss": 0.0002, "step": 2587 }, { "epoch": 2.36, "learning_rate": 1.0881627620221949e-05, "loss": 0.0001, "step": 2588 }, { "epoch": 2.36, "learning_rate": 1.086621454993835e-05, "loss": 0.0001, "step": 2589 }, { "epoch": 2.36, "learning_rate": 1.0850801479654747e-05, "loss": 0.0001, "step": 2590 }, { "epoch": 2.36, "learning_rate": 1.0835388409371147e-05, "loss": 0.0001, "step": 2591 }, { "epoch": 2.36, "learning_rate": 1.0819975339087548e-05, "loss": 0.0001, "step": 2592 }, { "epoch": 2.36, "learning_rate": 1.0804562268803945e-05, "loss": 0.0001, "step": 2593 }, { "epoch": 2.36, "learning_rate": 1.0789149198520346e-05, "loss": 0.0001, "step": 2594 }, { "epoch": 2.36, "learning_rate": 1.0773736128236746e-05, "loss": 0.0023, "step": 2595 }, { "epoch": 2.36, "learning_rate": 1.0758323057953145e-05, "loss": 0.0001, "step": 2596 }, { "epoch": 2.37, "learning_rate": 1.0742909987669544e-05, "loss": 0.0006, "step": 2597 }, { "epoch": 2.37, "learning_rate": 1.0727496917385945e-05, "loss": 0.0001, "step": 2598 }, { "epoch": 2.37, "learning_rate": 1.0712083847102343e-05, "loss": 0.0001, "step": 2599 }, { "epoch": 2.37, "learning_rate": 1.0696670776818742e-05, "loss": 0.0002, "step": 2600 }, { "epoch": 2.37, "learning_rate": 1.0681257706535143e-05, "loss": 0.0001, "step": 2601 }, { "epoch": 2.37, "learning_rate": 1.0665844636251542e-05, "loss": 0.0001, "step": 2602 }, { "epoch": 2.37, "learning_rate": 1.065043156596794e-05, "loss": 0.0072, "step": 2603 }, { "epoch": 2.37, "learning_rate": 1.063501849568434e-05, "loss": 0.0001, "step": 2604 }, { "epoch": 2.37, "learning_rate": 1.061960542540074e-05, "loss": 0.0001, "step": 2605 }, { "epoch": 2.37, "learning_rate": 1.060419235511714e-05, "loss": 0.0002, "step": 2606 }, { "epoch": 2.37, "learning_rate": 1.0588779284833538e-05, "loss": 0.0002, "step": 2607 }, { "epoch": 2.38, "learning_rate": 1.0573366214549939e-05, "loss": 0.0001, "step": 2608 }, { "epoch": 2.38, "learning_rate": 1.055795314426634e-05, "loss": 0.0001, "step": 2609 }, { "epoch": 2.38, "learning_rate": 1.0542540073982738e-05, "loss": 0.0001, "step": 2610 }, { "epoch": 2.38, "learning_rate": 1.0527127003699137e-05, "loss": 0.0001, "step": 2611 }, { "epoch": 2.38, "learning_rate": 1.0511713933415538e-05, "loss": 0.0001, "step": 2612 }, { "epoch": 2.38, "learning_rate": 1.0496300863131936e-05, "loss": 0.0001, "step": 2613 }, { "epoch": 2.38, "learning_rate": 1.0480887792848335e-05, "loss": 0.0001, "step": 2614 }, { "epoch": 2.38, "learning_rate": 1.0465474722564736e-05, "loss": 0.0001, "step": 2615 }, { "epoch": 2.38, "learning_rate": 1.0450061652281135e-05, "loss": 0.0001, "step": 2616 }, { "epoch": 2.38, "learning_rate": 1.0434648581997534e-05, "loss": 0.0001, "step": 2617 }, { "epoch": 2.38, "learning_rate": 1.0419235511713934e-05, "loss": 0.0001, "step": 2618 }, { "epoch": 2.39, "learning_rate": 1.0403822441430333e-05, "loss": 0.5592, "step": 2619 }, { "epoch": 2.39, "learning_rate": 1.0388409371146734e-05, "loss": 0.0001, "step": 2620 }, { "epoch": 2.39, "learning_rate": 1.0372996300863133e-05, "loss": 0.0001, "step": 2621 }, { "epoch": 2.39, "learning_rate": 1.0357583230579532e-05, "loss": 0.0002, "step": 2622 }, { "epoch": 2.39, "learning_rate": 1.0342170160295932e-05, "loss": 0.0001, "step": 2623 }, { "epoch": 2.39, "learning_rate": 1.0326757090012331e-05, "loss": 0.0001, "step": 2624 }, { "epoch": 2.39, "learning_rate": 1.031134401972873e-05, "loss": 0.0001, "step": 2625 }, { "epoch": 2.39, "learning_rate": 1.029593094944513e-05, "loss": 0.0001, "step": 2626 }, { "epoch": 2.39, "learning_rate": 1.028051787916153e-05, "loss": 0.0003, "step": 2627 }, { "epoch": 2.39, "learning_rate": 1.0265104808877928e-05, "loss": 0.0001, "step": 2628 }, { "epoch": 2.39, "learning_rate": 1.0249691738594329e-05, "loss": 0.0001, "step": 2629 }, { "epoch": 2.4, "learning_rate": 1.0234278668310728e-05, "loss": 0.0001, "step": 2630 }, { "epoch": 2.4, "learning_rate": 1.0218865598027127e-05, "loss": 0.0001, "step": 2631 }, { "epoch": 2.4, "learning_rate": 1.0203452527743527e-05, "loss": 0.0001, "step": 2632 }, { "epoch": 2.4, "learning_rate": 1.0188039457459926e-05, "loss": 0.0001, "step": 2633 }, { "epoch": 2.4, "learning_rate": 1.0172626387176327e-05, "loss": 0.0002, "step": 2634 }, { "epoch": 2.4, "learning_rate": 1.0157213316892726e-05, "loss": 0.0001, "step": 2635 }, { "epoch": 2.4, "learning_rate": 1.0141800246609125e-05, "loss": 0.0001, "step": 2636 }, { "epoch": 2.4, "learning_rate": 1.0126387176325525e-05, "loss": 0.0001, "step": 2637 }, { "epoch": 2.4, "learning_rate": 1.0110974106041924e-05, "loss": 0.0001, "step": 2638 }, { "epoch": 2.4, "learning_rate": 1.0095561035758323e-05, "loss": 0.0001, "step": 2639 }, { "epoch": 2.4, "learning_rate": 1.0080147965474724e-05, "loss": 0.0002, "step": 2640 }, { "epoch": 2.41, "learning_rate": 1.0064734895191122e-05, "loss": 0.0001, "step": 2641 }, { "epoch": 2.41, "learning_rate": 1.0049321824907521e-05, "loss": 0.0003, "step": 2642 }, { "epoch": 2.41, "learning_rate": 1.0033908754623922e-05, "loss": 0.0001, "step": 2643 }, { "epoch": 2.41, "learning_rate": 1.001849568434032e-05, "loss": 0.1639, "step": 2644 }, { "epoch": 2.41, "learning_rate": 1.000308261405672e-05, "loss": 0.0001, "step": 2645 }, { "epoch": 2.41, "learning_rate": 9.98766954377312e-06, "loss": 0.0001, "step": 2646 }, { "epoch": 2.41, "learning_rate": 9.97225647348952e-06, "loss": 0.0002, "step": 2647 }, { "epoch": 2.41, "learning_rate": 9.95684340320592e-06, "loss": 0.0003, "step": 2648 }, { "epoch": 2.41, "learning_rate": 9.941430332922319e-06, "loss": 0.0001, "step": 2649 }, { "epoch": 2.41, "learning_rate": 9.926017262638718e-06, "loss": 0.0001, "step": 2650 }, { "epoch": 2.41, "learning_rate": 9.910604192355118e-06, "loss": 0.0001, "step": 2651 }, { "epoch": 2.42, "learning_rate": 9.895191122071517e-06, "loss": 0.0002, "step": 2652 }, { "epoch": 2.42, "learning_rate": 9.879778051787916e-06, "loss": 0.0001, "step": 2653 }, { "epoch": 2.42, "learning_rate": 9.864364981504317e-06, "loss": 0.0001, "step": 2654 }, { "epoch": 2.42, "learning_rate": 9.848951911220717e-06, "loss": 0.0001, "step": 2655 }, { "epoch": 2.42, "learning_rate": 9.833538840937114e-06, "loss": 0.0002, "step": 2656 }, { "epoch": 2.42, "learning_rate": 9.818125770653515e-06, "loss": 0.0001, "step": 2657 }, { "epoch": 2.42, "learning_rate": 9.802712700369915e-06, "loss": 0.0002, "step": 2658 }, { "epoch": 2.42, "learning_rate": 9.787299630086313e-06, "loss": 0.02, "step": 2659 }, { "epoch": 2.42, "learning_rate": 9.771886559802713e-06, "loss": 0.0003, "step": 2660 }, { "epoch": 2.42, "learning_rate": 9.756473489519112e-06, "loss": 0.0002, "step": 2661 }, { "epoch": 2.42, "learning_rate": 9.741060419235513e-06, "loss": 0.0003, "step": 2662 }, { "epoch": 2.43, "learning_rate": 9.725647348951912e-06, "loss": 0.0001, "step": 2663 }, { "epoch": 2.43, "learning_rate": 9.71023427866831e-06, "loss": 0.125, "step": 2664 }, { "epoch": 2.43, "learning_rate": 9.694821208384711e-06, "loss": 0.0001, "step": 2665 }, { "epoch": 2.43, "learning_rate": 9.67940813810111e-06, "loss": 0.0002, "step": 2666 }, { "epoch": 2.43, "learning_rate": 9.663995067817509e-06, "loss": 0.0001, "step": 2667 }, { "epoch": 2.43, "learning_rate": 9.64858199753391e-06, "loss": 0.0001, "step": 2668 }, { "epoch": 2.43, "learning_rate": 9.633168927250308e-06, "loss": 0.0002, "step": 2669 }, { "epoch": 2.43, "learning_rate": 9.617755856966707e-06, "loss": 0.0001, "step": 2670 }, { "epoch": 2.43, "learning_rate": 9.602342786683108e-06, "loss": 0.0001, "step": 2671 }, { "epoch": 2.43, "learning_rate": 9.586929716399508e-06, "loss": 0.0001, "step": 2672 }, { "epoch": 2.43, "learning_rate": 9.571516646115906e-06, "loss": 0.0006, "step": 2673 }, { "epoch": 2.44, "learning_rate": 9.556103575832306e-06, "loss": 0.0002, "step": 2674 }, { "epoch": 2.44, "learning_rate": 9.540690505548707e-06, "loss": 0.0001, "step": 2675 }, { "epoch": 2.44, "learning_rate": 9.525277435265106e-06, "loss": 0.0002, "step": 2676 }, { "epoch": 2.44, "learning_rate": 9.509864364981505e-06, "loss": 0.0001, "step": 2677 }, { "epoch": 2.44, "learning_rate": 9.494451294697904e-06, "loss": 0.0001, "step": 2678 }, { "epoch": 2.44, "learning_rate": 9.479038224414304e-06, "loss": 0.0002, "step": 2679 }, { "epoch": 2.44, "learning_rate": 9.463625154130703e-06, "loss": 0.0002, "step": 2680 }, { "epoch": 2.44, "learning_rate": 9.448212083847102e-06, "loss": 0.0001, "step": 2681 }, { "epoch": 2.44, "learning_rate": 9.432799013563502e-06, "loss": 0.0002, "step": 2682 }, { "epoch": 2.44, "learning_rate": 9.417385943279901e-06, "loss": 0.0001, "step": 2683 }, { "epoch": 2.44, "learning_rate": 9.4019728729963e-06, "loss": 0.0001, "step": 2684 }, { "epoch": 2.45, "learning_rate": 9.386559802712701e-06, "loss": 0.0001, "step": 2685 }, { "epoch": 2.45, "learning_rate": 9.371146732429101e-06, "loss": 0.0003, "step": 2686 }, { "epoch": 2.45, "learning_rate": 9.355733662145499e-06, "loss": 0.0001, "step": 2687 }, { "epoch": 2.45, "learning_rate": 9.3403205918619e-06, "loss": 0.0001, "step": 2688 }, { "epoch": 2.45, "learning_rate": 9.3249075215783e-06, "loss": 0.0009, "step": 2689 }, { "epoch": 2.45, "learning_rate": 9.309494451294699e-06, "loss": 0.0001, "step": 2690 }, { "epoch": 2.45, "learning_rate": 9.294081381011098e-06, "loss": 0.0001, "step": 2691 }, { "epoch": 2.45, "learning_rate": 9.278668310727498e-06, "loss": 0.0001, "step": 2692 }, { "epoch": 2.45, "learning_rate": 9.263255240443897e-06, "loss": 0.0001, "step": 2693 }, { "epoch": 2.45, "learning_rate": 9.247842170160296e-06, "loss": 0.0001, "step": 2694 }, { "epoch": 2.45, "learning_rate": 9.232429099876695e-06, "loss": 0.0001, "step": 2695 }, { "epoch": 2.46, "learning_rate": 9.217016029593095e-06, "loss": 0.0001, "step": 2696 }, { "epoch": 2.46, "learning_rate": 9.201602959309494e-06, "loss": 0.0006, "step": 2697 }, { "epoch": 2.46, "learning_rate": 9.186189889025893e-06, "loss": 0.0001, "step": 2698 }, { "epoch": 2.46, "learning_rate": 9.170776818742294e-06, "loss": 0.0001, "step": 2699 }, { "epoch": 2.46, "learning_rate": 9.155363748458694e-06, "loss": 0.0001, "step": 2700 }, { "epoch": 2.46, "learning_rate": 9.139950678175092e-06, "loss": 0.0001, "step": 2701 }, { "epoch": 2.46, "learning_rate": 9.124537607891492e-06, "loss": 0.0001, "step": 2702 }, { "epoch": 2.46, "learning_rate": 9.109124537607893e-06, "loss": 0.0001, "step": 2703 }, { "epoch": 2.46, "learning_rate": 9.093711467324292e-06, "loss": 0.0001, "step": 2704 }, { "epoch": 2.46, "learning_rate": 9.07829839704069e-06, "loss": 0.0001, "step": 2705 }, { "epoch": 2.46, "learning_rate": 9.062885326757091e-06, "loss": 0.0001, "step": 2706 }, { "epoch": 2.47, "learning_rate": 9.04747225647349e-06, "loss": 0.0001, "step": 2707 }, { "epoch": 2.47, "learning_rate": 9.032059186189889e-06, "loss": 0.0001, "step": 2708 }, { "epoch": 2.47, "learning_rate": 9.01664611590629e-06, "loss": 0.0001, "step": 2709 }, { "epoch": 2.47, "learning_rate": 9.001233045622688e-06, "loss": 0.0001, "step": 2710 }, { "epoch": 2.47, "learning_rate": 8.985819975339087e-06, "loss": 0.0, "step": 2711 }, { "epoch": 2.47, "learning_rate": 8.970406905055488e-06, "loss": 0.0001, "step": 2712 }, { "epoch": 2.47, "learning_rate": 8.954993834771887e-06, "loss": 0.0195, "step": 2713 }, { "epoch": 2.47, "learning_rate": 8.939580764488287e-06, "loss": 0.0001, "step": 2714 }, { "epoch": 2.47, "learning_rate": 8.924167694204685e-06, "loss": 0.0001, "step": 2715 }, { "epoch": 2.47, "learning_rate": 8.908754623921085e-06, "loss": 0.0001, "step": 2716 }, { "epoch": 2.47, "learning_rate": 8.893341553637486e-06, "loss": 0.0002, "step": 2717 }, { "epoch": 2.48, "learning_rate": 8.877928483353885e-06, "loss": 0.0095, "step": 2718 }, { "epoch": 2.48, "learning_rate": 8.862515413070284e-06, "loss": 0.0001, "step": 2719 }, { "epoch": 2.48, "learning_rate": 8.847102342786684e-06, "loss": 0.0001, "step": 2720 }, { "epoch": 2.48, "learning_rate": 8.831689272503083e-06, "loss": 0.0001, "step": 2721 }, { "epoch": 2.48, "learning_rate": 8.816276202219482e-06, "loss": 0.0001, "step": 2722 }, { "epoch": 2.48, "learning_rate": 8.800863131935883e-06, "loss": 0.0006, "step": 2723 }, { "epoch": 2.48, "learning_rate": 8.785450061652281e-06, "loss": 0.0002, "step": 2724 }, { "epoch": 2.48, "learning_rate": 8.77003699136868e-06, "loss": 0.0001, "step": 2725 }, { "epoch": 2.48, "learning_rate": 8.754623921085081e-06, "loss": 0.0001, "step": 2726 }, { "epoch": 2.48, "learning_rate": 8.73921085080148e-06, "loss": 0.0001, "step": 2727 }, { "epoch": 2.48, "learning_rate": 8.72379778051788e-06, "loss": 0.0003, "step": 2728 }, { "epoch": 2.49, "learning_rate": 8.70838471023428e-06, "loss": 0.0001, "step": 2729 }, { "epoch": 2.49, "learning_rate": 8.692971639950678e-06, "loss": 0.0007, "step": 2730 }, { "epoch": 2.49, "learning_rate": 8.677558569667079e-06, "loss": 0.0001, "step": 2731 }, { "epoch": 2.49, "learning_rate": 8.662145499383478e-06, "loss": 0.0001, "step": 2732 }, { "epoch": 2.49, "learning_rate": 8.646732429099877e-06, "loss": 0.0001, "step": 2733 }, { "epoch": 2.49, "learning_rate": 8.631319358816277e-06, "loss": 0.0002, "step": 2734 }, { "epoch": 2.49, "learning_rate": 8.615906288532676e-06, "loss": 0.0001, "step": 2735 }, { "epoch": 2.49, "learning_rate": 8.600493218249075e-06, "loss": 0.0001, "step": 2736 }, { "epoch": 2.49, "learning_rate": 8.585080147965476e-06, "loss": 0.0001, "step": 2737 }, { "epoch": 2.49, "learning_rate": 8.569667077681874e-06, "loss": 0.0001, "step": 2738 }, { "epoch": 2.49, "learning_rate": 8.554254007398273e-06, "loss": 0.0001, "step": 2739 }, { "epoch": 2.5, "learning_rate": 8.538840937114674e-06, "loss": 0.0001, "step": 2740 }, { "epoch": 2.5, "learning_rate": 8.523427866831073e-06, "loss": 0.0002, "step": 2741 }, { "epoch": 2.5, "learning_rate": 8.508014796547473e-06, "loss": 0.0006, "step": 2742 }, { "epoch": 2.5, "learning_rate": 8.492601726263872e-06, "loss": 0.0425, "step": 2743 }, { "epoch": 2.5, "learning_rate": 8.477188655980271e-06, "loss": 0.5047, "step": 2744 }, { "epoch": 2.5, "learning_rate": 8.461775585696672e-06, "loss": 0.0001, "step": 2745 }, { "epoch": 2.5, "learning_rate": 8.44636251541307e-06, "loss": 0.0001, "step": 2746 }, { "epoch": 2.5, "learning_rate": 8.43094944512947e-06, "loss": 0.0002, "step": 2747 }, { "epoch": 2.5, "learning_rate": 8.41553637484587e-06, "loss": 0.0002, "step": 2748 }, { "epoch": 2.5, "learning_rate": 8.400123304562269e-06, "loss": 0.0001, "step": 2749 }, { "epoch": 2.5, "learning_rate": 8.384710234278668e-06, "loss": 0.0002, "step": 2750 }, { "epoch": 2.51, "learning_rate": 8.369297163995069e-06, "loss": 0.0001, "step": 2751 }, { "epoch": 2.51, "learning_rate": 8.353884093711467e-06, "loss": 0.0001, "step": 2752 }, { "epoch": 2.51, "learning_rate": 8.338471023427866e-06, "loss": 0.0001, "step": 2753 }, { "epoch": 2.51, "learning_rate": 8.323057953144267e-06, "loss": 0.0001, "step": 2754 }, { "epoch": 2.51, "learning_rate": 8.307644882860666e-06, "loss": 0.0001, "step": 2755 }, { "epoch": 2.51, "learning_rate": 8.292231812577066e-06, "loss": 0.0001, "step": 2756 }, { "epoch": 2.51, "learning_rate": 8.276818742293465e-06, "loss": 0.0001, "step": 2757 }, { "epoch": 2.51, "learning_rate": 8.261405672009864e-06, "loss": 0.0001, "step": 2758 }, { "epoch": 2.51, "learning_rate": 8.245992601726265e-06, "loss": 0.0001, "step": 2759 }, { "epoch": 2.51, "learning_rate": 8.230579531442664e-06, "loss": 0.0001, "step": 2760 }, { "epoch": 2.51, "learning_rate": 8.215166461159063e-06, "loss": 0.0001, "step": 2761 }, { "epoch": 2.52, "learning_rate": 8.199753390875463e-06, "loss": 0.0001, "step": 2762 }, { "epoch": 2.52, "learning_rate": 8.184340320591862e-06, "loss": 0.0001, "step": 2763 }, { "epoch": 2.52, "learning_rate": 8.168927250308261e-06, "loss": 0.0001, "step": 2764 }, { "epoch": 2.52, "learning_rate": 8.153514180024662e-06, "loss": 0.0001, "step": 2765 }, { "epoch": 2.52, "learning_rate": 8.138101109741062e-06, "loss": 0.0001, "step": 2766 }, { "epoch": 2.52, "learning_rate": 8.12268803945746e-06, "loss": 0.0001, "step": 2767 }, { "epoch": 2.52, "learning_rate": 8.10727496917386e-06, "loss": 0.0001, "step": 2768 }, { "epoch": 2.52, "learning_rate": 8.091861898890259e-06, "loss": 0.0001, "step": 2769 }, { "epoch": 2.52, "learning_rate": 8.07644882860666e-06, "loss": 0.0001, "step": 2770 }, { "epoch": 2.52, "learning_rate": 8.061035758323058e-06, "loss": 0.0001, "step": 2771 }, { "epoch": 2.52, "learning_rate": 8.045622688039457e-06, "loss": 0.0001, "step": 2772 }, { "epoch": 2.53, "learning_rate": 8.030209617755858e-06, "loss": 0.0001, "step": 2773 }, { "epoch": 2.53, "learning_rate": 8.014796547472257e-06, "loss": 0.0001, "step": 2774 }, { "epoch": 2.53, "learning_rate": 7.999383477188656e-06, "loss": 0.0004, "step": 2775 }, { "epoch": 2.53, "learning_rate": 7.983970406905056e-06, "loss": 0.0001, "step": 2776 }, { "epoch": 2.53, "learning_rate": 7.968557336621455e-06, "loss": 0.0001, "step": 2777 }, { "epoch": 2.53, "learning_rate": 7.953144266337854e-06, "loss": 0.0001, "step": 2778 }, { "epoch": 2.53, "learning_rate": 7.937731196054254e-06, "loss": 0.0001, "step": 2779 }, { "epoch": 2.53, "learning_rate": 7.922318125770655e-06, "loss": 0.0001, "step": 2780 }, { "epoch": 2.53, "learning_rate": 7.906905055487052e-06, "loss": 0.0001, "step": 2781 }, { "epoch": 2.53, "learning_rate": 7.891491985203453e-06, "loss": 0.0001, "step": 2782 }, { "epoch": 2.53, "learning_rate": 7.876078914919853e-06, "loss": 0.0001, "step": 2783 }, { "epoch": 2.54, "learning_rate": 7.860665844636252e-06, "loss": 0.0001, "step": 2784 }, { "epoch": 2.54, "learning_rate": 7.845252774352651e-06, "loss": 0.0001, "step": 2785 }, { "epoch": 2.54, "learning_rate": 7.829839704069052e-06, "loss": 0.0001, "step": 2786 }, { "epoch": 2.54, "learning_rate": 7.81442663378545e-06, "loss": 0.0001, "step": 2787 }, { "epoch": 2.54, "learning_rate": 7.79901356350185e-06, "loss": 0.0001, "step": 2788 }, { "epoch": 2.54, "learning_rate": 7.783600493218249e-06, "loss": 0.0001, "step": 2789 }, { "epoch": 2.54, "learning_rate": 7.768187422934649e-06, "loss": 0.0001, "step": 2790 }, { "epoch": 2.54, "learning_rate": 7.752774352651048e-06, "loss": 0.0001, "step": 2791 }, { "epoch": 2.54, "learning_rate": 7.737361282367447e-06, "loss": 0.0001, "step": 2792 }, { "epoch": 2.54, "learning_rate": 7.721948212083847e-06, "loss": 0.0001, "step": 2793 }, { "epoch": 2.54, "learning_rate": 7.706535141800248e-06, "loss": 0.0001, "step": 2794 }, { "epoch": 2.55, "learning_rate": 7.691122071516645e-06, "loss": 0.0001, "step": 2795 }, { "epoch": 2.55, "learning_rate": 7.675709001233046e-06, "loss": 0.0002, "step": 2796 }, { "epoch": 2.55, "learning_rate": 7.660295930949446e-06, "loss": 0.0001, "step": 2797 }, { "epoch": 2.55, "learning_rate": 7.644882860665845e-06, "loss": 0.0001, "step": 2798 }, { "epoch": 2.55, "learning_rate": 7.629469790382244e-06, "loss": 0.0001, "step": 2799 }, { "epoch": 2.55, "learning_rate": 7.614056720098645e-06, "loss": 0.0002, "step": 2800 }, { "epoch": 2.55, "learning_rate": 7.598643649815043e-06, "loss": 0.0001, "step": 2801 }, { "epoch": 2.55, "learning_rate": 7.5832305795314435e-06, "loss": 0.0003, "step": 2802 }, { "epoch": 2.55, "learning_rate": 7.567817509247843e-06, "loss": 0.0001, "step": 2803 }, { "epoch": 2.55, "learning_rate": 7.552404438964242e-06, "loss": 0.0878, "step": 2804 }, { "epoch": 2.55, "learning_rate": 7.536991368680642e-06, "loss": 0.0001, "step": 2805 }, { "epoch": 2.56, "learning_rate": 7.521578298397041e-06, "loss": 0.0001, "step": 2806 }, { "epoch": 2.56, "learning_rate": 7.5061652281134405e-06, "loss": 0.0002, "step": 2807 }, { "epoch": 2.56, "learning_rate": 7.49075215782984e-06, "loss": 0.0001, "step": 2808 }, { "epoch": 2.56, "learning_rate": 7.475339087546239e-06, "loss": 0.0001, "step": 2809 }, { "epoch": 2.56, "learning_rate": 7.459926017262639e-06, "loss": 0.0001, "step": 2810 }, { "epoch": 2.56, "learning_rate": 7.444512946979039e-06, "loss": 0.0001, "step": 2811 }, { "epoch": 2.56, "learning_rate": 7.4290998766954375e-06, "loss": 0.0001, "step": 2812 }, { "epoch": 2.56, "learning_rate": 7.413686806411837e-06, "loss": 0.0002, "step": 2813 }, { "epoch": 2.56, "learning_rate": 7.398273736128238e-06, "loss": 0.0001, "step": 2814 }, { "epoch": 2.56, "learning_rate": 7.382860665844636e-06, "loss": 0.0001, "step": 2815 }, { "epoch": 2.56, "learning_rate": 7.3674475955610364e-06, "loss": 0.0001, "step": 2816 }, { "epoch": 2.57, "learning_rate": 7.352034525277436e-06, "loss": 0.0002, "step": 2817 }, { "epoch": 2.57, "learning_rate": 7.336621454993835e-06, "loss": 0.0001, "step": 2818 }, { "epoch": 2.57, "learning_rate": 7.321208384710235e-06, "loss": 0.0001, "step": 2819 }, { "epoch": 2.57, "learning_rate": 7.3057953144266346e-06, "loss": 0.0001, "step": 2820 }, { "epoch": 2.57, "learning_rate": 7.2903822441430334e-06, "loss": 0.0001, "step": 2821 }, { "epoch": 2.57, "learning_rate": 7.274969173859433e-06, "loss": 0.0001, "step": 2822 }, { "epoch": 2.57, "learning_rate": 7.259556103575833e-06, "loss": 0.0001, "step": 2823 }, { "epoch": 2.57, "learning_rate": 7.244143033292232e-06, "loss": 0.0002, "step": 2824 }, { "epoch": 2.57, "learning_rate": 7.2287299630086316e-06, "loss": 0.0003, "step": 2825 }, { "epoch": 2.57, "learning_rate": 7.2133168927250305e-06, "loss": 0.0001, "step": 2826 }, { "epoch": 2.57, "learning_rate": 7.19790382244143e-06, "loss": 0.0001, "step": 2827 }, { "epoch": 2.58, "learning_rate": 7.182490752157831e-06, "loss": 0.0003, "step": 2828 }, { "epoch": 2.58, "learning_rate": 7.167077681874229e-06, "loss": 0.0001, "step": 2829 }, { "epoch": 2.58, "learning_rate": 7.1516646115906294e-06, "loss": 0.0001, "step": 2830 }, { "epoch": 2.58, "learning_rate": 7.136251541307029e-06, "loss": 0.0001, "step": 2831 }, { "epoch": 2.58, "learning_rate": 7.120838471023428e-06, "loss": 0.0001, "step": 2832 }, { "epoch": 2.58, "learning_rate": 7.105425400739828e-06, "loss": 0.0001, "step": 2833 }, { "epoch": 2.58, "learning_rate": 7.0900123304562275e-06, "loss": 0.0001, "step": 2834 }, { "epoch": 2.58, "learning_rate": 7.0745992601726264e-06, "loss": 0.0001, "step": 2835 }, { "epoch": 2.58, "learning_rate": 7.059186189889026e-06, "loss": 0.0001, "step": 2836 }, { "epoch": 2.58, "learning_rate": 7.043773119605426e-06, "loss": 0.0001, "step": 2837 }, { "epoch": 2.58, "learning_rate": 7.028360049321825e-06, "loss": 0.0001, "step": 2838 }, { "epoch": 2.59, "learning_rate": 7.0129469790382246e-06, "loss": 0.0001, "step": 2839 }, { "epoch": 2.59, "learning_rate": 6.997533908754625e-06, "loss": 0.0001, "step": 2840 }, { "epoch": 2.59, "learning_rate": 6.982120838471023e-06, "loss": 0.0001, "step": 2841 }, { "epoch": 2.59, "learning_rate": 6.966707768187424e-06, "loss": 0.0001, "step": 2842 }, { "epoch": 2.59, "learning_rate": 6.951294697903822e-06, "loss": 0.0001, "step": 2843 }, { "epoch": 2.59, "learning_rate": 6.935881627620222e-06, "loss": 0.0001, "step": 2844 }, { "epoch": 2.59, "learning_rate": 6.920468557336622e-06, "loss": 0.0001, "step": 2845 }, { "epoch": 2.59, "learning_rate": 6.905055487053021e-06, "loss": 0.0001, "step": 2846 }, { "epoch": 2.59, "learning_rate": 6.889642416769421e-06, "loss": 0.0001, "step": 2847 }, { "epoch": 2.59, "learning_rate": 6.8742293464858205e-06, "loss": 0.0001, "step": 2848 }, { "epoch": 2.59, "learning_rate": 6.858816276202219e-06, "loss": 0.0034, "step": 2849 }, { "epoch": 2.6, "learning_rate": 6.843403205918619e-06, "loss": 0.0002, "step": 2850 }, { "epoch": 2.6, "learning_rate": 6.827990135635019e-06, "loss": 0.0001, "step": 2851 }, { "epoch": 2.6, "learning_rate": 6.812577065351418e-06, "loss": 0.0001, "step": 2852 }, { "epoch": 2.6, "learning_rate": 6.7971639950678175e-06, "loss": 0.0002, "step": 2853 }, { "epoch": 2.6, "learning_rate": 6.781750924784218e-06, "loss": 0.0001, "step": 2854 }, { "epoch": 2.6, "learning_rate": 6.766337854500616e-06, "loss": 0.0001, "step": 2855 }, { "epoch": 2.6, "learning_rate": 6.750924784217017e-06, "loss": 0.0001, "step": 2856 }, { "epoch": 2.6, "learning_rate": 6.7355117139334165e-06, "loss": 0.0001, "step": 2857 }, { "epoch": 2.6, "learning_rate": 6.720098643649815e-06, "loss": 0.0001, "step": 2858 }, { "epoch": 2.6, "learning_rate": 6.704685573366215e-06, "loss": 0.0001, "step": 2859 }, { "epoch": 2.6, "learning_rate": 6.689272503082615e-06, "loss": 0.0001, "step": 2860 }, { "epoch": 2.61, "learning_rate": 6.673859432799014e-06, "loss": 0.0002, "step": 2861 }, { "epoch": 2.61, "learning_rate": 6.6584463625154135e-06, "loss": 0.0002, "step": 2862 }, { "epoch": 2.61, "learning_rate": 6.643033292231812e-06, "loss": 0.0005, "step": 2863 }, { "epoch": 2.61, "learning_rate": 6.627620221948212e-06, "loss": 0.0002, "step": 2864 }, { "epoch": 2.61, "learning_rate": 6.612207151664612e-06, "loss": 0.0001, "step": 2865 }, { "epoch": 2.61, "learning_rate": 6.596794081381011e-06, "loss": 0.0003, "step": 2866 }, { "epoch": 2.61, "learning_rate": 6.5813810110974105e-06, "loss": 0.0002, "step": 2867 }, { "epoch": 2.61, "learning_rate": 6.565967940813811e-06, "loss": 0.0013, "step": 2868 }, { "epoch": 2.61, "learning_rate": 6.550554870530209e-06, "loss": 0.0001, "step": 2869 }, { "epoch": 2.61, "learning_rate": 6.53514180024661e-06, "loss": 0.0001, "step": 2870 }, { "epoch": 2.61, "learning_rate": 6.5197287299630095e-06, "loss": 0.0001, "step": 2871 }, { "epoch": 2.62, "learning_rate": 6.504315659679408e-06, "loss": 0.0001, "step": 2872 }, { "epoch": 2.62, "learning_rate": 6.488902589395808e-06, "loss": 0.0001, "step": 2873 }, { "epoch": 2.62, "learning_rate": 6.473489519112208e-06, "loss": 0.0001, "step": 2874 }, { "epoch": 2.62, "learning_rate": 6.458076448828607e-06, "loss": 0.0001, "step": 2875 }, { "epoch": 2.62, "learning_rate": 6.4426633785450065e-06, "loss": 0.3985, "step": 2876 }, { "epoch": 2.62, "learning_rate": 6.427250308261406e-06, "loss": 0.0002, "step": 2877 }, { "epoch": 2.62, "learning_rate": 6.411837237977805e-06, "loss": 0.0001, "step": 2878 }, { "epoch": 2.62, "learning_rate": 6.396424167694205e-06, "loss": 0.0001, "step": 2879 }, { "epoch": 2.62, "learning_rate": 6.381011097410604e-06, "loss": 0.0001, "step": 2880 }, { "epoch": 2.62, "learning_rate": 6.3655980271270035e-06, "loss": 0.0001, "step": 2881 }, { "epoch": 2.62, "learning_rate": 6.350184956843404e-06, "loss": 0.0005, "step": 2882 }, { "epoch": 2.63, "learning_rate": 6.334771886559802e-06, "loss": 0.0001, "step": 2883 }, { "epoch": 2.63, "learning_rate": 6.319358816276203e-06, "loss": 0.0001, "step": 2884 }, { "epoch": 2.63, "learning_rate": 6.3039457459926025e-06, "loss": 0.0006, "step": 2885 }, { "epoch": 2.63, "learning_rate": 6.288532675709001e-06, "loss": 0.0002, "step": 2886 }, { "epoch": 2.63, "learning_rate": 6.273119605425401e-06, "loss": 0.0001, "step": 2887 }, { "epoch": 2.63, "learning_rate": 6.257706535141801e-06, "loss": 0.0001, "step": 2888 }, { "epoch": 2.63, "learning_rate": 6.242293464858201e-06, "loss": 0.0001, "step": 2889 }, { "epoch": 2.63, "learning_rate": 6.2268803945745995e-06, "loss": 0.0001, "step": 2890 }, { "epoch": 2.63, "learning_rate": 6.211467324290999e-06, "loss": 0.0001, "step": 2891 }, { "epoch": 2.63, "learning_rate": 6.196054254007399e-06, "loss": 0.0002, "step": 2892 }, { "epoch": 2.63, "learning_rate": 6.180641183723798e-06, "loss": 0.0002, "step": 2893 }, { "epoch": 2.64, "learning_rate": 6.165228113440198e-06, "loss": 0.0001, "step": 2894 }, { "epoch": 2.64, "learning_rate": 6.1498150431565965e-06, "loss": 0.0001, "step": 2895 }, { "epoch": 2.64, "learning_rate": 6.134401972872997e-06, "loss": 0.0003, "step": 2896 }, { "epoch": 2.64, "learning_rate": 6.118988902589396e-06, "loss": 0.0001, "step": 2897 }, { "epoch": 2.64, "learning_rate": 6.103575832305796e-06, "loss": 0.0001, "step": 2898 }, { "epoch": 2.64, "learning_rate": 6.0881627620221955e-06, "loss": 0.0001, "step": 2899 }, { "epoch": 2.64, "learning_rate": 6.072749691738594e-06, "loss": 0.0001, "step": 2900 }, { "epoch": 2.64, "learning_rate": 6.057336621454994e-06, "loss": 0.0001, "step": 2901 }, { "epoch": 2.64, "learning_rate": 6.041923551171394e-06, "loss": 0.0001, "step": 2902 }, { "epoch": 2.64, "learning_rate": 6.026510480887794e-06, "loss": 0.0001, "step": 2903 }, { "epoch": 2.64, "learning_rate": 6.0110974106041925e-06, "loss": 0.0001, "step": 2904 }, { "epoch": 2.65, "learning_rate": 5.995684340320592e-06, "loss": 0.0001, "step": 2905 }, { "epoch": 2.65, "learning_rate": 5.980271270036992e-06, "loss": 0.0001, "step": 2906 }, { "epoch": 2.65, "learning_rate": 5.964858199753391e-06, "loss": 0.0001, "step": 2907 }, { "epoch": 2.65, "learning_rate": 5.949445129469791e-06, "loss": 0.0001, "step": 2908 }, { "epoch": 2.65, "learning_rate": 5.93403205918619e-06, "loss": 0.0001, "step": 2909 }, { "epoch": 2.65, "learning_rate": 5.91861898890259e-06, "loss": 0.0001, "step": 2910 }, { "epoch": 2.65, "learning_rate": 5.903205918618989e-06, "loss": 0.0001, "step": 2911 }, { "epoch": 2.65, "learning_rate": 5.887792848335389e-06, "loss": 0.0001, "step": 2912 }, { "epoch": 2.65, "learning_rate": 5.8723797780517884e-06, "loss": 0.0001, "step": 2913 }, { "epoch": 2.65, "learning_rate": 5.856966707768187e-06, "loss": 0.0002, "step": 2914 }, { "epoch": 2.65, "learning_rate": 5.841553637484587e-06, "loss": 0.0001, "step": 2915 }, { "epoch": 2.66, "learning_rate": 5.826140567200987e-06, "loss": 0.0001, "step": 2916 }, { "epoch": 2.66, "learning_rate": 5.8107274969173866e-06, "loss": 0.0001, "step": 2917 }, { "epoch": 2.66, "learning_rate": 5.7953144266337855e-06, "loss": 0.0001, "step": 2918 }, { "epoch": 2.66, "learning_rate": 5.779901356350185e-06, "loss": 0.0001, "step": 2919 }, { "epoch": 2.66, "learning_rate": 5.764488286066585e-06, "loss": 0.0033, "step": 2920 }, { "epoch": 2.66, "learning_rate": 5.749075215782984e-06, "loss": 0.0001, "step": 2921 }, { "epoch": 2.66, "learning_rate": 5.733662145499384e-06, "loss": 0.0001, "step": 2922 }, { "epoch": 2.66, "learning_rate": 5.718249075215783e-06, "loss": 0.0001, "step": 2923 }, { "epoch": 2.66, "learning_rate": 5.702836004932183e-06, "loss": 0.0001, "step": 2924 }, { "epoch": 2.66, "learning_rate": 5.687422934648582e-06, "loss": 0.0001, "step": 2925 }, { "epoch": 2.66, "learning_rate": 5.672009864364982e-06, "loss": 0.0001, "step": 2926 }, { "epoch": 2.67, "learning_rate": 5.6565967940813814e-06, "loss": 0.0001, "step": 2927 }, { "epoch": 2.67, "learning_rate": 5.64118372379778e-06, "loss": 0.0001, "step": 2928 }, { "epoch": 2.67, "learning_rate": 5.625770653514181e-06, "loss": 0.0001, "step": 2929 }, { "epoch": 2.67, "learning_rate": 5.61035758323058e-06, "loss": 0.0001, "step": 2930 }, { "epoch": 2.67, "learning_rate": 5.5949445129469796e-06, "loss": 0.0001, "step": 2931 }, { "epoch": 2.67, "learning_rate": 5.5795314426633784e-06, "loss": 0.0001, "step": 2932 }, { "epoch": 2.67, "learning_rate": 5.564118372379778e-06, "loss": 0.0001, "step": 2933 }, { "epoch": 2.67, "learning_rate": 5.548705302096178e-06, "loss": 0.0001, "step": 2934 }, { "epoch": 2.67, "learning_rate": 5.533292231812577e-06, "loss": 0.0004, "step": 2935 }, { "epoch": 2.67, "learning_rate": 5.517879161528977e-06, "loss": 0.0001, "step": 2936 }, { "epoch": 2.67, "learning_rate": 5.502466091245376e-06, "loss": 0.0001, "step": 2937 }, { "epoch": 2.68, "learning_rate": 5.487053020961776e-06, "loss": 0.0001, "step": 2938 }, { "epoch": 2.68, "learning_rate": 5.471639950678176e-06, "loss": 0.0001, "step": 2939 }, { "epoch": 2.68, "learning_rate": 5.456226880394575e-06, "loss": 0.0001, "step": 2940 }, { "epoch": 2.68, "learning_rate": 5.440813810110974e-06, "loss": 0.0001, "step": 2941 }, { "epoch": 2.68, "learning_rate": 5.425400739827373e-06, "loss": 0.0001, "step": 2942 }, { "epoch": 2.68, "learning_rate": 5.409987669543774e-06, "loss": 0.0001, "step": 2943 }, { "epoch": 2.68, "learning_rate": 5.394574599260173e-06, "loss": 0.181, "step": 2944 }, { "epoch": 2.68, "learning_rate": 5.3791615289765725e-06, "loss": 0.0001, "step": 2945 }, { "epoch": 2.68, "learning_rate": 5.363748458692972e-06, "loss": 0.0001, "step": 2946 }, { "epoch": 2.68, "learning_rate": 5.348335388409371e-06, "loss": 0.0001, "step": 2947 }, { "epoch": 2.68, "learning_rate": 5.332922318125771e-06, "loss": 0.0001, "step": 2948 }, { "epoch": 2.69, "learning_rate": 5.31750924784217e-06, "loss": 0.0001, "step": 2949 }, { "epoch": 2.69, "learning_rate": 5.30209617755857e-06, "loss": 0.0001, "step": 2950 }, { "epoch": 2.69, "learning_rate": 5.286683107274969e-06, "loss": 0.0002, "step": 2951 }, { "epoch": 2.69, "learning_rate": 5.271270036991369e-06, "loss": 0.0001, "step": 2952 }, { "epoch": 2.69, "learning_rate": 5.255856966707769e-06, "loss": 0.0002, "step": 2953 }, { "epoch": 2.69, "learning_rate": 5.240443896424168e-06, "loss": 0.0001, "step": 2954 }, { "epoch": 2.69, "learning_rate": 5.225030826140567e-06, "loss": 0.0001, "step": 2955 }, { "epoch": 2.69, "learning_rate": 5.209617755856967e-06, "loss": 0.0001, "step": 2956 }, { "epoch": 2.69, "learning_rate": 5.194204685573367e-06, "loss": 0.0001, "step": 2957 }, { "epoch": 2.69, "learning_rate": 5.178791615289766e-06, "loss": 0.0001, "step": 2958 }, { "epoch": 2.69, "learning_rate": 5.1633785450061655e-06, "loss": 0.0001, "step": 2959 }, { "epoch": 2.7, "learning_rate": 5.147965474722565e-06, "loss": 0.0001, "step": 2960 }, { "epoch": 2.7, "learning_rate": 5.132552404438964e-06, "loss": 0.0001, "step": 2961 }, { "epoch": 2.7, "learning_rate": 5.117139334155364e-06, "loss": 0.0001, "step": 2962 }, { "epoch": 2.7, "learning_rate": 5.101726263871764e-06, "loss": 0.0001, "step": 2963 }, { "epoch": 2.7, "learning_rate": 5.086313193588163e-06, "loss": 0.0001, "step": 2964 }, { "epoch": 2.7, "learning_rate": 5.070900123304562e-06, "loss": 0.0001, "step": 2965 }, { "epoch": 2.7, "learning_rate": 5.055487053020962e-06, "loss": 0.0001, "step": 2966 }, { "epoch": 2.7, "learning_rate": 5.040073982737362e-06, "loss": 0.0001, "step": 2967 }, { "epoch": 2.7, "learning_rate": 5.024660912453761e-06, "loss": 0.0001, "step": 2968 }, { "epoch": 2.7, "learning_rate": 5.00924784217016e-06, "loss": 0.0001, "step": 2969 }, { "epoch": 2.7, "learning_rate": 4.99383477188656e-06, "loss": 0.0003, "step": 2970 }, { "epoch": 2.71, "learning_rate": 4.97842170160296e-06, "loss": 0.0001, "step": 2971 }, { "epoch": 2.71, "learning_rate": 4.963008631319359e-06, "loss": 0.0003, "step": 2972 }, { "epoch": 2.71, "learning_rate": 4.9475955610357585e-06, "loss": 0.0001, "step": 2973 }, { "epoch": 2.71, "learning_rate": 4.932182490752158e-06, "loss": 0.0001, "step": 2974 }, { "epoch": 2.71, "learning_rate": 4.916769420468557e-06, "loss": 0.0001, "step": 2975 }, { "epoch": 2.71, "learning_rate": 4.901356350184958e-06, "loss": 0.0001, "step": 2976 }, { "epoch": 2.71, "learning_rate": 4.885943279901357e-06, "loss": 0.0001, "step": 2977 }, { "epoch": 2.71, "learning_rate": 4.870530209617756e-06, "loss": 0.0001, "step": 2978 }, { "epoch": 2.71, "learning_rate": 4.855117139334155e-06, "loss": 0.0019, "step": 2979 }, { "epoch": 2.71, "learning_rate": 4.839704069050555e-06, "loss": 0.0001, "step": 2980 }, { "epoch": 2.71, "learning_rate": 4.824290998766955e-06, "loss": 0.0003, "step": 2981 }, { "epoch": 2.72, "learning_rate": 4.808877928483354e-06, "loss": 0.0001, "step": 2982 }, { "epoch": 2.72, "learning_rate": 4.793464858199754e-06, "loss": 0.0001, "step": 2983 }, { "epoch": 2.72, "learning_rate": 4.778051787916153e-06, "loss": 0.0, "step": 2984 }, { "epoch": 2.72, "learning_rate": 4.762638717632553e-06, "loss": 0.0001, "step": 2985 }, { "epoch": 2.72, "learning_rate": 4.747225647348952e-06, "loss": 0.0001, "step": 2986 }, { "epoch": 2.72, "learning_rate": 4.7318125770653515e-06, "loss": 0.0001, "step": 2987 }, { "epoch": 2.72, "learning_rate": 4.716399506781751e-06, "loss": 0.0001, "step": 2988 }, { "epoch": 2.72, "learning_rate": 4.70098643649815e-06, "loss": 0.0002, "step": 2989 }, { "epoch": 2.72, "learning_rate": 4.685573366214551e-06, "loss": 0.0001, "step": 2990 }, { "epoch": 2.72, "learning_rate": 4.67016029593095e-06, "loss": 0.0001, "step": 2991 }, { "epoch": 2.72, "learning_rate": 4.654747225647349e-06, "loss": 0.0001, "step": 2992 }, { "epoch": 2.73, "learning_rate": 4.639334155363749e-06, "loss": 0.0001, "step": 2993 }, { "epoch": 2.73, "learning_rate": 4.623921085080148e-06, "loss": 0.0001, "step": 2994 }, { "epoch": 2.73, "learning_rate": 4.608508014796548e-06, "loss": 0.0001, "step": 2995 }, { "epoch": 2.73, "learning_rate": 4.593094944512947e-06, "loss": 0.0001, "step": 2996 }, { "epoch": 2.73, "learning_rate": 4.577681874229347e-06, "loss": 0.0001, "step": 2997 }, { "epoch": 2.73, "learning_rate": 4.562268803945746e-06, "loss": 0.0001, "step": 2998 }, { "epoch": 2.73, "learning_rate": 4.546855733662146e-06, "loss": 0.0001, "step": 2999 }, { "epoch": 2.73, "learning_rate": 4.531442663378546e-06, "loss": 0.0001, "step": 3000 }, { "epoch": 2.73, "learning_rate": 4.5160295930949445e-06, "loss": 0.0, "step": 3001 }, { "epoch": 2.73, "learning_rate": 4.500616522811344e-06, "loss": 0.0001, "step": 3002 }, { "epoch": 2.73, "learning_rate": 4.485203452527744e-06, "loss": 0.0001, "step": 3003 }, { "epoch": 2.74, "learning_rate": 4.469790382244144e-06, "loss": 0.0002, "step": 3004 }, { "epoch": 2.74, "learning_rate": 4.454377311960543e-06, "loss": 0.0001, "step": 3005 }, { "epoch": 2.74, "learning_rate": 4.438964241676942e-06, "loss": 0.0001, "step": 3006 }, { "epoch": 2.74, "learning_rate": 4.423551171393342e-06, "loss": 0.0, "step": 3007 }, { "epoch": 2.74, "learning_rate": 4.408138101109741e-06, "loss": 0.0001, "step": 3008 }, { "epoch": 2.74, "learning_rate": 4.392725030826141e-06, "loss": 0.0002, "step": 3009 }, { "epoch": 2.74, "learning_rate": 4.3773119605425405e-06, "loss": 0.0001, "step": 3010 }, { "epoch": 2.74, "learning_rate": 4.36189889025894e-06, "loss": 0.0001, "step": 3011 }, { "epoch": 2.74, "learning_rate": 4.346485819975339e-06, "loss": 0.0001, "step": 3012 }, { "epoch": 2.74, "learning_rate": 4.331072749691739e-06, "loss": 0.0001, "step": 3013 }, { "epoch": 2.74, "learning_rate": 4.315659679408139e-06, "loss": 0.0001, "step": 3014 }, { "epoch": 2.75, "learning_rate": 4.3002466091245375e-06, "loss": 0.0001, "step": 3015 }, { "epoch": 2.75, "learning_rate": 4.284833538840937e-06, "loss": 0.0001, "step": 3016 }, { "epoch": 2.75, "learning_rate": 4.269420468557337e-06, "loss": 0.0001, "step": 3017 }, { "epoch": 2.75, "learning_rate": 4.254007398273737e-06, "loss": 0.0001, "step": 3018 }, { "epoch": 2.75, "learning_rate": 4.238594327990136e-06, "loss": 0.0002, "step": 3019 }, { "epoch": 2.75, "learning_rate": 4.223181257706535e-06, "loss": 0.0001, "step": 3020 }, { "epoch": 2.75, "learning_rate": 4.207768187422935e-06, "loss": 0.0001, "step": 3021 }, { "epoch": 2.75, "learning_rate": 4.192355117139334e-06, "loss": 0.0001, "step": 3022 }, { "epoch": 2.75, "learning_rate": 4.176942046855734e-06, "loss": 0.0001, "step": 3023 }, { "epoch": 2.75, "learning_rate": 4.1615289765721334e-06, "loss": 0.0001, "step": 3024 }, { "epoch": 2.76, "learning_rate": 4.146115906288533e-06, "loss": 0.0001, "step": 3025 }, { "epoch": 2.76, "learning_rate": 4.130702836004932e-06, "loss": 0.0002, "step": 3026 }, { "epoch": 2.76, "learning_rate": 4.115289765721332e-06, "loss": 0.0001, "step": 3027 }, { "epoch": 2.76, "learning_rate": 4.0998766954377316e-06, "loss": 0.0001, "step": 3028 }, { "epoch": 2.76, "learning_rate": 4.0844636251541305e-06, "loss": 0.0001, "step": 3029 }, { "epoch": 2.76, "learning_rate": 4.069050554870531e-06, "loss": 0.0001, "step": 3030 }, { "epoch": 2.76, "learning_rate": 4.05363748458693e-06, "loss": 0.0, "step": 3031 }, { "epoch": 2.76, "learning_rate": 4.03822441430333e-06, "loss": 0.0001, "step": 3032 }, { "epoch": 2.76, "learning_rate": 4.0228113440197286e-06, "loss": 0.0001, "step": 3033 }, { "epoch": 2.76, "learning_rate": 4.007398273736128e-06, "loss": 0.0001, "step": 3034 }, { "epoch": 2.76, "learning_rate": 3.991985203452528e-06, "loss": 0.0001, "step": 3035 }, { "epoch": 2.77, "learning_rate": 3.976572133168927e-06, "loss": 0.0003, "step": 3036 }, { "epoch": 2.77, "learning_rate": 3.9611590628853275e-06, "loss": 0.0001, "step": 3037 }, { "epoch": 2.77, "learning_rate": 3.9457459926017264e-06, "loss": 0.0001, "step": 3038 }, { "epoch": 2.77, "learning_rate": 3.930332922318126e-06, "loss": 0.0001, "step": 3039 }, { "epoch": 2.77, "learning_rate": 3.914919852034526e-06, "loss": 0.0001, "step": 3040 }, { "epoch": 2.77, "learning_rate": 3.899506781750925e-06, "loss": 0.0001, "step": 3041 }, { "epoch": 2.77, "learning_rate": 3.8840937114673246e-06, "loss": 0.0001, "step": 3042 }, { "epoch": 2.77, "learning_rate": 3.8686806411837234e-06, "loss": 0.0001, "step": 3043 }, { "epoch": 2.77, "learning_rate": 3.853267570900124e-06, "loss": 0.0001, "step": 3044 }, { "epoch": 2.77, "learning_rate": 3.837854500616523e-06, "loss": 0.0001, "step": 3045 }, { "epoch": 2.77, "learning_rate": 3.822441430332923e-06, "loss": 0.0001, "step": 3046 }, { "epoch": 2.78, "learning_rate": 3.8070283600493224e-06, "loss": 0.0001, "step": 3047 }, { "epoch": 2.78, "learning_rate": 3.7916152897657217e-06, "loss": 0.0001, "step": 3048 }, { "epoch": 2.78, "learning_rate": 3.776202219482121e-06, "loss": 0.0001, "step": 3049 }, { "epoch": 2.78, "learning_rate": 3.7607891491985204e-06, "loss": 0.0001, "step": 3050 }, { "epoch": 2.78, "learning_rate": 3.74537607891492e-06, "loss": 0.0, "step": 3051 }, { "epoch": 2.78, "learning_rate": 3.7299630086313194e-06, "loss": 0.0001, "step": 3052 }, { "epoch": 2.78, "learning_rate": 3.7145499383477187e-06, "loss": 0.0001, "step": 3053 }, { "epoch": 2.78, "learning_rate": 3.699136868064119e-06, "loss": 0.0001, "step": 3054 }, { "epoch": 2.78, "learning_rate": 3.6837237977805182e-06, "loss": 0.01, "step": 3055 }, { "epoch": 2.78, "learning_rate": 3.6683107274969175e-06, "loss": 0.0001, "step": 3056 }, { "epoch": 2.78, "learning_rate": 3.6528976572133173e-06, "loss": 0.0001, "step": 3057 }, { "epoch": 2.79, "learning_rate": 3.6374845869297166e-06, "loss": 0.0006, "step": 3058 }, { "epoch": 2.79, "learning_rate": 3.622071516646116e-06, "loss": 0.0001, "step": 3059 }, { "epoch": 2.79, "learning_rate": 3.6066584463625152e-06, "loss": 0.0001, "step": 3060 }, { "epoch": 2.79, "learning_rate": 3.5912453760789154e-06, "loss": 0.0001, "step": 3061 }, { "epoch": 2.79, "learning_rate": 3.5758323057953147e-06, "loss": 0.0001, "step": 3062 }, { "epoch": 2.79, "learning_rate": 3.560419235511714e-06, "loss": 0.0001, "step": 3063 }, { "epoch": 2.79, "learning_rate": 3.5450061652281138e-06, "loss": 0.0001, "step": 3064 }, { "epoch": 2.79, "learning_rate": 3.529593094944513e-06, "loss": 0.0001, "step": 3065 }, { "epoch": 2.79, "learning_rate": 3.5141800246609124e-06, "loss": 0.0001, "step": 3066 }, { "epoch": 2.79, "learning_rate": 3.4987669543773126e-06, "loss": 0.0001, "step": 3067 }, { "epoch": 2.79, "learning_rate": 3.483353884093712e-06, "loss": 0.0001, "step": 3068 }, { "epoch": 2.8, "learning_rate": 3.467940813810111e-06, "loss": 0.0001, "step": 3069 }, { "epoch": 2.8, "learning_rate": 3.4525277435265105e-06, "loss": 0.0001, "step": 3070 }, { "epoch": 2.8, "learning_rate": 3.4371146732429103e-06, "loss": 0.0001, "step": 3071 }, { "epoch": 2.8, "learning_rate": 3.4217016029593096e-06, "loss": 0.0001, "step": 3072 }, { "epoch": 2.8, "learning_rate": 3.406288532675709e-06, "loss": 0.0001, "step": 3073 }, { "epoch": 2.8, "learning_rate": 3.390875462392109e-06, "loss": 0.0001, "step": 3074 }, { "epoch": 2.8, "learning_rate": 3.3754623921085084e-06, "loss": 0.0001, "step": 3075 }, { "epoch": 2.8, "learning_rate": 3.3600493218249077e-06, "loss": 0.0001, "step": 3076 }, { "epoch": 2.8, "learning_rate": 3.3446362515413074e-06, "loss": 0.0001, "step": 3077 }, { "epoch": 2.8, "learning_rate": 3.3292231812577068e-06, "loss": 0.0, "step": 3078 }, { "epoch": 2.8, "learning_rate": 3.313810110974106e-06, "loss": 0.0001, "step": 3079 }, { "epoch": 2.81, "learning_rate": 3.2983970406905054e-06, "loss": 0.0001, "step": 3080 }, { "epoch": 2.81, "learning_rate": 3.2829839704069056e-06, "loss": 0.0003, "step": 3081 }, { "epoch": 2.81, "learning_rate": 3.267570900123305e-06, "loss": 0.0001, "step": 3082 }, { "epoch": 2.81, "learning_rate": 3.252157829839704e-06, "loss": 0.0001, "step": 3083 }, { "epoch": 2.81, "learning_rate": 3.236744759556104e-06, "loss": 0.0001, "step": 3084 }, { "epoch": 2.81, "learning_rate": 3.2213316892725033e-06, "loss": 0.0004, "step": 3085 }, { "epoch": 2.81, "learning_rate": 3.2059186189889026e-06, "loss": 0.0001, "step": 3086 }, { "epoch": 2.81, "learning_rate": 3.190505548705302e-06, "loss": 0.0001, "step": 3087 }, { "epoch": 2.81, "learning_rate": 3.175092478421702e-06, "loss": 0.0, "step": 3088 }, { "epoch": 2.81, "learning_rate": 3.1596794081381014e-06, "loss": 0.0001, "step": 3089 }, { "epoch": 2.81, "learning_rate": 3.1442663378545007e-06, "loss": 0.0001, "step": 3090 }, { "epoch": 2.82, "learning_rate": 3.1288532675709004e-06, "loss": 0.0001, "step": 3091 }, { "epoch": 2.82, "learning_rate": 3.1134401972872997e-06, "loss": 0.0001, "step": 3092 }, { "epoch": 2.82, "learning_rate": 3.0980271270036995e-06, "loss": 0.0001, "step": 3093 }, { "epoch": 2.82, "learning_rate": 3.082614056720099e-06, "loss": 0.0001, "step": 3094 }, { "epoch": 2.82, "learning_rate": 3.0672009864364985e-06, "loss": 0.0001, "step": 3095 }, { "epoch": 2.82, "learning_rate": 3.051787916152898e-06, "loss": 0.0001, "step": 3096 }, { "epoch": 2.82, "learning_rate": 3.036374845869297e-06, "loss": 0.0001, "step": 3097 }, { "epoch": 2.82, "learning_rate": 3.020961775585697e-06, "loss": 0.0001, "step": 3098 }, { "epoch": 2.82, "learning_rate": 3.0055487053020962e-06, "loss": 0.0001, "step": 3099 }, { "epoch": 2.82, "learning_rate": 2.990135635018496e-06, "loss": 0.0001, "step": 3100 }, { "epoch": 2.82, "learning_rate": 2.9747225647348953e-06, "loss": 0.0001, "step": 3101 }, { "epoch": 2.83, "learning_rate": 2.959309494451295e-06, "loss": 0.0001, "step": 3102 }, { "epoch": 2.83, "learning_rate": 2.9438964241676944e-06, "loss": 0.0001, "step": 3103 }, { "epoch": 2.83, "learning_rate": 2.9284833538840937e-06, "loss": 0.0002, "step": 3104 }, { "epoch": 2.83, "learning_rate": 2.9130702836004934e-06, "loss": 0.0001, "step": 3105 }, { "epoch": 2.83, "learning_rate": 2.8976572133168927e-06, "loss": 0.0001, "step": 3106 }, { "epoch": 2.83, "learning_rate": 2.8822441430332925e-06, "loss": 0.0001, "step": 3107 }, { "epoch": 2.83, "learning_rate": 2.866831072749692e-06, "loss": 0.0001, "step": 3108 }, { "epoch": 2.83, "learning_rate": 2.8514180024660915e-06, "loss": 0.0001, "step": 3109 }, { "epoch": 2.83, "learning_rate": 2.836004932182491e-06, "loss": 0.0001, "step": 3110 }, { "epoch": 2.83, "learning_rate": 2.82059186189889e-06, "loss": 0.0001, "step": 3111 }, { "epoch": 2.83, "learning_rate": 2.80517879161529e-06, "loss": 0.0001, "step": 3112 }, { "epoch": 2.84, "learning_rate": 2.7897657213316892e-06, "loss": 0.0001, "step": 3113 }, { "epoch": 2.84, "learning_rate": 2.774352651048089e-06, "loss": 0.0001, "step": 3114 }, { "epoch": 2.84, "learning_rate": 2.7589395807644887e-06, "loss": 0.0001, "step": 3115 }, { "epoch": 2.84, "learning_rate": 2.743526510480888e-06, "loss": 0.0001, "step": 3116 }, { "epoch": 2.84, "learning_rate": 2.7281134401972873e-06, "loss": 0.0001, "step": 3117 }, { "epoch": 2.84, "learning_rate": 2.7127003699136867e-06, "loss": 0.0001, "step": 3118 }, { "epoch": 2.84, "learning_rate": 2.6972872996300864e-06, "loss": 0.0002, "step": 3119 }, { "epoch": 2.84, "learning_rate": 2.681874229346486e-06, "loss": 0.0001, "step": 3120 }, { "epoch": 2.84, "learning_rate": 2.6664611590628855e-06, "loss": 0.0001, "step": 3121 }, { "epoch": 2.84, "learning_rate": 2.651048088779285e-06, "loss": 0.0001, "step": 3122 }, { "epoch": 2.84, "learning_rate": 2.6356350184956845e-06, "loss": 0.0001, "step": 3123 }, { "epoch": 2.85, "learning_rate": 2.620221948212084e-06, "loss": 0.0002, "step": 3124 }, { "epoch": 2.85, "learning_rate": 2.6048088779284836e-06, "loss": 0.0001, "step": 3125 }, { "epoch": 2.85, "learning_rate": 2.589395807644883e-06, "loss": 0.0001, "step": 3126 }, { "epoch": 2.85, "learning_rate": 2.5739827373612826e-06, "loss": 0.0003, "step": 3127 }, { "epoch": 2.85, "learning_rate": 2.558569667077682e-06, "loss": 0.0, "step": 3128 }, { "epoch": 2.85, "learning_rate": 2.5431565967940817e-06, "loss": 0.0001, "step": 3129 }, { "epoch": 2.85, "learning_rate": 2.527743526510481e-06, "loss": 0.0001, "step": 3130 }, { "epoch": 2.85, "learning_rate": 2.5123304562268803e-06, "loss": 0.0001, "step": 3131 }, { "epoch": 2.85, "learning_rate": 2.49691738594328e-06, "loss": 0.0001, "step": 3132 }, { "epoch": 2.85, "learning_rate": 2.4815043156596794e-06, "loss": 0.0001, "step": 3133 }, { "epoch": 2.85, "learning_rate": 2.466091245376079e-06, "loss": 0.0001, "step": 3134 }, { "epoch": 2.86, "learning_rate": 2.450678175092479e-06, "loss": 0.3184, "step": 3135 }, { "epoch": 2.86, "learning_rate": 2.435265104808878e-06, "loss": 0.0001, "step": 3136 }, { "epoch": 2.86, "learning_rate": 2.4198520345252775e-06, "loss": 0.0001, "step": 3137 }, { "epoch": 2.86, "learning_rate": 2.404438964241677e-06, "loss": 0.0, "step": 3138 }, { "epoch": 2.86, "learning_rate": 2.3890258939580766e-06, "loss": 0.0001, "step": 3139 }, { "epoch": 2.86, "learning_rate": 2.373612823674476e-06, "loss": 0.0001, "step": 3140 }, { "epoch": 2.86, "learning_rate": 2.3581997533908756e-06, "loss": 0.0001, "step": 3141 }, { "epoch": 2.86, "learning_rate": 2.3427866831072754e-06, "loss": 0.0002, "step": 3142 }, { "epoch": 2.86, "learning_rate": 2.3273736128236747e-06, "loss": 0.0001, "step": 3143 }, { "epoch": 2.86, "learning_rate": 2.311960542540074e-06, "loss": 0.0001, "step": 3144 }, { "epoch": 2.86, "learning_rate": 2.2965474722564733e-06, "loss": 0.0001, "step": 3145 }, { "epoch": 2.87, "learning_rate": 2.281134401972873e-06, "loss": 0.0001, "step": 3146 }, { "epoch": 2.87, "learning_rate": 2.265721331689273e-06, "loss": 0.0, "step": 3147 }, { "epoch": 2.87, "learning_rate": 2.250308261405672e-06, "loss": 0.0001, "step": 3148 }, { "epoch": 2.87, "learning_rate": 2.234895191122072e-06, "loss": 0.0001, "step": 3149 }, { "epoch": 2.87, "learning_rate": 2.219482120838471e-06, "loss": 0.0, "step": 3150 }, { "epoch": 2.87, "learning_rate": 2.2040690505548705e-06, "loss": 0.0001, "step": 3151 }, { "epoch": 2.87, "learning_rate": 2.1886559802712702e-06, "loss": 0.0001, "step": 3152 }, { "epoch": 2.87, "learning_rate": 2.1732429099876695e-06, "loss": 0.0001, "step": 3153 }, { "epoch": 2.87, "learning_rate": 2.1578298397040693e-06, "loss": 0.0001, "step": 3154 }, { "epoch": 2.87, "learning_rate": 2.1424167694204686e-06, "loss": 0.0001, "step": 3155 }, { "epoch": 2.87, "learning_rate": 2.1270036991368683e-06, "loss": 0.0001, "step": 3156 }, { "epoch": 2.88, "learning_rate": 2.1115906288532677e-06, "loss": 0.0001, "step": 3157 }, { "epoch": 2.88, "learning_rate": 2.096177558569667e-06, "loss": 0.0027, "step": 3158 }, { "epoch": 2.88, "learning_rate": 2.0807644882860667e-06, "loss": 0.0001, "step": 3159 }, { "epoch": 2.88, "learning_rate": 2.065351418002466e-06, "loss": 0.0001, "step": 3160 }, { "epoch": 2.88, "learning_rate": 2.0499383477188658e-06, "loss": 0.0001, "step": 3161 }, { "epoch": 2.88, "learning_rate": 2.0345252774352655e-06, "loss": 0.0001, "step": 3162 }, { "epoch": 2.88, "learning_rate": 2.019112207151665e-06, "loss": 0.0001, "step": 3163 }, { "epoch": 2.88, "learning_rate": 2.003699136868064e-06, "loss": 0.0002, "step": 3164 }, { "epoch": 2.88, "learning_rate": 1.9882860665844635e-06, "loss": 0.0001, "step": 3165 }, { "epoch": 2.88, "learning_rate": 1.9728729963008632e-06, "loss": 0.0001, "step": 3166 }, { "epoch": 2.88, "learning_rate": 1.957459926017263e-06, "loss": 0.0001, "step": 3167 }, { "epoch": 2.89, "learning_rate": 1.9420468557336623e-06, "loss": 0.0001, "step": 3168 }, { "epoch": 2.89, "learning_rate": 1.926633785450062e-06, "loss": 0.0001, "step": 3169 }, { "epoch": 2.89, "learning_rate": 1.9112207151664613e-06, "loss": 0.0001, "step": 3170 }, { "epoch": 2.89, "learning_rate": 1.8958076448828609e-06, "loss": 0.0002, "step": 3171 }, { "epoch": 2.89, "learning_rate": 1.8803945745992602e-06, "loss": 0.0041, "step": 3172 }, { "epoch": 2.89, "learning_rate": 1.8649815043156597e-06, "loss": 0.0001, "step": 3173 }, { "epoch": 2.89, "learning_rate": 1.8495684340320595e-06, "loss": 0.0001, "step": 3174 }, { "epoch": 2.89, "learning_rate": 1.8341553637484588e-06, "loss": 0.0001, "step": 3175 }, { "epoch": 2.89, "learning_rate": 1.8187422934648583e-06, "loss": 0.0001, "step": 3176 }, { "epoch": 2.89, "learning_rate": 1.8033292231812576e-06, "loss": 0.0001, "step": 3177 }, { "epoch": 2.89, "learning_rate": 1.7879161528976574e-06, "loss": 0.0001, "step": 3178 }, { "epoch": 2.9, "learning_rate": 1.7725030826140569e-06, "loss": 0.0001, "step": 3179 }, { "epoch": 2.9, "learning_rate": 1.7570900123304562e-06, "loss": 0.0001, "step": 3180 }, { "epoch": 2.9, "learning_rate": 1.741676942046856e-06, "loss": 0.0001, "step": 3181 }, { "epoch": 2.9, "learning_rate": 1.7262638717632553e-06, "loss": 0.0001, "step": 3182 }, { "epoch": 2.9, "learning_rate": 1.7108508014796548e-06, "loss": 0.0001, "step": 3183 }, { "epoch": 2.9, "learning_rate": 1.6954377311960545e-06, "loss": 0.0001, "step": 3184 }, { "epoch": 2.9, "learning_rate": 1.6800246609124538e-06, "loss": 0.0001, "step": 3185 }, { "epoch": 2.9, "learning_rate": 1.6646115906288534e-06, "loss": 0.0001, "step": 3186 }, { "epoch": 2.9, "learning_rate": 1.6491985203452527e-06, "loss": 0.0002, "step": 3187 }, { "epoch": 2.9, "learning_rate": 1.6337854500616524e-06, "loss": 0.0001, "step": 3188 }, { "epoch": 2.9, "learning_rate": 1.618372379778052e-06, "loss": 0.0002, "step": 3189 }, { "epoch": 2.91, "learning_rate": 1.6029593094944513e-06, "loss": 0.0001, "step": 3190 }, { "epoch": 2.91, "learning_rate": 1.587546239210851e-06, "loss": 0.0001, "step": 3191 }, { "epoch": 2.91, "learning_rate": 1.5721331689272503e-06, "loss": 0.0001, "step": 3192 }, { "epoch": 2.91, "learning_rate": 1.5567200986436499e-06, "loss": 0.0, "step": 3193 }, { "epoch": 2.91, "learning_rate": 1.5413070283600494e-06, "loss": 0.0001, "step": 3194 }, { "epoch": 2.91, "learning_rate": 1.525893958076449e-06, "loss": 0.0001, "step": 3195 }, { "epoch": 2.91, "learning_rate": 1.5104808877928485e-06, "loss": 0.0001, "step": 3196 }, { "epoch": 2.91, "learning_rate": 1.495067817509248e-06, "loss": 0.0001, "step": 3197 }, { "epoch": 2.91, "learning_rate": 1.4796547472256475e-06, "loss": 0.0, "step": 3198 }, { "epoch": 2.91, "learning_rate": 1.4642416769420468e-06, "loss": 0.1597, "step": 3199 }, { "epoch": 2.91, "learning_rate": 1.4488286066584464e-06, "loss": 0.0001, "step": 3200 }, { "epoch": 2.92, "learning_rate": 1.433415536374846e-06, "loss": 0.0001, "step": 3201 }, { "epoch": 2.92, "learning_rate": 1.4180024660912454e-06, "loss": 0.0011, "step": 3202 }, { "epoch": 2.92, "learning_rate": 1.402589395807645e-06, "loss": 0.0001, "step": 3203 }, { "epoch": 2.92, "learning_rate": 1.3871763255240445e-06, "loss": 0.0002, "step": 3204 }, { "epoch": 2.92, "learning_rate": 1.371763255240444e-06, "loss": 0.0001, "step": 3205 }, { "epoch": 2.92, "learning_rate": 1.3563501849568433e-06, "loss": 0.0001, "step": 3206 }, { "epoch": 2.92, "learning_rate": 1.340937114673243e-06, "loss": 0.0001, "step": 3207 }, { "epoch": 2.92, "learning_rate": 1.3255240443896426e-06, "loss": 0.0001, "step": 3208 }, { "epoch": 2.92, "learning_rate": 1.310110974106042e-06, "loss": 0.0003, "step": 3209 }, { "epoch": 2.92, "learning_rate": 1.2946979038224414e-06, "loss": 0.0001, "step": 3210 }, { "epoch": 2.92, "learning_rate": 1.279284833538841e-06, "loss": 0.0001, "step": 3211 }, { "epoch": 2.93, "learning_rate": 1.2638717632552405e-06, "loss": 0.0001, "step": 3212 }, { "epoch": 2.93, "learning_rate": 1.24845869297164e-06, "loss": 0.0001, "step": 3213 }, { "epoch": 2.93, "learning_rate": 1.2330456226880396e-06, "loss": 0.0009, "step": 3214 }, { "epoch": 2.93, "learning_rate": 1.217632552404439e-06, "loss": 0.0, "step": 3215 }, { "epoch": 2.93, "learning_rate": 1.2022194821208384e-06, "loss": 0.0001, "step": 3216 }, { "epoch": 2.93, "learning_rate": 1.186806411837238e-06, "loss": 0.0001, "step": 3217 }, { "epoch": 2.93, "learning_rate": 1.1713933415536377e-06, "loss": 0.0001, "step": 3218 }, { "epoch": 2.93, "learning_rate": 1.155980271270037e-06, "loss": 0.0001, "step": 3219 }, { "epoch": 2.93, "learning_rate": 1.1405672009864365e-06, "loss": 0.0001, "step": 3220 }, { "epoch": 2.93, "learning_rate": 1.125154130702836e-06, "loss": 0.0008, "step": 3221 }, { "epoch": 2.93, "learning_rate": 1.1097410604192356e-06, "loss": 0.0001, "step": 3222 }, { "epoch": 2.94, "learning_rate": 1.0943279901356351e-06, "loss": 0.0001, "step": 3223 }, { "epoch": 2.94, "learning_rate": 1.0789149198520346e-06, "loss": 0.0001, "step": 3224 }, { "epoch": 2.94, "learning_rate": 1.0635018495684342e-06, "loss": 0.0001, "step": 3225 }, { "epoch": 2.94, "learning_rate": 1.0480887792848335e-06, "loss": 0.0001, "step": 3226 }, { "epoch": 2.94, "learning_rate": 1.032675709001233e-06, "loss": 0.0001, "step": 3227 }, { "epoch": 2.94, "learning_rate": 1.0172626387176328e-06, "loss": 0.0001, "step": 3228 }, { "epoch": 2.94, "learning_rate": 1.001849568434032e-06, "loss": 0.0001, "step": 3229 }, { "epoch": 2.94, "learning_rate": 9.864364981504316e-07, "loss": 0.0001, "step": 3230 }, { "epoch": 2.94, "learning_rate": 9.710234278668311e-07, "loss": 0.0001, "step": 3231 }, { "epoch": 2.94, "learning_rate": 9.556103575832307e-07, "loss": 0.0001, "step": 3232 }, { "epoch": 2.94, "learning_rate": 9.401972872996301e-07, "loss": 0.0002, "step": 3233 }, { "epoch": 2.95, "learning_rate": 9.247842170160297e-07, "loss": 0.0001, "step": 3234 }, { "epoch": 2.95, "learning_rate": 9.093711467324291e-07, "loss": 0.0001, "step": 3235 }, { "epoch": 2.95, "learning_rate": 8.939580764488287e-07, "loss": 0.0001, "step": 3236 }, { "epoch": 2.95, "learning_rate": 8.785450061652281e-07, "loss": 0.0001, "step": 3237 }, { "epoch": 2.95, "learning_rate": 8.631319358816276e-07, "loss": 0.0001, "step": 3238 }, { "epoch": 2.95, "learning_rate": 8.477188655980273e-07, "loss": 0.0001, "step": 3239 }, { "epoch": 2.95, "learning_rate": 8.323057953144267e-07, "loss": 0.0001, "step": 3240 }, { "epoch": 2.95, "learning_rate": 8.168927250308262e-07, "loss": 0.0001, "step": 3241 }, { "epoch": 2.95, "learning_rate": 8.014796547472256e-07, "loss": 0.1365, "step": 3242 }, { "epoch": 2.95, "learning_rate": 7.860665844636252e-07, "loss": 0.0001, "step": 3243 }, { "epoch": 2.95, "learning_rate": 7.706535141800247e-07, "loss": 0.0001, "step": 3244 }, { "epoch": 2.96, "learning_rate": 7.552404438964242e-07, "loss": 0.0001, "step": 3245 }, { "epoch": 2.96, "learning_rate": 7.398273736128238e-07, "loss": 0.0001, "step": 3246 }, { "epoch": 2.96, "learning_rate": 7.244143033292232e-07, "loss": 0.0001, "step": 3247 }, { "epoch": 2.96, "learning_rate": 7.090012330456227e-07, "loss": 0.0001, "step": 3248 }, { "epoch": 2.96, "learning_rate": 6.935881627620222e-07, "loss": 0.0001, "step": 3249 }, { "epoch": 2.96, "learning_rate": 6.781750924784217e-07, "loss": 0.0001, "step": 3250 }, { "epoch": 2.96, "learning_rate": 6.627620221948213e-07, "loss": 0.0001, "step": 3251 }, { "epoch": 2.96, "learning_rate": 6.473489519112207e-07, "loss": 0.0001, "step": 3252 }, { "epoch": 2.96, "learning_rate": 6.319358816276203e-07, "loss": 0.0001, "step": 3253 }, { "epoch": 2.96, "learning_rate": 6.165228113440198e-07, "loss": 0.0002, "step": 3254 }, { "epoch": 2.96, "learning_rate": 6.011097410604192e-07, "loss": 0.0001, "step": 3255 }, { "epoch": 2.97, "learning_rate": 5.856966707768188e-07, "loss": 0.0001, "step": 3256 }, { "epoch": 2.97, "learning_rate": 5.702836004932183e-07, "loss": 0.0001, "step": 3257 }, { "epoch": 2.97, "learning_rate": 5.548705302096178e-07, "loss": 0.0001, "step": 3258 }, { "epoch": 2.97, "learning_rate": 5.394574599260173e-07, "loss": 0.0001, "step": 3259 }, { "epoch": 2.97, "learning_rate": 5.240443896424167e-07, "loss": 0.0001, "step": 3260 }, { "epoch": 2.97, "learning_rate": 5.086313193588164e-07, "loss": 0.0001, "step": 3261 }, { "epoch": 2.97, "learning_rate": 4.932182490752158e-07, "loss": 0.0001, "step": 3262 }, { "epoch": 2.97, "learning_rate": 4.778051787916153e-07, "loss": 0.0001, "step": 3263 }, { "epoch": 2.97, "learning_rate": 4.6239210850801486e-07, "loss": 0.0001, "step": 3264 }, { "epoch": 2.97, "learning_rate": 4.4697903822441434e-07, "loss": 0.0001, "step": 3265 }, { "epoch": 2.97, "learning_rate": 4.315659679408138e-07, "loss": 0.0001, "step": 3266 }, { "epoch": 2.98, "learning_rate": 4.1615289765721334e-07, "loss": 0.0001, "step": 3267 }, { "epoch": 2.98, "learning_rate": 4.007398273736128e-07, "loss": 0.0001, "step": 3268 }, { "epoch": 2.98, "learning_rate": 3.8532675709001235e-07, "loss": 0.0001, "step": 3269 }, { "epoch": 2.98, "learning_rate": 3.699136868064119e-07, "loss": 0.0003, "step": 3270 }, { "epoch": 2.98, "learning_rate": 3.5450061652281136e-07, "loss": 0.0001, "step": 3271 }, { "epoch": 2.98, "learning_rate": 3.3908754623921083e-07, "loss": 0.0001, "step": 3272 }, { "epoch": 2.98, "learning_rate": 3.2367447595561036e-07, "loss": 0.0001, "step": 3273 }, { "epoch": 2.98, "learning_rate": 3.082614056720099e-07, "loss": 0.0001, "step": 3274 }, { "epoch": 2.98, "learning_rate": 2.928483353884094e-07, "loss": 0.0001, "step": 3275 }, { "epoch": 2.98, "learning_rate": 2.774352651048089e-07, "loss": 0.0001, "step": 3276 }, { "epoch": 2.98, "learning_rate": 2.6202219482120837e-07, "loss": 0.0001, "step": 3277 }, { "epoch": 2.99, "learning_rate": 2.466091245376079e-07, "loss": 0.0001, "step": 3278 }, { "epoch": 2.99, "learning_rate": 2.3119605425400743e-07, "loss": 0.0107, "step": 3279 }, { "epoch": 2.99, "learning_rate": 2.157829839704069e-07, "loss": 0.0001, "step": 3280 }, { "epoch": 2.99, "learning_rate": 2.003699136868064e-07, "loss": 0.0001, "step": 3281 }, { "epoch": 2.99, "learning_rate": 1.8495684340320594e-07, "loss": 0.0001, "step": 3282 }, { "epoch": 2.99, "learning_rate": 1.6954377311960542e-07, "loss": 0.0001, "step": 3283 }, { "epoch": 2.99, "learning_rate": 1.5413070283600495e-07, "loss": 0.0001, "step": 3284 }, { "epoch": 2.99, "learning_rate": 1.3871763255240445e-07, "loss": 0.0, "step": 3285 }, { "epoch": 2.99, "learning_rate": 1.2330456226880395e-07, "loss": 0.0001, "step": 3286 }, { "epoch": 2.99, "learning_rate": 1.0789149198520345e-07, "loss": 0.0001, "step": 3287 }, { "epoch": 2.99, "learning_rate": 9.247842170160297e-08, "loss": 0.0001, "step": 3288 }, { "epoch": 3.0, "learning_rate": 7.706535141800247e-08, "loss": 0.0001, "step": 3289 }, { "epoch": 3.0, "learning_rate": 6.165228113440198e-08, "loss": 0.0001, "step": 3290 }, { "epoch": 3.0, "learning_rate": 4.6239210850801485e-08, "loss": 0.0001, "step": 3291 }, { "epoch": 3.0, "learning_rate": 3.082614056720099e-08, "loss": 0.0001, "step": 3292 }, { "epoch": 3.0, "learning_rate": 1.5413070283600494e-08, "loss": 0.0001, "step": 3293 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 0.0001, "step": 3294 }, { "epoch": 3.0, "eval_accuracy": 0.9927140255009107, "eval_loss": 0.03864353522658348, "eval_runtime": 41.3032, "eval_samples_per_second": 106.336, "eval_steps_per_second": 6.658, "step": 3294 } ], "max_steps": 3294, "num_train_epochs": 3, "total_flos": 6976623228559680.0, "trial_name": null, "trial_params": null }