{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 9258, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.798561151079137e-08, "loss": 0.8164, "step": 1 }, { "epoch": 0.0, "learning_rate": 3.597122302158274e-08, "loss": 0.8067, "step": 2 }, { "epoch": 0.0, "learning_rate": 5.395683453237411e-08, "loss": 0.5894, "step": 3 }, { "epoch": 0.0, "learning_rate": 7.194244604316547e-08, "loss": 0.5525, "step": 4 }, { "epoch": 0.0, "learning_rate": 8.992805755395684e-08, "loss": 0.5043, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.0791366906474822e-07, "loss": 0.485, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.2589928057553958e-07, "loss": 0.5067, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.4388489208633095e-07, "loss": 0.4637, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.618705035971223e-07, "loss": 0.445, "step": 9 }, { "epoch": 0.0, "learning_rate": 1.7985611510791368e-07, "loss": 0.4298, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.9784172661870504e-07, "loss": 0.4483, "step": 11 }, { "epoch": 0.0, "learning_rate": 2.1582733812949643e-07, "loss": 0.4263, "step": 12 }, { "epoch": 0.0, "learning_rate": 2.338129496402878e-07, "loss": 0.4298, "step": 13 }, { "epoch": 0.0, "learning_rate": 2.5179856115107916e-07, "loss": 0.4126, "step": 14 }, { "epoch": 0.0, "learning_rate": 2.697841726618705e-07, "loss": 0.3952, "step": 15 }, { "epoch": 0.01, "learning_rate": 2.877697841726619e-07, "loss": 0.4205, "step": 16 }, { "epoch": 0.01, "learning_rate": 3.057553956834533e-07, "loss": 0.4006, "step": 17 }, { "epoch": 0.01, "learning_rate": 3.237410071942446e-07, "loss": 0.3984, "step": 18 }, { "epoch": 0.01, "learning_rate": 3.41726618705036e-07, "loss": 0.4087, "step": 19 }, { "epoch": 0.01, "learning_rate": 3.5971223021582736e-07, "loss": 0.3978, "step": 20 }, { "epoch": 0.01, "learning_rate": 3.7769784172661875e-07, "loss": 0.4132, "step": 21 }, { "epoch": 0.01, "learning_rate": 3.956834532374101e-07, "loss": 0.4262, "step": 22 }, { "epoch": 0.01, "learning_rate": 4.136690647482015e-07, "loss": 0.4022, "step": 23 }, { "epoch": 0.01, "learning_rate": 4.3165467625899287e-07, "loss": 0.399, "step": 24 }, { "epoch": 0.01, "learning_rate": 4.496402877697842e-07, "loss": 0.3991, "step": 25 }, { "epoch": 0.01, "learning_rate": 4.676258992805756e-07, "loss": 0.3692, "step": 26 }, { "epoch": 0.01, "learning_rate": 4.85611510791367e-07, "loss": 0.3883, "step": 27 }, { "epoch": 0.01, "learning_rate": 5.035971223021583e-07, "loss": 0.3916, "step": 28 }, { "epoch": 0.01, "learning_rate": 5.215827338129497e-07, "loss": 0.4122, "step": 29 }, { "epoch": 0.01, "learning_rate": 5.39568345323741e-07, "loss": 0.3708, "step": 30 }, { "epoch": 0.01, "learning_rate": 5.575539568345325e-07, "loss": 0.3655, "step": 31 }, { "epoch": 0.01, "learning_rate": 5.755395683453238e-07, "loss": 0.3869, "step": 32 }, { "epoch": 0.01, "learning_rate": 5.935251798561151e-07, "loss": 0.3944, "step": 33 }, { "epoch": 0.01, "learning_rate": 6.115107913669066e-07, "loss": 0.3819, "step": 34 }, { "epoch": 0.01, "learning_rate": 6.294964028776979e-07, "loss": 0.386, "step": 35 }, { "epoch": 0.01, "learning_rate": 6.474820143884893e-07, "loss": 0.3984, "step": 36 }, { "epoch": 0.01, "learning_rate": 6.654676258992807e-07, "loss": 0.3627, "step": 37 }, { "epoch": 0.01, "learning_rate": 6.83453237410072e-07, "loss": 0.3654, "step": 38 }, { "epoch": 0.01, "learning_rate": 7.014388489208633e-07, "loss": 0.3536, "step": 39 }, { "epoch": 0.01, "learning_rate": 7.194244604316547e-07, "loss": 0.3548, "step": 40 }, { "epoch": 0.01, "learning_rate": 7.37410071942446e-07, "loss": 0.3542, "step": 41 }, { "epoch": 0.01, "learning_rate": 7.553956834532375e-07, "loss": 0.3499, "step": 42 }, { "epoch": 0.01, "learning_rate": 7.733812949640289e-07, "loss": 0.3547, "step": 43 }, { "epoch": 0.01, "learning_rate": 7.913669064748202e-07, "loss": 0.3124, "step": 44 }, { "epoch": 0.01, "learning_rate": 8.093525179856115e-07, "loss": 0.3525, "step": 45 }, { "epoch": 0.01, "learning_rate": 8.27338129496403e-07, "loss": 0.3495, "step": 46 }, { "epoch": 0.02, "learning_rate": 8.453237410071943e-07, "loss": 0.3601, "step": 47 }, { "epoch": 0.02, "learning_rate": 8.633093525179857e-07, "loss": 0.3286, "step": 48 }, { "epoch": 0.02, "learning_rate": 8.81294964028777e-07, "loss": 0.327, "step": 49 }, { "epoch": 0.02, "learning_rate": 8.992805755395684e-07, "loss": 0.3369, "step": 50 }, { "epoch": 0.02, "learning_rate": 9.172661870503598e-07, "loss": 0.3437, "step": 51 }, { "epoch": 0.02, "learning_rate": 9.352517985611512e-07, "loss": 0.3163, "step": 52 }, { "epoch": 0.02, "learning_rate": 9.532374100719425e-07, "loss": 0.3391, "step": 53 }, { "epoch": 0.02, "learning_rate": 9.71223021582734e-07, "loss": 0.3456, "step": 54 }, { "epoch": 0.02, "learning_rate": 9.892086330935252e-07, "loss": 0.3235, "step": 55 }, { "epoch": 0.02, "learning_rate": 1.0071942446043167e-06, "loss": 0.314, "step": 56 }, { "epoch": 0.02, "learning_rate": 1.025179856115108e-06, "loss": 0.3266, "step": 57 }, { "epoch": 0.02, "learning_rate": 1.0431654676258993e-06, "loss": 0.291, "step": 58 }, { "epoch": 0.02, "learning_rate": 1.0611510791366908e-06, "loss": 0.3248, "step": 59 }, { "epoch": 0.02, "learning_rate": 1.079136690647482e-06, "loss": 0.3093, "step": 60 }, { "epoch": 0.02, "learning_rate": 1.0971223021582735e-06, "loss": 0.2853, "step": 61 }, { "epoch": 0.02, "learning_rate": 1.115107913669065e-06, "loss": 0.3108, "step": 62 }, { "epoch": 0.02, "learning_rate": 1.1330935251798561e-06, "loss": 0.3112, "step": 63 }, { "epoch": 0.02, "learning_rate": 1.1510791366906476e-06, "loss": 0.3164, "step": 64 }, { "epoch": 0.02, "learning_rate": 1.1690647482014388e-06, "loss": 0.3068, "step": 65 }, { "epoch": 0.02, "learning_rate": 1.1870503597122303e-06, "loss": 0.3103, "step": 66 }, { "epoch": 0.02, "learning_rate": 1.2050359712230217e-06, "loss": 0.2831, "step": 67 }, { "epoch": 0.02, "learning_rate": 1.2230215827338131e-06, "loss": 0.3012, "step": 68 }, { "epoch": 0.02, "learning_rate": 1.2410071942446044e-06, "loss": 0.3195, "step": 69 }, { "epoch": 0.02, "learning_rate": 1.2589928057553958e-06, "loss": 0.2929, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.2769784172661873e-06, "loss": 0.2855, "step": 71 }, { "epoch": 0.02, "learning_rate": 1.2949640287769785e-06, "loss": 0.3095, "step": 72 }, { "epoch": 0.02, "learning_rate": 1.3129496402877697e-06, "loss": 0.3233, "step": 73 }, { "epoch": 0.02, "learning_rate": 1.3309352517985614e-06, "loss": 0.3271, "step": 74 }, { "epoch": 0.02, "learning_rate": 1.3489208633093526e-06, "loss": 0.3073, "step": 75 }, { "epoch": 0.02, "learning_rate": 1.366906474820144e-06, "loss": 0.3034, "step": 76 }, { "epoch": 0.02, "learning_rate": 1.3848920863309353e-06, "loss": 0.2921, "step": 77 }, { "epoch": 0.03, "learning_rate": 1.4028776978417265e-06, "loss": 0.3145, "step": 78 }, { "epoch": 0.03, "learning_rate": 1.4208633093525182e-06, "loss": 0.2933, "step": 79 }, { "epoch": 0.03, "learning_rate": 1.4388489208633094e-06, "loss": 0.3058, "step": 80 }, { "epoch": 0.03, "learning_rate": 1.4568345323741009e-06, "loss": 0.2902, "step": 81 }, { "epoch": 0.03, "learning_rate": 1.474820143884892e-06, "loss": 0.3111, "step": 82 }, { "epoch": 0.03, "learning_rate": 1.4928057553956835e-06, "loss": 0.3133, "step": 83 }, { "epoch": 0.03, "learning_rate": 1.510791366906475e-06, "loss": 0.2731, "step": 84 }, { "epoch": 0.03, "learning_rate": 1.5287769784172662e-06, "loss": 0.3184, "step": 85 }, { "epoch": 0.03, "learning_rate": 1.5467625899280579e-06, "loss": 0.2866, "step": 86 }, { "epoch": 0.03, "learning_rate": 1.5647482014388491e-06, "loss": 0.2898, "step": 87 }, { "epoch": 0.03, "learning_rate": 1.5827338129496403e-06, "loss": 0.2764, "step": 88 }, { "epoch": 0.03, "learning_rate": 1.6007194244604318e-06, "loss": 0.2844, "step": 89 }, { "epoch": 0.03, "learning_rate": 1.618705035971223e-06, "loss": 0.282, "step": 90 }, { "epoch": 0.03, "learning_rate": 1.6366906474820147e-06, "loss": 0.2559, "step": 91 }, { "epoch": 0.03, "learning_rate": 1.654676258992806e-06, "loss": 0.2992, "step": 92 }, { "epoch": 0.03, "learning_rate": 1.6726618705035971e-06, "loss": 0.2915, "step": 93 }, { "epoch": 0.03, "learning_rate": 1.6906474820143886e-06, "loss": 0.2917, "step": 94 }, { "epoch": 0.03, "learning_rate": 1.7086330935251798e-06, "loss": 0.2822, "step": 95 }, { "epoch": 0.03, "learning_rate": 1.7266187050359715e-06, "loss": 0.2865, "step": 96 }, { "epoch": 0.03, "learning_rate": 1.7446043165467627e-06, "loss": 0.252, "step": 97 }, { "epoch": 0.03, "learning_rate": 1.762589928057554e-06, "loss": 0.2745, "step": 98 }, { "epoch": 0.03, "learning_rate": 1.7805755395683456e-06, "loss": 0.3058, "step": 99 }, { "epoch": 0.03, "learning_rate": 1.7985611510791368e-06, "loss": 0.2927, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.8165467625899283e-06, "loss": 0.3076, "step": 101 }, { "epoch": 0.03, "learning_rate": 1.8345323741007195e-06, "loss": 0.2691, "step": 102 }, { "epoch": 0.03, "learning_rate": 1.8525179856115107e-06, "loss": 0.2867, "step": 103 }, { "epoch": 0.03, "learning_rate": 1.8705035971223024e-06, "loss": 0.2657, "step": 104 }, { "epoch": 0.03, "learning_rate": 1.8884892086330936e-06, "loss": 0.2846, "step": 105 }, { "epoch": 0.03, "learning_rate": 1.906474820143885e-06, "loss": 0.2756, "step": 106 }, { "epoch": 0.03, "learning_rate": 1.9244604316546765e-06, "loss": 0.2772, "step": 107 }, { "epoch": 0.03, "learning_rate": 1.942446043165468e-06, "loss": 0.2748, "step": 108 }, { "epoch": 0.04, "learning_rate": 1.960431654676259e-06, "loss": 0.2504, "step": 109 }, { "epoch": 0.04, "learning_rate": 1.9784172661870504e-06, "loss": 0.2623, "step": 110 }, { "epoch": 0.04, "learning_rate": 1.996402877697842e-06, "loss": 0.2377, "step": 111 }, { "epoch": 0.04, "learning_rate": 2.0143884892086333e-06, "loss": 0.2775, "step": 112 }, { "epoch": 0.04, "learning_rate": 2.0323741007194248e-06, "loss": 0.2791, "step": 113 }, { "epoch": 0.04, "learning_rate": 2.050359712230216e-06, "loss": 0.2689, "step": 114 }, { "epoch": 0.04, "learning_rate": 2.0683453237410072e-06, "loss": 0.2944, "step": 115 }, { "epoch": 0.04, "learning_rate": 2.0863309352517987e-06, "loss": 0.2637, "step": 116 }, { "epoch": 0.04, "learning_rate": 2.10431654676259e-06, "loss": 0.2853, "step": 117 }, { "epoch": 0.04, "learning_rate": 2.1223021582733816e-06, "loss": 0.2964, "step": 118 }, { "epoch": 0.04, "learning_rate": 2.140287769784173e-06, "loss": 0.2855, "step": 119 }, { "epoch": 0.04, "learning_rate": 2.158273381294964e-06, "loss": 0.2721, "step": 120 }, { "epoch": 0.04, "learning_rate": 2.1762589928057555e-06, "loss": 0.27, "step": 121 }, { "epoch": 0.04, "learning_rate": 2.194244604316547e-06, "loss": 0.2722, "step": 122 }, { "epoch": 0.04, "learning_rate": 2.2122302158273384e-06, "loss": 0.2685, "step": 123 }, { "epoch": 0.04, "learning_rate": 2.23021582733813e-06, "loss": 0.3013, "step": 124 }, { "epoch": 0.04, "learning_rate": 2.248201438848921e-06, "loss": 0.2766, "step": 125 }, { "epoch": 0.04, "learning_rate": 2.2661870503597123e-06, "loss": 0.2826, "step": 126 }, { "epoch": 0.04, "learning_rate": 2.2841726618705037e-06, "loss": 0.2561, "step": 127 }, { "epoch": 0.04, "learning_rate": 2.302158273381295e-06, "loss": 0.2802, "step": 128 }, { "epoch": 0.04, "learning_rate": 2.3201438848920866e-06, "loss": 0.265, "step": 129 }, { "epoch": 0.04, "learning_rate": 2.3381294964028776e-06, "loss": 0.2691, "step": 130 }, { "epoch": 0.04, "learning_rate": 2.3561151079136695e-06, "loss": 0.2366, "step": 131 }, { "epoch": 0.04, "learning_rate": 2.3741007194244605e-06, "loss": 0.2565, "step": 132 }, { "epoch": 0.04, "learning_rate": 2.392086330935252e-06, "loss": 0.2829, "step": 133 }, { "epoch": 0.04, "learning_rate": 2.4100719424460434e-06, "loss": 0.2771, "step": 134 }, { "epoch": 0.04, "learning_rate": 2.4280575539568344e-06, "loss": 0.2506, "step": 135 }, { "epoch": 0.04, "learning_rate": 2.4460431654676263e-06, "loss": 0.2394, "step": 136 }, { "epoch": 0.04, "learning_rate": 2.4640287769784173e-06, "loss": 0.2752, "step": 137 }, { "epoch": 0.04, "learning_rate": 2.4820143884892088e-06, "loss": 0.2764, "step": 138 }, { "epoch": 0.05, "learning_rate": 2.5e-06, "loss": 0.2857, "step": 139 }, { "epoch": 0.05, "learning_rate": 2.5179856115107916e-06, "loss": 0.2663, "step": 140 }, { "epoch": 0.05, "learning_rate": 2.5359712230215827e-06, "loss": 0.292, "step": 141 }, { "epoch": 0.05, "learning_rate": 2.5539568345323745e-06, "loss": 0.2489, "step": 142 }, { "epoch": 0.05, "learning_rate": 2.571942446043166e-06, "loss": 0.2708, "step": 143 }, { "epoch": 0.05, "learning_rate": 2.589928057553957e-06, "loss": 0.2664, "step": 144 }, { "epoch": 0.05, "learning_rate": 2.6079136690647484e-06, "loss": 0.2676, "step": 145 }, { "epoch": 0.05, "learning_rate": 2.6258992805755395e-06, "loss": 0.2776, "step": 146 }, { "epoch": 0.05, "learning_rate": 2.6438848920863313e-06, "loss": 0.292, "step": 147 }, { "epoch": 0.05, "learning_rate": 2.6618705035971228e-06, "loss": 0.2474, "step": 148 }, { "epoch": 0.05, "learning_rate": 2.679856115107914e-06, "loss": 0.2623, "step": 149 }, { "epoch": 0.05, "learning_rate": 2.6978417266187052e-06, "loss": 0.2844, "step": 150 }, { "epoch": 0.05, "learning_rate": 2.7158273381294963e-06, "loss": 0.2533, "step": 151 }, { "epoch": 0.05, "learning_rate": 2.733812949640288e-06, "loss": 0.2478, "step": 152 }, { "epoch": 0.05, "learning_rate": 2.7517985611510796e-06, "loss": 0.2472, "step": 153 }, { "epoch": 0.05, "learning_rate": 2.7697841726618706e-06, "loss": 0.2667, "step": 154 }, { "epoch": 0.05, "learning_rate": 2.787769784172662e-06, "loss": 0.2539, "step": 155 }, { "epoch": 0.05, "learning_rate": 2.805755395683453e-06, "loss": 0.2505, "step": 156 }, { "epoch": 0.05, "learning_rate": 2.823741007194245e-06, "loss": 0.2573, "step": 157 }, { "epoch": 0.05, "learning_rate": 2.8417266187050364e-06, "loss": 0.2863, "step": 158 }, { "epoch": 0.05, "learning_rate": 2.8597122302158274e-06, "loss": 0.253, "step": 159 }, { "epoch": 0.05, "learning_rate": 2.877697841726619e-06, "loss": 0.2438, "step": 160 }, { "epoch": 0.05, "learning_rate": 2.89568345323741e-06, "loss": 0.2569, "step": 161 }, { "epoch": 0.05, "learning_rate": 2.9136690647482017e-06, "loss": 0.2416, "step": 162 }, { "epoch": 0.05, "learning_rate": 2.931654676258993e-06, "loss": 0.2527, "step": 163 }, { "epoch": 0.05, "learning_rate": 2.949640287769784e-06, "loss": 0.2398, "step": 164 }, { "epoch": 0.05, "learning_rate": 2.9676258992805756e-06, "loss": 0.2427, "step": 165 }, { "epoch": 0.05, "learning_rate": 2.985611510791367e-06, "loss": 0.2463, "step": 166 }, { "epoch": 0.05, "learning_rate": 3.0035971223021585e-06, "loss": 0.2394, "step": 167 }, { "epoch": 0.05, "learning_rate": 3.02158273381295e-06, "loss": 0.2628, "step": 168 }, { "epoch": 0.05, "learning_rate": 3.0395683453237414e-06, "loss": 0.2564, "step": 169 }, { "epoch": 0.06, "learning_rate": 3.0575539568345324e-06, "loss": 0.2478, "step": 170 }, { "epoch": 0.06, "learning_rate": 3.075539568345324e-06, "loss": 0.2726, "step": 171 }, { "epoch": 0.06, "learning_rate": 3.0935251798561158e-06, "loss": 0.2546, "step": 172 }, { "epoch": 0.06, "learning_rate": 3.1115107913669068e-06, "loss": 0.2409, "step": 173 }, { "epoch": 0.06, "learning_rate": 3.1294964028776982e-06, "loss": 0.2588, "step": 174 }, { "epoch": 0.06, "learning_rate": 3.1474820143884892e-06, "loss": 0.282, "step": 175 }, { "epoch": 0.06, "learning_rate": 3.1654676258992807e-06, "loss": 0.2837, "step": 176 }, { "epoch": 0.06, "learning_rate": 3.1834532374100726e-06, "loss": 0.2577, "step": 177 }, { "epoch": 0.06, "learning_rate": 3.2014388489208636e-06, "loss": 0.2658, "step": 178 }, { "epoch": 0.06, "learning_rate": 3.219424460431655e-06, "loss": 0.2564, "step": 179 }, { "epoch": 0.06, "learning_rate": 3.237410071942446e-06, "loss": 0.2376, "step": 180 }, { "epoch": 0.06, "learning_rate": 3.2553956834532375e-06, "loss": 0.2378, "step": 181 }, { "epoch": 0.06, "learning_rate": 3.2733812949640294e-06, "loss": 0.2475, "step": 182 }, { "epoch": 0.06, "learning_rate": 3.2913669064748204e-06, "loss": 0.2496, "step": 183 }, { "epoch": 0.06, "learning_rate": 3.309352517985612e-06, "loss": 0.2638, "step": 184 }, { "epoch": 0.06, "learning_rate": 3.327338129496403e-06, "loss": 0.244, "step": 185 }, { "epoch": 0.06, "learning_rate": 3.3453237410071943e-06, "loss": 0.2424, "step": 186 }, { "epoch": 0.06, "learning_rate": 3.363309352517986e-06, "loss": 0.2535, "step": 187 }, { "epoch": 0.06, "learning_rate": 3.381294964028777e-06, "loss": 0.2791, "step": 188 }, { "epoch": 0.06, "learning_rate": 3.3992805755395686e-06, "loss": 0.2483, "step": 189 }, { "epoch": 0.06, "learning_rate": 3.4172661870503596e-06, "loss": 0.2548, "step": 190 }, { "epoch": 0.06, "learning_rate": 3.435251798561151e-06, "loss": 0.2317, "step": 191 }, { "epoch": 0.06, "learning_rate": 3.453237410071943e-06, "loss": 0.2607, "step": 192 }, { "epoch": 0.06, "learning_rate": 3.471223021582734e-06, "loss": 0.254, "step": 193 }, { "epoch": 0.06, "learning_rate": 3.4892086330935254e-06, "loss": 0.2525, "step": 194 }, { "epoch": 0.06, "learning_rate": 3.507194244604317e-06, "loss": 0.2785, "step": 195 }, { "epoch": 0.06, "learning_rate": 3.525179856115108e-06, "loss": 0.242, "step": 196 }, { "epoch": 0.06, "learning_rate": 3.5431654676258998e-06, "loss": 0.2585, "step": 197 }, { "epoch": 0.06, "learning_rate": 3.561151079136691e-06, "loss": 0.2444, "step": 198 }, { "epoch": 0.06, "learning_rate": 3.5791366906474822e-06, "loss": 0.2523, "step": 199 }, { "epoch": 0.06, "learning_rate": 3.5971223021582737e-06, "loss": 0.2348, "step": 200 }, { "epoch": 0.07, "learning_rate": 3.6151079136690647e-06, "loss": 0.2934, "step": 201 }, { "epoch": 0.07, "learning_rate": 3.6330935251798566e-06, "loss": 0.2449, "step": 202 }, { "epoch": 0.07, "learning_rate": 3.651079136690648e-06, "loss": 0.2542, "step": 203 }, { "epoch": 0.07, "learning_rate": 3.669064748201439e-06, "loss": 0.2721, "step": 204 }, { "epoch": 0.07, "learning_rate": 3.6870503597122305e-06, "loss": 0.2613, "step": 205 }, { "epoch": 0.07, "learning_rate": 3.7050359712230215e-06, "loss": 0.2625, "step": 206 }, { "epoch": 0.07, "learning_rate": 3.7230215827338134e-06, "loss": 0.2437, "step": 207 }, { "epoch": 0.07, "learning_rate": 3.741007194244605e-06, "loss": 0.2534, "step": 208 }, { "epoch": 0.07, "learning_rate": 3.758992805755396e-06, "loss": 0.237, "step": 209 }, { "epoch": 0.07, "learning_rate": 3.7769784172661873e-06, "loss": 0.2583, "step": 210 }, { "epoch": 0.07, "learning_rate": 3.794964028776979e-06, "loss": 0.2475, "step": 211 }, { "epoch": 0.07, "learning_rate": 3.81294964028777e-06, "loss": 0.246, "step": 212 }, { "epoch": 0.07, "learning_rate": 3.830935251798562e-06, "loss": 0.2676, "step": 213 }, { "epoch": 0.07, "learning_rate": 3.848920863309353e-06, "loss": 0.253, "step": 214 }, { "epoch": 0.07, "learning_rate": 3.866906474820144e-06, "loss": 0.2515, "step": 215 }, { "epoch": 0.07, "learning_rate": 3.884892086330936e-06, "loss": 0.2552, "step": 216 }, { "epoch": 0.07, "learning_rate": 3.902877697841727e-06, "loss": 0.2508, "step": 217 }, { "epoch": 0.07, "learning_rate": 3.920863309352518e-06, "loss": 0.271, "step": 218 }, { "epoch": 0.07, "learning_rate": 3.938848920863309e-06, "loss": 0.2452, "step": 219 }, { "epoch": 0.07, "learning_rate": 3.956834532374101e-06, "loss": 0.2636, "step": 220 }, { "epoch": 0.07, "learning_rate": 3.974820143884892e-06, "loss": 0.2661, "step": 221 }, { "epoch": 0.07, "learning_rate": 3.992805755395684e-06, "loss": 0.2615, "step": 222 }, { "epoch": 0.07, "learning_rate": 4.010791366906475e-06, "loss": 0.2271, "step": 223 }, { "epoch": 0.07, "learning_rate": 4.028776978417267e-06, "loss": 0.2492, "step": 224 }, { "epoch": 0.07, "learning_rate": 4.046762589928058e-06, "loss": 0.264, "step": 225 }, { "epoch": 0.07, "learning_rate": 4.0647482014388495e-06, "loss": 0.268, "step": 226 }, { "epoch": 0.07, "learning_rate": 4.082733812949641e-06, "loss": 0.2515, "step": 227 }, { "epoch": 0.07, "learning_rate": 4.100719424460432e-06, "loss": 0.2698, "step": 228 }, { "epoch": 0.07, "learning_rate": 4.118705035971223e-06, "loss": 0.2648, "step": 229 }, { "epoch": 0.07, "learning_rate": 4.1366906474820145e-06, "loss": 0.2518, "step": 230 }, { "epoch": 0.07, "learning_rate": 4.154676258992807e-06, "loss": 0.2418, "step": 231 }, { "epoch": 0.08, "learning_rate": 4.172661870503597e-06, "loss": 0.2576, "step": 232 }, { "epoch": 0.08, "learning_rate": 4.190647482014389e-06, "loss": 0.2426, "step": 233 }, { "epoch": 0.08, "learning_rate": 4.20863309352518e-06, "loss": 0.2437, "step": 234 }, { "epoch": 0.08, "learning_rate": 4.226618705035972e-06, "loss": 0.2449, "step": 235 }, { "epoch": 0.08, "learning_rate": 4.244604316546763e-06, "loss": 0.2166, "step": 236 }, { "epoch": 0.08, "learning_rate": 4.2625899280575546e-06, "loss": 0.2658, "step": 237 }, { "epoch": 0.08, "learning_rate": 4.280575539568346e-06, "loss": 0.2394, "step": 238 }, { "epoch": 0.08, "learning_rate": 4.298561151079137e-06, "loss": 0.24, "step": 239 }, { "epoch": 0.08, "learning_rate": 4.316546762589928e-06, "loss": 0.2431, "step": 240 }, { "epoch": 0.08, "learning_rate": 4.33453237410072e-06, "loss": 0.2249, "step": 241 }, { "epoch": 0.08, "learning_rate": 4.352517985611511e-06, "loss": 0.2501, "step": 242 }, { "epoch": 0.08, "learning_rate": 4.370503597122302e-06, "loss": 0.2476, "step": 243 }, { "epoch": 0.08, "learning_rate": 4.388489208633094e-06, "loss": 0.2506, "step": 244 }, { "epoch": 0.08, "learning_rate": 4.406474820143885e-06, "loss": 0.2722, "step": 245 }, { "epoch": 0.08, "learning_rate": 4.424460431654677e-06, "loss": 0.246, "step": 246 }, { "epoch": 0.08, "learning_rate": 4.442446043165468e-06, "loss": 0.2479, "step": 247 }, { "epoch": 0.08, "learning_rate": 4.46043165467626e-06, "loss": 0.2559, "step": 248 }, { "epoch": 0.08, "learning_rate": 4.478417266187051e-06, "loss": 0.2641, "step": 249 }, { "epoch": 0.08, "learning_rate": 4.496402877697842e-06, "loss": 0.2244, "step": 250 }, { "epoch": 0.08, "learning_rate": 4.514388489208634e-06, "loss": 0.2345, "step": 251 }, { "epoch": 0.08, "learning_rate": 4.5323741007194245e-06, "loss": 0.2467, "step": 252 }, { "epoch": 0.08, "learning_rate": 4.550359712230216e-06, "loss": 0.2498, "step": 253 }, { "epoch": 0.08, "learning_rate": 4.5683453237410074e-06, "loss": 0.2545, "step": 254 }, { "epoch": 0.08, "learning_rate": 4.586330935251799e-06, "loss": 0.2318, "step": 255 }, { "epoch": 0.08, "learning_rate": 4.60431654676259e-06, "loss": 0.2391, "step": 256 }, { "epoch": 0.08, "learning_rate": 4.622302158273382e-06, "loss": 0.234, "step": 257 }, { "epoch": 0.08, "learning_rate": 4.640287769784173e-06, "loss": 0.228, "step": 258 }, { "epoch": 0.08, "learning_rate": 4.658273381294965e-06, "loss": 0.2304, "step": 259 }, { "epoch": 0.08, "learning_rate": 4.676258992805755e-06, "loss": 0.244, "step": 260 }, { "epoch": 0.08, "learning_rate": 4.6942446043165475e-06, "loss": 0.252, "step": 261 }, { "epoch": 0.08, "learning_rate": 4.712230215827339e-06, "loss": 0.248, "step": 262 }, { "epoch": 0.09, "learning_rate": 4.73021582733813e-06, "loss": 0.2457, "step": 263 }, { "epoch": 0.09, "learning_rate": 4.748201438848921e-06, "loss": 0.2421, "step": 264 }, { "epoch": 0.09, "learning_rate": 4.7661870503597125e-06, "loss": 0.2446, "step": 265 }, { "epoch": 0.09, "learning_rate": 4.784172661870504e-06, "loss": 0.2274, "step": 266 }, { "epoch": 0.09, "learning_rate": 4.802158273381295e-06, "loss": 0.2479, "step": 267 }, { "epoch": 0.09, "learning_rate": 4.820143884892087e-06, "loss": 0.2604, "step": 268 }, { "epoch": 0.09, "learning_rate": 4.838129496402878e-06, "loss": 0.2433, "step": 269 }, { "epoch": 0.09, "learning_rate": 4.856115107913669e-06, "loss": 0.2395, "step": 270 }, { "epoch": 0.09, "learning_rate": 4.874100719424461e-06, "loss": 0.2384, "step": 271 }, { "epoch": 0.09, "learning_rate": 4.892086330935253e-06, "loss": 0.2304, "step": 272 }, { "epoch": 0.09, "learning_rate": 4.910071942446043e-06, "loss": 0.2407, "step": 273 }, { "epoch": 0.09, "learning_rate": 4.928057553956835e-06, "loss": 0.2487, "step": 274 }, { "epoch": 0.09, "learning_rate": 4.946043165467626e-06, "loss": 0.231, "step": 275 }, { "epoch": 0.09, "learning_rate": 4.9640287769784175e-06, "loss": 0.2349, "step": 276 }, { "epoch": 0.09, "learning_rate": 4.982014388489209e-06, "loss": 0.2378, "step": 277 }, { "epoch": 0.09, "learning_rate": 5e-06, "loss": 0.2526, "step": 278 }, { "epoch": 0.09, "learning_rate": 4.999999847012101e-06, "loss": 0.2495, "step": 279 }, { "epoch": 0.09, "learning_rate": 4.9999993880484235e-06, "loss": 0.2386, "step": 280 }, { "epoch": 0.09, "learning_rate": 4.999998623109022e-06, "loss": 0.2314, "step": 281 }, { "epoch": 0.09, "learning_rate": 4.99999755219399e-06, "loss": 0.2379, "step": 282 }, { "epoch": 0.09, "learning_rate": 4.9999961753034595e-06, "loss": 0.2485, "step": 283 }, { "epoch": 0.09, "learning_rate": 4.9999944924376e-06, "loss": 0.2525, "step": 284 }, { "epoch": 0.09, "learning_rate": 4.999992503596616e-06, "loss": 0.2453, "step": 285 }, { "epoch": 0.09, "learning_rate": 4.999990208780751e-06, "loss": 0.2484, "step": 286 }, { "epoch": 0.09, "learning_rate": 4.999987607990287e-06, "loss": 0.2186, "step": 287 }, { "epoch": 0.09, "learning_rate": 4.999984701225542e-06, "loss": 0.2471, "step": 288 }, { "epoch": 0.09, "learning_rate": 4.9999814884868705e-06, "loss": 0.2252, "step": 289 }, { "epoch": 0.09, "learning_rate": 4.999977969774666e-06, "loss": 0.2601, "step": 290 }, { "epoch": 0.09, "learning_rate": 4.99997414508936e-06, "loss": 0.256, "step": 291 }, { "epoch": 0.09, "learning_rate": 4.999970014431421e-06, "loss": 0.2453, "step": 292 }, { "epoch": 0.09, "learning_rate": 4.999965577801354e-06, "loss": 0.2338, "step": 293 }, { "epoch": 0.1, "learning_rate": 4.999960835199701e-06, "loss": 0.2517, "step": 294 }, { "epoch": 0.1, "learning_rate": 4.999955786627042e-06, "loss": 0.2455, "step": 295 }, { "epoch": 0.1, "learning_rate": 4.999950432083998e-06, "loss": 0.2456, "step": 296 }, { "epoch": 0.1, "learning_rate": 4.999944771571222e-06, "loss": 0.2253, "step": 297 }, { "epoch": 0.1, "learning_rate": 4.999938805089407e-06, "loss": 0.2427, "step": 298 }, { "epoch": 0.1, "learning_rate": 4.999932532639285e-06, "loss": 0.2474, "step": 299 }, { "epoch": 0.1, "learning_rate": 4.99992595422162e-06, "loss": 0.237, "step": 300 }, { "epoch": 0.1, "learning_rate": 4.9999190698372216e-06, "loss": 0.2251, "step": 301 }, { "epoch": 0.1, "learning_rate": 4.9999118794869285e-06, "loss": 0.2473, "step": 302 }, { "epoch": 0.1, "learning_rate": 4.999904383171623e-06, "loss": 0.2292, "step": 303 }, { "epoch": 0.1, "learning_rate": 4.999896580892221e-06, "loss": 0.225, "step": 304 }, { "epoch": 0.1, "learning_rate": 4.99988847264968e-06, "loss": 0.2519, "step": 305 }, { "epoch": 0.1, "learning_rate": 4.99988005844499e-06, "loss": 0.236, "step": 306 }, { "epoch": 0.1, "learning_rate": 4.999871338279181e-06, "loss": 0.2497, "step": 307 }, { "epoch": 0.1, "learning_rate": 4.999862312153322e-06, "loss": 0.2349, "step": 308 }, { "epoch": 0.1, "learning_rate": 4.999852980068516e-06, "loss": 0.2547, "step": 309 }, { "epoch": 0.1, "learning_rate": 4.9998433420259055e-06, "loss": 0.252, "step": 310 }, { "epoch": 0.1, "learning_rate": 4.99983339802667e-06, "loss": 0.2527, "step": 311 }, { "epoch": 0.1, "learning_rate": 4.999823148072027e-06, "loss": 0.219, "step": 312 }, { "epoch": 0.1, "learning_rate": 4.999812592163232e-06, "loss": 0.2314, "step": 313 }, { "epoch": 0.1, "learning_rate": 4.9998017303015735e-06, "loss": 0.2579, "step": 314 }, { "epoch": 0.1, "learning_rate": 4.999790562488385e-06, "loss": 0.2388, "step": 315 }, { "epoch": 0.1, "learning_rate": 4.999779088725031e-06, "loss": 0.2607, "step": 316 }, { "epoch": 0.1, "learning_rate": 4.999767309012916e-06, "loss": 0.2305, "step": 317 }, { "epoch": 0.1, "learning_rate": 4.999755223353483e-06, "loss": 0.2347, "step": 318 }, { "epoch": 0.1, "learning_rate": 4.9997428317482086e-06, "loss": 0.2206, "step": 319 }, { "epoch": 0.1, "learning_rate": 4.999730134198612e-06, "loss": 0.2296, "step": 320 }, { "epoch": 0.1, "learning_rate": 4.999717130706247e-06, "loss": 0.2305, "step": 321 }, { "epoch": 0.1, "learning_rate": 4.999703821272702e-06, "loss": 0.2486, "step": 322 }, { "epoch": 0.1, "learning_rate": 4.99969020589961e-06, "loss": 0.2124, "step": 323 }, { "epoch": 0.1, "learning_rate": 4.999676284588635e-06, "loss": 0.2318, "step": 324 }, { "epoch": 0.11, "learning_rate": 4.999662057341482e-06, "loss": 0.2648, "step": 325 }, { "epoch": 0.11, "learning_rate": 4.999647524159892e-06, "loss": 0.2445, "step": 326 }, { "epoch": 0.11, "learning_rate": 4.9996326850456435e-06, "loss": 0.2525, "step": 327 }, { "epoch": 0.11, "learning_rate": 4.999617540000552e-06, "loss": 0.2466, "step": 328 }, { "epoch": 0.11, "learning_rate": 4.999602089026472e-06, "loss": 0.2331, "step": 329 }, { "epoch": 0.11, "learning_rate": 4.999586332125294e-06, "loss": 0.2716, "step": 330 }, { "epoch": 0.11, "learning_rate": 4.9995702692989476e-06, "loss": 0.2448, "step": 331 }, { "epoch": 0.11, "learning_rate": 4.999553900549398e-06, "loss": 0.2369, "step": 332 }, { "epoch": 0.11, "learning_rate": 4.999537225878648e-06, "loss": 0.2274, "step": 333 }, { "epoch": 0.11, "learning_rate": 4.999520245288739e-06, "loss": 0.2388, "step": 334 }, { "epoch": 0.11, "learning_rate": 4.999502958781749e-06, "loss": 0.2319, "step": 335 }, { "epoch": 0.11, "learning_rate": 4.999485366359794e-06, "loss": 0.2389, "step": 336 }, { "epoch": 0.11, "learning_rate": 4.999467468025028e-06, "loss": 0.2285, "step": 337 }, { "epoch": 0.11, "learning_rate": 4.99944926377964e-06, "loss": 0.2237, "step": 338 }, { "epoch": 0.11, "learning_rate": 4.999430753625858e-06, "loss": 0.244, "step": 339 }, { "epoch": 0.11, "learning_rate": 4.999411937565949e-06, "loss": 0.2407, "step": 340 }, { "epoch": 0.11, "learning_rate": 4.999392815602214e-06, "loss": 0.2369, "step": 341 }, { "epoch": 0.11, "learning_rate": 4.999373387736996e-06, "loss": 0.2429, "step": 342 }, { "epoch": 0.11, "learning_rate": 4.999353653972669e-06, "loss": 0.2339, "step": 343 }, { "epoch": 0.11, "learning_rate": 4.999333614311652e-06, "loss": 0.2421, "step": 344 }, { "epoch": 0.11, "learning_rate": 4.999313268756396e-06, "loss": 0.2513, "step": 345 }, { "epoch": 0.11, "learning_rate": 4.99929261730939e-06, "loss": 0.2213, "step": 346 }, { "epoch": 0.11, "learning_rate": 4.999271659973164e-06, "loss": 0.2139, "step": 347 }, { "epoch": 0.11, "learning_rate": 4.999250396750281e-06, "loss": 0.2387, "step": 348 }, { "epoch": 0.11, "learning_rate": 4.999228827643344e-06, "loss": 0.2473, "step": 349 }, { "epoch": 0.11, "learning_rate": 4.999206952654993e-06, "loss": 0.2589, "step": 350 }, { "epoch": 0.11, "learning_rate": 4.999184771787905e-06, "loss": 0.2235, "step": 351 }, { "epoch": 0.11, "learning_rate": 4.999162285044795e-06, "loss": 0.2343, "step": 352 }, { "epoch": 0.11, "learning_rate": 4.9991394924284155e-06, "loss": 0.2352, "step": 353 }, { "epoch": 0.11, "learning_rate": 4.999116393941556e-06, "loss": 0.2108, "step": 354 }, { "epoch": 0.12, "learning_rate": 4.999092989587042e-06, "loss": 0.2327, "step": 355 }, { "epoch": 0.12, "learning_rate": 4.9990692793677395e-06, "loss": 0.2531, "step": 356 }, { "epoch": 0.12, "learning_rate": 4.999045263286551e-06, "loss": 0.2337, "step": 357 }, { "epoch": 0.12, "learning_rate": 4.999020941346414e-06, "loss": 0.2183, "step": 358 }, { "epoch": 0.12, "learning_rate": 4.998996313550306e-06, "loss": 0.2369, "step": 359 }, { "epoch": 0.12, "learning_rate": 4.998971379901242e-06, "loss": 0.2484, "step": 360 }, { "epoch": 0.12, "learning_rate": 4.998946140402273e-06, "loss": 0.2468, "step": 361 }, { "epoch": 0.12, "learning_rate": 4.998920595056488e-06, "loss": 0.2248, "step": 362 }, { "epoch": 0.12, "learning_rate": 4.998894743867013e-06, "loss": 0.2149, "step": 363 }, { "epoch": 0.12, "learning_rate": 4.998868586837013e-06, "loss": 0.2272, "step": 364 }, { "epoch": 0.12, "learning_rate": 4.998842123969689e-06, "loss": 0.2264, "step": 365 }, { "epoch": 0.12, "learning_rate": 4.998815355268279e-06, "loss": 0.2324, "step": 366 }, { "epoch": 0.12, "learning_rate": 4.998788280736061e-06, "loss": 0.2278, "step": 367 }, { "epoch": 0.12, "learning_rate": 4.998760900376347e-06, "loss": 0.2597, "step": 368 }, { "epoch": 0.12, "learning_rate": 4.99873321419249e-06, "loss": 0.2388, "step": 369 }, { "epoch": 0.12, "learning_rate": 4.998705222187875e-06, "loss": 0.2394, "step": 370 }, { "epoch": 0.12, "learning_rate": 4.998676924365931e-06, "loss": 0.2288, "step": 371 }, { "epoch": 0.12, "learning_rate": 4.998648320730121e-06, "loss": 0.2102, "step": 372 }, { "epoch": 0.12, "learning_rate": 4.998619411283945e-06, "loss": 0.2417, "step": 373 }, { "epoch": 0.12, "learning_rate": 4.998590196030942e-06, "loss": 0.2336, "step": 374 }, { "epoch": 0.12, "learning_rate": 4.998560674974686e-06, "loss": 0.2492, "step": 375 }, { "epoch": 0.12, "learning_rate": 4.998530848118792e-06, "loss": 0.2183, "step": 376 }, { "epoch": 0.12, "learning_rate": 4.99850071546691e-06, "loss": 0.2603, "step": 377 }, { "epoch": 0.12, "learning_rate": 4.998470277022728e-06, "loss": 0.2227, "step": 378 }, { "epoch": 0.12, "learning_rate": 4.99843953278997e-06, "loss": 0.2264, "step": 379 }, { "epoch": 0.12, "learning_rate": 4.998408482772401e-06, "loss": 0.2304, "step": 380 }, { "epoch": 0.12, "learning_rate": 4.99837712697382e-06, "loss": 0.2524, "step": 381 }, { "epoch": 0.12, "learning_rate": 4.998345465398066e-06, "loss": 0.2416, "step": 382 }, { "epoch": 0.12, "learning_rate": 4.998313498049011e-06, "loss": 0.2367, "step": 383 }, { "epoch": 0.12, "learning_rate": 4.9982812249305704e-06, "loss": 0.2314, "step": 384 }, { "epoch": 0.12, "learning_rate": 4.998248646046693e-06, "loss": 0.246, "step": 385 }, { "epoch": 0.13, "learning_rate": 4.998215761401366e-06, "loss": 0.2367, "step": 386 }, { "epoch": 0.13, "learning_rate": 4.9981825709986145e-06, "loss": 0.2201, "step": 387 }, { "epoch": 0.13, "learning_rate": 4.9981490748425e-06, "loss": 0.2316, "step": 388 }, { "epoch": 0.13, "learning_rate": 4.998115272937123e-06, "loss": 0.2299, "step": 389 }, { "epoch": 0.13, "learning_rate": 4.998081165286621e-06, "loss": 0.2318, "step": 390 }, { "epoch": 0.13, "learning_rate": 4.9980467518951666e-06, "loss": 0.2224, "step": 391 }, { "epoch": 0.13, "learning_rate": 4.998012032766974e-06, "loss": 0.2188, "step": 392 }, { "epoch": 0.13, "learning_rate": 4.997977007906291e-06, "loss": 0.2141, "step": 393 }, { "epoch": 0.13, "learning_rate": 4.997941677317403e-06, "loss": 0.2487, "step": 394 }, { "epoch": 0.13, "learning_rate": 4.997906041004637e-06, "loss": 0.2283, "step": 395 }, { "epoch": 0.13, "learning_rate": 4.997870098972353e-06, "loss": 0.2262, "step": 396 }, { "epoch": 0.13, "learning_rate": 4.99783385122495e-06, "loss": 0.2383, "step": 397 }, { "epoch": 0.13, "learning_rate": 4.997797297766864e-06, "loss": 0.2121, "step": 398 }, { "epoch": 0.13, "learning_rate": 4.9977604386025704e-06, "loss": 0.2292, "step": 399 }, { "epoch": 0.13, "learning_rate": 4.997723273736579e-06, "loss": 0.2393, "step": 400 }, { "epoch": 0.13, "learning_rate": 4.9976858031734375e-06, "loss": 0.2299, "step": 401 }, { "epoch": 0.13, "learning_rate": 4.9976480269177345e-06, "loss": 0.2231, "step": 402 }, { "epoch": 0.13, "learning_rate": 4.997609944974092e-06, "loss": 0.2281, "step": 403 }, { "epoch": 0.13, "learning_rate": 4.99757155734717e-06, "loss": 0.2281, "step": 404 }, { "epoch": 0.13, "learning_rate": 4.997532864041669e-06, "loss": 0.2285, "step": 405 }, { "epoch": 0.13, "learning_rate": 4.997493865062323e-06, "loss": 0.2323, "step": 406 }, { "epoch": 0.13, "learning_rate": 4.9974545604139055e-06, "loss": 0.2209, "step": 407 }, { "epoch": 0.13, "learning_rate": 4.997414950101227e-06, "loss": 0.222, "step": 408 }, { "epoch": 0.13, "learning_rate": 4.997375034129135e-06, "loss": 0.222, "step": 409 }, { "epoch": 0.13, "learning_rate": 4.997334812502516e-06, "loss": 0.2258, "step": 410 }, { "epoch": 0.13, "learning_rate": 4.9972942852262915e-06, "loss": 0.2029, "step": 411 }, { "epoch": 0.13, "learning_rate": 4.997253452305423e-06, "loss": 0.2255, "step": 412 }, { "epoch": 0.13, "learning_rate": 4.9972123137449065e-06, "loss": 0.2376, "step": 413 }, { "epoch": 0.13, "learning_rate": 4.997170869549778e-06, "loss": 0.238, "step": 414 }, { "epoch": 0.13, "learning_rate": 4.99712911972511e-06, "loss": 0.2347, "step": 415 }, { "epoch": 0.13, "learning_rate": 4.99708706427601e-06, "loss": 0.2269, "step": 416 }, { "epoch": 0.14, "learning_rate": 4.997044703207629e-06, "loss": 0.2284, "step": 417 }, { "epoch": 0.14, "learning_rate": 4.9970020365251485e-06, "loss": 0.2314, "step": 418 }, { "epoch": 0.14, "learning_rate": 4.996959064233792e-06, "loss": 0.2222, "step": 419 }, { "epoch": 0.14, "learning_rate": 4.996915786338818e-06, "loss": 0.233, "step": 420 }, { "epoch": 0.14, "learning_rate": 4.9968722028455245e-06, "loss": 0.2384, "step": 421 }, { "epoch": 0.14, "learning_rate": 4.996828313759245e-06, "loss": 0.203, "step": 422 }, { "epoch": 0.14, "learning_rate": 4.99678411908535e-06, "loss": 0.217, "step": 423 }, { "epoch": 0.14, "learning_rate": 4.996739618829251e-06, "loss": 0.2244, "step": 424 }, { "epoch": 0.14, "learning_rate": 4.996694812996391e-06, "loss": 0.2221, "step": 425 }, { "epoch": 0.14, "learning_rate": 4.996649701592258e-06, "loss": 0.2144, "step": 426 }, { "epoch": 0.14, "learning_rate": 4.99660428462237e-06, "loss": 0.2415, "step": 427 }, { "epoch": 0.14, "learning_rate": 4.996558562092286e-06, "loss": 0.2267, "step": 428 }, { "epoch": 0.14, "learning_rate": 4.996512534007602e-06, "loss": 0.2191, "step": 429 }, { "epoch": 0.14, "learning_rate": 4.996466200373954e-06, "loss": 0.2444, "step": 430 }, { "epoch": 0.14, "learning_rate": 4.99641956119701e-06, "loss": 0.2081, "step": 431 }, { "epoch": 0.14, "learning_rate": 4.996372616482478e-06, "loss": 0.2263, "step": 432 }, { "epoch": 0.14, "learning_rate": 4.996325366236105e-06, "loss": 0.2383, "step": 433 }, { "epoch": 0.14, "learning_rate": 4.996277810463675e-06, "loss": 0.2485, "step": 434 }, { "epoch": 0.14, "learning_rate": 4.996229949171004e-06, "loss": 0.2217, "step": 435 }, { "epoch": 0.14, "learning_rate": 4.996181782363955e-06, "loss": 0.22, "step": 436 }, { "epoch": 0.14, "learning_rate": 4.99613331004842e-06, "loss": 0.2319, "step": 437 }, { "epoch": 0.14, "learning_rate": 4.996084532230332e-06, "loss": 0.2231, "step": 438 }, { "epoch": 0.14, "learning_rate": 4.996035448915661e-06, "loss": 0.2347, "step": 439 }, { "epoch": 0.14, "learning_rate": 4.995986060110415e-06, "loss": 0.2542, "step": 440 }, { "epoch": 0.14, "learning_rate": 4.995936365820638e-06, "loss": 0.2047, "step": 441 }, { "epoch": 0.14, "learning_rate": 4.9958863660524125e-06, "loss": 0.2239, "step": 442 }, { "epoch": 0.14, "learning_rate": 4.995836060811859e-06, "loss": 0.2175, "step": 443 }, { "epoch": 0.14, "learning_rate": 4.995785450105131e-06, "loss": 0.2149, "step": 444 }, { "epoch": 0.14, "learning_rate": 4.995734533938427e-06, "loss": 0.2183, "step": 445 }, { "epoch": 0.14, "learning_rate": 4.995683312317975e-06, "loss": 0.2081, "step": 446 }, { "epoch": 0.14, "learning_rate": 4.995631785250046e-06, "loss": 0.2292, "step": 447 }, { "epoch": 0.15, "learning_rate": 4.9955799527409465e-06, "loss": 0.225, "step": 448 }, { "epoch": 0.15, "learning_rate": 4.99552781479702e-06, "loss": 0.2003, "step": 449 }, { "epoch": 0.15, "learning_rate": 4.995475371424648e-06, "loss": 0.2459, "step": 450 }, { "epoch": 0.15, "learning_rate": 4.995422622630247e-06, "loss": 0.2421, "step": 451 }, { "epoch": 0.15, "learning_rate": 4.995369568420276e-06, "loss": 0.2329, "step": 452 }, { "epoch": 0.15, "learning_rate": 4.995316208801226e-06, "loss": 0.236, "step": 453 }, { "epoch": 0.15, "learning_rate": 4.99526254377963e-06, "loss": 0.2234, "step": 454 }, { "epoch": 0.15, "learning_rate": 4.995208573362053e-06, "loss": 0.2196, "step": 455 }, { "epoch": 0.15, "learning_rate": 4.995154297555103e-06, "loss": 0.2206, "step": 456 }, { "epoch": 0.15, "learning_rate": 4.995099716365421e-06, "loss": 0.2585, "step": 457 }, { "epoch": 0.15, "learning_rate": 4.995044829799689e-06, "loss": 0.2155, "step": 458 }, { "epoch": 0.15, "learning_rate": 4.994989637864624e-06, "loss": 0.2313, "step": 459 }, { "epoch": 0.15, "learning_rate": 4.99493414056698e-06, "loss": 0.2377, "step": 460 }, { "epoch": 0.15, "learning_rate": 4.99487833791355e-06, "loss": 0.2287, "step": 461 }, { "epoch": 0.15, "learning_rate": 4.9948222299111644e-06, "loss": 0.2033, "step": 462 }, { "epoch": 0.15, "learning_rate": 4.994765816566689e-06, "loss": 0.2464, "step": 463 }, { "epoch": 0.15, "learning_rate": 4.994709097887029e-06, "loss": 0.2363, "step": 464 }, { "epoch": 0.15, "learning_rate": 4.994652073879127e-06, "loss": 0.2197, "step": 465 }, { "epoch": 0.15, "learning_rate": 4.994594744549961e-06, "loss": 0.2408, "step": 466 }, { "epoch": 0.15, "learning_rate": 4.994537109906546e-06, "loss": 0.2155, "step": 467 }, { "epoch": 0.15, "learning_rate": 4.99447916995594e-06, "loss": 0.2325, "step": 468 }, { "epoch": 0.15, "learning_rate": 4.99442092470523e-06, "loss": 0.2426, "step": 469 }, { "epoch": 0.15, "learning_rate": 4.994362374161548e-06, "loss": 0.2488, "step": 470 }, { "epoch": 0.15, "learning_rate": 4.994303518332059e-06, "loss": 0.2411, "step": 471 }, { "epoch": 0.15, "learning_rate": 4.994244357223965e-06, "loss": 0.1947, "step": 472 }, { "epoch": 0.15, "learning_rate": 4.994184890844509e-06, "loss": 0.2239, "step": 473 }, { "epoch": 0.15, "learning_rate": 4.9941251192009665e-06, "loss": 0.2227, "step": 474 }, { "epoch": 0.15, "learning_rate": 4.994065042300655e-06, "loss": 0.2352, "step": 475 }, { "epoch": 0.15, "learning_rate": 4.994004660150927e-06, "loss": 0.2357, "step": 476 }, { "epoch": 0.15, "learning_rate": 4.993943972759173e-06, "loss": 0.2261, "step": 477 }, { "epoch": 0.15, "learning_rate": 4.993882980132819e-06, "loss": 0.2297, "step": 478 }, { "epoch": 0.16, "learning_rate": 4.993821682279332e-06, "loss": 0.2275, "step": 479 }, { "epoch": 0.16, "learning_rate": 4.993760079206212e-06, "loss": 0.2074, "step": 480 }, { "epoch": 0.16, "learning_rate": 4.993698170920999e-06, "loss": 0.2239, "step": 481 }, { "epoch": 0.16, "learning_rate": 4.993635957431273e-06, "loss": 0.2329, "step": 482 }, { "epoch": 0.16, "learning_rate": 4.993573438744645e-06, "loss": 0.2241, "step": 483 }, { "epoch": 0.16, "learning_rate": 4.993510614868767e-06, "loss": 0.2349, "step": 484 }, { "epoch": 0.16, "learning_rate": 4.99344748581133e-06, "loss": 0.2164, "step": 485 }, { "epoch": 0.16, "learning_rate": 4.993384051580059e-06, "loss": 0.2353, "step": 486 }, { "epoch": 0.16, "learning_rate": 4.993320312182718e-06, "loss": 0.2136, "step": 487 }, { "epoch": 0.16, "learning_rate": 4.993256267627108e-06, "loss": 0.2267, "step": 488 }, { "epoch": 0.16, "learning_rate": 4.993191917921066e-06, "loss": 0.2466, "step": 489 }, { "epoch": 0.16, "learning_rate": 4.9931272630724704e-06, "loss": 0.2258, "step": 490 }, { "epoch": 0.16, "learning_rate": 4.993062303089233e-06, "loss": 0.224, "step": 491 }, { "epoch": 0.16, "learning_rate": 4.992997037979304e-06, "loss": 0.2289, "step": 492 }, { "epoch": 0.16, "learning_rate": 4.992931467750673e-06, "loss": 0.2111, "step": 493 }, { "epoch": 0.16, "learning_rate": 4.992865592411362e-06, "loss": 0.2089, "step": 494 }, { "epoch": 0.16, "learning_rate": 4.992799411969436e-06, "loss": 0.2132, "step": 495 }, { "epoch": 0.16, "learning_rate": 4.992732926432995e-06, "loss": 0.2286, "step": 496 }, { "epoch": 0.16, "learning_rate": 4.9926661358101745e-06, "loss": 0.2184, "step": 497 }, { "epoch": 0.16, "learning_rate": 4.9925990401091505e-06, "loss": 0.2145, "step": 498 }, { "epoch": 0.16, "learning_rate": 4.992531639338133e-06, "loss": 0.2317, "step": 499 }, { "epoch": 0.16, "learning_rate": 4.992463933505374e-06, "loss": 0.2168, "step": 500 }, { "epoch": 0.16, "learning_rate": 4.9923959226191574e-06, "loss": 0.225, "step": 501 }, { "epoch": 0.16, "learning_rate": 4.992327606687808e-06, "loss": 0.2214, "step": 502 }, { "epoch": 0.16, "learning_rate": 4.992258985719688e-06, "loss": 0.2254, "step": 503 }, { "epoch": 0.16, "learning_rate": 4.992190059723194e-06, "loss": 0.2178, "step": 504 }, { "epoch": 0.16, "learning_rate": 4.992120828706763e-06, "loss": 0.2372, "step": 505 }, { "epoch": 0.16, "learning_rate": 4.99205129267887e-06, "loss": 0.2297, "step": 506 }, { "epoch": 0.16, "learning_rate": 4.991981451648022e-06, "loss": 0.2288, "step": 507 }, { "epoch": 0.16, "learning_rate": 4.9919113056227685e-06, "loss": 0.2182, "step": 508 }, { "epoch": 0.16, "learning_rate": 4.991840854611696e-06, "loss": 0.2428, "step": 509 }, { "epoch": 0.17, "learning_rate": 4.991770098623425e-06, "loss": 0.2328, "step": 510 }, { "epoch": 0.17, "learning_rate": 4.9916990376666156e-06, "loss": 0.2248, "step": 511 }, { "epoch": 0.17, "learning_rate": 4.991627671749966e-06, "loss": 0.2221, "step": 512 }, { "epoch": 0.17, "learning_rate": 4.9915560008822105e-06, "loss": 0.2145, "step": 513 }, { "epoch": 0.17, "learning_rate": 4.99148402507212e-06, "loss": 0.2414, "step": 514 }, { "epoch": 0.17, "learning_rate": 4.991411744328505e-06, "loss": 0.2104, "step": 515 }, { "epoch": 0.17, "learning_rate": 4.991339158660211e-06, "loss": 0.2187, "step": 516 }, { "epoch": 0.17, "learning_rate": 4.991266268076121e-06, "loss": 0.2239, "step": 517 }, { "epoch": 0.17, "learning_rate": 4.991193072585158e-06, "loss": 0.2418, "step": 518 }, { "epoch": 0.17, "learning_rate": 4.99111957219628e-06, "loss": 0.2254, "step": 519 }, { "epoch": 0.17, "learning_rate": 4.991045766918482e-06, "loss": 0.2174, "step": 520 }, { "epoch": 0.17, "learning_rate": 4.990971656760797e-06, "loss": 0.2048, "step": 521 }, { "epoch": 0.17, "learning_rate": 4.990897241732296e-06, "loss": 0.2345, "step": 522 }, { "epoch": 0.17, "learning_rate": 4.990822521842086e-06, "loss": 0.1999, "step": 523 }, { "epoch": 0.17, "learning_rate": 4.990747497099312e-06, "loss": 0.247, "step": 524 }, { "epoch": 0.17, "learning_rate": 4.990672167513158e-06, "loss": 0.2107, "step": 525 }, { "epoch": 0.17, "learning_rate": 4.990596533092841e-06, "loss": 0.2269, "step": 526 }, { "epoch": 0.17, "learning_rate": 4.9905205938476195e-06, "loss": 0.2069, "step": 527 }, { "epoch": 0.17, "learning_rate": 4.990444349786788e-06, "loss": 0.2242, "step": 528 }, { "epoch": 0.17, "learning_rate": 4.990367800919677e-06, "loss": 0.2094, "step": 529 }, { "epoch": 0.17, "learning_rate": 4.990290947255656e-06, "loss": 0.2317, "step": 530 }, { "epoch": 0.17, "learning_rate": 4.9902137888041304e-06, "loss": 0.2177, "step": 531 }, { "epoch": 0.17, "learning_rate": 4.990136325574545e-06, "loss": 0.2129, "step": 532 }, { "epoch": 0.17, "learning_rate": 4.990058557576379e-06, "loss": 0.2414, "step": 533 }, { "epoch": 0.17, "learning_rate": 4.989980484819152e-06, "loss": 0.2325, "step": 534 }, { "epoch": 0.17, "learning_rate": 4.9899021073124175e-06, "loss": 0.2242, "step": 535 }, { "epoch": 0.17, "learning_rate": 4.989823425065769e-06, "loss": 0.2138, "step": 536 }, { "epoch": 0.17, "learning_rate": 4.989744438088838e-06, "loss": 0.2123, "step": 537 }, { "epoch": 0.17, "learning_rate": 4.98966514639129e-06, "loss": 0.2071, "step": 538 }, { "epoch": 0.17, "learning_rate": 4.98958554998283e-06, "loss": 0.2309, "step": 539 }, { "epoch": 0.17, "learning_rate": 4.989505648873198e-06, "loss": 0.2258, "step": 540 }, { "epoch": 0.18, "learning_rate": 4.989425443072177e-06, "loss": 0.2129, "step": 541 }, { "epoch": 0.18, "learning_rate": 4.9893449325895804e-06, "loss": 0.1913, "step": 542 }, { "epoch": 0.18, "learning_rate": 4.989264117435263e-06, "loss": 0.2179, "step": 543 }, { "epoch": 0.18, "learning_rate": 4.9891829976191155e-06, "loss": 0.2252, "step": 544 }, { "epoch": 0.18, "learning_rate": 4.9891015731510665e-06, "loss": 0.2306, "step": 545 }, { "epoch": 0.18, "learning_rate": 4.989019844041081e-06, "loss": 0.2237, "step": 546 }, { "epoch": 0.18, "learning_rate": 4.988937810299161e-06, "loss": 0.207, "step": 547 }, { "epoch": 0.18, "learning_rate": 4.98885547193535e-06, "loss": 0.256, "step": 548 }, { "epoch": 0.18, "learning_rate": 4.988772828959722e-06, "loss": 0.2095, "step": 549 }, { "epoch": 0.18, "learning_rate": 4.988689881382392e-06, "loss": 0.2213, "step": 550 }, { "epoch": 0.18, "learning_rate": 4.988606629213515e-06, "loss": 0.2411, "step": 551 }, { "epoch": 0.18, "learning_rate": 4.9885230724632775e-06, "loss": 0.2412, "step": 552 }, { "epoch": 0.18, "learning_rate": 4.9884392111419056e-06, "loss": 0.2107, "step": 553 }, { "epoch": 0.18, "learning_rate": 4.988355045259665e-06, "loss": 0.2187, "step": 554 }, { "epoch": 0.18, "learning_rate": 4.988270574826857e-06, "loss": 0.2261, "step": 555 }, { "epoch": 0.18, "learning_rate": 4.9881857998538175e-06, "loss": 0.2067, "step": 556 }, { "epoch": 0.18, "learning_rate": 4.988100720350924e-06, "loss": 0.2015, "step": 557 }, { "epoch": 0.18, "learning_rate": 4.988015336328589e-06, "loss": 0.215, "step": 558 }, { "epoch": 0.18, "learning_rate": 4.987929647797263e-06, "loss": 0.2041, "step": 559 }, { "epoch": 0.18, "learning_rate": 4.987843654767432e-06, "loss": 0.2224, "step": 560 }, { "epoch": 0.18, "learning_rate": 4.987757357249623e-06, "loss": 0.2104, "step": 561 }, { "epoch": 0.18, "learning_rate": 4.987670755254397e-06, "loss": 0.2248, "step": 562 }, { "epoch": 0.18, "learning_rate": 4.987583848792353e-06, "loss": 0.2039, "step": 563 }, { "epoch": 0.18, "learning_rate": 4.987496637874127e-06, "loss": 0.2398, "step": 564 }, { "epoch": 0.18, "learning_rate": 4.987409122510394e-06, "loss": 0.2402, "step": 565 }, { "epoch": 0.18, "learning_rate": 4.9873213027118635e-06, "loss": 0.225, "step": 566 }, { "epoch": 0.18, "learning_rate": 4.987233178489285e-06, "loss": 0.2204, "step": 567 }, { "epoch": 0.18, "learning_rate": 4.987144749853444e-06, "loss": 0.2044, "step": 568 }, { "epoch": 0.18, "learning_rate": 4.987056016815163e-06, "loss": 0.2255, "step": 569 }, { "epoch": 0.18, "learning_rate": 4.986966979385302e-06, "loss": 0.2169, "step": 570 }, { "epoch": 0.19, "learning_rate": 4.986877637574758e-06, "loss": 0.22, "step": 571 }, { "epoch": 0.19, "learning_rate": 4.986787991394467e-06, "loss": 0.2134, "step": 572 }, { "epoch": 0.19, "learning_rate": 4.9866980408554e-06, "loss": 0.2274, "step": 573 }, { "epoch": 0.19, "learning_rate": 4.986607785968565e-06, "loss": 0.2003, "step": 574 }, { "epoch": 0.19, "learning_rate": 4.986517226745009e-06, "loss": 0.2268, "step": 575 }, { "epoch": 0.19, "learning_rate": 4.9864263631958165e-06, "loss": 0.2131, "step": 576 }, { "epoch": 0.19, "learning_rate": 4.986335195332107e-06, "loss": 0.2053, "step": 577 }, { "epoch": 0.19, "learning_rate": 4.986243723165039e-06, "loss": 0.2199, "step": 578 }, { "epoch": 0.19, "learning_rate": 4.9861519467058094e-06, "loss": 0.2258, "step": 579 }, { "epoch": 0.19, "learning_rate": 4.986059865965649e-06, "loss": 0.2274, "step": 580 }, { "epoch": 0.19, "learning_rate": 4.985967480955827e-06, "loss": 0.2153, "step": 581 }, { "epoch": 0.19, "learning_rate": 4.9858747916876515e-06, "loss": 0.2023, "step": 582 }, { "epoch": 0.19, "learning_rate": 4.985781798172467e-06, "loss": 0.2105, "step": 583 }, { "epoch": 0.19, "learning_rate": 4.9856885004216545e-06, "loss": 0.2475, "step": 584 }, { "epoch": 0.19, "learning_rate": 4.985594898446633e-06, "loss": 0.2367, "step": 585 }, { "epoch": 0.19, "learning_rate": 4.9855009922588585e-06, "loss": 0.2278, "step": 586 }, { "epoch": 0.19, "learning_rate": 4.985406781869824e-06, "loss": 0.197, "step": 587 }, { "epoch": 0.19, "learning_rate": 4.98531226729106e-06, "loss": 0.2048, "step": 588 }, { "epoch": 0.19, "learning_rate": 4.985217448534134e-06, "loss": 0.2016, "step": 589 }, { "epoch": 0.19, "learning_rate": 4.985122325610651e-06, "loss": 0.225, "step": 590 }, { "epoch": 0.19, "learning_rate": 4.985026898532253e-06, "loss": 0.2102, "step": 591 }, { "epoch": 0.19, "learning_rate": 4.98493116731062e-06, "loss": 0.2219, "step": 592 }, { "epoch": 0.19, "learning_rate": 4.984835131957468e-06, "loss": 0.2273, "step": 593 }, { "epoch": 0.19, "learning_rate": 4.98473879248455e-06, "loss": 0.2217, "step": 594 }, { "epoch": 0.19, "learning_rate": 4.984642148903659e-06, "loss": 0.2099, "step": 595 }, { "epoch": 0.19, "learning_rate": 4.984545201226623e-06, "loss": 0.1961, "step": 596 }, { "epoch": 0.19, "learning_rate": 4.984447949465305e-06, "loss": 0.2214, "step": 597 }, { "epoch": 0.19, "learning_rate": 4.98435039363161e-06, "loss": 0.2284, "step": 598 }, { "epoch": 0.19, "learning_rate": 4.984252533737477e-06, "loss": 0.2188, "step": 599 }, { "epoch": 0.19, "learning_rate": 4.984154369794883e-06, "loss": 0.2348, "step": 600 }, { "epoch": 0.19, "learning_rate": 4.984055901815844e-06, "loss": 0.2133, "step": 601 }, { "epoch": 0.2, "learning_rate": 4.983957129812409e-06, "loss": 0.2026, "step": 602 }, { "epoch": 0.2, "learning_rate": 4.9838580537966676e-06, "loss": 0.2372, "step": 603 }, { "epoch": 0.2, "learning_rate": 4.983758673780747e-06, "loss": 0.21, "step": 604 }, { "epoch": 0.2, "learning_rate": 4.9836589897768084e-06, "loss": 0.2173, "step": 605 }, { "epoch": 0.2, "learning_rate": 4.983559001797054e-06, "loss": 0.2354, "step": 606 }, { "epoch": 0.2, "learning_rate": 4.983458709853719e-06, "loss": 0.2017, "step": 607 }, { "epoch": 0.2, "learning_rate": 4.9833581139590814e-06, "loss": 0.2301, "step": 608 }, { "epoch": 0.2, "learning_rate": 4.983257214125451e-06, "loss": 0.224, "step": 609 }, { "epoch": 0.2, "learning_rate": 4.9831560103651765e-06, "loss": 0.2233, "step": 610 }, { "epoch": 0.2, "learning_rate": 4.983054502690646e-06, "loss": 0.2201, "step": 611 }, { "epoch": 0.2, "learning_rate": 4.9829526911142825e-06, "loss": 0.1927, "step": 612 }, { "epoch": 0.2, "learning_rate": 4.982850575648545e-06, "loss": 0.2163, "step": 613 }, { "epoch": 0.2, "learning_rate": 4.982748156305934e-06, "loss": 0.2124, "step": 614 }, { "epoch": 0.2, "learning_rate": 4.982645433098984e-06, "loss": 0.2161, "step": 615 }, { "epoch": 0.2, "learning_rate": 4.982542406040266e-06, "loss": 0.2327, "step": 616 }, { "epoch": 0.2, "learning_rate": 4.98243907514239e-06, "loss": 0.2204, "step": 617 }, { "epoch": 0.2, "learning_rate": 4.982335440418004e-06, "loss": 0.2271, "step": 618 }, { "epoch": 0.2, "learning_rate": 4.98223150187979e-06, "loss": 0.2033, "step": 619 }, { "epoch": 0.2, "learning_rate": 4.982127259540471e-06, "loss": 0.2298, "step": 620 }, { "epoch": 0.2, "learning_rate": 4.9820227134128045e-06, "loss": 0.2163, "step": 621 }, { "epoch": 0.2, "learning_rate": 4.981917863509585e-06, "loss": 0.1994, "step": 622 }, { "epoch": 0.2, "learning_rate": 4.981812709843646e-06, "loss": 0.2233, "step": 623 }, { "epoch": 0.2, "learning_rate": 4.981707252427857e-06, "loss": 0.2082, "step": 624 }, { "epoch": 0.2, "learning_rate": 4.981601491275125e-06, "loss": 0.221, "step": 625 }, { "epoch": 0.2, "learning_rate": 4.981495426398395e-06, "loss": 0.1968, "step": 626 }, { "epoch": 0.2, "learning_rate": 4.981389057810647e-06, "loss": 0.2045, "step": 627 }, { "epoch": 0.2, "learning_rate": 4.9812823855248996e-06, "loss": 0.1984, "step": 628 }, { "epoch": 0.2, "learning_rate": 4.98117540955421e-06, "loss": 0.2152, "step": 629 }, { "epoch": 0.2, "learning_rate": 4.981068129911669e-06, "loss": 0.2097, "step": 630 }, { "epoch": 0.2, "learning_rate": 4.980960546610408e-06, "loss": 0.2008, "step": 631 }, { "epoch": 0.2, "learning_rate": 4.980852659663593e-06, "loss": 0.2098, "step": 632 }, { "epoch": 0.21, "learning_rate": 4.9807444690844296e-06, "loss": 0.2254, "step": 633 }, { "epoch": 0.21, "learning_rate": 4.980635974886158e-06, "loss": 0.2289, "step": 634 }, { "epoch": 0.21, "learning_rate": 4.980527177082058e-06, "loss": 0.2051, "step": 635 }, { "epoch": 0.21, "learning_rate": 4.980418075685445e-06, "loss": 0.2162, "step": 636 }, { "epoch": 0.21, "learning_rate": 4.980308670709671e-06, "loss": 0.2224, "step": 637 }, { "epoch": 0.21, "learning_rate": 4.980198962168128e-06, "loss": 0.2173, "step": 638 }, { "epoch": 0.21, "learning_rate": 4.9800889500742415e-06, "loss": 0.2062, "step": 639 }, { "epoch": 0.21, "learning_rate": 4.979978634441477e-06, "loss": 0.2045, "step": 640 }, { "epoch": 0.21, "learning_rate": 4.979868015283336e-06, "loss": 0.2603, "step": 641 }, { "epoch": 0.21, "learning_rate": 4.979757092613357e-06, "loss": 0.2084, "step": 642 }, { "epoch": 0.21, "learning_rate": 4.979645866445114e-06, "loss": 0.2226, "step": 643 }, { "epoch": 0.21, "learning_rate": 4.9795343367922235e-06, "loss": 0.2044, "step": 644 }, { "epoch": 0.21, "learning_rate": 4.979422503668334e-06, "loss": 0.2225, "step": 645 }, { "epoch": 0.21, "learning_rate": 4.979310367087132e-06, "loss": 0.2129, "step": 646 }, { "epoch": 0.21, "learning_rate": 4.979197927062343e-06, "loss": 0.221, "step": 647 }, { "epoch": 0.21, "learning_rate": 4.979085183607728e-06, "loss": 0.2231, "step": 648 }, { "epoch": 0.21, "learning_rate": 4.978972136737086e-06, "loss": 0.209, "step": 649 }, { "epoch": 0.21, "learning_rate": 4.978858786464252e-06, "loss": 0.2229, "step": 650 }, { "epoch": 0.21, "learning_rate": 4.978745132803101e-06, "loss": 0.2142, "step": 651 }, { "epoch": 0.21, "learning_rate": 4.9786311757675425e-06, "loss": 0.2172, "step": 652 }, { "epoch": 0.21, "learning_rate": 4.978516915371522e-06, "loss": 0.2068, "step": 653 }, { "epoch": 0.21, "learning_rate": 4.978402351629024e-06, "loss": 0.2281, "step": 654 }, { "epoch": 0.21, "learning_rate": 4.9782874845540715e-06, "loss": 0.2399, "step": 655 }, { "epoch": 0.21, "learning_rate": 4.978172314160724e-06, "loss": 0.221, "step": 656 }, { "epoch": 0.21, "learning_rate": 4.9780568404630746e-06, "loss": 0.2146, "step": 657 }, { "epoch": 0.21, "learning_rate": 4.977941063475258e-06, "loss": 0.2001, "step": 658 }, { "epoch": 0.21, "learning_rate": 4.977824983211443e-06, "loss": 0.2265, "step": 659 }, { "epoch": 0.21, "learning_rate": 4.977708599685837e-06, "loss": 0.2188, "step": 660 }, { "epoch": 0.21, "learning_rate": 4.977591912912685e-06, "loss": 0.227, "step": 661 }, { "epoch": 0.21, "learning_rate": 4.977474922906268e-06, "loss": 0.2059, "step": 662 }, { "epoch": 0.21, "learning_rate": 4.977357629680903e-06, "loss": 0.2202, "step": 663 }, { "epoch": 0.22, "learning_rate": 4.977240033250948e-06, "loss": 0.2266, "step": 664 }, { "epoch": 0.22, "learning_rate": 4.977122133630795e-06, "loss": 0.2062, "step": 665 }, { "epoch": 0.22, "learning_rate": 4.9770039308348725e-06, "loss": 0.2171, "step": 666 }, { "epoch": 0.22, "learning_rate": 4.9768854248776475e-06, "loss": 0.1995, "step": 667 }, { "epoch": 0.22, "learning_rate": 4.976766615773626e-06, "loss": 0.1996, "step": 668 }, { "epoch": 0.22, "learning_rate": 4.976647503537347e-06, "loss": 0.2345, "step": 669 }, { "epoch": 0.22, "learning_rate": 4.9765280881833885e-06, "loss": 0.2238, "step": 670 }, { "epoch": 0.22, "learning_rate": 4.976408369726368e-06, "loss": 0.1869, "step": 671 }, { "epoch": 0.22, "learning_rate": 4.976288348180935e-06, "loss": 0.2202, "step": 672 }, { "epoch": 0.22, "learning_rate": 4.976168023561782e-06, "loss": 0.215, "step": 673 }, { "epoch": 0.22, "learning_rate": 4.976047395883634e-06, "loss": 0.1982, "step": 674 }, { "epoch": 0.22, "learning_rate": 4.975926465161254e-06, "loss": 0.207, "step": 675 }, { "epoch": 0.22, "learning_rate": 4.975805231409444e-06, "loss": 0.1988, "step": 676 }, { "epoch": 0.22, "learning_rate": 4.975683694643041e-06, "loss": 0.2166, "step": 677 }, { "epoch": 0.22, "learning_rate": 4.97556185487692e-06, "loss": 0.2215, "step": 678 }, { "epoch": 0.22, "learning_rate": 4.9754397121259935e-06, "loss": 0.2219, "step": 679 }, { "epoch": 0.22, "learning_rate": 4.975317266405211e-06, "loss": 0.2114, "step": 680 }, { "epoch": 0.22, "learning_rate": 4.975194517729557e-06, "loss": 0.2021, "step": 681 }, { "epoch": 0.22, "learning_rate": 4.975071466114057e-06, "loss": 0.2473, "step": 682 }, { "epoch": 0.22, "learning_rate": 4.974948111573768e-06, "loss": 0.2186, "step": 683 }, { "epoch": 0.22, "learning_rate": 4.9748244541237915e-06, "loss": 0.2261, "step": 684 }, { "epoch": 0.22, "learning_rate": 4.97470049377926e-06, "loss": 0.2031, "step": 685 }, { "epoch": 0.22, "learning_rate": 4.974576230555344e-06, "loss": 0.2158, "step": 686 }, { "epoch": 0.22, "learning_rate": 4.974451664467253e-06, "loss": 0.2124, "step": 687 }, { "epoch": 0.22, "learning_rate": 4.974326795530234e-06, "loss": 0.2006, "step": 688 }, { "epoch": 0.22, "learning_rate": 4.974201623759568e-06, "loss": 0.2112, "step": 689 }, { "epoch": 0.22, "learning_rate": 4.974076149170575e-06, "loss": 0.2267, "step": 690 }, { "epoch": 0.22, "learning_rate": 4.973950371778612e-06, "loss": 0.2206, "step": 691 }, { "epoch": 0.22, "learning_rate": 4.973824291599074e-06, "loss": 0.1969, "step": 692 }, { "epoch": 0.22, "learning_rate": 4.973697908647391e-06, "loss": 0.2093, "step": 693 }, { "epoch": 0.22, "learning_rate": 4.973571222939031e-06, "loss": 0.1896, "step": 694 }, { "epoch": 0.23, "learning_rate": 4.973444234489499e-06, "loss": 0.2168, "step": 695 }, { "epoch": 0.23, "learning_rate": 4.973316943314338e-06, "loss": 0.2116, "step": 696 }, { "epoch": 0.23, "learning_rate": 4.9731893494291275e-06, "loss": 0.2147, "step": 697 }, { "epoch": 0.23, "learning_rate": 4.973061452849481e-06, "loss": 0.1906, "step": 698 }, { "epoch": 0.23, "learning_rate": 4.972933253591056e-06, "loss": 0.2056, "step": 699 }, { "epoch": 0.23, "learning_rate": 4.972804751669539e-06, "loss": 0.192, "step": 700 }, { "epoch": 0.23, "learning_rate": 4.972675947100659e-06, "loss": 0.2184, "step": 701 }, { "epoch": 0.23, "learning_rate": 4.972546839900181e-06, "loss": 0.2014, "step": 702 }, { "epoch": 0.23, "learning_rate": 4.972417430083906e-06, "loss": 0.1983, "step": 703 }, { "epoch": 0.23, "learning_rate": 4.972287717667672e-06, "loss": 0.2176, "step": 704 }, { "epoch": 0.23, "learning_rate": 4.972157702667356e-06, "loss": 0.2389, "step": 705 }, { "epoch": 0.23, "learning_rate": 4.972027385098868e-06, "loss": 0.2095, "step": 706 }, { "epoch": 0.23, "learning_rate": 4.97189676497816e-06, "loss": 0.2292, "step": 707 }, { "epoch": 0.23, "learning_rate": 4.971765842321218e-06, "loss": 0.2246, "step": 708 }, { "epoch": 0.23, "learning_rate": 4.971634617144065e-06, "loss": 0.2135, "step": 709 }, { "epoch": 0.23, "learning_rate": 4.971503089462762e-06, "loss": 0.2271, "step": 710 }, { "epoch": 0.23, "learning_rate": 4.9713712592934075e-06, "loss": 0.2233, "step": 711 }, { "epoch": 0.23, "learning_rate": 4.971239126652135e-06, "loss": 0.2427, "step": 712 }, { "epoch": 0.23, "learning_rate": 4.971106691555116e-06, "loss": 0.2146, "step": 713 }, { "epoch": 0.23, "learning_rate": 4.9709739540185616e-06, "loss": 0.2002, "step": 714 }, { "epoch": 0.23, "learning_rate": 4.970840914058716e-06, "loss": 0.2238, "step": 715 }, { "epoch": 0.23, "learning_rate": 4.970707571691862e-06, "loss": 0.2195, "step": 716 }, { "epoch": 0.23, "learning_rate": 4.970573926934319e-06, "loss": 0.2176, "step": 717 }, { "epoch": 0.23, "learning_rate": 4.970439979802445e-06, "loss": 0.2152, "step": 718 }, { "epoch": 0.23, "learning_rate": 4.970305730312632e-06, "loss": 0.1932, "step": 719 }, { "epoch": 0.23, "learning_rate": 4.9701711784813135e-06, "loss": 0.2055, "step": 720 }, { "epoch": 0.23, "learning_rate": 4.970036324324955e-06, "loss": 0.219, "step": 721 }, { "epoch": 0.23, "learning_rate": 4.969901167860063e-06, "loss": 0.2181, "step": 722 }, { "epoch": 0.23, "learning_rate": 4.969765709103177e-06, "loss": 0.2023, "step": 723 }, { "epoch": 0.23, "learning_rate": 4.9696299480708785e-06, "loss": 0.2075, "step": 724 }, { "epoch": 0.23, "learning_rate": 4.969493884779783e-06, "loss": 0.2098, "step": 725 }, { "epoch": 0.24, "learning_rate": 4.969357519246542e-06, "loss": 0.2113, "step": 726 }, { "epoch": 0.24, "learning_rate": 4.9692208514878445e-06, "loss": 0.233, "step": 727 }, { "epoch": 0.24, "learning_rate": 4.96908388152042e-06, "loss": 0.2242, "step": 728 }, { "epoch": 0.24, "learning_rate": 4.968946609361031e-06, "loss": 0.2421, "step": 729 }, { "epoch": 0.24, "learning_rate": 4.968809035026477e-06, "loss": 0.2301, "step": 730 }, { "epoch": 0.24, "learning_rate": 4.968671158533599e-06, "loss": 0.2085, "step": 731 }, { "epoch": 0.24, "learning_rate": 4.968532979899269e-06, "loss": 0.2102, "step": 732 }, { "epoch": 0.24, "learning_rate": 4.9683944991403985e-06, "loss": 0.2107, "step": 733 }, { "epoch": 0.24, "learning_rate": 4.968255716273938e-06, "loss": 0.2274, "step": 734 }, { "epoch": 0.24, "learning_rate": 4.968116631316873e-06, "loss": 0.2286, "step": 735 }, { "epoch": 0.24, "learning_rate": 4.967977244286225e-06, "loss": 0.2203, "step": 736 }, { "epoch": 0.24, "learning_rate": 4.967837555199054e-06, "loss": 0.2206, "step": 737 }, { "epoch": 0.24, "learning_rate": 4.967697564072457e-06, "loss": 0.2066, "step": 738 }, { "epoch": 0.24, "learning_rate": 4.9675572709235665e-06, "loss": 0.2021, "step": 739 }, { "epoch": 0.24, "learning_rate": 4.967416675769555e-06, "loss": 0.2129, "step": 740 }, { "epoch": 0.24, "learning_rate": 4.967275778627628e-06, "loss": 0.2338, "step": 741 }, { "epoch": 0.24, "learning_rate": 4.967134579515032e-06, "loss": 0.2188, "step": 742 }, { "epoch": 0.24, "learning_rate": 4.966993078449046e-06, "loss": 0.2177, "step": 743 }, { "epoch": 0.24, "learning_rate": 4.96685127544699e-06, "loss": 0.2052, "step": 744 }, { "epoch": 0.24, "learning_rate": 4.966709170526219e-06, "loss": 0.2021, "step": 745 }, { "epoch": 0.24, "learning_rate": 4.966566763704124e-06, "loss": 0.2175, "step": 746 }, { "epoch": 0.24, "learning_rate": 4.966424054998137e-06, "loss": 0.1973, "step": 747 }, { "epoch": 0.24, "learning_rate": 4.966281044425722e-06, "loss": 0.203, "step": 748 }, { "epoch": 0.24, "learning_rate": 4.9661377320043815e-06, "loss": 0.2049, "step": 749 }, { "epoch": 0.24, "learning_rate": 4.965994117751658e-06, "loss": 0.201, "step": 750 }, { "epoch": 0.24, "learning_rate": 4.965850201685126e-06, "loss": 0.2091, "step": 751 }, { "epoch": 0.24, "learning_rate": 4.965705983822401e-06, "loss": 0.2261, "step": 752 }, { "epoch": 0.24, "learning_rate": 4.965561464181134e-06, "loss": 0.2147, "step": 753 }, { "epoch": 0.24, "learning_rate": 4.965416642779012e-06, "loss": 0.237, "step": 754 }, { "epoch": 0.24, "learning_rate": 4.96527151963376e-06, "loss": 0.2189, "step": 755 }, { "epoch": 0.24, "learning_rate": 4.9651260947631395e-06, "loss": 0.211, "step": 756 }, { "epoch": 0.25, "learning_rate": 4.9649803681849495e-06, "loss": 0.1863, "step": 757 }, { "epoch": 0.25, "learning_rate": 4.9648343399170254e-06, "loss": 0.2267, "step": 758 }, { "epoch": 0.25, "learning_rate": 4.964688009977239e-06, "loss": 0.2016, "step": 759 }, { "epoch": 0.25, "learning_rate": 4.9645413783835006e-06, "loss": 0.2304, "step": 760 }, { "epoch": 0.25, "learning_rate": 4.964394445153756e-06, "loss": 0.2105, "step": 761 }, { "epoch": 0.25, "learning_rate": 4.964247210305989e-06, "loss": 0.2258, "step": 762 }, { "epoch": 0.25, "learning_rate": 4.964099673858219e-06, "loss": 0.2011, "step": 763 }, { "epoch": 0.25, "learning_rate": 4.963951835828503e-06, "loss": 0.2156, "step": 764 }, { "epoch": 0.25, "learning_rate": 4.963803696234935e-06, "loss": 0.1968, "step": 765 }, { "epoch": 0.25, "learning_rate": 4.9636552550956465e-06, "loss": 0.2331, "step": 766 }, { "epoch": 0.25, "learning_rate": 4.963506512428804e-06, "loss": 0.2303, "step": 767 }, { "epoch": 0.25, "learning_rate": 4.963357468252614e-06, "loss": 0.2212, "step": 768 }, { "epoch": 0.25, "learning_rate": 4.9632081225853165e-06, "loss": 0.213, "step": 769 }, { "epoch": 0.25, "learning_rate": 4.9630584754451906e-06, "loss": 0.1903, "step": 770 }, { "epoch": 0.25, "learning_rate": 4.962908526850552e-06, "loss": 0.2026, "step": 771 }, { "epoch": 0.25, "learning_rate": 4.962758276819752e-06, "loss": 0.2039, "step": 772 }, { "epoch": 0.25, "learning_rate": 4.9626077253711805e-06, "loss": 0.2077, "step": 773 }, { "epoch": 0.25, "learning_rate": 4.962456872523263e-06, "loss": 0.2213, "step": 774 }, { "epoch": 0.25, "learning_rate": 4.962305718294462e-06, "loss": 0.2316, "step": 775 }, { "epoch": 0.25, "learning_rate": 4.96215426270328e-06, "loss": 0.2073, "step": 776 }, { "epoch": 0.25, "learning_rate": 4.962002505768251e-06, "loss": 0.2146, "step": 777 }, { "epoch": 0.25, "learning_rate": 4.961850447507948e-06, "loss": 0.2346, "step": 778 }, { "epoch": 0.25, "learning_rate": 4.961698087940984e-06, "loss": 0.221, "step": 779 }, { "epoch": 0.25, "learning_rate": 4.961545427086006e-06, "loss": 0.2129, "step": 780 }, { "epoch": 0.25, "learning_rate": 4.961392464961695e-06, "loss": 0.2064, "step": 781 }, { "epoch": 0.25, "learning_rate": 4.961239201586776e-06, "loss": 0.225, "step": 782 }, { "epoch": 0.25, "learning_rate": 4.961085636980005e-06, "loss": 0.225, "step": 783 }, { "epoch": 0.25, "learning_rate": 4.960931771160177e-06, "loss": 0.2034, "step": 784 }, { "epoch": 0.25, "learning_rate": 4.960777604146124e-06, "loss": 0.2332, "step": 785 }, { "epoch": 0.25, "learning_rate": 4.9606231359567146e-06, "loss": 0.2128, "step": 786 }, { "epoch": 0.26, "learning_rate": 4.960468366610854e-06, "loss": 0.2017, "step": 787 }, { "epoch": 0.26, "learning_rate": 4.960313296127485e-06, "loss": 0.2091, "step": 788 }, { "epoch": 0.26, "learning_rate": 4.960157924525585e-06, "loss": 0.1996, "step": 789 }, { "epoch": 0.26, "learning_rate": 4.960002251824172e-06, "loss": 0.2294, "step": 790 }, { "epoch": 0.26, "learning_rate": 4.959846278042298e-06, "loss": 0.2104, "step": 791 }, { "epoch": 0.26, "learning_rate": 4.959690003199052e-06, "loss": 0.2029, "step": 792 }, { "epoch": 0.26, "learning_rate": 4.959533427313562e-06, "loss": 0.2296, "step": 793 }, { "epoch": 0.26, "learning_rate": 4.95937655040499e-06, "loss": 0.2083, "step": 794 }, { "epoch": 0.26, "learning_rate": 4.959219372492539e-06, "loss": 0.2282, "step": 795 }, { "epoch": 0.26, "learning_rate": 4.9590618935954415e-06, "loss": 0.2045, "step": 796 }, { "epoch": 0.26, "learning_rate": 4.958904113732975e-06, "loss": 0.2085, "step": 797 }, { "epoch": 0.26, "learning_rate": 4.958746032924449e-06, "loss": 0.22, "step": 798 }, { "epoch": 0.26, "learning_rate": 4.95858765118921e-06, "loss": 0.2169, "step": 799 }, { "epoch": 0.26, "learning_rate": 4.9584289685466444e-06, "loss": 0.1975, "step": 800 }, { "epoch": 0.26, "learning_rate": 4.958269985016172e-06, "loss": 0.2135, "step": 801 }, { "epoch": 0.26, "learning_rate": 4.958110700617251e-06, "loss": 0.205, "step": 802 }, { "epoch": 0.26, "learning_rate": 4.957951115369378e-06, "loss": 0.2184, "step": 803 }, { "epoch": 0.26, "learning_rate": 4.957791229292082e-06, "loss": 0.2104, "step": 804 }, { "epoch": 0.26, "learning_rate": 4.957631042404934e-06, "loss": 0.2207, "step": 805 }, { "epoch": 0.26, "learning_rate": 4.957470554727536e-06, "loss": 0.202, "step": 806 }, { "epoch": 0.26, "learning_rate": 4.9573097662795344e-06, "loss": 0.244, "step": 807 }, { "epoch": 0.26, "learning_rate": 4.957148677080605e-06, "loss": 0.2128, "step": 808 }, { "epoch": 0.26, "learning_rate": 4.956987287150465e-06, "loss": 0.1872, "step": 809 }, { "epoch": 0.26, "learning_rate": 4.956825596508867e-06, "loss": 0.2155, "step": 810 }, { "epoch": 0.26, "learning_rate": 4.956663605175599e-06, "loss": 0.2192, "step": 811 }, { "epoch": 0.26, "learning_rate": 4.956501313170487e-06, "loss": 0.21, "step": 812 }, { "epoch": 0.26, "learning_rate": 4.956338720513397e-06, "loss": 0.2146, "step": 813 }, { "epoch": 0.26, "learning_rate": 4.956175827224226e-06, "loss": 0.2088, "step": 814 }, { "epoch": 0.26, "learning_rate": 4.956012633322912e-06, "loss": 0.2093, "step": 815 }, { "epoch": 0.26, "learning_rate": 4.955849138829428e-06, "loss": 0.2302, "step": 816 }, { "epoch": 0.26, "learning_rate": 4.955685343763782e-06, "loss": 0.217, "step": 817 }, { "epoch": 0.27, "learning_rate": 4.9555212481460245e-06, "loss": 0.2073, "step": 818 }, { "epoch": 0.27, "learning_rate": 4.955356851996236e-06, "loss": 0.2122, "step": 819 }, { "epoch": 0.27, "learning_rate": 4.955192155334539e-06, "loss": 0.2041, "step": 820 }, { "epoch": 0.27, "learning_rate": 4.955027158181092e-06, "loss": 0.2138, "step": 821 }, { "epoch": 0.27, "learning_rate": 4.9548618605560855e-06, "loss": 0.2228, "step": 822 }, { "epoch": 0.27, "learning_rate": 4.954696262479753e-06, "loss": 0.2096, "step": 823 }, { "epoch": 0.27, "learning_rate": 4.954530363972361e-06, "loss": 0.2094, "step": 824 }, { "epoch": 0.27, "learning_rate": 4.954364165054214e-06, "loss": 0.2217, "step": 825 }, { "epoch": 0.27, "learning_rate": 4.9541976657456535e-06, "loss": 0.211, "step": 826 }, { "epoch": 0.27, "learning_rate": 4.954030866067057e-06, "loss": 0.2124, "step": 827 }, { "epoch": 0.27, "learning_rate": 4.95386376603884e-06, "loss": 0.2153, "step": 828 }, { "epoch": 0.27, "learning_rate": 4.953696365681452e-06, "loss": 0.1913, "step": 829 }, { "epoch": 0.27, "learning_rate": 4.953528665015383e-06, "loss": 0.2074, "step": 830 }, { "epoch": 0.27, "learning_rate": 4.953360664061159e-06, "loss": 0.2068, "step": 831 }, { "epoch": 0.27, "learning_rate": 4.953192362839338e-06, "loss": 0.2138, "step": 832 }, { "epoch": 0.27, "learning_rate": 4.953023761370521e-06, "loss": 0.224, "step": 833 }, { "epoch": 0.27, "learning_rate": 4.952854859675343e-06, "loss": 0.2182, "step": 834 }, { "epoch": 0.27, "learning_rate": 4.952685657774476e-06, "loss": 0.2096, "step": 835 }, { "epoch": 0.27, "learning_rate": 4.952516155688628e-06, "loss": 0.2057, "step": 836 }, { "epoch": 0.27, "learning_rate": 4.9523463534385444e-06, "loss": 0.2262, "step": 837 }, { "epoch": 0.27, "learning_rate": 4.952176251045008e-06, "loss": 0.1987, "step": 838 }, { "epoch": 0.27, "learning_rate": 4.952005848528838e-06, "loss": 0.2102, "step": 839 }, { "epoch": 0.27, "learning_rate": 4.951835145910888e-06, "loss": 0.1988, "step": 840 }, { "epoch": 0.27, "learning_rate": 4.951664143212053e-06, "loss": 0.1942, "step": 841 }, { "epoch": 0.27, "learning_rate": 4.95149284045326e-06, "loss": 0.2266, "step": 842 }, { "epoch": 0.27, "learning_rate": 4.951321237655477e-06, "loss": 0.247, "step": 843 }, { "epoch": 0.27, "learning_rate": 4.951149334839703e-06, "loss": 0.1965, "step": 844 }, { "epoch": 0.27, "learning_rate": 4.950977132026981e-06, "loss": 0.2064, "step": 845 }, { "epoch": 0.27, "learning_rate": 4.9508046292383846e-06, "loss": 0.2218, "step": 846 }, { "epoch": 0.27, "learning_rate": 4.950631826495027e-06, "loss": 0.2139, "step": 847 }, { "epoch": 0.27, "learning_rate": 4.950458723818058e-06, "loss": 0.2339, "step": 848 }, { "epoch": 0.28, "learning_rate": 4.950285321228664e-06, "loss": 0.1915, "step": 849 }, { "epoch": 0.28, "learning_rate": 4.950111618748067e-06, "loss": 0.2039, "step": 850 }, { "epoch": 0.28, "learning_rate": 4.949937616397527e-06, "loss": 0.2151, "step": 851 }, { "epoch": 0.28, "learning_rate": 4.949763314198339e-06, "loss": 0.2081, "step": 852 }, { "epoch": 0.28, "learning_rate": 4.949588712171838e-06, "loss": 0.218, "step": 853 }, { "epoch": 0.28, "learning_rate": 4.949413810339392e-06, "loss": 0.2099, "step": 854 }, { "epoch": 0.28, "learning_rate": 4.949238608722408e-06, "loss": 0.2117, "step": 855 }, { "epoch": 0.28, "learning_rate": 4.949063107342329e-06, "loss": 0.1978, "step": 856 }, { "epoch": 0.28, "learning_rate": 4.948887306220634e-06, "loss": 0.2038, "step": 857 }, { "epoch": 0.28, "learning_rate": 4.94871120537884e-06, "loss": 0.2068, "step": 858 }, { "epoch": 0.28, "learning_rate": 4.9485348048385e-06, "loss": 0.2071, "step": 859 }, { "epoch": 0.28, "learning_rate": 4.9483581046212025e-06, "loss": 0.2295, "step": 860 }, { "epoch": 0.28, "learning_rate": 4.948181104748576e-06, "loss": 0.1878, "step": 861 }, { "epoch": 0.28, "learning_rate": 4.948003805242282e-06, "loss": 0.2266, "step": 862 }, { "epoch": 0.28, "learning_rate": 4.9478262061240216e-06, "loss": 0.203, "step": 863 }, { "epoch": 0.28, "learning_rate": 4.947648307415529e-06, "loss": 0.2298, "step": 864 }, { "epoch": 0.28, "learning_rate": 4.947470109138579e-06, "loss": 0.1959, "step": 865 }, { "epoch": 0.28, "learning_rate": 4.947291611314981e-06, "loss": 0.2, "step": 866 }, { "epoch": 0.28, "learning_rate": 4.9471128139665826e-06, "loss": 0.2085, "step": 867 }, { "epoch": 0.28, "learning_rate": 4.9469337171152645e-06, "loss": 0.2138, "step": 868 }, { "epoch": 0.28, "learning_rate": 4.946754320782948e-06, "loss": 0.2224, "step": 869 }, { "epoch": 0.28, "learning_rate": 4.946574624991589e-06, "loss": 0.2023, "step": 870 }, { "epoch": 0.28, "learning_rate": 4.946394629763181e-06, "loss": 0.2148, "step": 871 }, { "epoch": 0.28, "learning_rate": 4.946214335119752e-06, "loss": 0.189, "step": 872 }, { "epoch": 0.28, "learning_rate": 4.94603374108337e-06, "loss": 0.1997, "step": 873 }, { "epoch": 0.28, "learning_rate": 4.945852847676138e-06, "loss": 0.205, "step": 874 }, { "epoch": 0.28, "learning_rate": 4.945671654920195e-06, "loss": 0.2088, "step": 875 }, { "epoch": 0.28, "learning_rate": 4.945490162837718e-06, "loss": 0.2054, "step": 876 }, { "epoch": 0.28, "learning_rate": 4.945308371450919e-06, "loss": 0.219, "step": 877 }, { "epoch": 0.28, "learning_rate": 4.945126280782047e-06, "loss": 0.2181, "step": 878 }, { "epoch": 0.28, "learning_rate": 4.944943890853389e-06, "loss": 0.2057, "step": 879 }, { "epoch": 0.29, "learning_rate": 4.944761201687268e-06, "loss": 0.2226, "step": 880 }, { "epoch": 0.29, "learning_rate": 4.944578213306043e-06, "loss": 0.2083, "step": 881 }, { "epoch": 0.29, "learning_rate": 4.94439492573211e-06, "loss": 0.1871, "step": 882 }, { "epoch": 0.29, "learning_rate": 4.944211338987901e-06, "loss": 0.2305, "step": 883 }, { "epoch": 0.29, "learning_rate": 4.944027453095887e-06, "loss": 0.2032, "step": 884 }, { "epoch": 0.29, "learning_rate": 4.943843268078572e-06, "loss": 0.2086, "step": 885 }, { "epoch": 0.29, "learning_rate": 4.9436587839585e-06, "loss": 0.2036, "step": 886 }, { "epoch": 0.29, "learning_rate": 4.9434740007582485e-06, "loss": 0.2087, "step": 887 }, { "epoch": 0.29, "learning_rate": 4.943288918500434e-06, "loss": 0.2197, "step": 888 }, { "epoch": 0.29, "learning_rate": 4.94310353720771e-06, "loss": 0.2101, "step": 889 }, { "epoch": 0.29, "learning_rate": 4.942917856902763e-06, "loss": 0.2078, "step": 890 }, { "epoch": 0.29, "learning_rate": 4.942731877608319e-06, "loss": 0.1991, "step": 891 }, { "epoch": 0.29, "learning_rate": 4.942545599347142e-06, "loss": 0.2055, "step": 892 }, { "epoch": 0.29, "learning_rate": 4.942359022142028e-06, "loss": 0.2064, "step": 893 }, { "epoch": 0.29, "learning_rate": 4.942172146015814e-06, "loss": 0.2034, "step": 894 }, { "epoch": 0.29, "learning_rate": 4.941984970991372e-06, "loss": 0.2249, "step": 895 }, { "epoch": 0.29, "learning_rate": 4.9417974970916096e-06, "loss": 0.2226, "step": 896 }, { "epoch": 0.29, "learning_rate": 4.9416097243394725e-06, "loss": 0.2043, "step": 897 }, { "epoch": 0.29, "learning_rate": 4.94142165275794e-06, "loss": 0.1856, "step": 898 }, { "epoch": 0.29, "learning_rate": 4.941233282370034e-06, "loss": 0.2093, "step": 899 }, { "epoch": 0.29, "learning_rate": 4.941044613198807e-06, "loss": 0.2201, "step": 900 }, { "epoch": 0.29, "learning_rate": 4.940855645267349e-06, "loss": 0.2074, "step": 901 }, { "epoch": 0.29, "learning_rate": 4.94066637859879e-06, "loss": 0.1975, "step": 902 }, { "epoch": 0.29, "learning_rate": 4.940476813216294e-06, "loss": 0.2159, "step": 903 }, { "epoch": 0.29, "learning_rate": 4.940286949143061e-06, "loss": 0.2028, "step": 904 }, { "epoch": 0.29, "learning_rate": 4.940096786402331e-06, "loss": 0.2059, "step": 905 }, { "epoch": 0.29, "learning_rate": 4.939906325017374e-06, "loss": 0.2149, "step": 906 }, { "epoch": 0.29, "learning_rate": 4.939715565011504e-06, "loss": 0.2407, "step": 907 }, { "epoch": 0.29, "learning_rate": 4.939524506408068e-06, "loss": 0.2247, "step": 908 }, { "epoch": 0.29, "learning_rate": 4.939333149230447e-06, "loss": 0.2021, "step": 909 }, { "epoch": 0.29, "learning_rate": 4.9391414935020656e-06, "loss": 0.202, "step": 910 }, { "epoch": 0.3, "learning_rate": 4.938949539246376e-06, "loss": 0.2226, "step": 911 }, { "epoch": 0.3, "learning_rate": 4.938757286486874e-06, "loss": 0.2132, "step": 912 }, { "epoch": 0.3, "learning_rate": 4.93856473524709e-06, "loss": 0.1904, "step": 913 }, { "epoch": 0.3, "learning_rate": 4.938371885550589e-06, "loss": 0.1954, "step": 914 }, { "epoch": 0.3, "learning_rate": 4.938178737420974e-06, "loss": 0.1937, "step": 915 }, { "epoch": 0.3, "learning_rate": 4.937985290881886e-06, "loss": 0.2087, "step": 916 }, { "epoch": 0.3, "learning_rate": 4.9377915459569995e-06, "loss": 0.2077, "step": 917 }, { "epoch": 0.3, "learning_rate": 4.937597502670027e-06, "loss": 0.2319, "step": 918 }, { "epoch": 0.3, "learning_rate": 4.9374031610447185e-06, "loss": 0.2013, "step": 919 }, { "epoch": 0.3, "learning_rate": 4.937208521104858e-06, "loss": 0.1944, "step": 920 }, { "epoch": 0.3, "learning_rate": 4.937013582874269e-06, "loss": 0.2106, "step": 921 }, { "epoch": 0.3, "learning_rate": 4.93681834637681e-06, "loss": 0.2187, "step": 922 }, { "epoch": 0.3, "learning_rate": 4.936622811636376e-06, "loss": 0.2272, "step": 923 }, { "epoch": 0.3, "learning_rate": 4.936426978676897e-06, "loss": 0.1927, "step": 924 }, { "epoch": 0.3, "learning_rate": 4.936230847522343e-06, "loss": 0.1985, "step": 925 }, { "epoch": 0.3, "learning_rate": 4.936034418196718e-06, "loss": 0.2019, "step": 926 }, { "epoch": 0.3, "learning_rate": 4.935837690724063e-06, "loss": 0.1983, "step": 927 }, { "epoch": 0.3, "learning_rate": 4.935640665128454e-06, "loss": 0.1918, "step": 928 }, { "epoch": 0.3, "learning_rate": 4.935443341434008e-06, "loss": 0.2246, "step": 929 }, { "epoch": 0.3, "learning_rate": 4.935245719664873e-06, "loss": 0.2248, "step": 930 }, { "epoch": 0.3, "learning_rate": 4.935047799845238e-06, "loss": 0.2075, "step": 931 }, { "epoch": 0.3, "learning_rate": 4.9348495819993235e-06, "loss": 0.1959, "step": 932 }, { "epoch": 0.3, "learning_rate": 4.9346510661513924e-06, "loss": 0.1985, "step": 933 }, { "epoch": 0.3, "learning_rate": 4.93445225232574e-06, "loss": 0.2046, "step": 934 }, { "epoch": 0.3, "learning_rate": 4.9342531405467e-06, "loss": 0.1953, "step": 935 }, { "epoch": 0.3, "learning_rate": 4.934053730838639e-06, "loss": 0.2116, "step": 936 }, { "epoch": 0.3, "learning_rate": 4.9338540232259664e-06, "loss": 0.2027, "step": 937 }, { "epoch": 0.3, "learning_rate": 4.9336540177331225e-06, "loss": 0.2144, "step": 938 }, { "epoch": 0.3, "learning_rate": 4.9334537143845876e-06, "loss": 0.2091, "step": 939 }, { "epoch": 0.3, "learning_rate": 4.933253113204874e-06, "loss": 0.21, "step": 940 }, { "epoch": 0.3, "learning_rate": 4.933052214218535e-06, "loss": 0.2053, "step": 941 }, { "epoch": 0.31, "learning_rate": 4.93285101745016e-06, "loss": 0.1969, "step": 942 }, { "epoch": 0.31, "learning_rate": 4.932649522924372e-06, "loss": 0.2273, "step": 943 }, { "epoch": 0.31, "learning_rate": 4.932447730665832e-06, "loss": 0.224, "step": 944 }, { "epoch": 0.31, "learning_rate": 4.932245640699238e-06, "loss": 0.2055, "step": 945 }, { "epoch": 0.31, "learning_rate": 4.932043253049323e-06, "loss": 0.2226, "step": 946 }, { "epoch": 0.31, "learning_rate": 4.931840567740858e-06, "loss": 0.2094, "step": 947 }, { "epoch": 0.31, "learning_rate": 4.93163758479865e-06, "loss": 0.1954, "step": 948 }, { "epoch": 0.31, "learning_rate": 4.931434304247541e-06, "loss": 0.2139, "step": 949 }, { "epoch": 0.31, "learning_rate": 4.931230726112412e-06, "loss": 0.2122, "step": 950 }, { "epoch": 0.31, "learning_rate": 4.9310268504181764e-06, "loss": 0.1949, "step": 951 }, { "epoch": 0.31, "learning_rate": 4.930822677189791e-06, "loss": 0.2021, "step": 952 }, { "epoch": 0.31, "learning_rate": 4.93061820645224e-06, "loss": 0.2222, "step": 953 }, { "epoch": 0.31, "learning_rate": 4.930413438230552e-06, "loss": 0.2057, "step": 954 }, { "epoch": 0.31, "learning_rate": 4.930208372549787e-06, "loss": 0.1969, "step": 955 }, { "epoch": 0.31, "learning_rate": 4.930003009435043e-06, "loss": 0.2057, "step": 956 }, { "epoch": 0.31, "learning_rate": 4.9297973489114565e-06, "loss": 0.2086, "step": 957 }, { "epoch": 0.31, "learning_rate": 4.929591391004196e-06, "loss": 0.2047, "step": 958 }, { "epoch": 0.31, "learning_rate": 4.929385135738469e-06, "loss": 0.2208, "step": 959 }, { "epoch": 0.31, "learning_rate": 4.92917858313952e-06, "loss": 0.2249, "step": 960 }, { "epoch": 0.31, "learning_rate": 4.928971733232628e-06, "loss": 0.2175, "step": 961 }, { "epoch": 0.31, "learning_rate": 4.928764586043111e-06, "loss": 0.2088, "step": 962 }, { "epoch": 0.31, "learning_rate": 4.9285571415963205e-06, "loss": 0.1915, "step": 963 }, { "epoch": 0.31, "learning_rate": 4.928349399917646e-06, "loss": 0.231, "step": 964 }, { "epoch": 0.31, "learning_rate": 4.928141361032513e-06, "loss": 0.1876, "step": 965 }, { "epoch": 0.31, "learning_rate": 4.927933024966385e-06, "loss": 0.2059, "step": 966 }, { "epoch": 0.31, "learning_rate": 4.927724391744758e-06, "loss": 0.2119, "step": 967 }, { "epoch": 0.31, "learning_rate": 4.927515461393167e-06, "loss": 0.2192, "step": 968 }, { "epoch": 0.31, "learning_rate": 4.927306233937185e-06, "loss": 0.2038, "step": 969 }, { "epoch": 0.31, "learning_rate": 4.927096709402417e-06, "loss": 0.2162, "step": 970 }, { "epoch": 0.31, "learning_rate": 4.926886887814509e-06, "loss": 0.2104, "step": 971 }, { "epoch": 0.31, "learning_rate": 4.926676769199139e-06, "loss": 0.2171, "step": 972 }, { "epoch": 0.32, "learning_rate": 4.9264663535820256e-06, "loss": 0.1906, "step": 973 }, { "epoch": 0.32, "learning_rate": 4.926255640988919e-06, "loss": 0.2023, "step": 974 }, { "epoch": 0.32, "learning_rate": 4.926044631445611e-06, "loss": 0.2082, "step": 975 }, { "epoch": 0.32, "learning_rate": 4.925833324977926e-06, "loss": 0.2117, "step": 976 }, { "epoch": 0.32, "learning_rate": 4.925621721611726e-06, "loss": 0.1948, "step": 977 }, { "epoch": 0.32, "learning_rate": 4.925409821372908e-06, "loss": 0.1923, "step": 978 }, { "epoch": 0.32, "learning_rate": 4.925197624287409e-06, "loss": 0.1908, "step": 979 }, { "epoch": 0.32, "learning_rate": 4.924985130381198e-06, "loss": 0.2, "step": 980 }, { "epoch": 0.32, "learning_rate": 4.924772339680283e-06, "loss": 0.2176, "step": 981 }, { "epoch": 0.32, "learning_rate": 4.9245592522107065e-06, "loss": 0.2199, "step": 982 }, { "epoch": 0.32, "learning_rate": 4.92434586799855e-06, "loss": 0.2033, "step": 983 }, { "epoch": 0.32, "learning_rate": 4.924132187069928e-06, "loss": 0.2081, "step": 984 }, { "epoch": 0.32, "learning_rate": 4.923918209450994e-06, "loss": 0.2193, "step": 985 }, { "epoch": 0.32, "learning_rate": 4.9237039351679365e-06, "loss": 0.1969, "step": 986 }, { "epoch": 0.32, "learning_rate": 4.923489364246981e-06, "loss": 0.1931, "step": 987 }, { "epoch": 0.32, "learning_rate": 4.923274496714387e-06, "loss": 0.2098, "step": 988 }, { "epoch": 0.32, "learning_rate": 4.923059332596456e-06, "loss": 0.1995, "step": 989 }, { "epoch": 0.32, "learning_rate": 4.922843871919518e-06, "loss": 0.1844, "step": 990 }, { "epoch": 0.32, "learning_rate": 4.922628114709945e-06, "loss": 0.1967, "step": 991 }, { "epoch": 0.32, "learning_rate": 4.922412060994145e-06, "loss": 0.2197, "step": 992 }, { "epoch": 0.32, "learning_rate": 4.922195710798559e-06, "loss": 0.208, "step": 993 }, { "epoch": 0.32, "learning_rate": 4.9219790641496656e-06, "loss": 0.2136, "step": 994 }, { "epoch": 0.32, "learning_rate": 4.9217621210739826e-06, "loss": 0.1907, "step": 995 }, { "epoch": 0.32, "learning_rate": 4.921544881598059e-06, "loss": 0.1958, "step": 996 }, { "epoch": 0.32, "learning_rate": 4.921327345748486e-06, "loss": 0.2032, "step": 997 }, { "epoch": 0.32, "learning_rate": 4.921109513551885e-06, "loss": 0.201, "step": 998 }, { "epoch": 0.32, "learning_rate": 4.920891385034918e-06, "loss": 0.2019, "step": 999 }, { "epoch": 0.32, "learning_rate": 4.920672960224282e-06, "loss": 0.1836, "step": 1000 }, { "epoch": 0.32, "learning_rate": 4.920454239146709e-06, "loss": 0.2088, "step": 1001 }, { "epoch": 0.32, "learning_rate": 4.92023522182897e-06, "loss": 0.2139, "step": 1002 }, { "epoch": 0.33, "learning_rate": 4.9200159082978685e-06, "loss": 0.2092, "step": 1003 }, { "epoch": 0.33, "learning_rate": 4.919796298580247e-06, "loss": 0.1941, "step": 1004 }, { "epoch": 0.33, "learning_rate": 4.919576392702984e-06, "loss": 0.2007, "step": 1005 }, { "epoch": 0.33, "learning_rate": 4.9193561906929945e-06, "loss": 0.1941, "step": 1006 }, { "epoch": 0.33, "learning_rate": 4.919135692577229e-06, "loss": 0.2131, "step": 1007 }, { "epoch": 0.33, "learning_rate": 4.918914898382673e-06, "loss": 0.2105, "step": 1008 }, { "epoch": 0.33, "learning_rate": 4.91869380813635e-06, "loss": 0.1879, "step": 1009 }, { "epoch": 0.33, "learning_rate": 4.91847242186532e-06, "loss": 0.1995, "step": 1010 }, { "epoch": 0.33, "learning_rate": 4.918250739596678e-06, "loss": 0.2025, "step": 1011 }, { "epoch": 0.33, "learning_rate": 4.918028761357557e-06, "loss": 0.2241, "step": 1012 }, { "epoch": 0.33, "learning_rate": 4.917806487175123e-06, "loss": 0.1903, "step": 1013 }, { "epoch": 0.33, "learning_rate": 4.917583917076581e-06, "loss": 0.2048, "step": 1014 }, { "epoch": 0.33, "learning_rate": 4.917361051089172e-06, "loss": 0.2221, "step": 1015 }, { "epoch": 0.33, "learning_rate": 4.917137889240172e-06, "loss": 0.2008, "step": 1016 }, { "epoch": 0.33, "learning_rate": 4.916914431556895e-06, "loss": 0.2367, "step": 1017 }, { "epoch": 0.33, "learning_rate": 4.916690678066688e-06, "loss": 0.2049, "step": 1018 }, { "epoch": 0.33, "learning_rate": 4.916466628796938e-06, "loss": 0.1867, "step": 1019 }, { "epoch": 0.33, "learning_rate": 4.9162422837750654e-06, "loss": 0.2012, "step": 1020 }, { "epoch": 0.33, "learning_rate": 4.916017643028529e-06, "loss": 0.1901, "step": 1021 }, { "epoch": 0.33, "learning_rate": 4.915792706584821e-06, "loss": 0.1884, "step": 1022 }, { "epoch": 0.33, "learning_rate": 4.9155674744714725e-06, "loss": 0.2055, "step": 1023 }, { "epoch": 0.33, "learning_rate": 4.91534194671605e-06, "loss": 0.1984, "step": 1024 }, { "epoch": 0.33, "learning_rate": 4.915116123346155e-06, "loss": 0.2176, "step": 1025 }, { "epoch": 0.33, "learning_rate": 4.9148900043894275e-06, "loss": 0.1983, "step": 1026 }, { "epoch": 0.33, "learning_rate": 4.914663589873541e-06, "loss": 0.2027, "step": 1027 }, { "epoch": 0.33, "learning_rate": 4.914436879826207e-06, "loss": 0.2138, "step": 1028 }, { "epoch": 0.33, "learning_rate": 4.9142098742751726e-06, "loss": 0.2169, "step": 1029 }, { "epoch": 0.33, "learning_rate": 4.9139825732482205e-06, "loss": 0.1964, "step": 1030 }, { "epoch": 0.33, "learning_rate": 4.91375497677317e-06, "loss": 0.2131, "step": 1031 }, { "epoch": 0.33, "learning_rate": 4.913527084877879e-06, "loss": 0.1995, "step": 1032 }, { "epoch": 0.33, "learning_rate": 4.913298897590237e-06, "loss": 0.205, "step": 1033 }, { "epoch": 0.34, "learning_rate": 4.913070414938172e-06, "loss": 0.2083, "step": 1034 }, { "epoch": 0.34, "learning_rate": 4.912841636949649e-06, "loss": 0.1951, "step": 1035 }, { "epoch": 0.34, "learning_rate": 4.912612563652667e-06, "loss": 0.2066, "step": 1036 }, { "epoch": 0.34, "learning_rate": 4.912383195075264e-06, "loss": 0.2322, "step": 1037 }, { "epoch": 0.34, "learning_rate": 4.912153531245511e-06, "loss": 0.2036, "step": 1038 }, { "epoch": 0.34, "learning_rate": 4.9119235721915174e-06, "loss": 0.2038, "step": 1039 }, { "epoch": 0.34, "learning_rate": 4.911693317941428e-06, "loss": 0.1868, "step": 1040 }, { "epoch": 0.34, "learning_rate": 4.911462768523423e-06, "loss": 0.2048, "step": 1041 }, { "epoch": 0.34, "learning_rate": 4.9112319239657204e-06, "loss": 0.2086, "step": 1042 }, { "epoch": 0.34, "learning_rate": 4.911000784296572e-06, "loss": 0.1994, "step": 1043 }, { "epoch": 0.34, "learning_rate": 4.910769349544269e-06, "loss": 0.209, "step": 1044 }, { "epoch": 0.34, "learning_rate": 4.9105376197371355e-06, "loss": 0.1967, "step": 1045 }, { "epoch": 0.34, "learning_rate": 4.9103055949035326e-06, "loss": 0.2325, "step": 1046 }, { "epoch": 0.34, "learning_rate": 4.910073275071858e-06, "loss": 0.2089, "step": 1047 }, { "epoch": 0.34, "learning_rate": 4.909840660270547e-06, "loss": 0.2038, "step": 1048 }, { "epoch": 0.34, "learning_rate": 4.909607750528068e-06, "loss": 0.1942, "step": 1049 }, { "epoch": 0.34, "learning_rate": 4.909374545872927e-06, "loss": 0.2075, "step": 1050 }, { "epoch": 0.34, "learning_rate": 4.909141046333666e-06, "loss": 0.2044, "step": 1051 }, { "epoch": 0.34, "learning_rate": 4.908907251938864e-06, "loss": 0.2011, "step": 1052 }, { "epoch": 0.34, "learning_rate": 4.908673162717133e-06, "loss": 0.213, "step": 1053 }, { "epoch": 0.34, "learning_rate": 4.908438778697125e-06, "loss": 0.2023, "step": 1054 }, { "epoch": 0.34, "learning_rate": 4.908204099907527e-06, "loss": 0.2153, "step": 1055 }, { "epoch": 0.34, "learning_rate": 4.907969126377059e-06, "loss": 0.2051, "step": 1056 }, { "epoch": 0.34, "learning_rate": 4.907733858134482e-06, "loss": 0.2236, "step": 1057 }, { "epoch": 0.34, "learning_rate": 4.907498295208589e-06, "loss": 0.2185, "step": 1058 }, { "epoch": 0.34, "learning_rate": 4.907262437628211e-06, "loss": 0.2096, "step": 1059 }, { "epoch": 0.34, "learning_rate": 4.907026285422215e-06, "loss": 0.2255, "step": 1060 }, { "epoch": 0.34, "learning_rate": 4.906789838619504e-06, "loss": 0.209, "step": 1061 }, { "epoch": 0.34, "learning_rate": 4.906553097249015e-06, "loss": 0.2139, "step": 1062 }, { "epoch": 0.34, "learning_rate": 4.906316061339724e-06, "loss": 0.2147, "step": 1063 }, { "epoch": 0.34, "learning_rate": 4.9060787309206436e-06, "loss": 0.208, "step": 1064 }, { "epoch": 0.35, "learning_rate": 4.905841106020818e-06, "loss": 0.2152, "step": 1065 }, { "epoch": 0.35, "learning_rate": 4.905603186669332e-06, "loss": 0.1918, "step": 1066 }, { "epoch": 0.35, "learning_rate": 4.905364972895304e-06, "loss": 0.2109, "step": 1067 }, { "epoch": 0.35, "learning_rate": 4.9051264647278886e-06, "loss": 0.2193, "step": 1068 }, { "epoch": 0.35, "learning_rate": 4.904887662196277e-06, "loss": 0.1814, "step": 1069 }, { "epoch": 0.35, "learning_rate": 4.904648565329697e-06, "loss": 0.2255, "step": 1070 }, { "epoch": 0.35, "learning_rate": 4.904409174157412e-06, "loss": 0.2171, "step": 1071 }, { "epoch": 0.35, "learning_rate": 4.90416948870872e-06, "loss": 0.2152, "step": 1072 }, { "epoch": 0.35, "learning_rate": 4.903929509012957e-06, "loss": 0.1974, "step": 1073 }, { "epoch": 0.35, "learning_rate": 4.9036892350994935e-06, "loss": 0.2019, "step": 1074 }, { "epoch": 0.35, "learning_rate": 4.9034486669977375e-06, "loss": 0.2032, "step": 1075 }, { "epoch": 0.35, "learning_rate": 4.903207804737132e-06, "loss": 0.214, "step": 1076 }, { "epoch": 0.35, "learning_rate": 4.902966648347156e-06, "loss": 0.1848, "step": 1077 }, { "epoch": 0.35, "learning_rate": 4.902725197857325e-06, "loss": 0.2005, "step": 1078 }, { "epoch": 0.35, "learning_rate": 4.902483453297189e-06, "loss": 0.2186, "step": 1079 }, { "epoch": 0.35, "learning_rate": 4.902241414696337e-06, "loss": 0.1972, "step": 1080 }, { "epoch": 0.35, "learning_rate": 4.901999082084391e-06, "loss": 0.2211, "step": 1081 }, { "epoch": 0.35, "learning_rate": 4.901756455491011e-06, "loss": 0.2005, "step": 1082 }, { "epoch": 0.35, "learning_rate": 4.901513534945891e-06, "loss": 0.2111, "step": 1083 }, { "epoch": 0.35, "learning_rate": 4.901270320478763e-06, "loss": 0.1884, "step": 1084 }, { "epoch": 0.35, "learning_rate": 4.901026812119394e-06, "loss": 0.2186, "step": 1085 }, { "epoch": 0.35, "learning_rate": 4.9007830098975875e-06, "loss": 0.1967, "step": 1086 }, { "epoch": 0.35, "learning_rate": 4.900538913843181e-06, "loss": 0.2042, "step": 1087 }, { "epoch": 0.35, "learning_rate": 4.900294523986051e-06, "loss": 0.2062, "step": 1088 }, { "epoch": 0.35, "learning_rate": 4.900049840356107e-06, "loss": 0.2, "step": 1089 }, { "epoch": 0.35, "learning_rate": 4.899804862983298e-06, "loss": 0.2024, "step": 1090 }, { "epoch": 0.35, "learning_rate": 4.899559591897604e-06, "loss": 0.199, "step": 1091 }, { "epoch": 0.35, "learning_rate": 4.899314027129047e-06, "loss": 0.1886, "step": 1092 }, { "epoch": 0.35, "learning_rate": 4.89906816870768e-06, "loss": 0.2073, "step": 1093 }, { "epoch": 0.35, "learning_rate": 4.898822016663595e-06, "loss": 0.2014, "step": 1094 }, { "epoch": 0.35, "learning_rate": 4.898575571026916e-06, "loss": 0.2119, "step": 1095 }, { "epoch": 0.36, "learning_rate": 4.898328831827808e-06, "loss": 0.1861, "step": 1096 }, { "epoch": 0.36, "learning_rate": 4.898081799096467e-06, "loss": 0.2027, "step": 1097 }, { "epoch": 0.36, "learning_rate": 4.897834472863131e-06, "loss": 0.1935, "step": 1098 }, { "epoch": 0.36, "learning_rate": 4.897586853158067e-06, "loss": 0.2081, "step": 1099 }, { "epoch": 0.36, "learning_rate": 4.897338940011583e-06, "loss": 0.1866, "step": 1100 }, { "epoch": 0.36, "learning_rate": 4.897090733454021e-06, "loss": 0.1966, "step": 1101 }, { "epoch": 0.36, "learning_rate": 4.896842233515759e-06, "loss": 0.2193, "step": 1102 }, { "epoch": 0.36, "learning_rate": 4.89659344022721e-06, "loss": 0.1866, "step": 1103 }, { "epoch": 0.36, "learning_rate": 4.896344353618826e-06, "loss": 0.1853, "step": 1104 }, { "epoch": 0.36, "learning_rate": 4.896094973721091e-06, "loss": 0.2203, "step": 1105 }, { "epoch": 0.36, "learning_rate": 4.8958453005645265e-06, "loss": 0.2093, "step": 1106 }, { "epoch": 0.36, "learning_rate": 4.895595334179692e-06, "loss": 0.2156, "step": 1107 }, { "epoch": 0.36, "learning_rate": 4.89534507459718e-06, "loss": 0.202, "step": 1108 }, { "epoch": 0.36, "learning_rate": 4.895094521847617e-06, "loss": 0.1948, "step": 1109 }, { "epoch": 0.36, "learning_rate": 4.894843675961673e-06, "loss": 0.195, "step": 1110 }, { "epoch": 0.36, "learning_rate": 4.894592536970047e-06, "loss": 0.2182, "step": 1111 }, { "epoch": 0.36, "learning_rate": 4.894341104903476e-06, "loss": 0.2065, "step": 1112 }, { "epoch": 0.36, "learning_rate": 4.894089379792731e-06, "loss": 0.2025, "step": 1113 }, { "epoch": 0.36, "learning_rate": 4.893837361668624e-06, "loss": 0.2001, "step": 1114 }, { "epoch": 0.36, "learning_rate": 4.8935850505619985e-06, "loss": 0.1923, "step": 1115 }, { "epoch": 0.36, "learning_rate": 4.8933324465037334e-06, "loss": 0.1933, "step": 1116 }, { "epoch": 0.36, "learning_rate": 4.893079549524747e-06, "loss": 0.1962, "step": 1117 }, { "epoch": 0.36, "learning_rate": 4.89282635965599e-06, "loss": 0.1752, "step": 1118 }, { "epoch": 0.36, "learning_rate": 4.8925728769284504e-06, "loss": 0.2077, "step": 1119 }, { "epoch": 0.36, "learning_rate": 4.892319101373154e-06, "loss": 0.2223, "step": 1120 }, { "epoch": 0.36, "learning_rate": 4.892065033021158e-06, "loss": 0.1999, "step": 1121 }, { "epoch": 0.36, "learning_rate": 4.8918106719035594e-06, "loss": 0.2093, "step": 1122 }, { "epoch": 0.36, "learning_rate": 4.891556018051489e-06, "loss": 0.2004, "step": 1123 }, { "epoch": 0.36, "learning_rate": 4.891301071496113e-06, "loss": 0.1934, "step": 1124 }, { "epoch": 0.36, "learning_rate": 4.891045832268637e-06, "loss": 0.1937, "step": 1125 }, { "epoch": 0.36, "learning_rate": 4.890790300400297e-06, "loss": 0.1934, "step": 1126 }, { "epoch": 0.37, "learning_rate": 4.8905344759223696e-06, "loss": 0.1935, "step": 1127 }, { "epoch": 0.37, "learning_rate": 4.890278358866165e-06, "loss": 0.1988, "step": 1128 }, { "epoch": 0.37, "learning_rate": 4.890021949263027e-06, "loss": 0.2093, "step": 1129 }, { "epoch": 0.37, "learning_rate": 4.889765247144341e-06, "loss": 0.1986, "step": 1130 }, { "epoch": 0.37, "learning_rate": 4.889508252541524e-06, "loss": 0.213, "step": 1131 }, { "epoch": 0.37, "learning_rate": 4.889250965486029e-06, "loss": 0.2156, "step": 1132 }, { "epoch": 0.37, "learning_rate": 4.888993386009345e-06, "loss": 0.1995, "step": 1133 }, { "epoch": 0.37, "learning_rate": 4.888735514142998e-06, "loss": 0.2066, "step": 1134 }, { "epoch": 0.37, "learning_rate": 4.8884773499185485e-06, "loss": 0.2003, "step": 1135 }, { "epoch": 0.37, "learning_rate": 4.8882188933675935e-06, "loss": 0.2169, "step": 1136 }, { "epoch": 0.37, "learning_rate": 4.887960144521766e-06, "loss": 0.2198, "step": 1137 }, { "epoch": 0.37, "learning_rate": 4.887701103412734e-06, "loss": 0.2231, "step": 1138 }, { "epoch": 0.37, "learning_rate": 4.8874417700722025e-06, "loss": 0.2011, "step": 1139 }, { "epoch": 0.37, "learning_rate": 4.887182144531909e-06, "loss": 0.2034, "step": 1140 }, { "epoch": 0.37, "learning_rate": 4.886922226823632e-06, "loss": 0.2038, "step": 1141 }, { "epoch": 0.37, "learning_rate": 4.8866620169791815e-06, "loss": 0.2004, "step": 1142 }, { "epoch": 0.37, "learning_rate": 4.886401515030404e-06, "loss": 0.2082, "step": 1143 }, { "epoch": 0.37, "learning_rate": 4.886140721009184e-06, "loss": 0.1929, "step": 1144 }, { "epoch": 0.37, "learning_rate": 4.885879634947439e-06, "loss": 0.2236, "step": 1145 }, { "epoch": 0.37, "learning_rate": 4.885618256877123e-06, "loss": 0.1901, "step": 1146 }, { "epoch": 0.37, "learning_rate": 4.885356586830229e-06, "loss": 0.1915, "step": 1147 }, { "epoch": 0.37, "learning_rate": 4.8850946248387795e-06, "loss": 0.2027, "step": 1148 }, { "epoch": 0.37, "learning_rate": 4.884832370934838e-06, "loss": 0.2217, "step": 1149 }, { "epoch": 0.37, "learning_rate": 4.8845698251505e-06, "loss": 0.1896, "step": 1150 }, { "epoch": 0.37, "learning_rate": 4.8843069875179005e-06, "loss": 0.1869, "step": 1151 }, { "epoch": 0.37, "learning_rate": 4.884043858069208e-06, "loss": 0.179, "step": 1152 }, { "epoch": 0.37, "learning_rate": 4.883780436836627e-06, "loss": 0.1906, "step": 1153 }, { "epoch": 0.37, "learning_rate": 4.883516723852396e-06, "loss": 0.1969, "step": 1154 }, { "epoch": 0.37, "learning_rate": 4.883252719148794e-06, "loss": 0.1933, "step": 1155 }, { "epoch": 0.37, "learning_rate": 4.8829884227581294e-06, "loss": 0.2067, "step": 1156 }, { "epoch": 0.37, "learning_rate": 4.88272383471275e-06, "loss": 0.2012, "step": 1157 }, { "epoch": 0.38, "learning_rate": 4.8824589550450415e-06, "loss": 0.2019, "step": 1158 }, { "epoch": 0.38, "learning_rate": 4.882193783787421e-06, "loss": 0.1827, "step": 1159 }, { "epoch": 0.38, "learning_rate": 4.881928320972342e-06, "loss": 0.2161, "step": 1160 }, { "epoch": 0.38, "learning_rate": 4.881662566632296e-06, "loss": 0.1966, "step": 1161 }, { "epoch": 0.38, "learning_rate": 4.881396520799808e-06, "loss": 0.1937, "step": 1162 }, { "epoch": 0.38, "learning_rate": 4.8811301835074384e-06, "loss": 0.2049, "step": 1163 }, { "epoch": 0.38, "learning_rate": 4.880863554787787e-06, "loss": 0.1918, "step": 1164 }, { "epoch": 0.38, "learning_rate": 4.880596634673484e-06, "loss": 0.1797, "step": 1165 }, { "epoch": 0.38, "learning_rate": 4.8803294231972e-06, "loss": 0.1847, "step": 1166 }, { "epoch": 0.38, "learning_rate": 4.8800619203916376e-06, "loss": 0.2055, "step": 1167 }, { "epoch": 0.38, "learning_rate": 4.8797941262895365e-06, "loss": 0.2041, "step": 1168 }, { "epoch": 0.38, "learning_rate": 4.8795260409236725e-06, "loss": 0.1955, "step": 1169 }, { "epoch": 0.38, "learning_rate": 4.879257664326856e-06, "loss": 0.2175, "step": 1170 }, { "epoch": 0.38, "learning_rate": 4.8789889965319355e-06, "loss": 0.1986, "step": 1171 }, { "epoch": 0.38, "learning_rate": 4.878720037571792e-06, "loss": 0.1964, "step": 1172 }, { "epoch": 0.38, "learning_rate": 4.878450787479344e-06, "loss": 0.2113, "step": 1173 }, { "epoch": 0.38, "learning_rate": 4.878181246287544e-06, "loss": 0.2153, "step": 1174 }, { "epoch": 0.38, "learning_rate": 4.877911414029382e-06, "loss": 0.2105, "step": 1175 }, { "epoch": 0.38, "learning_rate": 4.8776412907378845e-06, "loss": 0.2196, "step": 1176 }, { "epoch": 0.38, "learning_rate": 4.877370876446109e-06, "loss": 0.2006, "step": 1177 }, { "epoch": 0.38, "learning_rate": 4.877100171187154e-06, "loss": 0.2167, "step": 1178 }, { "epoch": 0.38, "learning_rate": 4.876829174994149e-06, "loss": 0.2045, "step": 1179 }, { "epoch": 0.38, "learning_rate": 4.8765578879002625e-06, "loss": 0.1913, "step": 1180 }, { "epoch": 0.38, "learning_rate": 4.8762863099386984e-06, "loss": 0.1968, "step": 1181 }, { "epoch": 0.38, "learning_rate": 4.876014441142693e-06, "loss": 0.1993, "step": 1182 }, { "epoch": 0.38, "learning_rate": 4.8757422815455215e-06, "loss": 0.1979, "step": 1183 }, { "epoch": 0.38, "learning_rate": 4.875469831180495e-06, "loss": 0.1967, "step": 1184 }, { "epoch": 0.38, "learning_rate": 4.875197090080957e-06, "loss": 0.2024, "step": 1185 }, { "epoch": 0.38, "learning_rate": 4.874924058280288e-06, "loss": 0.2143, "step": 1186 }, { "epoch": 0.38, "learning_rate": 4.874650735811906e-06, "loss": 0.2097, "step": 1187 }, { "epoch": 0.38, "learning_rate": 4.874377122709263e-06, "loss": 0.199, "step": 1188 }, { "epoch": 0.39, "learning_rate": 4.874103219005845e-06, "loss": 0.1898, "step": 1189 }, { "epoch": 0.39, "learning_rate": 4.873829024735176e-06, "loss": 0.2213, "step": 1190 }, { "epoch": 0.39, "learning_rate": 4.873554539930815e-06, "loss": 0.1785, "step": 1191 }, { "epoch": 0.39, "learning_rate": 4.873279764626357e-06, "loss": 0.1996, "step": 1192 }, { "epoch": 0.39, "learning_rate": 4.87300469885543e-06, "loss": 0.2278, "step": 1193 }, { "epoch": 0.39, "learning_rate": 4.872729342651701e-06, "loss": 0.1887, "step": 1194 }, { "epoch": 0.39, "learning_rate": 4.87245369604887e-06, "loss": 0.201, "step": 1195 }, { "epoch": 0.39, "learning_rate": 4.872177759080673e-06, "loss": 0.2372, "step": 1196 }, { "epoch": 0.39, "learning_rate": 4.8719015317808835e-06, "loss": 0.1959, "step": 1197 }, { "epoch": 0.39, "learning_rate": 4.871625014183308e-06, "loss": 0.2189, "step": 1198 }, { "epoch": 0.39, "learning_rate": 4.8713482063217895e-06, "loss": 0.2071, "step": 1199 }, { "epoch": 0.39, "learning_rate": 4.871071108230208e-06, "loss": 0.2036, "step": 1200 }, { "epoch": 0.39, "learning_rate": 4.8707937199424756e-06, "loss": 0.2129, "step": 1201 }, { "epoch": 0.39, "learning_rate": 4.870516041492543e-06, "loss": 0.1989, "step": 1202 }, { "epoch": 0.39, "learning_rate": 4.870238072914396e-06, "loss": 0.1957, "step": 1203 }, { "epoch": 0.39, "learning_rate": 4.869959814242054e-06, "loss": 0.1838, "step": 1204 }, { "epoch": 0.39, "learning_rate": 4.8696812655095744e-06, "loss": 0.2131, "step": 1205 }, { "epoch": 0.39, "learning_rate": 4.869402426751048e-06, "loss": 0.1828, "step": 1206 }, { "epoch": 0.39, "learning_rate": 4.8691232980006015e-06, "loss": 0.1934, "step": 1207 }, { "epoch": 0.39, "learning_rate": 4.868843879292399e-06, "loss": 0.209, "step": 1208 }, { "epoch": 0.39, "learning_rate": 4.868564170660637e-06, "loss": 0.2154, "step": 1209 }, { "epoch": 0.39, "learning_rate": 4.868284172139551e-06, "loss": 0.2115, "step": 1210 }, { "epoch": 0.39, "learning_rate": 4.868003883763408e-06, "loss": 0.2028, "step": 1211 }, { "epoch": 0.39, "learning_rate": 4.867723305566514e-06, "loss": 0.1982, "step": 1212 }, { "epoch": 0.39, "learning_rate": 4.86744243758321e-06, "loss": 0.2072, "step": 1213 }, { "epoch": 0.39, "learning_rate": 4.8671612798478685e-06, "loss": 0.2008, "step": 1214 }, { "epoch": 0.39, "learning_rate": 4.866879832394903e-06, "loss": 0.1974, "step": 1215 }, { "epoch": 0.39, "learning_rate": 4.86659809525876e-06, "loss": 0.2251, "step": 1216 }, { "epoch": 0.39, "learning_rate": 4.866316068473919e-06, "loss": 0.2152, "step": 1217 }, { "epoch": 0.39, "learning_rate": 4.8660337520749e-06, "loss": 0.1826, "step": 1218 }, { "epoch": 0.4, "learning_rate": 4.865751146096255e-06, "loss": 0.1874, "step": 1219 }, { "epoch": 0.4, "learning_rate": 4.865468250572571e-06, "loss": 0.2011, "step": 1220 }, { "epoch": 0.4, "learning_rate": 4.865185065538472e-06, "loss": 0.2081, "step": 1221 }, { "epoch": 0.4, "learning_rate": 4.86490159102862e-06, "loss": 0.1934, "step": 1222 }, { "epoch": 0.4, "learning_rate": 4.8646178270777055e-06, "loss": 0.2068, "step": 1223 }, { "epoch": 0.4, "learning_rate": 4.864333773720461e-06, "loss": 0.2143, "step": 1224 }, { "epoch": 0.4, "learning_rate": 4.8640494309916506e-06, "loss": 0.1973, "step": 1225 }, { "epoch": 0.4, "learning_rate": 4.863764798926076e-06, "loss": 0.1932, "step": 1226 }, { "epoch": 0.4, "learning_rate": 4.863479877558573e-06, "loss": 0.2031, "step": 1227 }, { "epoch": 0.4, "learning_rate": 4.863194666924013e-06, "loss": 0.2028, "step": 1228 }, { "epoch": 0.4, "learning_rate": 4.862909167057304e-06, "loss": 0.1942, "step": 1229 }, { "epoch": 0.4, "learning_rate": 4.862623377993387e-06, "loss": 0.1951, "step": 1230 }, { "epoch": 0.4, "learning_rate": 4.862337299767241e-06, "loss": 0.2045, "step": 1231 }, { "epoch": 0.4, "learning_rate": 4.862050932413878e-06, "loss": 0.2154, "step": 1232 }, { "epoch": 0.4, "learning_rate": 4.8617642759683474e-06, "loss": 0.2133, "step": 1233 }, { "epoch": 0.4, "learning_rate": 4.861477330465734e-06, "loss": 0.1916, "step": 1234 }, { "epoch": 0.4, "learning_rate": 4.861190095941155e-06, "loss": 0.1951, "step": 1235 }, { "epoch": 0.4, "learning_rate": 4.860902572429767e-06, "loss": 0.1924, "step": 1236 }, { "epoch": 0.4, "learning_rate": 4.86061475996676e-06, "loss": 0.2232, "step": 1237 }, { "epoch": 0.4, "learning_rate": 4.860326658587358e-06, "loss": 0.201, "step": 1238 }, { "epoch": 0.4, "learning_rate": 4.860038268326823e-06, "loss": 0.2, "step": 1239 }, { "epoch": 0.4, "learning_rate": 4.85974958922045e-06, "loss": 0.1906, "step": 1240 }, { "epoch": 0.4, "learning_rate": 4.859460621303572e-06, "loss": 0.1856, "step": 1241 }, { "epoch": 0.4, "learning_rate": 4.859171364611556e-06, "loss": 0.2039, "step": 1242 }, { "epoch": 0.4, "learning_rate": 4.8588818191798035e-06, "loss": 0.1923, "step": 1243 }, { "epoch": 0.4, "learning_rate": 4.858591985043751e-06, "loss": 0.2011, "step": 1244 }, { "epoch": 0.4, "learning_rate": 4.858301862238874e-06, "loss": 0.2141, "step": 1245 }, { "epoch": 0.4, "learning_rate": 4.858011450800678e-06, "loss": 0.2019, "step": 1246 }, { "epoch": 0.4, "learning_rate": 4.857720750764708e-06, "loss": 0.2116, "step": 1247 }, { "epoch": 0.4, "learning_rate": 4.857429762166543e-06, "loss": 0.1842, "step": 1248 }, { "epoch": 0.4, "learning_rate": 4.857138485041797e-06, "loss": 0.1929, "step": 1249 }, { "epoch": 0.41, "learning_rate": 4.856846919426118e-06, "loss": 0.1821, "step": 1250 }, { "epoch": 0.41, "learning_rate": 4.856555065355193e-06, "loss": 0.2071, "step": 1251 }, { "epoch": 0.41, "learning_rate": 4.856262922864741e-06, "loss": 0.2032, "step": 1252 }, { "epoch": 0.41, "learning_rate": 4.855970491990518e-06, "loss": 0.1861, "step": 1253 }, { "epoch": 0.41, "learning_rate": 4.855677772768315e-06, "loss": 0.194, "step": 1254 }, { "epoch": 0.41, "learning_rate": 4.855384765233956e-06, "loss": 0.2053, "step": 1255 }, { "epoch": 0.41, "learning_rate": 4.8550914694233045e-06, "loss": 0.2072, "step": 1256 }, { "epoch": 0.41, "learning_rate": 4.854797885372255e-06, "loss": 0.21, "step": 1257 }, { "epoch": 0.41, "learning_rate": 4.854504013116741e-06, "loss": 0.1956, "step": 1258 }, { "epoch": 0.41, "learning_rate": 4.8542098526927304e-06, "loss": 0.1915, "step": 1259 }, { "epoch": 0.41, "learning_rate": 4.853915404136223e-06, "loss": 0.1919, "step": 1260 }, { "epoch": 0.41, "learning_rate": 4.853620667483259e-06, "loss": 0.1859, "step": 1261 }, { "epoch": 0.41, "learning_rate": 4.853325642769908e-06, "loss": 0.1957, "step": 1262 }, { "epoch": 0.41, "learning_rate": 4.853030330032283e-06, "loss": 0.1817, "step": 1263 }, { "epoch": 0.41, "learning_rate": 4.852734729306523e-06, "loss": 0.203, "step": 1264 }, { "epoch": 0.41, "learning_rate": 4.852438840628808e-06, "loss": 0.1967, "step": 1265 }, { "epoch": 0.41, "learning_rate": 4.852142664035353e-06, "loss": 0.2188, "step": 1266 }, { "epoch": 0.41, "learning_rate": 4.8518461995624064e-06, "loss": 0.194, "step": 1267 }, { "epoch": 0.41, "learning_rate": 4.851549447246253e-06, "loss": 0.2041, "step": 1268 }, { "epoch": 0.41, "learning_rate": 4.851252407123211e-06, "loss": 0.187, "step": 1269 }, { "epoch": 0.41, "learning_rate": 4.850955079229637e-06, "loss": 0.1941, "step": 1270 }, { "epoch": 0.41, "learning_rate": 4.850657463601921e-06, "loss": 0.2164, "step": 1271 }, { "epoch": 0.41, "learning_rate": 4.850359560276486e-06, "loss": 0.2033, "step": 1272 }, { "epoch": 0.41, "learning_rate": 4.850061369289795e-06, "loss": 0.179, "step": 1273 }, { "epoch": 0.41, "learning_rate": 4.8497628906783425e-06, "loss": 0.1855, "step": 1274 }, { "epoch": 0.41, "learning_rate": 4.84946412447866e-06, "loss": 0.192, "step": 1275 }, { "epoch": 0.41, "learning_rate": 4.849165070727313e-06, "loss": 0.1976, "step": 1276 }, { "epoch": 0.41, "learning_rate": 4.848865729460903e-06, "loss": 0.1899, "step": 1277 }, { "epoch": 0.41, "learning_rate": 4.848566100716066e-06, "loss": 0.211, "step": 1278 }, { "epoch": 0.41, "learning_rate": 4.848266184529475e-06, "loss": 0.2067, "step": 1279 }, { "epoch": 0.41, "learning_rate": 4.847965980937836e-06, "loss": 0.2025, "step": 1280 }, { "epoch": 0.42, "learning_rate": 4.847665489977891e-06, "loss": 0.2024, "step": 1281 }, { "epoch": 0.42, "learning_rate": 4.847364711686417e-06, "loss": 0.2257, "step": 1282 }, { "epoch": 0.42, "learning_rate": 4.847063646100226e-06, "loss": 0.1749, "step": 1283 }, { "epoch": 0.42, "learning_rate": 4.846762293256167e-06, "loss": 0.2043, "step": 1284 }, { "epoch": 0.42, "learning_rate": 4.846460653191121e-06, "loss": 0.1956, "step": 1285 }, { "epoch": 0.42, "learning_rate": 4.846158725942006e-06, "loss": 0.1927, "step": 1286 }, { "epoch": 0.42, "learning_rate": 4.845856511545777e-06, "loss": 0.1946, "step": 1287 }, { "epoch": 0.42, "learning_rate": 4.84555401003942e-06, "loss": 0.1911, "step": 1288 }, { "epoch": 0.42, "learning_rate": 4.845251221459958e-06, "loss": 0.2085, "step": 1289 }, { "epoch": 0.42, "learning_rate": 4.844948145844452e-06, "loss": 0.1844, "step": 1290 }, { "epoch": 0.42, "learning_rate": 4.844644783229993e-06, "loss": 0.2108, "step": 1291 }, { "epoch": 0.42, "learning_rate": 4.844341133653709e-06, "loss": 0.2041, "step": 1292 }, { "epoch": 0.42, "learning_rate": 4.844037197152767e-06, "loss": 0.2122, "step": 1293 }, { "epoch": 0.42, "learning_rate": 4.843732973764363e-06, "loss": 0.1892, "step": 1294 }, { "epoch": 0.42, "learning_rate": 4.8434284635257335e-06, "loss": 0.2377, "step": 1295 }, { "epoch": 0.42, "learning_rate": 4.843123666474146e-06, "loss": 0.1976, "step": 1296 }, { "epoch": 0.42, "learning_rate": 4.842818582646904e-06, "loss": 0.1948, "step": 1297 }, { "epoch": 0.42, "learning_rate": 4.842513212081348e-06, "loss": 0.2011, "step": 1298 }, { "epoch": 0.42, "learning_rate": 4.8422075548148525e-06, "loss": 0.2094, "step": 1299 }, { "epoch": 0.42, "learning_rate": 4.841901610884826e-06, "loss": 0.1766, "step": 1300 }, { "epoch": 0.42, "learning_rate": 4.841595380328714e-06, "loss": 0.1864, "step": 1301 }, { "epoch": 0.42, "learning_rate": 4.841288863183996e-06, "loss": 0.2044, "step": 1302 }, { "epoch": 0.42, "learning_rate": 4.840982059488186e-06, "loss": 0.1867, "step": 1303 }, { "epoch": 0.42, "learning_rate": 4.840674969278836e-06, "loss": 0.2352, "step": 1304 }, { "epoch": 0.42, "learning_rate": 4.8403675925935275e-06, "loss": 0.201, "step": 1305 }, { "epoch": 0.42, "learning_rate": 4.8400599294698825e-06, "loss": 0.1937, "step": 1306 }, { "epoch": 0.42, "learning_rate": 4.839751979945556e-06, "loss": 0.2125, "step": 1307 }, { "epoch": 0.42, "learning_rate": 4.839443744058238e-06, "loss": 0.2082, "step": 1308 }, { "epoch": 0.42, "learning_rate": 4.839135221845654e-06, "loss": 0.2005, "step": 1309 }, { "epoch": 0.42, "learning_rate": 4.838826413345561e-06, "loss": 0.1971, "step": 1310 }, { "epoch": 0.42, "learning_rate": 4.838517318595758e-06, "loss": 0.1859, "step": 1311 }, { "epoch": 0.43, "learning_rate": 4.838207937634074e-06, "loss": 0.2017, "step": 1312 }, { "epoch": 0.43, "learning_rate": 4.837898270498374e-06, "loss": 0.2163, "step": 1313 }, { "epoch": 0.43, "learning_rate": 4.837588317226558e-06, "loss": 0.2185, "step": 1314 }, { "epoch": 0.43, "learning_rate": 4.837278077856562e-06, "loss": 0.1813, "step": 1315 }, { "epoch": 0.43, "learning_rate": 4.836967552426355e-06, "loss": 0.199, "step": 1316 }, { "epoch": 0.43, "learning_rate": 4.836656740973944e-06, "loss": 0.1974, "step": 1317 }, { "epoch": 0.43, "learning_rate": 4.836345643537368e-06, "loss": 0.1945, "step": 1318 }, { "epoch": 0.43, "learning_rate": 4.836034260154704e-06, "loss": 0.183, "step": 1319 }, { "epoch": 0.43, "learning_rate": 4.83572259086406e-06, "loss": 0.1898, "step": 1320 }, { "epoch": 0.43, "learning_rate": 4.835410635703582e-06, "loss": 0.2087, "step": 1321 }, { "epoch": 0.43, "learning_rate": 4.835098394711451e-06, "loss": 0.2037, "step": 1322 }, { "epoch": 0.43, "learning_rate": 4.834785867925883e-06, "loss": 0.1829, "step": 1323 }, { "epoch": 0.43, "learning_rate": 4.8344730553851275e-06, "loss": 0.1976, "step": 1324 }, { "epoch": 0.43, "learning_rate": 4.834159957127468e-06, "loss": 0.2157, "step": 1325 }, { "epoch": 0.43, "learning_rate": 4.833846573191227e-06, "loss": 0.2, "step": 1326 }, { "epoch": 0.43, "learning_rate": 4.833532903614758e-06, "loss": 0.196, "step": 1327 }, { "epoch": 0.43, "learning_rate": 4.833218948436453e-06, "loss": 0.1941, "step": 1328 }, { "epoch": 0.43, "learning_rate": 4.832904707694736e-06, "loss": 0.2124, "step": 1329 }, { "epoch": 0.43, "learning_rate": 4.832590181428066e-06, "loss": 0.2171, "step": 1330 }, { "epoch": 0.43, "learning_rate": 4.832275369674939e-06, "loss": 0.1828, "step": 1331 }, { "epoch": 0.43, "learning_rate": 4.831960272473886e-06, "loss": 0.195, "step": 1332 }, { "epoch": 0.43, "learning_rate": 4.831644889863471e-06, "loss": 0.1786, "step": 1333 }, { "epoch": 0.43, "learning_rate": 4.831329221882291e-06, "loss": 0.1948, "step": 1334 }, { "epoch": 0.43, "learning_rate": 4.831013268568986e-06, "loss": 0.2043, "step": 1335 }, { "epoch": 0.43, "learning_rate": 4.830697029962222e-06, "loss": 0.204, "step": 1336 }, { "epoch": 0.43, "learning_rate": 4.830380506100704e-06, "loss": 0.1914, "step": 1337 }, { "epoch": 0.43, "learning_rate": 4.830063697023173e-06, "loss": 0.2201, "step": 1338 }, { "epoch": 0.43, "learning_rate": 4.829746602768401e-06, "loss": 0.1787, "step": 1339 }, { "epoch": 0.43, "learning_rate": 4.8294292233752e-06, "loss": 0.1927, "step": 1340 }, { "epoch": 0.43, "learning_rate": 4.829111558882411e-06, "loss": 0.2086, "step": 1341 }, { "epoch": 0.43, "learning_rate": 4.828793609328916e-06, "loss": 0.1885, "step": 1342 }, { "epoch": 0.44, "learning_rate": 4.828475374753627e-06, "loss": 0.2096, "step": 1343 }, { "epoch": 0.44, "learning_rate": 4.828156855195493e-06, "loss": 0.2029, "step": 1344 }, { "epoch": 0.44, "learning_rate": 4.827838050693499e-06, "loss": 0.2044, "step": 1345 }, { "epoch": 0.44, "learning_rate": 4.827518961286663e-06, "loss": 0.1943, "step": 1346 }, { "epoch": 0.44, "learning_rate": 4.827199587014038e-06, "loss": 0.2104, "step": 1347 }, { "epoch": 0.44, "learning_rate": 4.826879927914713e-06, "loss": 0.2139, "step": 1348 }, { "epoch": 0.44, "learning_rate": 4.82655998402781e-06, "loss": 0.1975, "step": 1349 }, { "epoch": 0.44, "learning_rate": 4.826239755392488e-06, "loss": 0.2219, "step": 1350 }, { "epoch": 0.44, "learning_rate": 4.8259192420479395e-06, "loss": 0.202, "step": 1351 }, { "epoch": 0.44, "learning_rate": 4.825598444033393e-06, "loss": 0.2066, "step": 1352 }, { "epoch": 0.44, "learning_rate": 4.82527736138811e-06, "loss": 0.2078, "step": 1353 }, { "epoch": 0.44, "learning_rate": 4.824955994151389e-06, "loss": 0.201, "step": 1354 }, { "epoch": 0.44, "learning_rate": 4.824634342362561e-06, "loss": 0.21, "step": 1355 }, { "epoch": 0.44, "learning_rate": 4.824312406060995e-06, "loss": 0.1874, "step": 1356 }, { "epoch": 0.44, "learning_rate": 4.82399018528609e-06, "loss": 0.1796, "step": 1357 }, { "epoch": 0.44, "learning_rate": 4.823667680077285e-06, "loss": 0.2174, "step": 1358 }, { "epoch": 0.44, "learning_rate": 4.8233448904740505e-06, "loss": 0.2069, "step": 1359 }, { "epoch": 0.44, "learning_rate": 4.823021816515893e-06, "loss": 0.1977, "step": 1360 }, { "epoch": 0.44, "learning_rate": 4.8226984582423545e-06, "loss": 0.2349, "step": 1361 }, { "epoch": 0.44, "learning_rate": 4.82237481569301e-06, "loss": 0.2066, "step": 1362 }, { "epoch": 0.44, "learning_rate": 4.822050888907469e-06, "loss": 0.1989, "step": 1363 }, { "epoch": 0.44, "learning_rate": 4.82172667792538e-06, "loss": 0.1942, "step": 1364 }, { "epoch": 0.44, "learning_rate": 4.821402182786421e-06, "loss": 0.2068, "step": 1365 }, { "epoch": 0.44, "learning_rate": 4.8210774035303085e-06, "loss": 0.1995, "step": 1366 }, { "epoch": 0.44, "learning_rate": 4.82075234019679e-06, "loss": 0.1872, "step": 1367 }, { "epoch": 0.44, "learning_rate": 4.820426992825653e-06, "loss": 0.1991, "step": 1368 }, { "epoch": 0.44, "learning_rate": 4.820101361456715e-06, "loss": 0.1945, "step": 1369 }, { "epoch": 0.44, "learning_rate": 4.819775446129832e-06, "loss": 0.2061, "step": 1370 }, { "epoch": 0.44, "learning_rate": 4.8194492468848895e-06, "loss": 0.1865, "step": 1371 }, { "epoch": 0.44, "learning_rate": 4.8191227637618145e-06, "loss": 0.2091, "step": 1372 }, { "epoch": 0.44, "learning_rate": 4.818795996800564e-06, "loss": 0.229, "step": 1373 }, { "epoch": 0.45, "learning_rate": 4.8184689460411306e-06, "loss": 0.2104, "step": 1374 }, { "epoch": 0.45, "learning_rate": 4.818141611523543e-06, "loss": 0.1909, "step": 1375 }, { "epoch": 0.45, "learning_rate": 4.817813993287863e-06, "loss": 0.1879, "step": 1376 }, { "epoch": 0.45, "learning_rate": 4.817486091374189e-06, "loss": 0.2034, "step": 1377 }, { "epoch": 0.45, "learning_rate": 4.817157905822652e-06, "loss": 0.2027, "step": 1378 }, { "epoch": 0.45, "learning_rate": 4.816829436673421e-06, "loss": 0.214, "step": 1379 }, { "epoch": 0.45, "learning_rate": 4.816500683966694e-06, "loss": 0.1853, "step": 1380 }, { "epoch": 0.45, "learning_rate": 4.816171647742708e-06, "loss": 0.2137, "step": 1381 }, { "epoch": 0.45, "learning_rate": 4.815842328041736e-06, "loss": 0.1859, "step": 1382 }, { "epoch": 0.45, "learning_rate": 4.815512724904081e-06, "loss": 0.1823, "step": 1383 }, { "epoch": 0.45, "learning_rate": 4.815182838370085e-06, "loss": 0.1904, "step": 1384 }, { "epoch": 0.45, "learning_rate": 4.814852668480122e-06, "loss": 0.1882, "step": 1385 }, { "epoch": 0.45, "learning_rate": 4.814522215274603e-06, "loss": 0.1985, "step": 1386 }, { "epoch": 0.45, "learning_rate": 4.81419147879397e-06, "loss": 0.2057, "step": 1387 }, { "epoch": 0.45, "learning_rate": 4.813860459078703e-06, "loss": 0.2016, "step": 1388 }, { "epoch": 0.45, "learning_rate": 4.813529156169317e-06, "loss": 0.2119, "step": 1389 }, { "epoch": 0.45, "learning_rate": 4.813197570106357e-06, "loss": 0.197, "step": 1390 }, { "epoch": 0.45, "learning_rate": 4.8128657009304096e-06, "loss": 0.1964, "step": 1391 }, { "epoch": 0.45, "learning_rate": 4.8125335486820905e-06, "loss": 0.1773, "step": 1392 }, { "epoch": 0.45, "learning_rate": 4.8122011134020505e-06, "loss": 0.2065, "step": 1393 }, { "epoch": 0.45, "learning_rate": 4.8118683951309795e-06, "loss": 0.1906, "step": 1394 }, { "epoch": 0.45, "learning_rate": 4.811535393909598e-06, "loss": 0.1948, "step": 1395 }, { "epoch": 0.45, "learning_rate": 4.811202109778661e-06, "loss": 0.2077, "step": 1396 }, { "epoch": 0.45, "learning_rate": 4.810868542778959e-06, "loss": 0.1778, "step": 1397 }, { "epoch": 0.45, "learning_rate": 4.81053469295132e-06, "loss": 0.1908, "step": 1398 }, { "epoch": 0.45, "learning_rate": 4.810200560336601e-06, "loss": 0.1978, "step": 1399 }, { "epoch": 0.45, "learning_rate": 4.809866144975699e-06, "loss": 0.2111, "step": 1400 }, { "epoch": 0.45, "learning_rate": 4.809531446909541e-06, "loss": 0.2044, "step": 1401 }, { "epoch": 0.45, "learning_rate": 4.8091964661790926e-06, "loss": 0.2077, "step": 1402 }, { "epoch": 0.45, "learning_rate": 4.808861202825351e-06, "loss": 0.2157, "step": 1403 }, { "epoch": 0.45, "learning_rate": 4.80852565688935e-06, "loss": 0.194, "step": 1404 }, { "epoch": 0.46, "learning_rate": 4.808189828412157e-06, "loss": 0.2317, "step": 1405 }, { "epoch": 0.46, "learning_rate": 4.807853717434874e-06, "loss": 0.2038, "step": 1406 }, { "epoch": 0.46, "learning_rate": 4.807517323998637e-06, "loss": 0.1912, "step": 1407 }, { "epoch": 0.46, "learning_rate": 4.8071806481446194e-06, "loss": 0.1933, "step": 1408 }, { "epoch": 0.46, "learning_rate": 4.806843689914025e-06, "loss": 0.2047, "step": 1409 }, { "epoch": 0.46, "learning_rate": 4.806506449348094e-06, "loss": 0.1904, "step": 1410 }, { "epoch": 0.46, "learning_rate": 4.8061689264881036e-06, "loss": 0.2071, "step": 1411 }, { "epoch": 0.46, "learning_rate": 4.805831121375361e-06, "loss": 0.2043, "step": 1412 }, { "epoch": 0.46, "learning_rate": 4.805493034051212e-06, "loss": 0.1817, "step": 1413 }, { "epoch": 0.46, "learning_rate": 4.805154664557034e-06, "loss": 0.2085, "step": 1414 }, { "epoch": 0.46, "learning_rate": 4.804816012934242e-06, "loss": 0.1933, "step": 1415 }, { "epoch": 0.46, "learning_rate": 4.8044770792242815e-06, "loss": 0.1715, "step": 1416 }, { "epoch": 0.46, "learning_rate": 4.8041378634686355e-06, "loss": 0.2034, "step": 1417 }, { "epoch": 0.46, "learning_rate": 4.803798365708821e-06, "loss": 0.2082, "step": 1418 }, { "epoch": 0.46, "learning_rate": 4.803458585986389e-06, "loss": 0.1721, "step": 1419 }, { "epoch": 0.46, "learning_rate": 4.803118524342925e-06, "loss": 0.2111, "step": 1420 }, { "epoch": 0.46, "learning_rate": 4.80277818082005e-06, "loss": 0.2103, "step": 1421 }, { "epoch": 0.46, "learning_rate": 4.802437555459418e-06, "loss": 0.1626, "step": 1422 }, { "epoch": 0.46, "learning_rate": 4.802096648302718e-06, "loss": 0.2012, "step": 1423 }, { "epoch": 0.46, "learning_rate": 4.801755459391675e-06, "loss": 0.2205, "step": 1424 }, { "epoch": 0.46, "learning_rate": 4.801413988768047e-06, "loss": 0.2234, "step": 1425 }, { "epoch": 0.46, "learning_rate": 4.801072236473625e-06, "loss": 0.2028, "step": 1426 }, { "epoch": 0.46, "learning_rate": 4.800730202550237e-06, "loss": 0.1936, "step": 1427 }, { "epoch": 0.46, "learning_rate": 4.800387887039747e-06, "loss": 0.195, "step": 1428 }, { "epoch": 0.46, "learning_rate": 4.800045289984047e-06, "loss": 0.1842, "step": 1429 }, { "epoch": 0.46, "learning_rate": 4.799702411425071e-06, "loss": 0.1984, "step": 1430 }, { "epoch": 0.46, "learning_rate": 4.7993592514047825e-06, "loss": 0.2006, "step": 1431 }, { "epoch": 0.46, "learning_rate": 4.7990158099651815e-06, "loss": 0.1849, "step": 1432 }, { "epoch": 0.46, "learning_rate": 4.798672087148301e-06, "loss": 0.2077, "step": 1433 }, { "epoch": 0.46, "learning_rate": 4.79832808299621e-06, "loss": 0.1966, "step": 1434 }, { "epoch": 0.47, "learning_rate": 4.797983797551011e-06, "loss": 0.2019, "step": 1435 }, { "epoch": 0.47, "learning_rate": 4.7976392308548416e-06, "loss": 0.198, "step": 1436 }, { "epoch": 0.47, "learning_rate": 4.797294382949873e-06, "loss": 0.2, "step": 1437 }, { "epoch": 0.47, "learning_rate": 4.796949253878311e-06, "loss": 0.1763, "step": 1438 }, { "epoch": 0.47, "learning_rate": 4.796603843682397e-06, "loss": 0.2045, "step": 1439 }, { "epoch": 0.47, "learning_rate": 4.796258152404406e-06, "loss": 0.198, "step": 1440 }, { "epoch": 0.47, "learning_rate": 4.795912180086646e-06, "loss": 0.2092, "step": 1441 }, { "epoch": 0.47, "learning_rate": 4.795565926771461e-06, "loss": 0.1826, "step": 1442 }, { "epoch": 0.47, "learning_rate": 4.79521939250123e-06, "loss": 0.1951, "step": 1443 }, { "epoch": 0.47, "learning_rate": 4.7948725773183645e-06, "loss": 0.1773, "step": 1444 }, { "epoch": 0.47, "learning_rate": 4.794525481265312e-06, "loss": 0.2107, "step": 1445 }, { "epoch": 0.47, "learning_rate": 4.794178104384554e-06, "loss": 0.1818, "step": 1446 }, { "epoch": 0.47, "learning_rate": 4.7938304467186036e-06, "loss": 0.2026, "step": 1447 }, { "epoch": 0.47, "learning_rate": 4.793482508310014e-06, "loss": 0.2199, "step": 1448 }, { "epoch": 0.47, "learning_rate": 4.793134289201367e-06, "loss": 0.2129, "step": 1449 }, { "epoch": 0.47, "learning_rate": 4.792785789435283e-06, "loss": 0.1977, "step": 1450 }, { "epoch": 0.47, "learning_rate": 4.792437009054413e-06, "loss": 0.2, "step": 1451 }, { "epoch": 0.47, "learning_rate": 4.792087948101447e-06, "loss": 0.1977, "step": 1452 }, { "epoch": 0.47, "learning_rate": 4.791738606619105e-06, "loss": 0.1887, "step": 1453 }, { "epoch": 0.47, "learning_rate": 4.791388984650143e-06, "loss": 0.1886, "step": 1454 }, { "epoch": 0.47, "learning_rate": 4.791039082237352e-06, "loss": 0.2072, "step": 1455 }, { "epoch": 0.47, "learning_rate": 4.790688899423556e-06, "loss": 0.1997, "step": 1456 }, { "epoch": 0.47, "learning_rate": 4.7903384362516135e-06, "loss": 0.1951, "step": 1457 }, { "epoch": 0.47, "learning_rate": 4.78998769276442e-06, "loss": 0.193, "step": 1458 }, { "epoch": 0.47, "learning_rate": 4.7896366690049016e-06, "loss": 0.1967, "step": 1459 }, { "epoch": 0.47, "learning_rate": 4.789285365016019e-06, "loss": 0.2013, "step": 1460 }, { "epoch": 0.47, "learning_rate": 4.788933780840771e-06, "loss": 0.1946, "step": 1461 }, { "epoch": 0.47, "learning_rate": 4.788581916522186e-06, "loss": 0.1967, "step": 1462 }, { "epoch": 0.47, "learning_rate": 4.78822977210333e-06, "loss": 0.1934, "step": 1463 }, { "epoch": 0.47, "learning_rate": 4.787877347627302e-06, "loss": 0.198, "step": 1464 }, { "epoch": 0.47, "learning_rate": 4.787524643137235e-06, "loss": 0.2081, "step": 1465 }, { "epoch": 0.48, "learning_rate": 4.7871716586762965e-06, "loss": 0.1919, "step": 1466 }, { "epoch": 0.48, "learning_rate": 4.786818394287688e-06, "loss": 0.221, "step": 1467 }, { "epoch": 0.48, "learning_rate": 4.786464850014646e-06, "loss": 0.1885, "step": 1468 }, { "epoch": 0.48, "learning_rate": 4.786111025900442e-06, "loss": 0.1938, "step": 1469 }, { "epoch": 0.48, "learning_rate": 4.785756921988379e-06, "loss": 0.1972, "step": 1470 }, { "epoch": 0.48, "learning_rate": 4.785402538321798e-06, "loss": 0.2045, "step": 1471 }, { "epoch": 0.48, "learning_rate": 4.785047874944069e-06, "loss": 0.2066, "step": 1472 }, { "epoch": 0.48, "learning_rate": 4.784692931898601e-06, "loss": 0.1733, "step": 1473 }, { "epoch": 0.48, "learning_rate": 4.7843377092288365e-06, "loss": 0.2131, "step": 1474 }, { "epoch": 0.48, "learning_rate": 4.7839822069782505e-06, "loss": 0.1745, "step": 1475 }, { "epoch": 0.48, "learning_rate": 4.783626425190353e-06, "loss": 0.1889, "step": 1476 }, { "epoch": 0.48, "learning_rate": 4.783270363908687e-06, "loss": 0.201, "step": 1477 }, { "epoch": 0.48, "learning_rate": 4.782914023176834e-06, "loss": 0.2076, "step": 1478 }, { "epoch": 0.48, "learning_rate": 4.782557403038404e-06, "loss": 0.1925, "step": 1479 }, { "epoch": 0.48, "learning_rate": 4.7822005035370455e-06, "loss": 0.2023, "step": 1480 }, { "epoch": 0.48, "learning_rate": 4.781843324716437e-06, "loss": 0.188, "step": 1481 }, { "epoch": 0.48, "learning_rate": 4.7814858666202975e-06, "loss": 0.1948, "step": 1482 }, { "epoch": 0.48, "learning_rate": 4.781128129292374e-06, "loss": 0.2049, "step": 1483 }, { "epoch": 0.48, "learning_rate": 4.7807701127764506e-06, "loss": 0.1836, "step": 1484 }, { "epoch": 0.48, "learning_rate": 4.780411817116344e-06, "loss": 0.1931, "step": 1485 }, { "epoch": 0.48, "learning_rate": 4.780053242355908e-06, "loss": 0.2155, "step": 1486 }, { "epoch": 0.48, "learning_rate": 4.779694388539027e-06, "loss": 0.2102, "step": 1487 }, { "epoch": 0.48, "learning_rate": 4.779335255709623e-06, "loss": 0.2011, "step": 1488 }, { "epoch": 0.48, "learning_rate": 4.778975843911649e-06, "loss": 0.172, "step": 1489 }, { "epoch": 0.48, "learning_rate": 4.778616153189093e-06, "loss": 0.1925, "step": 1490 }, { "epoch": 0.48, "learning_rate": 4.7782561835859795e-06, "loss": 0.1858, "step": 1491 }, { "epoch": 0.48, "learning_rate": 4.777895935146364e-06, "loss": 0.1836, "step": 1492 }, { "epoch": 0.48, "learning_rate": 4.777535407914338e-06, "loss": 0.1987, "step": 1493 }, { "epoch": 0.48, "learning_rate": 4.777174601934026e-06, "loss": 0.1763, "step": 1494 }, { "epoch": 0.48, "learning_rate": 4.776813517249588e-06, "loss": 0.2112, "step": 1495 }, { "epoch": 0.48, "learning_rate": 4.776452153905216e-06, "loss": 0.2045, "step": 1496 }, { "epoch": 0.49, "learning_rate": 4.776090511945139e-06, "loss": 0.21, "step": 1497 }, { "epoch": 0.49, "learning_rate": 4.775728591413616e-06, "loss": 0.216, "step": 1498 }, { "epoch": 0.49, "learning_rate": 4.775366392354946e-06, "loss": 0.2166, "step": 1499 }, { "epoch": 0.49, "learning_rate": 4.775003914813456e-06, "loss": 0.1736, "step": 1500 }, { "epoch": 0.49, "learning_rate": 4.7746411588335105e-06, "loss": 0.2032, "step": 1501 }, { "epoch": 0.49, "learning_rate": 4.774278124459509e-06, "loss": 0.2072, "step": 1502 }, { "epoch": 0.49, "learning_rate": 4.773914811735879e-06, "loss": 0.1911, "step": 1503 }, { "epoch": 0.49, "learning_rate": 4.773551220707091e-06, "loss": 0.1938, "step": 1504 }, { "epoch": 0.49, "learning_rate": 4.773187351417643e-06, "loss": 0.196, "step": 1505 }, { "epoch": 0.49, "learning_rate": 4.772823203912069e-06, "loss": 0.1974, "step": 1506 }, { "epoch": 0.49, "learning_rate": 4.772458778234938e-06, "loss": 0.19, "step": 1507 }, { "epoch": 0.49, "learning_rate": 4.772094074430852e-06, "loss": 0.2039, "step": 1508 }, { "epoch": 0.49, "learning_rate": 4.771729092544446e-06, "loss": 0.2107, "step": 1509 }, { "epoch": 0.49, "learning_rate": 4.771363832620391e-06, "loss": 0.1959, "step": 1510 }, { "epoch": 0.49, "learning_rate": 4.770998294703392e-06, "loss": 0.1946, "step": 1511 }, { "epoch": 0.49, "learning_rate": 4.7706324788381865e-06, "loss": 0.2011, "step": 1512 }, { "epoch": 0.49, "learning_rate": 4.770266385069547e-06, "loss": 0.2049, "step": 1513 }, { "epoch": 0.49, "learning_rate": 4.769900013442279e-06, "loss": 0.2114, "step": 1514 }, { "epoch": 0.49, "learning_rate": 4.769533364001225e-06, "loss": 0.1892, "step": 1515 }, { "epoch": 0.49, "learning_rate": 4.769166436791257e-06, "loss": 0.1948, "step": 1516 }, { "epoch": 0.49, "learning_rate": 4.768799231857285e-06, "loss": 0.1955, "step": 1517 }, { "epoch": 0.49, "learning_rate": 4.768431749244251e-06, "loss": 0.1853, "step": 1518 }, { "epoch": 0.49, "learning_rate": 4.76806398899713e-06, "loss": 0.1738, "step": 1519 }, { "epoch": 0.49, "learning_rate": 4.767695951160934e-06, "loss": 0.1987, "step": 1520 }, { "epoch": 0.49, "learning_rate": 4.767327635780707e-06, "loss": 0.2173, "step": 1521 }, { "epoch": 0.49, "learning_rate": 4.7669590429015265e-06, "loss": 0.2027, "step": 1522 }, { "epoch": 0.49, "learning_rate": 4.7665901725685045e-06, "loss": 0.2007, "step": 1523 }, { "epoch": 0.49, "learning_rate": 4.766221024826788e-06, "loss": 0.1898, "step": 1524 }, { "epoch": 0.49, "learning_rate": 4.765851599721557e-06, "loss": 0.188, "step": 1525 }, { "epoch": 0.49, "learning_rate": 4.765481897298025e-06, "loss": 0.1923, "step": 1526 }, { "epoch": 0.49, "learning_rate": 4.76511191760144e-06, "loss": 0.2043, "step": 1527 }, { "epoch": 0.5, "learning_rate": 4.764741660677085e-06, "loss": 0.2098, "step": 1528 }, { "epoch": 0.5, "learning_rate": 4.764371126570275e-06, "loss": 0.1828, "step": 1529 }, { "epoch": 0.5, "learning_rate": 4.76400031532636e-06, "loss": 0.1712, "step": 1530 }, { "epoch": 0.5, "learning_rate": 4.763629226990724e-06, "loss": 0.1993, "step": 1531 }, { "epoch": 0.5, "learning_rate": 4.763257861608783e-06, "loss": 0.2036, "step": 1532 }, { "epoch": 0.5, "learning_rate": 4.762886219225991e-06, "loss": 0.2036, "step": 1533 }, { "epoch": 0.5, "learning_rate": 4.762514299887831e-06, "loss": 0.1976, "step": 1534 }, { "epoch": 0.5, "learning_rate": 4.762142103639824e-06, "loss": 0.2022, "step": 1535 }, { "epoch": 0.5, "learning_rate": 4.761769630527523e-06, "loss": 0.1901, "step": 1536 }, { "epoch": 0.5, "learning_rate": 4.761396880596515e-06, "loss": 0.1677, "step": 1537 }, { "epoch": 0.5, "learning_rate": 4.76102385389242e-06, "loss": 0.2105, "step": 1538 }, { "epoch": 0.5, "learning_rate": 4.760650550460895e-06, "loss": 0.2155, "step": 1539 }, { "epoch": 0.5, "learning_rate": 4.760276970347627e-06, "loss": 0.1932, "step": 1540 }, { "epoch": 0.5, "learning_rate": 4.759903113598338e-06, "loss": 0.1787, "step": 1541 }, { "epoch": 0.5, "learning_rate": 4.759528980258786e-06, "loss": 0.2073, "step": 1542 }, { "epoch": 0.5, "learning_rate": 4.759154570374761e-06, "loss": 0.1981, "step": 1543 }, { "epoch": 0.5, "learning_rate": 4.758779883992087e-06, "loss": 0.1861, "step": 1544 }, { "epoch": 0.5, "learning_rate": 4.758404921156622e-06, "loss": 0.1943, "step": 1545 }, { "epoch": 0.5, "learning_rate": 4.7580296819142565e-06, "loss": 0.2021, "step": 1546 }, { "epoch": 0.5, "learning_rate": 4.757654166310919e-06, "loss": 0.1761, "step": 1547 }, { "epoch": 0.5, "learning_rate": 4.757278374392567e-06, "loss": 0.1959, "step": 1548 }, { "epoch": 0.5, "learning_rate": 4.7569023062051936e-06, "loss": 0.1985, "step": 1549 }, { "epoch": 0.5, "learning_rate": 4.756525961794826e-06, "loss": 0.2134, "step": 1550 }, { "epoch": 0.5, "learning_rate": 4.756149341207526e-06, "loss": 0.1731, "step": 1551 }, { "epoch": 0.5, "learning_rate": 4.755772444489388e-06, "loss": 0.1866, "step": 1552 }, { "epoch": 0.5, "learning_rate": 4.75539527168654e-06, "loss": 0.1877, "step": 1553 }, { "epoch": 0.5, "learning_rate": 4.755017822845145e-06, "loss": 0.1679, "step": 1554 }, { "epoch": 0.5, "learning_rate": 4.754640098011399e-06, "loss": 0.2055, "step": 1555 }, { "epoch": 0.5, "learning_rate": 4.754262097231531e-06, "loss": 0.1935, "step": 1556 }, { "epoch": 0.5, "learning_rate": 4.753883820551806e-06, "loss": 0.1665, "step": 1557 }, { "epoch": 0.5, "learning_rate": 4.75350526801852e-06, "loss": 0.2075, "step": 1558 }, { "epoch": 0.51, "learning_rate": 4.753126439678005e-06, "loss": 0.2029, "step": 1559 }, { "epoch": 0.51, "learning_rate": 4.752747335576626e-06, "loss": 0.2013, "step": 1560 }, { "epoch": 0.51, "learning_rate": 4.752367955760781e-06, "loss": 0.1781, "step": 1561 }, { "epoch": 0.51, "learning_rate": 4.751988300276903e-06, "loss": 0.2087, "step": 1562 }, { "epoch": 0.51, "learning_rate": 4.751608369171458e-06, "loss": 0.1868, "step": 1563 }, { "epoch": 0.51, "learning_rate": 4.751228162490946e-06, "loss": 0.202, "step": 1564 }, { "epoch": 0.51, "learning_rate": 4.750847680281901e-06, "loss": 0.2006, "step": 1565 }, { "epoch": 0.51, "learning_rate": 4.750466922590888e-06, "loss": 0.1847, "step": 1566 }, { "epoch": 0.51, "learning_rate": 4.750085889464512e-06, "loss": 0.2177, "step": 1567 }, { "epoch": 0.51, "learning_rate": 4.749704580949404e-06, "loss": 0.1899, "step": 1568 }, { "epoch": 0.51, "learning_rate": 4.749322997092235e-06, "loss": 0.1912, "step": 1569 }, { "epoch": 0.51, "learning_rate": 4.748941137939706e-06, "loss": 0.1939, "step": 1570 }, { "epoch": 0.51, "learning_rate": 4.748559003538553e-06, "loss": 0.2016, "step": 1571 }, { "epoch": 0.51, "learning_rate": 4.748176593935546e-06, "loss": 0.1882, "step": 1572 }, { "epoch": 0.51, "learning_rate": 4.7477939091774885e-06, "loss": 0.1797, "step": 1573 }, { "epoch": 0.51, "learning_rate": 4.7474109493112154e-06, "loss": 0.1927, "step": 1574 }, { "epoch": 0.51, "learning_rate": 4.7470277143836e-06, "loss": 0.2101, "step": 1575 }, { "epoch": 0.51, "learning_rate": 4.746644204441545e-06, "loss": 0.2099, "step": 1576 }, { "epoch": 0.51, "learning_rate": 4.746260419531989e-06, "loss": 0.1786, "step": 1577 }, { "epoch": 0.51, "learning_rate": 4.745876359701902e-06, "loss": 0.2014, "step": 1578 }, { "epoch": 0.51, "learning_rate": 4.745492024998291e-06, "loss": 0.1932, "step": 1579 }, { "epoch": 0.51, "learning_rate": 4.745107415468194e-06, "loss": 0.2014, "step": 1580 }, { "epoch": 0.51, "learning_rate": 4.744722531158683e-06, "loss": 0.2015, "step": 1581 }, { "epoch": 0.51, "learning_rate": 4.744337372116866e-06, "loss": 0.1847, "step": 1582 }, { "epoch": 0.51, "learning_rate": 4.743951938389881e-06, "loss": 0.2009, "step": 1583 }, { "epoch": 0.51, "learning_rate": 4.743566230024902e-06, "loss": 0.1768, "step": 1584 }, { "epoch": 0.51, "learning_rate": 4.7431802470691355e-06, "loss": 0.2069, "step": 1585 }, { "epoch": 0.51, "learning_rate": 4.7427939895698235e-06, "loss": 0.1674, "step": 1586 }, { "epoch": 0.51, "learning_rate": 4.742407457574238e-06, "loss": 0.185, "step": 1587 }, { "epoch": 0.51, "learning_rate": 4.7420206511296885e-06, "loss": 0.1885, "step": 1588 }, { "epoch": 0.51, "learning_rate": 4.7416335702835155e-06, "loss": 0.2005, "step": 1589 }, { "epoch": 0.52, "learning_rate": 4.741246215083094e-06, "loss": 0.2057, "step": 1590 }, { "epoch": 0.52, "learning_rate": 4.740858585575832e-06, "loss": 0.2004, "step": 1591 }, { "epoch": 0.52, "learning_rate": 4.7404706818091736e-06, "loss": 0.2123, "step": 1592 }, { "epoch": 0.52, "learning_rate": 4.740082503830593e-06, "loss": 0.18, "step": 1593 }, { "epoch": 0.52, "learning_rate": 4.7396940516875996e-06, "loss": 0.2081, "step": 1594 }, { "epoch": 0.52, "learning_rate": 4.739305325427736e-06, "loss": 0.1994, "step": 1595 }, { "epoch": 0.52, "learning_rate": 4.738916325098579e-06, "loss": 0.2045, "step": 1596 }, { "epoch": 0.52, "learning_rate": 4.738527050747738e-06, "loss": 0.2217, "step": 1597 }, { "epoch": 0.52, "learning_rate": 4.738137502422856e-06, "loss": 0.2079, "step": 1598 }, { "epoch": 0.52, "learning_rate": 4.737747680171611e-06, "loss": 0.1895, "step": 1599 }, { "epoch": 0.52, "learning_rate": 4.737357584041713e-06, "loss": 0.1833, "step": 1600 }, { "epoch": 0.52, "learning_rate": 4.7369672140809065e-06, "loss": 0.2108, "step": 1601 }, { "epoch": 0.52, "learning_rate": 4.736576570336968e-06, "loss": 0.1999, "step": 1602 }, { "epoch": 0.52, "learning_rate": 4.736185652857709e-06, "loss": 0.2087, "step": 1603 }, { "epoch": 0.52, "learning_rate": 4.7357944616909745e-06, "loss": 0.1846, "step": 1604 }, { "epoch": 0.52, "learning_rate": 4.735402996884642e-06, "loss": 0.1975, "step": 1605 }, { "epoch": 0.52, "learning_rate": 4.7350112584866225e-06, "loss": 0.21, "step": 1606 }, { "epoch": 0.52, "learning_rate": 4.734619246544862e-06, "loss": 0.2036, "step": 1607 }, { "epoch": 0.52, "learning_rate": 4.734226961107338e-06, "loss": 0.1851, "step": 1608 }, { "epoch": 0.52, "learning_rate": 4.733834402222064e-06, "loss": 0.1856, "step": 1609 }, { "epoch": 0.52, "learning_rate": 4.7334415699370825e-06, "loss": 0.2047, "step": 1610 }, { "epoch": 0.52, "learning_rate": 4.733048464300476e-06, "loss": 0.2122, "step": 1611 }, { "epoch": 0.52, "learning_rate": 4.732655085360355e-06, "loss": 0.2001, "step": 1612 }, { "epoch": 0.52, "learning_rate": 4.7322614331648645e-06, "loss": 0.2155, "step": 1613 }, { "epoch": 0.52, "learning_rate": 4.731867507762184e-06, "loss": 0.1804, "step": 1614 }, { "epoch": 0.52, "learning_rate": 4.731473309200528e-06, "loss": 0.1967, "step": 1615 }, { "epoch": 0.52, "learning_rate": 4.731078837528141e-06, "loss": 0.1782, "step": 1616 }, { "epoch": 0.52, "learning_rate": 4.730684092793302e-06, "loss": 0.1792, "step": 1617 }, { "epoch": 0.52, "learning_rate": 4.730289075044326e-06, "loss": 0.2085, "step": 1618 }, { "epoch": 0.52, "learning_rate": 4.729893784329557e-06, "loss": 0.2036, "step": 1619 }, { "epoch": 0.52, "learning_rate": 4.729498220697377e-06, "loss": 0.1856, "step": 1620 }, { "epoch": 0.53, "learning_rate": 4.729102384196197e-06, "loss": 0.1845, "step": 1621 }, { "epoch": 0.53, "learning_rate": 4.728706274874465e-06, "loss": 0.1771, "step": 1622 }, { "epoch": 0.53, "learning_rate": 4.72830989278066e-06, "loss": 0.1884, "step": 1623 }, { "epoch": 0.53, "learning_rate": 4.727913237963296e-06, "loss": 0.1792, "step": 1624 }, { "epoch": 0.53, "learning_rate": 4.72751631047092e-06, "loss": 0.1977, "step": 1625 }, { "epoch": 0.53, "learning_rate": 4.727119110352112e-06, "loss": 0.1878, "step": 1626 }, { "epoch": 0.53, "learning_rate": 4.726721637655484e-06, "loss": 0.1994, "step": 1627 }, { "epoch": 0.53, "learning_rate": 4.7263238924296835e-06, "loss": 0.1908, "step": 1628 }, { "epoch": 0.53, "learning_rate": 4.725925874723393e-06, "loss": 0.1778, "step": 1629 }, { "epoch": 0.53, "learning_rate": 4.725527584585322e-06, "loss": 0.1974, "step": 1630 }, { "epoch": 0.53, "learning_rate": 4.725129022064221e-06, "loss": 0.1949, "step": 1631 }, { "epoch": 0.53, "learning_rate": 4.724730187208868e-06, "loss": 0.2084, "step": 1632 }, { "epoch": 0.53, "learning_rate": 4.724331080068077e-06, "loss": 0.1967, "step": 1633 }, { "epoch": 0.53, "learning_rate": 4.723931700690695e-06, "loss": 0.1818, "step": 1634 }, { "epoch": 0.53, "learning_rate": 4.7235320491256026e-06, "loss": 0.1888, "step": 1635 }, { "epoch": 0.53, "learning_rate": 4.723132125421712e-06, "loss": 0.1834, "step": 1636 }, { "epoch": 0.53, "learning_rate": 4.722731929627971e-06, "loss": 0.1882, "step": 1637 }, { "epoch": 0.53, "learning_rate": 4.722331461793361e-06, "loss": 0.2064, "step": 1638 }, { "epoch": 0.53, "learning_rate": 4.721930721966893e-06, "loss": 0.1888, "step": 1639 }, { "epoch": 0.53, "learning_rate": 4.7215297101976145e-06, "loss": 0.1938, "step": 1640 }, { "epoch": 0.53, "learning_rate": 4.721128426534605e-06, "loss": 0.1839, "step": 1641 }, { "epoch": 0.53, "learning_rate": 4.720726871026978e-06, "loss": 0.2022, "step": 1642 }, { "epoch": 0.53, "learning_rate": 4.720325043723881e-06, "loss": 0.2064, "step": 1643 }, { "epoch": 0.53, "learning_rate": 4.719922944674494e-06, "loss": 0.2015, "step": 1644 }, { "epoch": 0.53, "learning_rate": 4.719520573928028e-06, "loss": 0.1962, "step": 1645 }, { "epoch": 0.53, "learning_rate": 4.71911793153373e-06, "loss": 0.2104, "step": 1646 }, { "epoch": 0.53, "learning_rate": 4.7187150175408805e-06, "loss": 0.1983, "step": 1647 }, { "epoch": 0.53, "learning_rate": 4.718311831998792e-06, "loss": 0.1949, "step": 1648 }, { "epoch": 0.53, "learning_rate": 4.71790837495681e-06, "loss": 0.1907, "step": 1649 }, { "epoch": 0.53, "learning_rate": 4.717504646464314e-06, "loss": 0.1758, "step": 1650 }, { "epoch": 0.53, "learning_rate": 4.717100646570716e-06, "loss": 0.1997, "step": 1651 }, { "epoch": 0.54, "learning_rate": 4.7166963753254616e-06, "loss": 0.2045, "step": 1652 }, { "epoch": 0.54, "learning_rate": 4.716291832778031e-06, "loss": 0.1808, "step": 1653 }, { "epoch": 0.54, "learning_rate": 4.715887018977935e-06, "loss": 0.2071, "step": 1654 }, { "epoch": 0.54, "learning_rate": 4.715481933974719e-06, "loss": 0.2148, "step": 1655 }, { "epoch": 0.54, "learning_rate": 4.715076577817963e-06, "loss": 0.1788, "step": 1656 }, { "epoch": 0.54, "learning_rate": 4.714670950557276e-06, "loss": 0.1907, "step": 1657 }, { "epoch": 0.54, "learning_rate": 4.714265052242306e-06, "loss": 0.1886, "step": 1658 }, { "epoch": 0.54, "learning_rate": 4.7138588829227285e-06, "loss": 0.2148, "step": 1659 }, { "epoch": 0.54, "learning_rate": 4.713452442648255e-06, "loss": 0.2022, "step": 1660 }, { "epoch": 0.54, "learning_rate": 4.7130457314686316e-06, "loss": 0.2044, "step": 1661 }, { "epoch": 0.54, "learning_rate": 4.712638749433634e-06, "loss": 0.2029, "step": 1662 }, { "epoch": 0.54, "learning_rate": 4.7122314965930724e-06, "loss": 0.1991, "step": 1663 }, { "epoch": 0.54, "learning_rate": 4.711823972996793e-06, "loss": 0.1931, "step": 1664 }, { "epoch": 0.54, "learning_rate": 4.711416178694671e-06, "loss": 0.2047, "step": 1665 }, { "epoch": 0.54, "learning_rate": 4.711008113736617e-06, "loss": 0.1915, "step": 1666 }, { "epoch": 0.54, "learning_rate": 4.710599778172575e-06, "loss": 0.2012, "step": 1667 }, { "epoch": 0.54, "learning_rate": 4.7101911720525186e-06, "loss": 0.199, "step": 1668 }, { "epoch": 0.54, "learning_rate": 4.70978229542646e-06, "loss": 0.1844, "step": 1669 }, { "epoch": 0.54, "learning_rate": 4.709373148344441e-06, "loss": 0.1947, "step": 1670 }, { "epoch": 0.54, "learning_rate": 4.708963730856536e-06, "loss": 0.1966, "step": 1671 }, { "epoch": 0.54, "learning_rate": 4.708554043012857e-06, "loss": 0.1883, "step": 1672 }, { "epoch": 0.54, "learning_rate": 4.708144084863541e-06, "loss": 0.2005, "step": 1673 }, { "epoch": 0.54, "learning_rate": 4.707733856458767e-06, "loss": 0.1748, "step": 1674 }, { "epoch": 0.54, "learning_rate": 4.707323357848741e-06, "loss": 0.2035, "step": 1675 }, { "epoch": 0.54, "learning_rate": 4.706912589083704e-06, "loss": 0.1849, "step": 1676 }, { "epoch": 0.54, "learning_rate": 4.706501550213932e-06, "loss": 0.2054, "step": 1677 }, { "epoch": 0.54, "learning_rate": 4.70609024128973e-06, "loss": 0.1942, "step": 1678 }, { "epoch": 0.54, "learning_rate": 4.7056786623614395e-06, "loss": 0.1745, "step": 1679 }, { "epoch": 0.54, "learning_rate": 4.705266813479434e-06, "loss": 0.1916, "step": 1680 }, { "epoch": 0.54, "learning_rate": 4.704854694694117e-06, "loss": 0.1928, "step": 1681 }, { "epoch": 0.55, "learning_rate": 4.704442306055932e-06, "loss": 0.1959, "step": 1682 }, { "epoch": 0.55, "learning_rate": 4.704029647615348e-06, "loss": 0.1983, "step": 1683 }, { "epoch": 0.55, "learning_rate": 4.703616719422873e-06, "loss": 0.1936, "step": 1684 }, { "epoch": 0.55, "learning_rate": 4.703203521529044e-06, "loss": 0.1814, "step": 1685 }, { "epoch": 0.55, "learning_rate": 4.702790053984432e-06, "loss": 0.1953, "step": 1686 }, { "epoch": 0.55, "learning_rate": 4.702376316839642e-06, "loss": 0.1977, "step": 1687 }, { "epoch": 0.55, "learning_rate": 4.701962310145312e-06, "loss": 0.1907, "step": 1688 }, { "epoch": 0.55, "learning_rate": 4.7015480339521115e-06, "loss": 0.1933, "step": 1689 }, { "epoch": 0.55, "learning_rate": 4.701133488310744e-06, "loss": 0.1904, "step": 1690 }, { "epoch": 0.55, "learning_rate": 4.700718673271947e-06, "loss": 0.1776, "step": 1691 }, { "epoch": 0.55, "learning_rate": 4.700303588886489e-06, "loss": 0.1934, "step": 1692 }, { "epoch": 0.55, "learning_rate": 4.699888235205172e-06, "loss": 0.1965, "step": 1693 }, { "epoch": 0.55, "learning_rate": 4.699472612278831e-06, "loss": 0.2013, "step": 1694 }, { "epoch": 0.55, "learning_rate": 4.699056720158336e-06, "loss": 0.1816, "step": 1695 }, { "epoch": 0.55, "learning_rate": 4.698640558894586e-06, "loss": 0.1903, "step": 1696 }, { "epoch": 0.55, "learning_rate": 4.698224128538517e-06, "loss": 0.2038, "step": 1697 }, { "epoch": 0.55, "learning_rate": 4.6978074291410936e-06, "loss": 0.1891, "step": 1698 }, { "epoch": 0.55, "learning_rate": 4.697390460753318e-06, "loss": 0.1908, "step": 1699 }, { "epoch": 0.55, "learning_rate": 4.696973223426224e-06, "loss": 0.1965, "step": 1700 }, { "epoch": 0.55, "learning_rate": 4.696555717210873e-06, "loss": 0.19, "step": 1701 }, { "epoch": 0.55, "learning_rate": 4.6961379421583685e-06, "loss": 0.1751, "step": 1702 }, { "epoch": 0.55, "learning_rate": 4.695719898319839e-06, "loss": 0.2049, "step": 1703 }, { "epoch": 0.55, "learning_rate": 4.695301585746451e-06, "loss": 0.2029, "step": 1704 }, { "epoch": 0.55, "learning_rate": 4.6948830044894016e-06, "loss": 0.1973, "step": 1705 }, { "epoch": 0.55, "learning_rate": 4.6944641545999194e-06, "loss": 0.2079, "step": 1706 }, { "epoch": 0.55, "learning_rate": 4.694045036129269e-06, "loss": 0.183, "step": 1707 }, { "epoch": 0.55, "learning_rate": 4.693625649128746e-06, "loss": 0.1657, "step": 1708 }, { "epoch": 0.55, "learning_rate": 4.69320599364968e-06, "loss": 0.1759, "step": 1709 }, { "epoch": 0.55, "learning_rate": 4.692786069743432e-06, "loss": 0.1931, "step": 1710 }, { "epoch": 0.55, "learning_rate": 4.692365877461397e-06, "loss": 0.1862, "step": 1711 }, { "epoch": 0.55, "learning_rate": 4.691945416855002e-06, "loss": 0.1965, "step": 1712 }, { "epoch": 0.56, "learning_rate": 4.6915246879757084e-06, "loss": 0.1804, "step": 1713 }, { "epoch": 0.56, "learning_rate": 4.691103690875007e-06, "loss": 0.1895, "step": 1714 }, { "epoch": 0.56, "learning_rate": 4.690682425604427e-06, "loss": 0.1908, "step": 1715 }, { "epoch": 0.56, "learning_rate": 4.690260892215525e-06, "loss": 0.1981, "step": 1716 }, { "epoch": 0.56, "learning_rate": 4.689839090759893e-06, "loss": 0.2046, "step": 1717 }, { "epoch": 0.56, "learning_rate": 4.689417021289157e-06, "loss": 0.1939, "step": 1718 }, { "epoch": 0.56, "learning_rate": 4.68899468385497e-06, "loss": 0.1929, "step": 1719 }, { "epoch": 0.56, "learning_rate": 4.688572078509027e-06, "loss": 0.1871, "step": 1720 }, { "epoch": 0.56, "learning_rate": 4.688149205303048e-06, "loss": 0.196, "step": 1721 }, { "epoch": 0.56, "learning_rate": 4.687726064288789e-06, "loss": 0.2059, "step": 1722 }, { "epoch": 0.56, "learning_rate": 4.6873026555180386e-06, "loss": 0.2058, "step": 1723 }, { "epoch": 0.56, "learning_rate": 4.6868789790426185e-06, "loss": 0.19, "step": 1724 }, { "epoch": 0.56, "learning_rate": 4.6864550349143815e-06, "loss": 0.1928, "step": 1725 }, { "epoch": 0.56, "learning_rate": 4.686030823185215e-06, "loss": 0.2229, "step": 1726 }, { "epoch": 0.56, "learning_rate": 4.685606343907038e-06, "loss": 0.2074, "step": 1727 }, { "epoch": 0.56, "learning_rate": 4.685181597131802e-06, "loss": 0.1825, "step": 1728 }, { "epoch": 0.56, "learning_rate": 4.684756582911494e-06, "loss": 0.1797, "step": 1729 }, { "epoch": 0.56, "learning_rate": 4.6843313012981295e-06, "loss": 0.1894, "step": 1730 }, { "epoch": 0.56, "learning_rate": 4.6839057523437606e-06, "loss": 0.2066, "step": 1731 }, { "epoch": 0.56, "learning_rate": 4.683479936100468e-06, "loss": 0.2039, "step": 1732 }, { "epoch": 0.56, "learning_rate": 4.68305385262037e-06, "loss": 0.2045, "step": 1733 }, { "epoch": 0.56, "learning_rate": 4.682627501955614e-06, "loss": 0.184, "step": 1734 }, { "epoch": 0.56, "learning_rate": 4.682200884158381e-06, "loss": 0.17, "step": 1735 }, { "epoch": 0.56, "learning_rate": 4.6817739992808855e-06, "loss": 0.1725, "step": 1736 }, { "epoch": 0.56, "learning_rate": 4.681346847375373e-06, "loss": 0.1783, "step": 1737 }, { "epoch": 0.56, "learning_rate": 4.6809194284941236e-06, "loss": 0.1871, "step": 1738 }, { "epoch": 0.56, "learning_rate": 4.6804917426894495e-06, "loss": 0.2067, "step": 1739 }, { "epoch": 0.56, "learning_rate": 4.6800637900136944e-06, "loss": 0.1849, "step": 1740 }, { "epoch": 0.56, "learning_rate": 4.679635570519236e-06, "loss": 0.1925, "step": 1741 }, { "epoch": 0.56, "learning_rate": 4.6792070842584855e-06, "loss": 0.1906, "step": 1742 }, { "epoch": 0.56, "learning_rate": 4.678778331283883e-06, "loss": 0.1961, "step": 1743 }, { "epoch": 0.57, "learning_rate": 4.678349311647905e-06, "loss": 0.1909, "step": 1744 }, { "epoch": 0.57, "learning_rate": 4.67792002540306e-06, "loss": 0.1936, "step": 1745 }, { "epoch": 0.57, "learning_rate": 4.677490472601888e-06, "loss": 0.1901, "step": 1746 }, { "epoch": 0.57, "learning_rate": 4.677060653296961e-06, "loss": 0.225, "step": 1747 }, { "epoch": 0.57, "learning_rate": 4.676630567540886e-06, "loss": 0.1846, "step": 1748 }, { "epoch": 0.57, "learning_rate": 4.6762002153863e-06, "loss": 0.2078, "step": 1749 }, { "epoch": 0.57, "learning_rate": 4.675769596885877e-06, "loss": 0.1969, "step": 1750 }, { "epoch": 0.57, "learning_rate": 4.675338712092316e-06, "loss": 0.1872, "step": 1751 }, { "epoch": 0.57, "learning_rate": 4.674907561058358e-06, "loss": 0.1822, "step": 1752 }, { "epoch": 0.57, "learning_rate": 4.674476143836768e-06, "loss": 0.1914, "step": 1753 }, { "epoch": 0.57, "learning_rate": 4.674044460480348e-06, "loss": 0.2025, "step": 1754 }, { "epoch": 0.57, "learning_rate": 4.673612511041933e-06, "loss": 0.2226, "step": 1755 }, { "epoch": 0.57, "learning_rate": 4.673180295574389e-06, "loss": 0.2048, "step": 1756 }, { "epoch": 0.57, "learning_rate": 4.672747814130615e-06, "loss": 0.1998, "step": 1757 }, { "epoch": 0.57, "learning_rate": 4.672315066763542e-06, "loss": 0.1949, "step": 1758 }, { "epoch": 0.57, "learning_rate": 4.671882053526135e-06, "loss": 0.1913, "step": 1759 }, { "epoch": 0.57, "learning_rate": 4.671448774471389e-06, "loss": 0.193, "step": 1760 }, { "epoch": 0.57, "learning_rate": 4.671015229652335e-06, "loss": 0.1734, "step": 1761 }, { "epoch": 0.57, "learning_rate": 4.670581419122034e-06, "loss": 0.1839, "step": 1762 }, { "epoch": 0.57, "learning_rate": 4.67014734293358e-06, "loss": 0.2164, "step": 1763 }, { "epoch": 0.57, "learning_rate": 4.6697130011401e-06, "loss": 0.1696, "step": 1764 }, { "epoch": 0.57, "learning_rate": 4.669278393794753e-06, "loss": 0.1722, "step": 1765 }, { "epoch": 0.57, "learning_rate": 4.6688435209507305e-06, "loss": 0.1906, "step": 1766 }, { "epoch": 0.57, "learning_rate": 4.668408382661257e-06, "loss": 0.1889, "step": 1767 }, { "epoch": 0.57, "learning_rate": 4.66797297897959e-06, "loss": 0.1873, "step": 1768 }, { "epoch": 0.57, "learning_rate": 4.667537309959018e-06, "loss": 0.2116, "step": 1769 }, { "epoch": 0.57, "learning_rate": 4.667101375652862e-06, "loss": 0.1926, "step": 1770 }, { "epoch": 0.57, "learning_rate": 4.666665176114477e-06, "loss": 0.1954, "step": 1771 }, { "epoch": 0.57, "learning_rate": 4.666228711397249e-06, "loss": 0.2063, "step": 1772 }, { "epoch": 0.57, "learning_rate": 4.665791981554598e-06, "loss": 0.1881, "step": 1773 }, { "epoch": 0.57, "learning_rate": 4.665354986639975e-06, "loss": 0.2054, "step": 1774 }, { "epoch": 0.58, "learning_rate": 4.664917726706864e-06, "loss": 0.1884, "step": 1775 }, { "epoch": 0.58, "learning_rate": 4.6644802018087806e-06, "loss": 0.1737, "step": 1776 }, { "epoch": 0.58, "learning_rate": 4.664042411999276e-06, "loss": 0.207, "step": 1777 }, { "epoch": 0.58, "learning_rate": 4.663604357331928e-06, "loss": 0.1895, "step": 1778 }, { "epoch": 0.58, "learning_rate": 4.6631660378603526e-06, "loss": 0.196, "step": 1779 }, { "epoch": 0.58, "learning_rate": 4.662727453638195e-06, "loss": 0.1815, "step": 1780 }, { "epoch": 0.58, "learning_rate": 4.662288604719134e-06, "loss": 0.2003, "step": 1781 }, { "epoch": 0.58, "learning_rate": 4.66184949115688e-06, "loss": 0.1867, "step": 1782 }, { "epoch": 0.58, "learning_rate": 4.661410113005177e-06, "loss": 0.1999, "step": 1783 }, { "epoch": 0.58, "learning_rate": 4.6609704703178e-06, "loss": 0.2032, "step": 1784 }, { "epoch": 0.58, "learning_rate": 4.660530563148557e-06, "loss": 0.185, "step": 1785 }, { "epoch": 0.58, "learning_rate": 4.66009039155129e-06, "loss": 0.1771, "step": 1786 }, { "epoch": 0.58, "learning_rate": 4.659649955579869e-06, "loss": 0.1928, "step": 1787 }, { "epoch": 0.58, "learning_rate": 4.659209255288201e-06, "loss": 0.1871, "step": 1788 }, { "epoch": 0.58, "learning_rate": 4.658768290730222e-06, "loss": 0.1916, "step": 1789 }, { "epoch": 0.58, "learning_rate": 4.658327061959904e-06, "loss": 0.1938, "step": 1790 }, { "epoch": 0.58, "learning_rate": 4.6578855690312474e-06, "loss": 0.1873, "step": 1791 }, { "epoch": 0.58, "learning_rate": 4.657443811998287e-06, "loss": 0.1952, "step": 1792 }, { "epoch": 0.58, "learning_rate": 4.65700179091509e-06, "loss": 0.1969, "step": 1793 }, { "epoch": 0.58, "learning_rate": 4.656559505835755e-06, "loss": 0.1895, "step": 1794 }, { "epoch": 0.58, "learning_rate": 4.656116956814414e-06, "loss": 0.1979, "step": 1795 }, { "epoch": 0.58, "learning_rate": 4.655674143905229e-06, "loss": 0.1811, "step": 1796 }, { "epoch": 0.58, "learning_rate": 4.655231067162398e-06, "loss": 0.194, "step": 1797 }, { "epoch": 0.58, "learning_rate": 4.65478772664015e-06, "loss": 0.1816, "step": 1798 }, { "epoch": 0.58, "learning_rate": 4.654344122392742e-06, "loss": 0.1802, "step": 1799 }, { "epoch": 0.58, "learning_rate": 4.6539002544744705e-06, "loss": 0.1944, "step": 1800 }, { "epoch": 0.58, "learning_rate": 4.653456122939659e-06, "loss": 0.1976, "step": 1801 }, { "epoch": 0.58, "learning_rate": 4.653011727842665e-06, "loss": 0.1949, "step": 1802 }, { "epoch": 0.58, "learning_rate": 4.652567069237877e-06, "loss": 0.1962, "step": 1803 }, { "epoch": 0.58, "learning_rate": 4.652122147179721e-06, "loss": 0.2025, "step": 1804 }, { "epoch": 0.58, "learning_rate": 4.651676961722647e-06, "loss": 0.2059, "step": 1805 }, { "epoch": 0.59, "learning_rate": 4.651231512921142e-06, "loss": 0.1884, "step": 1806 }, { "epoch": 0.59, "learning_rate": 4.650785800829726e-06, "loss": 0.1878, "step": 1807 }, { "epoch": 0.59, "learning_rate": 4.650339825502949e-06, "loss": 0.1825, "step": 1808 }, { "epoch": 0.59, "learning_rate": 4.6498935869953945e-06, "loss": 0.1757, "step": 1809 }, { "epoch": 0.59, "learning_rate": 4.649447085361677e-06, "loss": 0.2012, "step": 1810 }, { "epoch": 0.59, "learning_rate": 4.649000320656445e-06, "loss": 0.1999, "step": 1811 }, { "epoch": 0.59, "learning_rate": 4.648553292934377e-06, "loss": 0.1679, "step": 1812 }, { "epoch": 0.59, "learning_rate": 4.648106002250186e-06, "loss": 0.1789, "step": 1813 }, { "epoch": 0.59, "learning_rate": 4.647658448658616e-06, "loss": 0.1943, "step": 1814 }, { "epoch": 0.59, "learning_rate": 4.647210632214443e-06, "loss": 0.2053, "step": 1815 }, { "epoch": 0.59, "learning_rate": 4.646762552972475e-06, "loss": 0.1761, "step": 1816 }, { "epoch": 0.59, "learning_rate": 4.646314210987552e-06, "loss": 0.1949, "step": 1817 }, { "epoch": 0.59, "learning_rate": 4.645865606314548e-06, "loss": 0.198, "step": 1818 }, { "epoch": 0.59, "learning_rate": 4.645416739008367e-06, "loss": 0.1808, "step": 1819 }, { "epoch": 0.59, "learning_rate": 4.644967609123947e-06, "loss": 0.2208, "step": 1820 }, { "epoch": 0.59, "learning_rate": 4.644518216716256e-06, "loss": 0.1996, "step": 1821 }, { "epoch": 0.59, "learning_rate": 4.644068561840297e-06, "loss": 0.1906, "step": 1822 }, { "epoch": 0.59, "learning_rate": 4.643618644551101e-06, "loss": 0.1926, "step": 1823 }, { "epoch": 0.59, "learning_rate": 4.643168464903736e-06, "loss": 0.195, "step": 1824 }, { "epoch": 0.59, "learning_rate": 4.642718022953297e-06, "loss": 0.1784, "step": 1825 }, { "epoch": 0.59, "learning_rate": 4.642267318754915e-06, "loss": 0.1834, "step": 1826 }, { "epoch": 0.59, "learning_rate": 4.641816352363753e-06, "loss": 0.2033, "step": 1827 }, { "epoch": 0.59, "learning_rate": 4.641365123835004e-06, "loss": 0.1737, "step": 1828 }, { "epoch": 0.59, "learning_rate": 4.640913633223893e-06, "loss": 0.1787, "step": 1829 }, { "epoch": 0.59, "learning_rate": 4.64046188058568e-06, "loss": 0.1848, "step": 1830 }, { "epoch": 0.59, "learning_rate": 4.6400098659756525e-06, "loss": 0.19, "step": 1831 }, { "epoch": 0.59, "learning_rate": 4.639557589449135e-06, "loss": 0.1696, "step": 1832 }, { "epoch": 0.59, "learning_rate": 4.639105051061481e-06, "loss": 0.1879, "step": 1833 }, { "epoch": 0.59, "learning_rate": 4.638652250868078e-06, "loss": 0.1945, "step": 1834 }, { "epoch": 0.59, "learning_rate": 4.6381991889243416e-06, "loss": 0.1932, "step": 1835 }, { "epoch": 0.59, "learning_rate": 4.637745865285725e-06, "loss": 0.1969, "step": 1836 }, { "epoch": 0.6, "learning_rate": 4.637292280007709e-06, "loss": 0.2136, "step": 1837 }, { "epoch": 0.6, "learning_rate": 4.6368384331458085e-06, "loss": 0.1991, "step": 1838 }, { "epoch": 0.6, "learning_rate": 4.63638432475557e-06, "loss": 0.2063, "step": 1839 }, { "epoch": 0.6, "learning_rate": 4.635929954892572e-06, "loss": 0.1848, "step": 1840 }, { "epoch": 0.6, "learning_rate": 4.6354753236124254e-06, "loss": 0.1915, "step": 1841 }, { "epoch": 0.6, "learning_rate": 4.635020430970771e-06, "loss": 0.1859, "step": 1842 }, { "epoch": 0.6, "learning_rate": 4.6345652770232856e-06, "loss": 0.1902, "step": 1843 }, { "epoch": 0.6, "learning_rate": 4.6341098618256745e-06, "loss": 0.193, "step": 1844 }, { "epoch": 0.6, "learning_rate": 4.633654185433676e-06, "loss": 0.2138, "step": 1845 }, { "epoch": 0.6, "learning_rate": 4.63319824790306e-06, "loss": 0.1954, "step": 1846 }, { "epoch": 0.6, "learning_rate": 4.6327420492896295e-06, "loss": 0.204, "step": 1847 }, { "epoch": 0.6, "learning_rate": 4.632285589649219e-06, "loss": 0.1977, "step": 1848 }, { "epoch": 0.6, "learning_rate": 4.631828869037694e-06, "loss": 0.1739, "step": 1849 }, { "epoch": 0.6, "learning_rate": 4.631371887510954e-06, "loss": 0.194, "step": 1850 }, { "epoch": 0.6, "learning_rate": 4.630914645124928e-06, "loss": 0.1929, "step": 1851 }, { "epoch": 0.6, "learning_rate": 4.630457141935577e-06, "loss": 0.1883, "step": 1852 }, { "epoch": 0.6, "learning_rate": 4.629999377998898e-06, "loss": 0.1939, "step": 1853 }, { "epoch": 0.6, "learning_rate": 4.629541353370914e-06, "loss": 0.2048, "step": 1854 }, { "epoch": 0.6, "learning_rate": 4.629083068107684e-06, "loss": 0.1909, "step": 1855 }, { "epoch": 0.6, "learning_rate": 4.628624522265298e-06, "loss": 0.196, "step": 1856 }, { "epoch": 0.6, "learning_rate": 4.628165715899877e-06, "loss": 0.192, "step": 1857 }, { "epoch": 0.6, "learning_rate": 4.627706649067575e-06, "loss": 0.1822, "step": 1858 }, { "epoch": 0.6, "learning_rate": 4.627247321824576e-06, "loss": 0.1822, "step": 1859 }, { "epoch": 0.6, "learning_rate": 4.6267877342271e-06, "loss": 0.202, "step": 1860 }, { "epoch": 0.6, "learning_rate": 4.626327886331392e-06, "loss": 0.19, "step": 1861 }, { "epoch": 0.6, "learning_rate": 4.625867778193737e-06, "loss": 0.1827, "step": 1862 }, { "epoch": 0.6, "learning_rate": 4.625407409870444e-06, "loss": 0.1852, "step": 1863 }, { "epoch": 0.6, "learning_rate": 4.624946781417861e-06, "loss": 0.179, "step": 1864 }, { "epoch": 0.6, "learning_rate": 4.624485892892363e-06, "loss": 0.1927, "step": 1865 }, { "epoch": 0.6, "learning_rate": 4.624024744350358e-06, "loss": 0.1835, "step": 1866 }, { "epoch": 0.6, "learning_rate": 4.623563335848286e-06, "loss": 0.1781, "step": 1867 }, { "epoch": 0.61, "learning_rate": 4.62310166744262e-06, "loss": 0.1915, "step": 1868 }, { "epoch": 0.61, "learning_rate": 4.622639739189863e-06, "loss": 0.1981, "step": 1869 }, { "epoch": 0.61, "learning_rate": 4.62217755114655e-06, "loss": 0.18, "step": 1870 }, { "epoch": 0.61, "learning_rate": 4.62171510336925e-06, "loss": 0.2216, "step": 1871 }, { "epoch": 0.61, "learning_rate": 4.621252395914561e-06, "loss": 0.1743, "step": 1872 }, { "epoch": 0.61, "learning_rate": 4.620789428839114e-06, "loss": 0.2059, "step": 1873 }, { "epoch": 0.61, "learning_rate": 4.620326202199572e-06, "loss": 0.1939, "step": 1874 }, { "epoch": 0.61, "learning_rate": 4.619862716052629e-06, "loss": 0.1888, "step": 1875 }, { "epoch": 0.61, "learning_rate": 4.6193989704550105e-06, "loss": 0.1864, "step": 1876 }, { "epoch": 0.61, "learning_rate": 4.6189349654634766e-06, "loss": 0.1834, "step": 1877 }, { "epoch": 0.61, "learning_rate": 4.618470701134815e-06, "loss": 0.2063, "step": 1878 }, { "epoch": 0.61, "learning_rate": 4.618006177525849e-06, "loss": 0.1837, "step": 1879 }, { "epoch": 0.61, "learning_rate": 4.61754139469343e-06, "loss": 0.1965, "step": 1880 }, { "epoch": 0.61, "learning_rate": 4.6170763526944425e-06, "loss": 0.1958, "step": 1881 }, { "epoch": 0.61, "learning_rate": 4.616611051585806e-06, "loss": 0.1905, "step": 1882 }, { "epoch": 0.61, "learning_rate": 4.6161454914244665e-06, "loss": 0.1914, "step": 1883 }, { "epoch": 0.61, "learning_rate": 4.615679672267405e-06, "loss": 0.1862, "step": 1884 }, { "epoch": 0.61, "learning_rate": 4.615213594171633e-06, "loss": 0.1981, "step": 1885 }, { "epoch": 0.61, "learning_rate": 4.614747257194194e-06, "loss": 0.1827, "step": 1886 }, { "epoch": 0.61, "learning_rate": 4.614280661392163e-06, "loss": 0.1849, "step": 1887 }, { "epoch": 0.61, "learning_rate": 4.613813806822647e-06, "loss": 0.2053, "step": 1888 }, { "epoch": 0.61, "learning_rate": 4.613346693542784e-06, "loss": 0.1903, "step": 1889 }, { "epoch": 0.61, "learning_rate": 4.6128793216097445e-06, "loss": 0.1967, "step": 1890 }, { "epoch": 0.61, "learning_rate": 4.61241169108073e-06, "loss": 0.1691, "step": 1891 }, { "epoch": 0.61, "learning_rate": 4.611943802012975e-06, "loss": 0.195, "step": 1892 }, { "epoch": 0.61, "learning_rate": 4.611475654463743e-06, "loss": 0.1977, "step": 1893 }, { "epoch": 0.61, "learning_rate": 4.6110072484903326e-06, "loss": 0.2088, "step": 1894 }, { "epoch": 0.61, "learning_rate": 4.610538584150071e-06, "loss": 0.1746, "step": 1895 }, { "epoch": 0.61, "learning_rate": 4.610069661500317e-06, "loss": 0.1941, "step": 1896 }, { "epoch": 0.61, "learning_rate": 4.609600480598464e-06, "loss": 0.1838, "step": 1897 }, { "epoch": 0.62, "learning_rate": 4.6091310415019355e-06, "loss": 0.1782, "step": 1898 }, { "epoch": 0.62, "learning_rate": 4.608661344268185e-06, "loss": 0.1942, "step": 1899 }, { "epoch": 0.62, "learning_rate": 4.608191388954699e-06, "loss": 0.2111, "step": 1900 }, { "epoch": 0.62, "learning_rate": 4.607721175618997e-06, "loss": 0.1948, "step": 1901 }, { "epoch": 0.62, "learning_rate": 4.6072507043186265e-06, "loss": 0.1821, "step": 1902 }, { "epoch": 0.62, "learning_rate": 4.60677997511117e-06, "loss": 0.1777, "step": 1903 }, { "epoch": 0.62, "learning_rate": 4.606308988054239e-06, "loss": 0.1789, "step": 1904 }, { "epoch": 0.62, "learning_rate": 4.605837743205479e-06, "loss": 0.1644, "step": 1905 }, { "epoch": 0.62, "learning_rate": 4.605366240622565e-06, "loss": 0.2017, "step": 1906 }, { "epoch": 0.62, "learning_rate": 4.604894480363205e-06, "loss": 0.1798, "step": 1907 }, { "epoch": 0.62, "learning_rate": 4.604422462485138e-06, "loss": 0.1919, "step": 1908 }, { "epoch": 0.62, "learning_rate": 4.603950187046134e-06, "loss": 0.222, "step": 1909 }, { "epoch": 0.62, "learning_rate": 4.603477654103994e-06, "loss": 0.1857, "step": 1910 }, { "epoch": 0.62, "learning_rate": 4.603004863716553e-06, "loss": 0.1598, "step": 1911 }, { "epoch": 0.62, "learning_rate": 4.602531815941676e-06, "loss": 0.1898, "step": 1912 }, { "epoch": 0.62, "learning_rate": 4.602058510837257e-06, "loss": 0.2076, "step": 1913 }, { "epoch": 0.62, "learning_rate": 4.6015849484612265e-06, "loss": 0.1837, "step": 1914 }, { "epoch": 0.62, "learning_rate": 4.601111128871544e-06, "loss": 0.1908, "step": 1915 }, { "epoch": 0.62, "learning_rate": 4.600637052126199e-06, "loss": 0.1897, "step": 1916 }, { "epoch": 0.62, "learning_rate": 4.600162718283215e-06, "loss": 0.193, "step": 1917 }, { "epoch": 0.62, "learning_rate": 4.599688127400645e-06, "loss": 0.1932, "step": 1918 }, { "epoch": 0.62, "learning_rate": 4.599213279536575e-06, "loss": 0.1748, "step": 1919 }, { "epoch": 0.62, "learning_rate": 4.598738174749121e-06, "loss": 0.1866, "step": 1920 }, { "epoch": 0.62, "learning_rate": 4.598262813096432e-06, "loss": 0.1911, "step": 1921 }, { "epoch": 0.62, "learning_rate": 4.597787194636688e-06, "loss": 0.1839, "step": 1922 }, { "epoch": 0.62, "learning_rate": 4.597311319428099e-06, "loss": 0.1805, "step": 1923 }, { "epoch": 0.62, "learning_rate": 4.596835187528908e-06, "loss": 0.1982, "step": 1924 }, { "epoch": 0.62, "learning_rate": 4.59635879899739e-06, "loss": 0.2086, "step": 1925 }, { "epoch": 0.62, "learning_rate": 4.595882153891849e-06, "loss": 0.1881, "step": 1926 }, { "epoch": 0.62, "learning_rate": 4.595405252270622e-06, "loss": 0.2073, "step": 1927 }, { "epoch": 0.62, "learning_rate": 4.594928094192076e-06, "loss": 0.1896, "step": 1928 }, { "epoch": 0.63, "learning_rate": 4.594450679714613e-06, "loss": 0.2012, "step": 1929 }, { "epoch": 0.63, "learning_rate": 4.593973008896662e-06, "loss": 0.1936, "step": 1930 }, { "epoch": 0.63, "learning_rate": 4.593495081796686e-06, "loss": 0.1839, "step": 1931 }, { "epoch": 0.63, "learning_rate": 4.59301689847318e-06, "loss": 0.1834, "step": 1932 }, { "epoch": 0.63, "learning_rate": 4.592538458984666e-06, "loss": 0.2091, "step": 1933 }, { "epoch": 0.63, "learning_rate": 4.5920597633897015e-06, "loss": 0.1973, "step": 1934 }, { "epoch": 0.63, "learning_rate": 4.5915808117468766e-06, "loss": 0.1852, "step": 1935 }, { "epoch": 0.63, "learning_rate": 4.591101604114807e-06, "loss": 0.2019, "step": 1936 }, { "epoch": 0.63, "learning_rate": 4.590622140552144e-06, "loss": 0.1933, "step": 1937 }, { "epoch": 0.63, "learning_rate": 4.5901424211175715e-06, "loss": 0.1864, "step": 1938 }, { "epoch": 0.63, "learning_rate": 4.5896624458698e-06, "loss": 0.1885, "step": 1939 }, { "epoch": 0.63, "learning_rate": 4.5891822148675745e-06, "loss": 0.1748, "step": 1940 }, { "epoch": 0.63, "learning_rate": 4.588701728169671e-06, "loss": 0.1748, "step": 1941 }, { "epoch": 0.63, "learning_rate": 4.5882209858348956e-06, "loss": 0.1921, "step": 1942 }, { "epoch": 0.63, "learning_rate": 4.587739987922087e-06, "loss": 0.1812, "step": 1943 }, { "epoch": 0.63, "learning_rate": 4.587258734490115e-06, "loss": 0.2017, "step": 1944 }, { "epoch": 0.63, "learning_rate": 4.586777225597881e-06, "loss": 0.1892, "step": 1945 }, { "epoch": 0.63, "learning_rate": 4.586295461304315e-06, "loss": 0.1753, "step": 1946 }, { "epoch": 0.63, "learning_rate": 4.585813441668383e-06, "loss": 0.1791, "step": 1947 }, { "epoch": 0.63, "learning_rate": 4.585331166749077e-06, "loss": 0.1901, "step": 1948 }, { "epoch": 0.63, "learning_rate": 4.584848636605423e-06, "loss": 0.2037, "step": 1949 }, { "epoch": 0.63, "learning_rate": 4.58436585129648e-06, "loss": 0.1911, "step": 1950 }, { "epoch": 0.63, "learning_rate": 4.583882810881334e-06, "loss": 0.171, "step": 1951 }, { "epoch": 0.63, "learning_rate": 4.583399515419106e-06, "loss": 0.2096, "step": 1952 }, { "epoch": 0.63, "learning_rate": 4.582915964968946e-06, "loss": 0.1909, "step": 1953 }, { "epoch": 0.63, "learning_rate": 4.582432159590037e-06, "loss": 0.1874, "step": 1954 }, { "epoch": 0.63, "learning_rate": 4.58194809934159e-06, "loss": 0.1848, "step": 1955 }, { "epoch": 0.63, "learning_rate": 4.5814637842828506e-06, "loss": 0.1993, "step": 1956 }, { "epoch": 0.63, "learning_rate": 4.580979214473095e-06, "loss": 0.1792, "step": 1957 }, { "epoch": 0.63, "learning_rate": 4.580494389971628e-06, "loss": 0.1927, "step": 1958 }, { "epoch": 0.63, "learning_rate": 4.580009310837789e-06, "loss": 0.1882, "step": 1959 }, { "epoch": 0.64, "learning_rate": 4.579523977130946e-06, "loss": 0.1507, "step": 1960 }, { "epoch": 0.64, "learning_rate": 4.579038388910499e-06, "loss": 0.1869, "step": 1961 }, { "epoch": 0.64, "learning_rate": 4.578552546235882e-06, "loss": 0.1594, "step": 1962 }, { "epoch": 0.64, "learning_rate": 4.578066449166554e-06, "loss": 0.199, "step": 1963 }, { "epoch": 0.64, "learning_rate": 4.57758009776201e-06, "loss": 0.2066, "step": 1964 }, { "epoch": 0.64, "learning_rate": 4.577093492081774e-06, "loss": 0.202, "step": 1965 }, { "epoch": 0.64, "learning_rate": 4.576606632185403e-06, "loss": 0.1798, "step": 1966 }, { "epoch": 0.64, "learning_rate": 4.576119518132483e-06, "loss": 0.1857, "step": 1967 }, { "epoch": 0.64, "learning_rate": 4.575632149982631e-06, "loss": 0.2091, "step": 1968 }, { "epoch": 0.64, "learning_rate": 4.5751445277955e-06, "loss": 0.1823, "step": 1969 }, { "epoch": 0.64, "learning_rate": 4.574656651630767e-06, "loss": 0.1839, "step": 1970 }, { "epoch": 0.64, "learning_rate": 4.574168521548144e-06, "loss": 0.1777, "step": 1971 }, { "epoch": 0.64, "learning_rate": 4.573680137607373e-06, "loss": 0.1979, "step": 1972 }, { "epoch": 0.64, "learning_rate": 4.573191499868228e-06, "loss": 0.2026, "step": 1973 }, { "epoch": 0.64, "learning_rate": 4.572702608390513e-06, "loss": 0.199, "step": 1974 }, { "epoch": 0.64, "learning_rate": 4.572213463234065e-06, "loss": 0.1895, "step": 1975 }, { "epoch": 0.64, "learning_rate": 4.5717240644587495e-06, "loss": 0.201, "step": 1976 }, { "epoch": 0.64, "learning_rate": 4.571234412124464e-06, "loss": 0.1975, "step": 1977 }, { "epoch": 0.64, "learning_rate": 4.570744506291138e-06, "loss": 0.1956, "step": 1978 }, { "epoch": 0.64, "learning_rate": 4.570254347018731e-06, "loss": 0.1789, "step": 1979 }, { "epoch": 0.64, "learning_rate": 4.5697639343672325e-06, "loss": 0.2013, "step": 1980 }, { "epoch": 0.64, "learning_rate": 4.569273268396667e-06, "loss": 0.1829, "step": 1981 }, { "epoch": 0.64, "learning_rate": 4.568782349167084e-06, "loss": 0.1845, "step": 1982 }, { "epoch": 0.64, "learning_rate": 4.56829117673857e-06, "loss": 0.1683, "step": 1983 }, { "epoch": 0.64, "learning_rate": 4.567799751171237e-06, "loss": 0.1887, "step": 1984 }, { "epoch": 0.64, "learning_rate": 4.567308072525233e-06, "loss": 0.1862, "step": 1985 }, { "epoch": 0.64, "learning_rate": 4.566816140860735e-06, "loss": 0.1785, "step": 1986 }, { "epoch": 0.64, "learning_rate": 4.566323956237948e-06, "loss": 0.1965, "step": 1987 }, { "epoch": 0.64, "learning_rate": 4.565831518717114e-06, "loss": 0.1965, "step": 1988 }, { "epoch": 0.64, "learning_rate": 4.5653388283585e-06, "loss": 0.1882, "step": 1989 }, { "epoch": 0.64, "learning_rate": 4.564845885222407e-06, "loss": 0.1971, "step": 1990 }, { "epoch": 0.65, "learning_rate": 4.564352689369168e-06, "loss": 0.1758, "step": 1991 }, { "epoch": 0.65, "learning_rate": 4.563859240859144e-06, "loss": 0.1914, "step": 1992 }, { "epoch": 0.65, "learning_rate": 4.563365539752728e-06, "loss": 0.1943, "step": 1993 }, { "epoch": 0.65, "learning_rate": 4.5628715861103455e-06, "loss": 0.1762, "step": 1994 }, { "epoch": 0.65, "learning_rate": 4.562377379992451e-06, "loss": 0.1756, "step": 1995 }, { "epoch": 0.65, "learning_rate": 4.56188292145953e-06, "loss": 0.202, "step": 1996 }, { "epoch": 0.65, "learning_rate": 4.561388210572101e-06, "loss": 0.1977, "step": 1997 }, { "epoch": 0.65, "learning_rate": 4.56089324739071e-06, "loss": 0.1807, "step": 1998 }, { "epoch": 0.65, "learning_rate": 4.560398031975937e-06, "loss": 0.1782, "step": 1999 }, { "epoch": 0.65, "learning_rate": 4.55990256438839e-06, "loss": 0.1906, "step": 2000 }, { "epoch": 0.65, "learning_rate": 4.559406844688711e-06, "loss": 0.1899, "step": 2001 }, { "epoch": 0.65, "learning_rate": 4.558910872937572e-06, "loss": 0.1886, "step": 2002 }, { "epoch": 0.65, "learning_rate": 4.558414649195673e-06, "loss": 0.1823, "step": 2003 }, { "epoch": 0.65, "learning_rate": 4.557918173523747e-06, "loss": 0.1879, "step": 2004 }, { "epoch": 0.65, "learning_rate": 4.55742144598256e-06, "loss": 0.189, "step": 2005 }, { "epoch": 0.65, "learning_rate": 4.5569244666329055e-06, "loss": 0.1916, "step": 2006 }, { "epoch": 0.65, "learning_rate": 4.5564272355356085e-06, "loss": 0.1935, "step": 2007 }, { "epoch": 0.65, "learning_rate": 4.555929752751526e-06, "loss": 0.1971, "step": 2008 }, { "epoch": 0.65, "learning_rate": 4.5554320183415435e-06, "loss": 0.1805, "step": 2009 }, { "epoch": 0.65, "learning_rate": 4.5549340323665815e-06, "loss": 0.1994, "step": 2010 }, { "epoch": 0.65, "learning_rate": 4.554435794887586e-06, "loss": 0.1783, "step": 2011 }, { "epoch": 0.65, "learning_rate": 4.553937305965539e-06, "loss": 0.1943, "step": 2012 }, { "epoch": 0.65, "learning_rate": 4.553438565661448e-06, "loss": 0.179, "step": 2013 }, { "epoch": 0.65, "learning_rate": 4.552939574036356e-06, "loss": 0.1957, "step": 2014 }, { "epoch": 0.65, "learning_rate": 4.552440331151334e-06, "loss": 0.1939, "step": 2015 }, { "epoch": 0.65, "learning_rate": 4.551940837067486e-06, "loss": 0.1872, "step": 2016 }, { "epoch": 0.65, "learning_rate": 4.551441091845942e-06, "loss": 0.1927, "step": 2017 }, { "epoch": 0.65, "learning_rate": 4.550941095547869e-06, "loss": 0.1921, "step": 2018 }, { "epoch": 0.65, "learning_rate": 4.55044084823446e-06, "loss": 0.2038, "step": 2019 }, { "epoch": 0.65, "learning_rate": 4.5499403499669415e-06, "loss": 0.1866, "step": 2020 }, { "epoch": 0.65, "learning_rate": 4.549439600806568e-06, "loss": 0.1907, "step": 2021 }, { "epoch": 0.66, "learning_rate": 4.548938600814629e-06, "loss": 0.2031, "step": 2022 }, { "epoch": 0.66, "learning_rate": 4.5484373500524395e-06, "loss": 0.192, "step": 2023 }, { "epoch": 0.66, "learning_rate": 4.547935848581349e-06, "loss": 0.1891, "step": 2024 }, { "epoch": 0.66, "learning_rate": 4.5474340964627365e-06, "loss": 0.2099, "step": 2025 }, { "epoch": 0.66, "learning_rate": 4.5469320937580105e-06, "loss": 0.1842, "step": 2026 }, { "epoch": 0.66, "learning_rate": 4.546429840528612e-06, "loss": 0.1847, "step": 2027 }, { "epoch": 0.66, "learning_rate": 4.545927336836013e-06, "loss": 0.1805, "step": 2028 }, { "epoch": 0.66, "learning_rate": 4.545424582741714e-06, "loss": 0.1861, "step": 2029 }, { "epoch": 0.66, "learning_rate": 4.544921578307246e-06, "loss": 0.1946, "step": 2030 }, { "epoch": 0.66, "learning_rate": 4.544418323594175e-06, "loss": 0.2121, "step": 2031 }, { "epoch": 0.66, "learning_rate": 4.543914818664092e-06, "loss": 0.181, "step": 2032 }, { "epoch": 0.66, "learning_rate": 4.543411063578621e-06, "loss": 0.1907, "step": 2033 }, { "epoch": 0.66, "learning_rate": 4.5429070583994185e-06, "loss": 0.192, "step": 2034 }, { "epoch": 0.66, "learning_rate": 4.542402803188168e-06, "loss": 0.1793, "step": 2035 }, { "epoch": 0.66, "learning_rate": 4.5418982980065874e-06, "loss": 0.1815, "step": 2036 }, { "epoch": 0.66, "learning_rate": 4.541393542916423e-06, "loss": 0.1914, "step": 2037 }, { "epoch": 0.66, "learning_rate": 4.540888537979449e-06, "loss": 0.1903, "step": 2038 }, { "epoch": 0.66, "learning_rate": 4.540383283257477e-06, "loss": 0.1969, "step": 2039 }, { "epoch": 0.66, "learning_rate": 4.539877778812342e-06, "loss": 0.1971, "step": 2040 }, { "epoch": 0.66, "learning_rate": 4.539372024705916e-06, "loss": 0.1969, "step": 2041 }, { "epoch": 0.66, "learning_rate": 4.538866021000096e-06, "loss": 0.1825, "step": 2042 }, { "epoch": 0.66, "learning_rate": 4.538359767756813e-06, "loss": 0.1888, "step": 2043 }, { "epoch": 0.66, "learning_rate": 4.537853265038027e-06, "loss": 0.187, "step": 2044 }, { "epoch": 0.66, "learning_rate": 4.537346512905729e-06, "loss": 0.1856, "step": 2045 }, { "epoch": 0.66, "learning_rate": 4.536839511421941e-06, "loss": 0.1778, "step": 2046 }, { "epoch": 0.66, "learning_rate": 4.536332260648716e-06, "loss": 0.1727, "step": 2047 }, { "epoch": 0.66, "learning_rate": 4.535824760648135e-06, "loss": 0.1896, "step": 2048 }, { "epoch": 0.66, "learning_rate": 4.535317011482311e-06, "loss": 0.168, "step": 2049 }, { "epoch": 0.66, "learning_rate": 4.534809013213389e-06, "loss": 0.1854, "step": 2050 }, { "epoch": 0.66, "learning_rate": 4.534300765903542e-06, "loss": 0.196, "step": 2051 }, { "epoch": 0.66, "learning_rate": 4.533792269614974e-06, "loss": 0.1847, "step": 2052 }, { "epoch": 0.67, "learning_rate": 4.533283524409922e-06, "loss": 0.1859, "step": 2053 }, { "epoch": 0.67, "learning_rate": 4.53277453035065e-06, "loss": 0.1881, "step": 2054 }, { "epoch": 0.67, "learning_rate": 4.532265287499454e-06, "loss": 0.2021, "step": 2055 }, { "epoch": 0.67, "learning_rate": 4.531755795918661e-06, "loss": 0.1787, "step": 2056 }, { "epoch": 0.67, "learning_rate": 4.531246055670627e-06, "loss": 0.1958, "step": 2057 }, { "epoch": 0.67, "learning_rate": 4.53073606681774e-06, "loss": 0.1799, "step": 2058 }, { "epoch": 0.67, "learning_rate": 4.530225829422418e-06, "loss": 0.1824, "step": 2059 }, { "epoch": 0.67, "learning_rate": 4.529715343547107e-06, "loss": 0.1918, "step": 2060 }, { "epoch": 0.67, "learning_rate": 4.5292046092542885e-06, "loss": 0.1868, "step": 2061 }, { "epoch": 0.67, "learning_rate": 4.52869362660647e-06, "loss": 0.1719, "step": 2062 }, { "epoch": 0.67, "learning_rate": 4.5281823956661905e-06, "loss": 0.184, "step": 2063 }, { "epoch": 0.67, "learning_rate": 4.527670916496021e-06, "loss": 0.1886, "step": 2064 }, { "epoch": 0.67, "learning_rate": 4.52715918915856e-06, "loss": 0.1672, "step": 2065 }, { "epoch": 0.67, "learning_rate": 4.526647213716438e-06, "loss": 0.1977, "step": 2066 }, { "epoch": 0.67, "learning_rate": 4.526134990232317e-06, "loss": 0.1815, "step": 2067 }, { "epoch": 0.67, "learning_rate": 4.525622518768888e-06, "loss": 0.1623, "step": 2068 }, { "epoch": 0.67, "learning_rate": 4.5251097993888726e-06, "loss": 0.1906, "step": 2069 }, { "epoch": 0.67, "learning_rate": 4.524596832155022e-06, "loss": 0.2122, "step": 2070 }, { "epoch": 0.67, "learning_rate": 4.524083617130118e-06, "loss": 0.2146, "step": 2071 }, { "epoch": 0.67, "learning_rate": 4.523570154376975e-06, "loss": 0.1819, "step": 2072 }, { "epoch": 0.67, "learning_rate": 4.5230564439584335e-06, "loss": 0.1844, "step": 2073 }, { "epoch": 0.67, "learning_rate": 4.522542485937369e-06, "loss": 0.1727, "step": 2074 }, { "epoch": 0.67, "learning_rate": 4.522028280376683e-06, "loss": 0.2052, "step": 2075 }, { "epoch": 0.67, "learning_rate": 4.521513827339311e-06, "loss": 0.1896, "step": 2076 }, { "epoch": 0.67, "learning_rate": 4.5209991268882165e-06, "loss": 0.183, "step": 2077 }, { "epoch": 0.67, "learning_rate": 4.520484179086394e-06, "loss": 0.1864, "step": 2078 }, { "epoch": 0.67, "learning_rate": 4.519968983996867e-06, "loss": 0.1952, "step": 2079 }, { "epoch": 0.67, "learning_rate": 4.519453541682691e-06, "loss": 0.1939, "step": 2080 }, { "epoch": 0.67, "learning_rate": 4.518937852206952e-06, "loss": 0.1799, "step": 2081 }, { "epoch": 0.67, "learning_rate": 4.518421915632764e-06, "loss": 0.174, "step": 2082 }, { "epoch": 0.67, "learning_rate": 4.5179057320232735e-06, "loss": 0.165, "step": 2083 }, { "epoch": 0.68, "learning_rate": 4.517389301441657e-06, "loss": 0.1888, "step": 2084 }, { "epoch": 0.68, "learning_rate": 4.51687262395112e-06, "loss": 0.1893, "step": 2085 }, { "epoch": 0.68, "learning_rate": 4.516355699614897e-06, "loss": 0.174, "step": 2086 }, { "epoch": 0.68, "learning_rate": 4.515838528496257e-06, "loss": 0.1907, "step": 2087 }, { "epoch": 0.68, "learning_rate": 4.5153211106584965e-06, "loss": 0.1898, "step": 2088 }, { "epoch": 0.68, "learning_rate": 4.514803446164941e-06, "loss": 0.1884, "step": 2089 }, { "epoch": 0.68, "learning_rate": 4.514285535078949e-06, "loss": 0.1879, "step": 2090 }, { "epoch": 0.68, "learning_rate": 4.513767377463908e-06, "loss": 0.1947, "step": 2091 }, { "epoch": 0.68, "learning_rate": 4.513248973383234e-06, "loss": 0.181, "step": 2092 }, { "epoch": 0.68, "learning_rate": 4.512730322900375e-06, "loss": 0.1948, "step": 2093 }, { "epoch": 0.68, "learning_rate": 4.51221142607881e-06, "loss": 0.198, "step": 2094 }, { "epoch": 0.68, "learning_rate": 4.511692282982047e-06, "loss": 0.1922, "step": 2095 }, { "epoch": 0.68, "learning_rate": 4.511172893673621e-06, "loss": 0.1843, "step": 2096 }, { "epoch": 0.68, "learning_rate": 4.510653258217103e-06, "loss": 0.1805, "step": 2097 }, { "epoch": 0.68, "learning_rate": 4.5101333766760926e-06, "loss": 0.1987, "step": 2098 }, { "epoch": 0.68, "learning_rate": 4.509613249114215e-06, "loss": 0.1772, "step": 2099 }, { "epoch": 0.68, "learning_rate": 4.509092875595131e-06, "loss": 0.1917, "step": 2100 }, { "epoch": 0.68, "learning_rate": 4.508572256182528e-06, "loss": 0.1715, "step": 2101 }, { "epoch": 0.68, "learning_rate": 4.508051390940125e-06, "loss": 0.1909, "step": 2102 }, { "epoch": 0.68, "learning_rate": 4.507530279931673e-06, "loss": 0.183, "step": 2103 }, { "epoch": 0.68, "learning_rate": 4.5070089232209465e-06, "loss": 0.1714, "step": 2104 }, { "epoch": 0.68, "learning_rate": 4.506487320871758e-06, "loss": 0.2183, "step": 2105 }, { "epoch": 0.68, "learning_rate": 4.5059654729479474e-06, "loss": 0.1865, "step": 2106 }, { "epoch": 0.68, "learning_rate": 4.505443379513381e-06, "loss": 0.1932, "step": 2107 }, { "epoch": 0.68, "learning_rate": 4.5049210406319585e-06, "loss": 0.1807, "step": 2108 }, { "epoch": 0.68, "learning_rate": 4.5043984563676105e-06, "loss": 0.1952, "step": 2109 }, { "epoch": 0.68, "learning_rate": 4.503875626784295e-06, "loss": 0.1814, "step": 2110 }, { "epoch": 0.68, "learning_rate": 4.503352551946003e-06, "loss": 0.1857, "step": 2111 }, { "epoch": 0.68, "learning_rate": 4.5028292319167515e-06, "loss": 0.1854, "step": 2112 }, { "epoch": 0.68, "learning_rate": 4.502305666760592e-06, "loss": 0.1878, "step": 2113 }, { "epoch": 0.69, "learning_rate": 4.501781856541601e-06, "loss": 0.1752, "step": 2114 }, { "epoch": 0.69, "learning_rate": 4.50125780132389e-06, "loss": 0.1988, "step": 2115 }, { "epoch": 0.69, "learning_rate": 4.500733501171599e-06, "loss": 0.2068, "step": 2116 }, { "epoch": 0.69, "learning_rate": 4.500208956148895e-06, "loss": 0.1975, "step": 2117 }, { "epoch": 0.69, "learning_rate": 4.499684166319978e-06, "loss": 0.1882, "step": 2118 }, { "epoch": 0.69, "learning_rate": 4.499159131749079e-06, "loss": 0.194, "step": 2119 }, { "epoch": 0.69, "learning_rate": 4.498633852500455e-06, "loss": 0.179, "step": 2120 }, { "epoch": 0.69, "learning_rate": 4.498108328638395e-06, "loss": 0.1898, "step": 2121 }, { "epoch": 0.69, "learning_rate": 4.4975825602272185e-06, "loss": 0.1856, "step": 2122 }, { "epoch": 0.69, "learning_rate": 4.497056547331276e-06, "loss": 0.204, "step": 2123 }, { "epoch": 0.69, "learning_rate": 4.496530290014945e-06, "loss": 0.1798, "step": 2124 }, { "epoch": 0.69, "learning_rate": 4.496003788342633e-06, "loss": 0.184, "step": 2125 }, { "epoch": 0.69, "learning_rate": 4.495477042378781e-06, "loss": 0.2051, "step": 2126 }, { "epoch": 0.69, "learning_rate": 4.494950052187857e-06, "loss": 0.195, "step": 2127 }, { "epoch": 0.69, "learning_rate": 4.494422817834359e-06, "loss": 0.1789, "step": 2128 }, { "epoch": 0.69, "learning_rate": 4.493895339382815e-06, "loss": 0.1616, "step": 2129 }, { "epoch": 0.69, "learning_rate": 4.493367616897785e-06, "loss": 0.1865, "step": 2130 }, { "epoch": 0.69, "learning_rate": 4.4928396504438555e-06, "loss": 0.1876, "step": 2131 }, { "epoch": 0.69, "learning_rate": 4.4923114400856445e-06, "loss": 0.2038, "step": 2132 }, { "epoch": 0.69, "learning_rate": 4.491782985887802e-06, "loss": 0.1807, "step": 2133 }, { "epoch": 0.69, "learning_rate": 4.491254287915003e-06, "loss": 0.1835, "step": 2134 }, { "epoch": 0.69, "learning_rate": 4.490725346231954e-06, "loss": 0.1786, "step": 2135 }, { "epoch": 0.69, "learning_rate": 4.4901961609033965e-06, "loss": 0.1913, "step": 2136 }, { "epoch": 0.69, "learning_rate": 4.489666731994095e-06, "loss": 0.1875, "step": 2137 }, { "epoch": 0.69, "learning_rate": 4.489137059568847e-06, "loss": 0.1861, "step": 2138 }, { "epoch": 0.69, "learning_rate": 4.48860714369248e-06, "loss": 0.1641, "step": 2139 }, { "epoch": 0.69, "learning_rate": 4.488076984429849e-06, "loss": 0.178, "step": 2140 }, { "epoch": 0.69, "learning_rate": 4.4875465818458404e-06, "loss": 0.1982, "step": 2141 }, { "epoch": 0.69, "learning_rate": 4.4870159360053725e-06, "loss": 0.1922, "step": 2142 }, { "epoch": 0.69, "learning_rate": 4.4864850469733886e-06, "loss": 0.1916, "step": 2143 }, { "epoch": 0.69, "learning_rate": 4.485953914814867e-06, "loss": 0.1856, "step": 2144 }, { "epoch": 0.7, "learning_rate": 4.485422539594811e-06, "loss": 0.2036, "step": 2145 }, { "epoch": 0.7, "learning_rate": 4.4848909213782566e-06, "loss": 0.2066, "step": 2146 }, { "epoch": 0.7, "learning_rate": 4.484359060230269e-06, "loss": 0.1884, "step": 2147 }, { "epoch": 0.7, "learning_rate": 4.483826956215942e-06, "loss": 0.1911, "step": 2148 }, { "epoch": 0.7, "learning_rate": 4.4832946094004e-06, "loss": 0.172, "step": 2149 }, { "epoch": 0.7, "learning_rate": 4.482762019848799e-06, "loss": 0.2226, "step": 2150 }, { "epoch": 0.7, "learning_rate": 4.48222918762632e-06, "loss": 0.1759, "step": 2151 }, { "epoch": 0.7, "learning_rate": 4.481696112798179e-06, "loss": 0.184, "step": 2152 }, { "epoch": 0.7, "learning_rate": 4.481162795429618e-06, "loss": 0.1924, "step": 2153 }, { "epoch": 0.7, "learning_rate": 4.480629235585909e-06, "loss": 0.1854, "step": 2154 }, { "epoch": 0.7, "learning_rate": 4.480095433332357e-06, "loss": 0.1989, "step": 2155 }, { "epoch": 0.7, "learning_rate": 4.4795613887342916e-06, "loss": 0.1823, "step": 2156 }, { "epoch": 0.7, "learning_rate": 4.479027101857076e-06, "loss": 0.2074, "step": 2157 }, { "epoch": 0.7, "learning_rate": 4.4784925727661025e-06, "loss": 0.1799, "step": 2158 }, { "epoch": 0.7, "learning_rate": 4.47795780152679e-06, "loss": 0.1968, "step": 2159 }, { "epoch": 0.7, "learning_rate": 4.477422788204592e-06, "loss": 0.1976, "step": 2160 }, { "epoch": 0.7, "learning_rate": 4.476887532864986e-06, "loss": 0.1851, "step": 2161 }, { "epoch": 0.7, "learning_rate": 4.476352035573486e-06, "loss": 0.1912, "step": 2162 }, { "epoch": 0.7, "learning_rate": 4.475816296395627e-06, "loss": 0.1793, "step": 2163 }, { "epoch": 0.7, "learning_rate": 4.475280315396982e-06, "loss": 0.1843, "step": 2164 }, { "epoch": 0.7, "learning_rate": 4.474744092643149e-06, "loss": 0.199, "step": 2165 }, { "epoch": 0.7, "learning_rate": 4.474207628199756e-06, "loss": 0.1835, "step": 2166 }, { "epoch": 0.7, "learning_rate": 4.47367092213246e-06, "loss": 0.17, "step": 2167 }, { "epoch": 0.7, "learning_rate": 4.473133974506951e-06, "loss": 0.1962, "step": 2168 }, { "epoch": 0.7, "learning_rate": 4.472596785388944e-06, "loss": 0.1655, "step": 2169 }, { "epoch": 0.7, "learning_rate": 4.472059354844187e-06, "loss": 0.173, "step": 2170 }, { "epoch": 0.7, "learning_rate": 4.4715216829384566e-06, "loss": 0.1945, "step": 2171 }, { "epoch": 0.7, "learning_rate": 4.470983769737557e-06, "loss": 0.1922, "step": 2172 }, { "epoch": 0.7, "learning_rate": 4.470445615307325e-06, "loss": 0.1901, "step": 2173 }, { "epoch": 0.7, "learning_rate": 4.4699072197136255e-06, "loss": 0.1946, "step": 2174 }, { "epoch": 0.7, "learning_rate": 4.469368583022352e-06, "loss": 0.201, "step": 2175 }, { "epoch": 0.71, "learning_rate": 4.468829705299429e-06, "loss": 0.1851, "step": 2176 }, { "epoch": 0.71, "learning_rate": 4.4682905866108094e-06, "loss": 0.1796, "step": 2177 }, { "epoch": 0.71, "learning_rate": 4.467751227022478e-06, "loss": 0.1829, "step": 2178 }, { "epoch": 0.71, "learning_rate": 4.467211626600444e-06, "loss": 0.1871, "step": 2179 }, { "epoch": 0.71, "learning_rate": 4.466671785410752e-06, "loss": 0.1909, "step": 2180 }, { "epoch": 0.71, "learning_rate": 4.4661317035194716e-06, "loss": 0.1911, "step": 2181 }, { "epoch": 0.71, "learning_rate": 4.4655913809927045e-06, "loss": 0.1994, "step": 2182 }, { "epoch": 0.71, "learning_rate": 4.4650508178965814e-06, "loss": 0.1946, "step": 2183 }, { "epoch": 0.71, "learning_rate": 4.464510014297261e-06, "loss": 0.1887, "step": 2184 }, { "epoch": 0.71, "learning_rate": 4.4639689702609326e-06, "loss": 0.1934, "step": 2185 }, { "epoch": 0.71, "learning_rate": 4.463427685853815e-06, "loss": 0.2028, "step": 2186 }, { "epoch": 0.71, "learning_rate": 4.462886161142157e-06, "loss": 0.1833, "step": 2187 }, { "epoch": 0.71, "learning_rate": 4.4623443961922334e-06, "loss": 0.2066, "step": 2188 }, { "epoch": 0.71, "learning_rate": 4.461802391070354e-06, "loss": 0.1854, "step": 2189 }, { "epoch": 0.71, "learning_rate": 4.4612601458428525e-06, "loss": 0.1713, "step": 2190 }, { "epoch": 0.71, "learning_rate": 4.460717660576097e-06, "loss": 0.1877, "step": 2191 }, { "epoch": 0.71, "learning_rate": 4.46017493533648e-06, "loss": 0.1872, "step": 2192 }, { "epoch": 0.71, "learning_rate": 4.459631970190428e-06, "loss": 0.1815, "step": 2193 }, { "epoch": 0.71, "learning_rate": 4.4590887652043925e-06, "loss": 0.1816, "step": 2194 }, { "epoch": 0.71, "learning_rate": 4.458545320444857e-06, "loss": 0.198, "step": 2195 }, { "epoch": 0.71, "learning_rate": 4.458001635978335e-06, "loss": 0.2025, "step": 2196 }, { "epoch": 0.71, "learning_rate": 4.457457711871369e-06, "loss": 0.1678, "step": 2197 }, { "epoch": 0.71, "learning_rate": 4.4569135481905274e-06, "loss": 0.2162, "step": 2198 }, { "epoch": 0.71, "learning_rate": 4.456369145002412e-06, "loss": 0.182, "step": 2199 }, { "epoch": 0.71, "learning_rate": 4.455824502373653e-06, "loss": 0.1803, "step": 2200 }, { "epoch": 0.71, "learning_rate": 4.455279620370908e-06, "loss": 0.2125, "step": 2201 }, { "epoch": 0.71, "learning_rate": 4.454734499060867e-06, "loss": 0.1769, "step": 2202 }, { "epoch": 0.71, "learning_rate": 4.454189138510246e-06, "loss": 0.1954, "step": 2203 }, { "epoch": 0.71, "learning_rate": 4.453643538785793e-06, "loss": 0.2029, "step": 2204 }, { "epoch": 0.71, "learning_rate": 4.453097699954282e-06, "loss": 0.1791, "step": 2205 }, { "epoch": 0.71, "learning_rate": 4.452551622082522e-06, "loss": 0.1853, "step": 2206 }, { "epoch": 0.72, "learning_rate": 4.452005305237344e-06, "loss": 0.1913, "step": 2207 }, { "epoch": 0.72, "learning_rate": 4.451458749485614e-06, "loss": 0.1998, "step": 2208 }, { "epoch": 0.72, "learning_rate": 4.4509119548942245e-06, "loss": 0.191, "step": 2209 }, { "epoch": 0.72, "learning_rate": 4.450364921530099e-06, "loss": 0.1731, "step": 2210 }, { "epoch": 0.72, "learning_rate": 4.449817649460187e-06, "loss": 0.2068, "step": 2211 }, { "epoch": 0.72, "learning_rate": 4.449270138751471e-06, "loss": 0.2091, "step": 2212 }, { "epoch": 0.72, "learning_rate": 4.4487223894709606e-06, "loss": 0.1763, "step": 2213 }, { "epoch": 0.72, "learning_rate": 4.448174401685694e-06, "loss": 0.2071, "step": 2214 }, { "epoch": 0.72, "learning_rate": 4.447626175462741e-06, "loss": 0.1823, "step": 2215 }, { "epoch": 0.72, "learning_rate": 4.447077710869199e-06, "loss": 0.1758, "step": 2216 }, { "epoch": 0.72, "learning_rate": 4.4465290079721935e-06, "loss": 0.1989, "step": 2217 }, { "epoch": 0.72, "learning_rate": 4.445980066838882e-06, "loss": 0.1973, "step": 2218 }, { "epoch": 0.72, "learning_rate": 4.4454308875364486e-06, "loss": 0.1961, "step": 2219 }, { "epoch": 0.72, "learning_rate": 4.444881470132108e-06, "loss": 0.1953, "step": 2220 }, { "epoch": 0.72, "learning_rate": 4.444331814693103e-06, "loss": 0.2133, "step": 2221 }, { "epoch": 0.72, "learning_rate": 4.443781921286706e-06, "loss": 0.1998, "step": 2222 }, { "epoch": 0.72, "learning_rate": 4.4432317899802205e-06, "loss": 0.1909, "step": 2223 }, { "epoch": 0.72, "learning_rate": 4.442681420840974e-06, "loss": 0.1943, "step": 2224 }, { "epoch": 0.72, "learning_rate": 4.44213081393633e-06, "loss": 0.1823, "step": 2225 }, { "epoch": 0.72, "learning_rate": 4.441579969333675e-06, "loss": 0.1852, "step": 2226 }, { "epoch": 0.72, "learning_rate": 4.441028887100427e-06, "loss": 0.181, "step": 2227 }, { "epoch": 0.72, "learning_rate": 4.4404775673040346e-06, "loss": 0.1891, "step": 2228 }, { "epoch": 0.72, "learning_rate": 4.4399260100119726e-06, "loss": 0.1961, "step": 2229 }, { "epoch": 0.72, "learning_rate": 4.439374215291748e-06, "loss": 0.1879, "step": 2230 }, { "epoch": 0.72, "learning_rate": 4.438822183210894e-06, "loss": 0.1884, "step": 2231 }, { "epoch": 0.72, "learning_rate": 4.438269913836972e-06, "loss": 0.1984, "step": 2232 }, { "epoch": 0.72, "learning_rate": 4.437717407237578e-06, "loss": 0.1927, "step": 2233 }, { "epoch": 0.72, "learning_rate": 4.437164663480332e-06, "loss": 0.1809, "step": 2234 }, { "epoch": 0.72, "learning_rate": 4.436611682632884e-06, "loss": 0.2048, "step": 2235 }, { "epoch": 0.72, "learning_rate": 4.436058464762915e-06, "loss": 0.1975, "step": 2236 }, { "epoch": 0.72, "learning_rate": 4.435505009938131e-06, "loss": 0.1871, "step": 2237 }, { "epoch": 0.73, "learning_rate": 4.434951318226272e-06, "loss": 0.2039, "step": 2238 }, { "epoch": 0.73, "learning_rate": 4.434397389695102e-06, "loss": 0.1936, "step": 2239 }, { "epoch": 0.73, "learning_rate": 4.433843224412419e-06, "loss": 0.1932, "step": 2240 }, { "epoch": 0.73, "learning_rate": 4.4332888224460466e-06, "loss": 0.188, "step": 2241 }, { "epoch": 0.73, "learning_rate": 4.432734183863837e-06, "loss": 0.1933, "step": 2242 }, { "epoch": 0.73, "learning_rate": 4.432179308733674e-06, "loss": 0.174, "step": 2243 }, { "epoch": 0.73, "learning_rate": 4.43162419712347e-06, "loss": 0.1886, "step": 2244 }, { "epoch": 0.73, "learning_rate": 4.431068849101162e-06, "loss": 0.1825, "step": 2245 }, { "epoch": 0.73, "learning_rate": 4.4305132647347215e-06, "loss": 0.1785, "step": 2246 }, { "epoch": 0.73, "learning_rate": 4.429957444092146e-06, "loss": 0.1878, "step": 2247 }, { "epoch": 0.73, "learning_rate": 4.429401387241464e-06, "loss": 0.1897, "step": 2248 }, { "epoch": 0.73, "learning_rate": 4.428845094250729e-06, "loss": 0.1787, "step": 2249 }, { "epoch": 0.73, "learning_rate": 4.428288565188028e-06, "loss": 0.1889, "step": 2250 }, { "epoch": 0.73, "learning_rate": 4.427731800121473e-06, "loss": 0.1962, "step": 2251 }, { "epoch": 0.73, "learning_rate": 4.427174799119208e-06, "loss": 0.1857, "step": 2252 }, { "epoch": 0.73, "learning_rate": 4.426617562249405e-06, "loss": 0.1922, "step": 2253 }, { "epoch": 0.73, "learning_rate": 4.426060089580262e-06, "loss": 0.1799, "step": 2254 }, { "epoch": 0.73, "learning_rate": 4.42550238118001e-06, "loss": 0.1754, "step": 2255 }, { "epoch": 0.73, "learning_rate": 4.424944437116907e-06, "loss": 0.185, "step": 2256 }, { "epoch": 0.73, "learning_rate": 4.424386257459241e-06, "loss": 0.1773, "step": 2257 }, { "epoch": 0.73, "learning_rate": 4.423827842275325e-06, "loss": 0.2028, "step": 2258 }, { "epoch": 0.73, "learning_rate": 4.4232691916335055e-06, "loss": 0.1989, "step": 2259 }, { "epoch": 0.73, "learning_rate": 4.422710305602156e-06, "loss": 0.204, "step": 2260 }, { "epoch": 0.73, "learning_rate": 4.422151184249679e-06, "loss": 0.1876, "step": 2261 }, { "epoch": 0.73, "learning_rate": 4.421591827644503e-06, "loss": 0.1723, "step": 2262 }, { "epoch": 0.73, "learning_rate": 4.4210322358550915e-06, "loss": 0.1871, "step": 2263 }, { "epoch": 0.73, "learning_rate": 4.420472408949931e-06, "loss": 0.1959, "step": 2264 }, { "epoch": 0.73, "learning_rate": 4.419912346997539e-06, "loss": 0.208, "step": 2265 }, { "epoch": 0.73, "learning_rate": 4.419352050066462e-06, "loss": 0.1844, "step": 2266 }, { "epoch": 0.73, "learning_rate": 4.418791518225275e-06, "loss": 0.175, "step": 2267 }, { "epoch": 0.73, "learning_rate": 4.418230751542581e-06, "loss": 0.1944, "step": 2268 }, { "epoch": 0.74, "learning_rate": 4.417669750087014e-06, "loss": 0.1677, "step": 2269 }, { "epoch": 0.74, "learning_rate": 4.417108513927233e-06, "loss": 0.1852, "step": 2270 }, { "epoch": 0.74, "learning_rate": 4.416547043131929e-06, "loss": 0.1589, "step": 2271 }, { "epoch": 0.74, "learning_rate": 4.41598533776982e-06, "loss": 0.1837, "step": 2272 }, { "epoch": 0.74, "learning_rate": 4.415423397909655e-06, "loss": 0.1935, "step": 2273 }, { "epoch": 0.74, "learning_rate": 4.414861223620209e-06, "loss": 0.1709, "step": 2274 }, { "epoch": 0.74, "learning_rate": 4.414298814970286e-06, "loss": 0.1944, "step": 2275 }, { "epoch": 0.74, "learning_rate": 4.41373617202872e-06, "loss": 0.1848, "step": 2276 }, { "epoch": 0.74, "learning_rate": 4.413173294864373e-06, "loss": 0.1865, "step": 2277 }, { "epoch": 0.74, "learning_rate": 4.412610183546135e-06, "loss": 0.1867, "step": 2278 }, { "epoch": 0.74, "learning_rate": 4.412046838142927e-06, "loss": 0.1932, "step": 2279 }, { "epoch": 0.74, "learning_rate": 4.411483258723695e-06, "loss": 0.1981, "step": 2280 }, { "epoch": 0.74, "learning_rate": 4.410919445357418e-06, "loss": 0.1769, "step": 2281 }, { "epoch": 0.74, "learning_rate": 4.410355398113099e-06, "loss": 0.2006, "step": 2282 }, { "epoch": 0.74, "learning_rate": 4.409791117059773e-06, "loss": 0.1878, "step": 2283 }, { "epoch": 0.74, "learning_rate": 4.409226602266503e-06, "loss": 0.1903, "step": 2284 }, { "epoch": 0.74, "learning_rate": 4.408661853802379e-06, "loss": 0.2073, "step": 2285 }, { "epoch": 0.74, "learning_rate": 4.408096871736522e-06, "loss": 0.1651, "step": 2286 }, { "epoch": 0.74, "learning_rate": 4.407531656138079e-06, "loss": 0.176, "step": 2287 }, { "epoch": 0.74, "learning_rate": 4.406966207076229e-06, "loss": 0.1848, "step": 2288 }, { "epoch": 0.74, "learning_rate": 4.406400524620174e-06, "loss": 0.172, "step": 2289 }, { "epoch": 0.74, "learning_rate": 4.405834608839152e-06, "loss": 0.193, "step": 2290 }, { "epoch": 0.74, "learning_rate": 4.405268459802423e-06, "loss": 0.1667, "step": 2291 }, { "epoch": 0.74, "learning_rate": 4.404702077579279e-06, "loss": 0.1937, "step": 2292 }, { "epoch": 0.74, "learning_rate": 4.4041354622390395e-06, "loss": 0.1779, "step": 2293 }, { "epoch": 0.74, "learning_rate": 4.403568613851054e-06, "loss": 0.1812, "step": 2294 }, { "epoch": 0.74, "learning_rate": 4.403001532484697e-06, "loss": 0.1916, "step": 2295 }, { "epoch": 0.74, "learning_rate": 4.4024342182093745e-06, "loss": 0.1696, "step": 2296 }, { "epoch": 0.74, "learning_rate": 4.401866671094522e-06, "loss": 0.1807, "step": 2297 }, { "epoch": 0.74, "learning_rate": 4.4012988912096e-06, "loss": 0.1979, "step": 2298 }, { "epoch": 0.74, "learning_rate": 4.4007308786241e-06, "loss": 0.2168, "step": 2299 }, { "epoch": 0.75, "learning_rate": 4.40016263340754e-06, "loss": 0.1966, "step": 2300 }, { "epoch": 0.75, "learning_rate": 4.399594155629469e-06, "loss": 0.188, "step": 2301 }, { "epoch": 0.75, "learning_rate": 4.3990254453594634e-06, "loss": 0.1826, "step": 2302 }, { "epoch": 0.75, "learning_rate": 4.398456502667127e-06, "loss": 0.1738, "step": 2303 }, { "epoch": 0.75, "learning_rate": 4.397887327622093e-06, "loss": 0.1827, "step": 2304 }, { "epoch": 0.75, "learning_rate": 4.397317920294023e-06, "loss": 0.1781, "step": 2305 }, { "epoch": 0.75, "learning_rate": 4.396748280752608e-06, "loss": 0.2078, "step": 2306 }, { "epoch": 0.75, "learning_rate": 4.396178409067564e-06, "loss": 0.2073, "step": 2307 }, { "epoch": 0.75, "learning_rate": 4.395608305308639e-06, "loss": 0.172, "step": 2308 }, { "epoch": 0.75, "learning_rate": 4.395037969545609e-06, "loss": 0.1823, "step": 2309 }, { "epoch": 0.75, "learning_rate": 4.394467401848277e-06, "loss": 0.1693, "step": 2310 }, { "epoch": 0.75, "learning_rate": 4.393896602286475e-06, "loss": 0.2029, "step": 2311 }, { "epoch": 0.75, "learning_rate": 4.3933255709300635e-06, "loss": 0.1879, "step": 2312 }, { "epoch": 0.75, "learning_rate": 4.3927543078489295e-06, "loss": 0.1858, "step": 2313 }, { "epoch": 0.75, "learning_rate": 4.392182813112993e-06, "loss": 0.1927, "step": 2314 }, { "epoch": 0.75, "learning_rate": 4.391611086792198e-06, "loss": 0.1811, "step": 2315 }, { "epoch": 0.75, "learning_rate": 4.391039128956517e-06, "loss": 0.1868, "step": 2316 }, { "epoch": 0.75, "learning_rate": 4.390466939675954e-06, "loss": 0.1781, "step": 2317 }, { "epoch": 0.75, "learning_rate": 4.389894519020539e-06, "loss": 0.1793, "step": 2318 }, { "epoch": 0.75, "learning_rate": 4.38932186706033e-06, "loss": 0.1818, "step": 2319 }, { "epoch": 0.75, "learning_rate": 4.388748983865414e-06, "loss": 0.1899, "step": 2320 }, { "epoch": 0.75, "learning_rate": 4.388175869505908e-06, "loss": 0.1848, "step": 2321 }, { "epoch": 0.75, "learning_rate": 4.387602524051954e-06, "loss": 0.1972, "step": 2322 }, { "epoch": 0.75, "learning_rate": 4.387028947573724e-06, "loss": 0.1755, "step": 2323 }, { "epoch": 0.75, "learning_rate": 4.3864551401414195e-06, "loss": 0.1837, "step": 2324 }, { "epoch": 0.75, "learning_rate": 4.385881101825268e-06, "loss": 0.1871, "step": 2325 }, { "epoch": 0.75, "learning_rate": 4.385306832695526e-06, "loss": 0.1576, "step": 2326 }, { "epoch": 0.75, "learning_rate": 4.384732332822479e-06, "loss": 0.2069, "step": 2327 }, { "epoch": 0.75, "learning_rate": 4.38415760227644e-06, "loss": 0.1853, "step": 2328 }, { "epoch": 0.75, "learning_rate": 4.38358264112775e-06, "loss": 0.1892, "step": 2329 }, { "epoch": 0.76, "learning_rate": 4.3830074494467815e-06, "loss": 0.2016, "step": 2330 }, { "epoch": 0.76, "learning_rate": 4.382432027303928e-06, "loss": 0.1844, "step": 2331 }, { "epoch": 0.76, "learning_rate": 4.381856374769617e-06, "loss": 0.1789, "step": 2332 }, { "epoch": 0.76, "learning_rate": 4.3812804919143055e-06, "loss": 0.1869, "step": 2333 }, { "epoch": 0.76, "learning_rate": 4.380704378808473e-06, "loss": 0.1857, "step": 2334 }, { "epoch": 0.76, "learning_rate": 4.380128035522632e-06, "loss": 0.1832, "step": 2335 }, { "epoch": 0.76, "learning_rate": 4.379551462127319e-06, "loss": 0.1981, "step": 2336 }, { "epoch": 0.76, "learning_rate": 4.3789746586931034e-06, "loss": 0.1719, "step": 2337 }, { "epoch": 0.76, "learning_rate": 4.37839762529058e-06, "loss": 0.1888, "step": 2338 }, { "epoch": 0.76, "learning_rate": 4.3778203619903716e-06, "loss": 0.1784, "step": 2339 }, { "epoch": 0.76, "learning_rate": 4.3772428688631285e-06, "loss": 0.1906, "step": 2340 }, { "epoch": 0.76, "learning_rate": 4.376665145979532e-06, "loss": 0.1878, "step": 2341 }, { "epoch": 0.76, "learning_rate": 4.376087193410289e-06, "loss": 0.1862, "step": 2342 }, { "epoch": 0.76, "learning_rate": 4.375509011226135e-06, "loss": 0.1916, "step": 2343 }, { "epoch": 0.76, "learning_rate": 4.374930599497835e-06, "loss": 0.1812, "step": 2344 }, { "epoch": 0.76, "learning_rate": 4.37435195829618e-06, "loss": 0.2139, "step": 2345 }, { "epoch": 0.76, "learning_rate": 4.373773087691992e-06, "loss": 0.1885, "step": 2346 }, { "epoch": 0.76, "learning_rate": 4.373193987756116e-06, "loss": 0.1894, "step": 2347 }, { "epoch": 0.76, "learning_rate": 4.3726146585594296e-06, "loss": 0.1649, "step": 2348 }, { "epoch": 0.76, "learning_rate": 4.372035100172838e-06, "loss": 0.1782, "step": 2349 }, { "epoch": 0.76, "learning_rate": 4.371455312667272e-06, "loss": 0.1736, "step": 2350 }, { "epoch": 0.76, "learning_rate": 4.370875296113694e-06, "loss": 0.1818, "step": 2351 }, { "epoch": 0.76, "learning_rate": 4.370295050583091e-06, "loss": 0.1793, "step": 2352 }, { "epoch": 0.76, "learning_rate": 4.3697145761464785e-06, "loss": 0.1956, "step": 2353 }, { "epoch": 0.76, "learning_rate": 4.369133872874903e-06, "loss": 0.1815, "step": 2354 }, { "epoch": 0.76, "learning_rate": 4.368552940839436e-06, "loss": 0.1818, "step": 2355 }, { "epoch": 0.76, "learning_rate": 4.367971780111179e-06, "loss": 0.1925, "step": 2356 }, { "epoch": 0.76, "learning_rate": 4.367390390761258e-06, "loss": 0.2044, "step": 2357 }, { "epoch": 0.76, "learning_rate": 4.3668087728608314e-06, "loss": 0.1814, "step": 2358 }, { "epoch": 0.76, "learning_rate": 4.366226926481083e-06, "loss": 0.1798, "step": 2359 }, { "epoch": 0.76, "learning_rate": 4.365644851693226e-06, "loss": 0.1924, "step": 2360 }, { "epoch": 0.77, "learning_rate": 4.3650625485685e-06, "loss": 0.17, "step": 2361 }, { "epoch": 0.77, "learning_rate": 4.364480017178172e-06, "loss": 0.1944, "step": 2362 }, { "epoch": 0.77, "learning_rate": 4.36389725759354e-06, "loss": 0.1929, "step": 2363 }, { "epoch": 0.77, "learning_rate": 4.363314269885928e-06, "loss": 0.1787, "step": 2364 }, { "epoch": 0.77, "learning_rate": 4.362731054126687e-06, "loss": 0.1709, "step": 2365 }, { "epoch": 0.77, "learning_rate": 4.362147610387198e-06, "loss": 0.1937, "step": 2366 }, { "epoch": 0.77, "learning_rate": 4.361563938738869e-06, "loss": 0.1681, "step": 2367 }, { "epoch": 0.77, "learning_rate": 4.3609800392531345e-06, "loss": 0.1847, "step": 2368 }, { "epoch": 0.77, "learning_rate": 4.36039591200146e-06, "loss": 0.1667, "step": 2369 }, { "epoch": 0.77, "learning_rate": 4.359811557055335e-06, "loss": 0.1836, "step": 2370 }, { "epoch": 0.77, "learning_rate": 4.3592269744862794e-06, "loss": 0.1987, "step": 2371 }, { "epoch": 0.77, "learning_rate": 4.3586421643658404e-06, "loss": 0.176, "step": 2372 }, { "epoch": 0.77, "learning_rate": 4.3580571267655945e-06, "loss": 0.1635, "step": 2373 }, { "epoch": 0.77, "learning_rate": 4.357471861757144e-06, "loss": 0.1803, "step": 2374 }, { "epoch": 0.77, "learning_rate": 4.3568863694121185e-06, "loss": 0.1945, "step": 2375 }, { "epoch": 0.77, "learning_rate": 4.356300649802178e-06, "loss": 0.1744, "step": 2376 }, { "epoch": 0.77, "learning_rate": 4.355714702999008e-06, "loss": 0.1898, "step": 2377 }, { "epoch": 0.77, "learning_rate": 4.355128529074323e-06, "loss": 0.1793, "step": 2378 }, { "epoch": 0.77, "learning_rate": 4.354542128099866e-06, "loss": 0.1875, "step": 2379 }, { "epoch": 0.77, "learning_rate": 4.353955500147405e-06, "loss": 0.1879, "step": 2380 }, { "epoch": 0.77, "learning_rate": 4.353368645288738e-06, "loss": 0.1945, "step": 2381 }, { "epoch": 0.77, "learning_rate": 4.352781563595691e-06, "loss": 0.1802, "step": 2382 }, { "epoch": 0.77, "learning_rate": 4.352194255140118e-06, "loss": 0.1731, "step": 2383 }, { "epoch": 0.77, "learning_rate": 4.351606719993899e-06, "loss": 0.1866, "step": 2384 }, { "epoch": 0.77, "learning_rate": 4.351018958228941e-06, "loss": 0.1895, "step": 2385 }, { "epoch": 0.77, "learning_rate": 4.350430969917182e-06, "loss": 0.191, "step": 2386 }, { "epoch": 0.77, "learning_rate": 4.349842755130587e-06, "loss": 0.1904, "step": 2387 }, { "epoch": 0.77, "learning_rate": 4.349254313941146e-06, "loss": 0.1878, "step": 2388 }, { "epoch": 0.77, "learning_rate": 4.3486656464208785e-06, "loss": 0.1841, "step": 2389 }, { "epoch": 0.77, "learning_rate": 4.348076752641834e-06, "loss": 0.2046, "step": 2390 }, { "epoch": 0.77, "learning_rate": 4.347487632676084e-06, "loss": 0.1881, "step": 2391 }, { "epoch": 0.78, "learning_rate": 4.346898286595733e-06, "loss": 0.1634, "step": 2392 }, { "epoch": 0.78, "learning_rate": 4.3463087144729115e-06, "loss": 0.1767, "step": 2393 }, { "epoch": 0.78, "learning_rate": 4.3457189163797776e-06, "loss": 0.1845, "step": 2394 }, { "epoch": 0.78, "learning_rate": 4.345128892388515e-06, "loss": 0.1889, "step": 2395 }, { "epoch": 0.78, "learning_rate": 4.344538642571339e-06, "loss": 0.1682, "step": 2396 }, { "epoch": 0.78, "learning_rate": 4.3439481670004895e-06, "loss": 0.1801, "step": 2397 }, { "epoch": 0.78, "learning_rate": 4.343357465748235e-06, "loss": 0.182, "step": 2398 }, { "epoch": 0.78, "learning_rate": 4.342766538886872e-06, "loss": 0.1748, "step": 2399 }, { "epoch": 0.78, "learning_rate": 4.342175386488724e-06, "loss": 0.192, "step": 2400 }, { "epoch": 0.78, "learning_rate": 4.341584008626143e-06, "loss": 0.1992, "step": 2401 }, { "epoch": 0.78, "learning_rate": 4.340992405371506e-06, "loss": 0.1748, "step": 2402 }, { "epoch": 0.78, "learning_rate": 4.340400576797221e-06, "loss": 0.1824, "step": 2403 }, { "epoch": 0.78, "learning_rate": 4.339808522975722e-06, "loss": 0.1893, "step": 2404 }, { "epoch": 0.78, "learning_rate": 4.339216243979471e-06, "loss": 0.1968, "step": 2405 }, { "epoch": 0.78, "learning_rate": 4.3386237398809576e-06, "loss": 0.1725, "step": 2406 }, { "epoch": 0.78, "learning_rate": 4.338031010752696e-06, "loss": 0.1973, "step": 2407 }, { "epoch": 0.78, "learning_rate": 4.337438056667233e-06, "loss": 0.1843, "step": 2408 }, { "epoch": 0.78, "learning_rate": 4.336844877697139e-06, "loss": 0.1894, "step": 2409 }, { "epoch": 0.78, "learning_rate": 4.336251473915015e-06, "loss": 0.2011, "step": 2410 }, { "epoch": 0.78, "learning_rate": 4.335657845393486e-06, "loss": 0.184, "step": 2411 }, { "epoch": 0.78, "learning_rate": 4.335063992205207e-06, "loss": 0.1587, "step": 2412 }, { "epoch": 0.78, "learning_rate": 4.3344699144228605e-06, "loss": 0.1828, "step": 2413 }, { "epoch": 0.78, "learning_rate": 4.333875612119156e-06, "loss": 0.1974, "step": 2414 }, { "epoch": 0.78, "learning_rate": 4.333281085366829e-06, "loss": 0.2027, "step": 2415 }, { "epoch": 0.78, "learning_rate": 4.332686334238646e-06, "loss": 0.1637, "step": 2416 }, { "epoch": 0.78, "learning_rate": 4.332091358807397e-06, "loss": 0.1954, "step": 2417 }, { "epoch": 0.78, "learning_rate": 4.3314961591459015e-06, "loss": 0.1748, "step": 2418 }, { "epoch": 0.78, "learning_rate": 4.330900735327006e-06, "loss": 0.1944, "step": 2419 }, { "epoch": 0.78, "learning_rate": 4.330305087423585e-06, "loss": 0.1828, "step": 2420 }, { "epoch": 0.78, "learning_rate": 4.329709215508541e-06, "loss": 0.1588, "step": 2421 }, { "epoch": 0.78, "learning_rate": 4.329113119654801e-06, "loss": 0.1751, "step": 2422 }, { "epoch": 0.79, "learning_rate": 4.328516799935323e-06, "loss": 0.177, "step": 2423 }, { "epoch": 0.79, "learning_rate": 4.327920256423089e-06, "loss": 0.177, "step": 2424 }, { "epoch": 0.79, "learning_rate": 4.3273234891911135e-06, "loss": 0.1813, "step": 2425 }, { "epoch": 0.79, "learning_rate": 4.3267264983124304e-06, "loss": 0.201, "step": 2426 }, { "epoch": 0.79, "learning_rate": 4.326129283860109e-06, "loss": 0.1766, "step": 2427 }, { "epoch": 0.79, "learning_rate": 4.3255318459072415e-06, "loss": 0.1925, "step": 2428 }, { "epoch": 0.79, "learning_rate": 4.324934184526949e-06, "loss": 0.1862, "step": 2429 }, { "epoch": 0.79, "learning_rate": 4.324336299792378e-06, "loss": 0.1949, "step": 2430 }, { "epoch": 0.79, "learning_rate": 4.3237381917767054e-06, "loss": 0.1993, "step": 2431 }, { "epoch": 0.79, "learning_rate": 4.323139860553133e-06, "loss": 0.1777, "step": 2432 }, { "epoch": 0.79, "learning_rate": 4.3225413061948915e-06, "loss": 0.1757, "step": 2433 }, { "epoch": 0.79, "learning_rate": 4.321942528775238e-06, "loss": 0.1849, "step": 2434 }, { "epoch": 0.79, "learning_rate": 4.3213435283674556e-06, "loss": 0.1732, "step": 2435 }, { "epoch": 0.79, "learning_rate": 4.320744305044858e-06, "loss": 0.1863, "step": 2436 }, { "epoch": 0.79, "learning_rate": 4.320144858880784e-06, "loss": 0.1899, "step": 2437 }, { "epoch": 0.79, "learning_rate": 4.319545189948599e-06, "loss": 0.1743, "step": 2438 }, { "epoch": 0.79, "learning_rate": 4.318945298321698e-06, "loss": 0.1853, "step": 2439 }, { "epoch": 0.79, "learning_rate": 4.3183451840735e-06, "loss": 0.1738, "step": 2440 }, { "epoch": 0.79, "learning_rate": 4.3177448472774566e-06, "loss": 0.1607, "step": 2441 }, { "epoch": 0.79, "learning_rate": 4.317144288007039e-06, "loss": 0.1892, "step": 2442 }, { "epoch": 0.79, "learning_rate": 4.316543506335752e-06, "loss": 0.1731, "step": 2443 }, { "epoch": 0.79, "learning_rate": 4.315942502337126e-06, "loss": 0.1847, "step": 2444 }, { "epoch": 0.79, "learning_rate": 4.315341276084717e-06, "loss": 0.1822, "step": 2445 }, { "epoch": 0.79, "learning_rate": 4.3147398276521105e-06, "loss": 0.1794, "step": 2446 }, { "epoch": 0.79, "learning_rate": 4.314138157112916e-06, "loss": 0.1857, "step": 2447 }, { "epoch": 0.79, "learning_rate": 4.313536264540774e-06, "loss": 0.1831, "step": 2448 }, { "epoch": 0.79, "learning_rate": 4.312934150009351e-06, "loss": 0.191, "step": 2449 }, { "epoch": 0.79, "learning_rate": 4.3123318135923355e-06, "loss": 0.1926, "step": 2450 }, { "epoch": 0.79, "learning_rate": 4.311729255363453e-06, "loss": 0.1912, "step": 2451 }, { "epoch": 0.79, "learning_rate": 4.3111264753964475e-06, "loss": 0.1956, "step": 2452 }, { "epoch": 0.79, "learning_rate": 4.310523473765095e-06, "loss": 0.1861, "step": 2453 }, { "epoch": 0.8, "learning_rate": 4.309920250543196e-06, "loss": 0.1764, "step": 2454 }, { "epoch": 0.8, "learning_rate": 4.30931680580458e-06, "loss": 0.1968, "step": 2455 }, { "epoch": 0.8, "learning_rate": 4.308713139623103e-06, "loss": 0.2062, "step": 2456 }, { "epoch": 0.8, "learning_rate": 4.308109252072647e-06, "loss": 0.1775, "step": 2457 }, { "epoch": 0.8, "learning_rate": 4.307505143227122e-06, "loss": 0.1923, "step": 2458 }, { "epoch": 0.8, "learning_rate": 4.306900813160466e-06, "loss": 0.1879, "step": 2459 }, { "epoch": 0.8, "learning_rate": 4.306296261946643e-06, "loss": 0.1827, "step": 2460 }, { "epoch": 0.8, "learning_rate": 4.305691489659643e-06, "loss": 0.1759, "step": 2461 }, { "epoch": 0.8, "learning_rate": 4.3050864963734854e-06, "loss": 0.1995, "step": 2462 }, { "epoch": 0.8, "learning_rate": 4.304481282162215e-06, "loss": 0.1894, "step": 2463 }, { "epoch": 0.8, "learning_rate": 4.3038758470999056e-06, "loss": 0.1954, "step": 2464 }, { "epoch": 0.8, "learning_rate": 4.303270191260654e-06, "loss": 0.2128, "step": 2465 }, { "epoch": 0.8, "learning_rate": 4.302664314718588e-06, "loss": 0.2059, "step": 2466 }, { "epoch": 0.8, "learning_rate": 4.302058217547862e-06, "loss": 0.172, "step": 2467 }, { "epoch": 0.8, "learning_rate": 4.301451899822655e-06, "loss": 0.1701, "step": 2468 }, { "epoch": 0.8, "learning_rate": 4.3008453616171746e-06, "loss": 0.1813, "step": 2469 }, { "epoch": 0.8, "learning_rate": 4.300238603005656e-06, "loss": 0.1784, "step": 2470 }, { "epoch": 0.8, "learning_rate": 4.299631624062359e-06, "loss": 0.1772, "step": 2471 }, { "epoch": 0.8, "learning_rate": 4.299024424861574e-06, "loss": 0.176, "step": 2472 }, { "epoch": 0.8, "learning_rate": 4.298417005477616e-06, "loss": 0.1952, "step": 2473 }, { "epoch": 0.8, "learning_rate": 4.2978093659848255e-06, "loss": 0.1989, "step": 2474 }, { "epoch": 0.8, "learning_rate": 4.2972015064575726e-06, "loss": 0.1785, "step": 2475 }, { "epoch": 0.8, "learning_rate": 4.2965934269702535e-06, "loss": 0.1864, "step": 2476 }, { "epoch": 0.8, "learning_rate": 4.295985127597291e-06, "loss": 0.1812, "step": 2477 }, { "epoch": 0.8, "learning_rate": 4.295376608413136e-06, "loss": 0.1961, "step": 2478 }, { "epoch": 0.8, "learning_rate": 4.294767869492265e-06, "loss": 0.1784, "step": 2479 }, { "epoch": 0.8, "learning_rate": 4.294158910909181e-06, "loss": 0.1709, "step": 2480 }, { "epoch": 0.8, "learning_rate": 4.293549732738415e-06, "loss": 0.1842, "step": 2481 }, { "epoch": 0.8, "learning_rate": 4.2929403350545255e-06, "loss": 0.1801, "step": 2482 }, { "epoch": 0.8, "learning_rate": 4.292330717932095e-06, "loss": 0.1754, "step": 2483 }, { "epoch": 0.8, "learning_rate": 4.2917208814457364e-06, "loss": 0.171, "step": 2484 }, { "epoch": 0.81, "learning_rate": 4.291110825670087e-06, "loss": 0.183, "step": 2485 }, { "epoch": 0.81, "learning_rate": 4.290500550679811e-06, "loss": 0.189, "step": 2486 }, { "epoch": 0.81, "learning_rate": 4.289890056549603e-06, "loss": 0.1817, "step": 2487 }, { "epoch": 0.81, "learning_rate": 4.289279343354178e-06, "loss": 0.1921, "step": 2488 }, { "epoch": 0.81, "learning_rate": 4.288668411168283e-06, "loss": 0.1545, "step": 2489 }, { "epoch": 0.81, "learning_rate": 4.28805726006669e-06, "loss": 0.1765, "step": 2490 }, { "epoch": 0.81, "learning_rate": 4.287445890124198e-06, "loss": 0.1725, "step": 2491 }, { "epoch": 0.81, "learning_rate": 4.286834301415634e-06, "loss": 0.2005, "step": 2492 }, { "epoch": 0.81, "learning_rate": 4.286222494015848e-06, "loss": 0.1803, "step": 2493 }, { "epoch": 0.81, "learning_rate": 4.285610467999722e-06, "loss": 0.1966, "step": 2494 }, { "epoch": 0.81, "learning_rate": 4.28499822344216e-06, "loss": 0.1897, "step": 2495 }, { "epoch": 0.81, "learning_rate": 4.2843857604180955e-06, "loss": 0.1988, "step": 2496 }, { "epoch": 0.81, "learning_rate": 4.283773079002488e-06, "loss": 0.1915, "step": 2497 }, { "epoch": 0.81, "learning_rate": 4.283160179270325e-06, "loss": 0.1968, "step": 2498 }, { "epoch": 0.81, "learning_rate": 4.282547061296618e-06, "loss": 0.1802, "step": 2499 }, { "epoch": 0.81, "learning_rate": 4.281933725156406e-06, "loss": 0.1939, "step": 2500 }, { "epoch": 0.81, "learning_rate": 4.281320170924758e-06, "loss": 0.1645, "step": 2501 }, { "epoch": 0.81, "learning_rate": 4.280706398676764e-06, "loss": 0.1874, "step": 2502 }, { "epoch": 0.81, "learning_rate": 4.2800924084875465e-06, "loss": 0.1933, "step": 2503 }, { "epoch": 0.81, "learning_rate": 4.27947820043225e-06, "loss": 0.1785, "step": 2504 }, { "epoch": 0.81, "learning_rate": 4.278863774586049e-06, "loss": 0.1825, "step": 2505 }, { "epoch": 0.81, "learning_rate": 4.2782491310241426e-06, "loss": 0.1928, "step": 2506 }, { "epoch": 0.81, "learning_rate": 4.2776342698217575e-06, "loss": 0.1798, "step": 2507 }, { "epoch": 0.81, "learning_rate": 4.277019191054146e-06, "loss": 0.1716, "step": 2508 }, { "epoch": 0.81, "learning_rate": 4.276403894796589e-06, "loss": 0.1875, "step": 2509 }, { "epoch": 0.81, "learning_rate": 4.275788381124393e-06, "loss": 0.177, "step": 2510 }, { "epoch": 0.81, "learning_rate": 4.275172650112889e-06, "loss": 0.1664, "step": 2511 }, { "epoch": 0.81, "learning_rate": 4.274556701837438e-06, "loss": 0.1758, "step": 2512 }, { "epoch": 0.81, "learning_rate": 4.273940536373426e-06, "loss": 0.1795, "step": 2513 }, { "epoch": 0.81, "learning_rate": 4.273324153796264e-06, "loss": 0.1856, "step": 2514 }, { "epoch": 0.81, "learning_rate": 4.2727075541813945e-06, "loss": 0.1868, "step": 2515 }, { "epoch": 0.82, "learning_rate": 4.27209073760428e-06, "loss": 0.1828, "step": 2516 }, { "epoch": 0.82, "learning_rate": 4.271473704140415e-06, "loss": 0.1723, "step": 2517 }, { "epoch": 0.82, "learning_rate": 4.270856453865318e-06, "loss": 0.1956, "step": 2518 }, { "epoch": 0.82, "learning_rate": 4.270238986854534e-06, "loss": 0.1659, "step": 2519 }, { "epoch": 0.82, "learning_rate": 4.2696213031836355e-06, "loss": 0.1516, "step": 2520 }, { "epoch": 0.82, "learning_rate": 4.2690034029282214e-06, "loss": 0.1835, "step": 2521 }, { "epoch": 0.82, "learning_rate": 4.268385286163915e-06, "loss": 0.188, "step": 2522 }, { "epoch": 0.82, "learning_rate": 4.267766952966369e-06, "loss": 0.1826, "step": 2523 }, { "epoch": 0.82, "learning_rate": 4.267148403411261e-06, "loss": 0.1852, "step": 2524 }, { "epoch": 0.82, "learning_rate": 4.266529637574297e-06, "loss": 0.1889, "step": 2525 }, { "epoch": 0.82, "learning_rate": 4.265910655531206e-06, "loss": 0.1768, "step": 2526 }, { "epoch": 0.82, "learning_rate": 4.265291457357746e-06, "loss": 0.2024, "step": 2527 }, { "epoch": 0.82, "learning_rate": 4.2646720431297006e-06, "loss": 0.1938, "step": 2528 }, { "epoch": 0.82, "learning_rate": 4.2640524129228815e-06, "loss": 0.1671, "step": 2529 }, { "epoch": 0.82, "learning_rate": 4.263432566813123e-06, "loss": 0.1845, "step": 2530 }, { "epoch": 0.82, "learning_rate": 4.262812504876291e-06, "loss": 0.1618, "step": 2531 }, { "epoch": 0.82, "learning_rate": 4.262192227188273e-06, "loss": 0.1782, "step": 2532 }, { "epoch": 0.82, "learning_rate": 4.261571733824986e-06, "loss": 0.182, "step": 2533 }, { "epoch": 0.82, "learning_rate": 4.260951024862372e-06, "loss": 0.1936, "step": 2534 }, { "epoch": 0.82, "learning_rate": 4.2603301003763994e-06, "loss": 0.1971, "step": 2535 }, { "epoch": 0.82, "learning_rate": 4.259708960443065e-06, "loss": 0.1803, "step": 2536 }, { "epoch": 0.82, "learning_rate": 4.259087605138388e-06, "loss": 0.16, "step": 2537 }, { "epoch": 0.82, "learning_rate": 4.2584660345384176e-06, "loss": 0.1824, "step": 2538 }, { "epoch": 0.82, "learning_rate": 4.257844248719229e-06, "loss": 0.1845, "step": 2539 }, { "epoch": 0.82, "learning_rate": 4.25722224775692e-06, "loss": 0.1945, "step": 2540 }, { "epoch": 0.82, "learning_rate": 4.25660003172762e-06, "loss": 0.1946, "step": 2541 }, { "epoch": 0.82, "learning_rate": 4.255977600707481e-06, "loss": 0.1847, "step": 2542 }, { "epoch": 0.82, "learning_rate": 4.255354954772684e-06, "loss": 0.2023, "step": 2543 }, { "epoch": 0.82, "learning_rate": 4.2547320939994315e-06, "loss": 0.1789, "step": 2544 }, { "epoch": 0.82, "learning_rate": 4.25410901846396e-06, "loss": 0.1934, "step": 2545 }, { "epoch": 0.83, "learning_rate": 4.253485728242525e-06, "loss": 0.1821, "step": 2546 }, { "epoch": 0.83, "learning_rate": 4.252862223411412e-06, "loss": 0.1925, "step": 2547 }, { "epoch": 0.83, "learning_rate": 4.252238504046931e-06, "loss": 0.1896, "step": 2548 }, { "epoch": 0.83, "learning_rate": 4.251614570225421e-06, "loss": 0.204, "step": 2549 }, { "epoch": 0.83, "learning_rate": 4.250990422023243e-06, "loss": 0.2042, "step": 2550 }, { "epoch": 0.83, "learning_rate": 4.250366059516791e-06, "loss": 0.1792, "step": 2551 }, { "epoch": 0.83, "learning_rate": 4.249741482782476e-06, "loss": 0.1879, "step": 2552 }, { "epoch": 0.83, "learning_rate": 4.249116691896743e-06, "loss": 0.187, "step": 2553 }, { "epoch": 0.83, "learning_rate": 4.248491686936059e-06, "loss": 0.166, "step": 2554 }, { "epoch": 0.83, "learning_rate": 4.2478664679769196e-06, "loss": 0.182, "step": 2555 }, { "epoch": 0.83, "learning_rate": 4.247241035095846e-06, "loss": 0.1715, "step": 2556 }, { "epoch": 0.83, "learning_rate": 4.246615388369384e-06, "loss": 0.2097, "step": 2557 }, { "epoch": 0.83, "learning_rate": 4.245989527874107e-06, "loss": 0.178, "step": 2558 }, { "epoch": 0.83, "learning_rate": 4.245363453686614e-06, "loss": 0.1806, "step": 2559 }, { "epoch": 0.83, "learning_rate": 4.24473716588353e-06, "loss": 0.18, "step": 2560 }, { "epoch": 0.83, "learning_rate": 4.2441106645415085e-06, "loss": 0.188, "step": 2561 }, { "epoch": 0.83, "learning_rate": 4.243483949737225e-06, "loss": 0.1755, "step": 2562 }, { "epoch": 0.83, "learning_rate": 4.242857021547385e-06, "loss": 0.1667, "step": 2563 }, { "epoch": 0.83, "learning_rate": 4.242229880048718e-06, "loss": 0.1728, "step": 2564 }, { "epoch": 0.83, "learning_rate": 4.241602525317979e-06, "loss": 0.1872, "step": 2565 }, { "epoch": 0.83, "learning_rate": 4.240974957431951e-06, "loss": 0.1842, "step": 2566 }, { "epoch": 0.83, "learning_rate": 4.240347176467442e-06, "loss": 0.1882, "step": 2567 }, { "epoch": 0.83, "learning_rate": 4.2397191825012865e-06, "loss": 0.1662, "step": 2568 }, { "epoch": 0.83, "learning_rate": 4.239090975610346e-06, "loss": 0.1894, "step": 2569 }, { "epoch": 0.83, "learning_rate": 4.2384625558715045e-06, "loss": 0.1769, "step": 2570 }, { "epoch": 0.83, "learning_rate": 4.237833923361676e-06, "loss": 0.1981, "step": 2571 }, { "epoch": 0.83, "learning_rate": 4.237205078157799e-06, "loss": 0.1846, "step": 2572 }, { "epoch": 0.83, "learning_rate": 4.236576020336838e-06, "loss": 0.1747, "step": 2573 }, { "epoch": 0.83, "learning_rate": 4.235946749975783e-06, "loss": 0.1823, "step": 2574 }, { "epoch": 0.83, "learning_rate": 4.235317267151652e-06, "loss": 0.196, "step": 2575 }, { "epoch": 0.83, "learning_rate": 4.234687571941486e-06, "loss": 0.2052, "step": 2576 }, { "epoch": 0.84, "learning_rate": 4.234057664422354e-06, "loss": 0.1879, "step": 2577 }, { "epoch": 0.84, "learning_rate": 4.2334275446713515e-06, "loss": 0.1839, "step": 2578 }, { "epoch": 0.84, "learning_rate": 4.232797212765598e-06, "loss": 0.1806, "step": 2579 }, { "epoch": 0.84, "learning_rate": 4.2321666687822405e-06, "loss": 0.1939, "step": 2580 }, { "epoch": 0.84, "learning_rate": 4.231535912798452e-06, "loss": 0.1891, "step": 2581 }, { "epoch": 0.84, "learning_rate": 4.23090494489143e-06, "loss": 0.1841, "step": 2582 }, { "epoch": 0.84, "learning_rate": 4.230273765138399e-06, "loss": 0.1814, "step": 2583 }, { "epoch": 0.84, "learning_rate": 4.229642373616609e-06, "loss": 0.1656, "step": 2584 }, { "epoch": 0.84, "learning_rate": 4.229010770403337e-06, "loss": 0.1837, "step": 2585 }, { "epoch": 0.84, "learning_rate": 4.228378955575885e-06, "loss": 0.1838, "step": 2586 }, { "epoch": 0.84, "learning_rate": 4.227746929211582e-06, "loss": 0.1839, "step": 2587 }, { "epoch": 0.84, "learning_rate": 4.227114691387779e-06, "loss": 0.1672, "step": 2588 }, { "epoch": 0.84, "learning_rate": 4.226482242181859e-06, "loss": 0.1811, "step": 2589 }, { "epoch": 0.84, "learning_rate": 4.225849581671225e-06, "loss": 0.1936, "step": 2590 }, { "epoch": 0.84, "learning_rate": 4.225216709933309e-06, "loss": 0.1804, "step": 2591 }, { "epoch": 0.84, "learning_rate": 4.2245836270455706e-06, "loss": 0.1863, "step": 2592 }, { "epoch": 0.84, "learning_rate": 4.223950333085492e-06, "loss": 0.1762, "step": 2593 }, { "epoch": 0.84, "learning_rate": 4.223316828130581e-06, "loss": 0.1689, "step": 2594 }, { "epoch": 0.84, "learning_rate": 4.222683112258372e-06, "loss": 0.1798, "step": 2595 }, { "epoch": 0.84, "learning_rate": 4.222049185546428e-06, "loss": 0.1932, "step": 2596 }, { "epoch": 0.84, "learning_rate": 4.221415048072335e-06, "loss": 0.1678, "step": 2597 }, { "epoch": 0.84, "learning_rate": 4.220780699913704e-06, "loss": 0.1849, "step": 2598 }, { "epoch": 0.84, "learning_rate": 4.220146141148174e-06, "loss": 0.1844, "step": 2599 }, { "epoch": 0.84, "learning_rate": 4.219511371853408e-06, "loss": 0.189, "step": 2600 }, { "epoch": 0.84, "learning_rate": 4.2188763921070974e-06, "loss": 0.1745, "step": 2601 }, { "epoch": 0.84, "learning_rate": 4.2182412019869556e-06, "loss": 0.1807, "step": 2602 }, { "epoch": 0.84, "learning_rate": 4.217605801570725e-06, "loss": 0.1813, "step": 2603 }, { "epoch": 0.84, "learning_rate": 4.216970190936171e-06, "loss": 0.1784, "step": 2604 }, { "epoch": 0.84, "learning_rate": 4.2163343701610884e-06, "loss": 0.2011, "step": 2605 }, { "epoch": 0.84, "learning_rate": 4.215698339323294e-06, "loss": 0.182, "step": 2606 }, { "epoch": 0.84, "learning_rate": 4.215062098500632e-06, "loss": 0.1844, "step": 2607 }, { "epoch": 0.85, "learning_rate": 4.214425647770972e-06, "loss": 0.1946, "step": 2608 }, { "epoch": 0.85, "learning_rate": 4.213788987212211e-06, "loss": 0.1734, "step": 2609 }, { "epoch": 0.85, "learning_rate": 4.213152116902267e-06, "loss": 0.1862, "step": 2610 }, { "epoch": 0.85, "learning_rate": 4.212515036919089e-06, "loss": 0.1713, "step": 2611 }, { "epoch": 0.85, "learning_rate": 4.211877747340649e-06, "loss": 0.1775, "step": 2612 }, { "epoch": 0.85, "learning_rate": 4.211240248244945e-06, "loss": 0.192, "step": 2613 }, { "epoch": 0.85, "learning_rate": 4.21060253971e-06, "loss": 0.1917, "step": 2614 }, { "epoch": 0.85, "learning_rate": 4.2099646218138655e-06, "loss": 0.1884, "step": 2615 }, { "epoch": 0.85, "learning_rate": 4.209326494634614e-06, "loss": 0.1913, "step": 2616 }, { "epoch": 0.85, "learning_rate": 4.208688158250348e-06, "loss": 0.1835, "step": 2617 }, { "epoch": 0.85, "learning_rate": 4.2080496127391914e-06, "loss": 0.188, "step": 2618 }, { "epoch": 0.85, "learning_rate": 4.207410858179298e-06, "loss": 0.1997, "step": 2619 }, { "epoch": 0.85, "learning_rate": 4.206771894648846e-06, "loss": 0.1892, "step": 2620 }, { "epoch": 0.85, "learning_rate": 4.206132722226035e-06, "loss": 0.1903, "step": 2621 }, { "epoch": 0.85, "learning_rate": 4.205493340989096e-06, "loss": 0.1859, "step": 2622 }, { "epoch": 0.85, "learning_rate": 4.204853751016282e-06, "loss": 0.1951, "step": 2623 }, { "epoch": 0.85, "learning_rate": 4.204213952385875e-06, "loss": 0.1711, "step": 2624 }, { "epoch": 0.85, "learning_rate": 4.203573945176177e-06, "loss": 0.1888, "step": 2625 }, { "epoch": 0.85, "learning_rate": 4.202933729465519e-06, "loss": 0.1956, "step": 2626 }, { "epoch": 0.85, "learning_rate": 4.20229330533226e-06, "loss": 0.1949, "step": 2627 }, { "epoch": 0.85, "learning_rate": 4.201652672854779e-06, "loss": 0.1886, "step": 2628 }, { "epoch": 0.85, "learning_rate": 4.201011832111485e-06, "loss": 0.1696, "step": 2629 }, { "epoch": 0.85, "learning_rate": 4.2003707831808086e-06, "loss": 0.1959, "step": 2630 }, { "epoch": 0.85, "learning_rate": 4.199729526141209e-06, "loss": 0.1766, "step": 2631 }, { "epoch": 0.85, "learning_rate": 4.199088061071172e-06, "loss": 0.1819, "step": 2632 }, { "epoch": 0.85, "learning_rate": 4.198446388049203e-06, "loss": 0.1977, "step": 2633 }, { "epoch": 0.85, "learning_rate": 4.197804507153838e-06, "loss": 0.1848, "step": 2634 }, { "epoch": 0.85, "learning_rate": 4.197162418463639e-06, "loss": 0.1809, "step": 2635 }, { "epoch": 0.85, "learning_rate": 4.1965201220571895e-06, "loss": 0.1653, "step": 2636 }, { "epoch": 0.85, "learning_rate": 4.1958776180131e-06, "loss": 0.1836, "step": 2637 }, { "epoch": 0.85, "learning_rate": 4.1952349064100074e-06, "loss": 0.1895, "step": 2638 }, { "epoch": 0.86, "learning_rate": 4.194591987326574e-06, "loss": 0.1772, "step": 2639 }, { "epoch": 0.86, "learning_rate": 4.193948860841485e-06, "loss": 0.2091, "step": 2640 }, { "epoch": 0.86, "learning_rate": 4.193305527033456e-06, "loss": 0.1938, "step": 2641 }, { "epoch": 0.86, "learning_rate": 4.192661985981221e-06, "loss": 0.2003, "step": 2642 }, { "epoch": 0.86, "learning_rate": 4.192018237763547e-06, "loss": 0.1797, "step": 2643 }, { "epoch": 0.86, "learning_rate": 4.19137428245922e-06, "loss": 0.1988, "step": 2644 }, { "epoch": 0.86, "learning_rate": 4.190730120147054e-06, "loss": 0.1654, "step": 2645 }, { "epoch": 0.86, "learning_rate": 4.190085750905889e-06, "loss": 0.185, "step": 2646 }, { "epoch": 0.86, "learning_rate": 4.189441174814589e-06, "loss": 0.1962, "step": 2647 }, { "epoch": 0.86, "learning_rate": 4.188796391952046e-06, "loss": 0.1871, "step": 2648 }, { "epoch": 0.86, "learning_rate": 4.188151402397172e-06, "loss": 0.1775, "step": 2649 }, { "epoch": 0.86, "learning_rate": 4.187506206228909e-06, "loss": 0.1728, "step": 2650 }, { "epoch": 0.86, "learning_rate": 4.1868608035262225e-06, "loss": 0.1965, "step": 2651 }, { "epoch": 0.86, "learning_rate": 4.186215194368105e-06, "loss": 0.1886, "step": 2652 }, { "epoch": 0.86, "learning_rate": 4.18556937883357e-06, "loss": 0.1791, "step": 2653 }, { "epoch": 0.86, "learning_rate": 4.184923357001661e-06, "loss": 0.1864, "step": 2654 }, { "epoch": 0.86, "learning_rate": 4.184277128951445e-06, "loss": 0.1811, "step": 2655 }, { "epoch": 0.86, "learning_rate": 4.1836306947620135e-06, "loss": 0.1835, "step": 2656 }, { "epoch": 0.86, "learning_rate": 4.182984054512483e-06, "loss": 0.1877, "step": 2657 }, { "epoch": 0.86, "learning_rate": 4.182337208281998e-06, "loss": 0.1836, "step": 2658 }, { "epoch": 0.86, "learning_rate": 4.181690156149724e-06, "loss": 0.191, "step": 2659 }, { "epoch": 0.86, "learning_rate": 4.1810428981948555e-06, "loss": 0.1903, "step": 2660 }, { "epoch": 0.86, "learning_rate": 4.1803954344966095e-06, "loss": 0.2199, "step": 2661 }, { "epoch": 0.86, "learning_rate": 4.17974776513423e-06, "loss": 0.1808, "step": 2662 }, { "epoch": 0.86, "learning_rate": 4.179099890186985e-06, "loss": 0.1742, "step": 2663 }, { "epoch": 0.86, "learning_rate": 4.178451809734168e-06, "loss": 0.1879, "step": 2664 }, { "epoch": 0.86, "learning_rate": 4.1778035238550995e-06, "loss": 0.1969, "step": 2665 }, { "epoch": 0.86, "learning_rate": 4.177155032629122e-06, "loss": 0.1933, "step": 2666 }, { "epoch": 0.86, "learning_rate": 4.176506336135603e-06, "loss": 0.1715, "step": 2667 }, { "epoch": 0.86, "learning_rate": 4.175857434453939e-06, "loss": 0.1764, "step": 2668 }, { "epoch": 0.86, "learning_rate": 4.175208327663549e-06, "loss": 0.2003, "step": 2669 }, { "epoch": 0.87, "learning_rate": 4.174559015843878e-06, "loss": 0.1889, "step": 2670 }, { "epoch": 0.87, "learning_rate": 4.173909499074392e-06, "loss": 0.1793, "step": 2671 }, { "epoch": 0.87, "learning_rate": 4.173259777434589e-06, "loss": 0.1807, "step": 2672 }, { "epoch": 0.87, "learning_rate": 4.1726098510039894e-06, "loss": 0.204, "step": 2673 }, { "epoch": 0.87, "learning_rate": 4.171959719862134e-06, "loss": 0.1918, "step": 2674 }, { "epoch": 0.87, "learning_rate": 4.171309384088596e-06, "loss": 0.1901, "step": 2675 }, { "epoch": 0.87, "learning_rate": 4.170658843762968e-06, "loss": 0.1882, "step": 2676 }, { "epoch": 0.87, "learning_rate": 4.170008098964871e-06, "loss": 0.1709, "step": 2677 }, { "epoch": 0.87, "learning_rate": 4.169357149773949e-06, "loss": 0.1848, "step": 2678 }, { "epoch": 0.87, "learning_rate": 4.168705996269874e-06, "loss": 0.1883, "step": 2679 }, { "epoch": 0.87, "learning_rate": 4.168054638532338e-06, "loss": 0.1824, "step": 2680 }, { "epoch": 0.87, "learning_rate": 4.167403076641063e-06, "loss": 0.1567, "step": 2681 }, { "epoch": 0.87, "learning_rate": 4.166751310675793e-06, "loss": 0.1874, "step": 2682 }, { "epoch": 0.87, "learning_rate": 4.166099340716298e-06, "loss": 0.1913, "step": 2683 }, { "epoch": 0.87, "learning_rate": 4.165447166842373e-06, "loss": 0.1828, "step": 2684 }, { "epoch": 0.87, "learning_rate": 4.164794789133837e-06, "loss": 0.1641, "step": 2685 }, { "epoch": 0.87, "learning_rate": 4.164142207670536e-06, "loss": 0.1991, "step": 2686 }, { "epoch": 0.87, "learning_rate": 4.163489422532338e-06, "loss": 0.2002, "step": 2687 }, { "epoch": 0.87, "learning_rate": 4.162836433799139e-06, "loss": 0.1812, "step": 2688 }, { "epoch": 0.87, "learning_rate": 4.162183241550858e-06, "loss": 0.1821, "step": 2689 }, { "epoch": 0.87, "learning_rate": 4.161529845867439e-06, "loss": 0.2016, "step": 2690 }, { "epoch": 0.87, "learning_rate": 4.160876246828853e-06, "loss": 0.1887, "step": 2691 }, { "epoch": 0.87, "learning_rate": 4.160222444515092e-06, "loss": 0.1954, "step": 2692 }, { "epoch": 0.87, "learning_rate": 4.159568439006176e-06, "loss": 0.1884, "step": 2693 }, { "epoch": 0.87, "learning_rate": 4.1589142303821485e-06, "loss": 0.1706, "step": 2694 }, { "epoch": 0.87, "learning_rate": 4.158259818723079e-06, "loss": 0.1628, "step": 2695 }, { "epoch": 0.87, "learning_rate": 4.157605204109062e-06, "loss": 0.1916, "step": 2696 }, { "epoch": 0.87, "learning_rate": 4.156950386620214e-06, "loss": 0.1824, "step": 2697 }, { "epoch": 0.87, "learning_rate": 4.156295366336679e-06, "loss": 0.186, "step": 2698 }, { "epoch": 0.87, "learning_rate": 4.155640143338625e-06, "loss": 0.1748, "step": 2699 }, { "epoch": 0.87, "learning_rate": 4.154984717706246e-06, "loss": 0.1745, "step": 2700 }, { "epoch": 0.88, "learning_rate": 4.15432908951976e-06, "loss": 0.1872, "step": 2701 }, { "epoch": 0.88, "learning_rate": 4.153673258859406e-06, "loss": 0.1751, "step": 2702 }, { "epoch": 0.88, "learning_rate": 4.153017225805456e-06, "loss": 0.1898, "step": 2703 }, { "epoch": 0.88, "learning_rate": 4.1523609904382e-06, "loss": 0.1756, "step": 2704 }, { "epoch": 0.88, "learning_rate": 4.1517045528379544e-06, "loss": 0.1791, "step": 2705 }, { "epoch": 0.88, "learning_rate": 4.151047913085061e-06, "loss": 0.1825, "step": 2706 }, { "epoch": 0.88, "learning_rate": 4.150391071259886e-06, "loss": 0.1718, "step": 2707 }, { "epoch": 0.88, "learning_rate": 4.149734027442821e-06, "loss": 0.178, "step": 2708 }, { "epoch": 0.88, "learning_rate": 4.149076781714283e-06, "loss": 0.189, "step": 2709 }, { "epoch": 0.88, "learning_rate": 4.1484193341547106e-06, "loss": 0.1711, "step": 2710 }, { "epoch": 0.88, "learning_rate": 4.147761684844569e-06, "loss": 0.1824, "step": 2711 }, { "epoch": 0.88, "learning_rate": 4.147103833864349e-06, "loss": 0.1852, "step": 2712 }, { "epoch": 0.88, "learning_rate": 4.146445781294566e-06, "loss": 0.1812, "step": 2713 }, { "epoch": 0.88, "learning_rate": 4.145787527215757e-06, "loss": 0.1709, "step": 2714 }, { "epoch": 0.88, "learning_rate": 4.145129071708487e-06, "loss": 0.1922, "step": 2715 }, { "epoch": 0.88, "learning_rate": 4.144470414853345e-06, "loss": 0.183, "step": 2716 }, { "epoch": 0.88, "learning_rate": 4.143811556730944e-06, "loss": 0.1984, "step": 2717 }, { "epoch": 0.88, "learning_rate": 4.143152497421922e-06, "loss": 0.1652, "step": 2718 }, { "epoch": 0.88, "learning_rate": 4.142493237006941e-06, "loss": 0.1861, "step": 2719 }, { "epoch": 0.88, "learning_rate": 4.141833775566688e-06, "loss": 0.1892, "step": 2720 }, { "epoch": 0.88, "learning_rate": 4.1411741131818765e-06, "loss": 0.1685, "step": 2721 }, { "epoch": 0.88, "learning_rate": 4.14051424993324e-06, "loss": 0.1848, "step": 2722 }, { "epoch": 0.88, "learning_rate": 4.1398541859015405e-06, "loss": 0.1819, "step": 2723 }, { "epoch": 0.88, "learning_rate": 4.139193921167565e-06, "loss": 0.1882, "step": 2724 }, { "epoch": 0.88, "learning_rate": 4.138533455812121e-06, "loss": 0.1784, "step": 2725 }, { "epoch": 0.88, "learning_rate": 4.137872789916044e-06, "loss": 0.1908, "step": 2726 }, { "epoch": 0.88, "learning_rate": 4.137211923560195e-06, "loss": 0.1803, "step": 2727 }, { "epoch": 0.88, "learning_rate": 4.136550856825455e-06, "loss": 0.1894, "step": 2728 }, { "epoch": 0.88, "learning_rate": 4.135889589792733e-06, "loss": 0.1712, "step": 2729 }, { "epoch": 0.88, "learning_rate": 4.135228122542962e-06, "loss": 0.2101, "step": 2730 }, { "epoch": 0.88, "learning_rate": 4.1345664551570985e-06, "loss": 0.1669, "step": 2731 }, { "epoch": 0.89, "learning_rate": 4.133904587716126e-06, "loss": 0.1733, "step": 2732 }, { "epoch": 0.89, "learning_rate": 4.133242520301049e-06, "loss": 0.1775, "step": 2733 }, { "epoch": 0.89, "learning_rate": 4.132580252992898e-06, "loss": 0.1845, "step": 2734 }, { "epoch": 0.89, "learning_rate": 4.131917785872728e-06, "loss": 0.1976, "step": 2735 }, { "epoch": 0.89, "learning_rate": 4.13125511902162e-06, "loss": 0.1811, "step": 2736 }, { "epoch": 0.89, "learning_rate": 4.130592252520677e-06, "loss": 0.1667, "step": 2737 }, { "epoch": 0.89, "learning_rate": 4.129929186451028e-06, "loss": 0.1694, "step": 2738 }, { "epoch": 0.89, "learning_rate": 4.129265920893826e-06, "loss": 0.1828, "step": 2739 }, { "epoch": 0.89, "learning_rate": 4.128602455930247e-06, "loss": 0.1702, "step": 2740 }, { "epoch": 0.89, "learning_rate": 4.127938791641493e-06, "loss": 0.1735, "step": 2741 }, { "epoch": 0.89, "learning_rate": 4.127274928108792e-06, "loss": 0.1928, "step": 2742 }, { "epoch": 0.89, "learning_rate": 4.126610865413392e-06, "loss": 0.194, "step": 2743 }, { "epoch": 0.89, "learning_rate": 4.125946603636569e-06, "loss": 0.2081, "step": 2744 }, { "epoch": 0.89, "learning_rate": 4.125282142859622e-06, "loss": 0.1852, "step": 2745 }, { "epoch": 0.89, "learning_rate": 4.124617483163876e-06, "loss": 0.1795, "step": 2746 }, { "epoch": 0.89, "learning_rate": 4.123952624630676e-06, "loss": 0.173, "step": 2747 }, { "epoch": 0.89, "learning_rate": 4.123287567341396e-06, "loss": 0.1727, "step": 2748 }, { "epoch": 0.89, "learning_rate": 4.122622311377433e-06, "loss": 0.1952, "step": 2749 }, { "epoch": 0.89, "learning_rate": 4.121956856820207e-06, "loss": 0.1816, "step": 2750 }, { "epoch": 0.89, "learning_rate": 4.1212912037511634e-06, "loss": 0.1782, "step": 2751 }, { "epoch": 0.89, "learning_rate": 4.1206253522517725e-06, "loss": 0.1815, "step": 2752 }, { "epoch": 0.89, "learning_rate": 4.119959302403527e-06, "loss": 0.1764, "step": 2753 }, { "epoch": 0.89, "learning_rate": 4.119293054287945e-06, "loss": 0.1878, "step": 2754 }, { "epoch": 0.89, "learning_rate": 4.118626607986569e-06, "loss": 0.1764, "step": 2755 }, { "epoch": 0.89, "learning_rate": 4.1179599635809654e-06, "loss": 0.1839, "step": 2756 }, { "epoch": 0.89, "learning_rate": 4.1172931211527254e-06, "loss": 0.1659, "step": 2757 }, { "epoch": 0.89, "learning_rate": 4.116626080783464e-06, "loss": 0.1785, "step": 2758 }, { "epoch": 0.89, "learning_rate": 4.1159588425548215e-06, "loss": 0.1956, "step": 2759 }, { "epoch": 0.89, "learning_rate": 4.11529140654846e-06, "loss": 0.1808, "step": 2760 }, { "epoch": 0.89, "learning_rate": 4.114623772846067e-06, "loss": 0.1846, "step": 2761 }, { "epoch": 0.9, "learning_rate": 4.113955941529355e-06, "loss": 0.189, "step": 2762 }, { "epoch": 0.9, "learning_rate": 4.113287912680061e-06, "loss": 0.1945, "step": 2763 }, { "epoch": 0.9, "learning_rate": 4.112619686379944e-06, "loss": 0.1676, "step": 2764 }, { "epoch": 0.9, "learning_rate": 4.111951262710788e-06, "loss": 0.1743, "step": 2765 }, { "epoch": 0.9, "learning_rate": 4.111282641754403e-06, "loss": 0.1925, "step": 2766 }, { "epoch": 0.9, "learning_rate": 4.110613823592621e-06, "loss": 0.1917, "step": 2767 }, { "epoch": 0.9, "learning_rate": 4.109944808307298e-06, "loss": 0.1723, "step": 2768 }, { "epoch": 0.9, "learning_rate": 4.109275595980316e-06, "loss": 0.1809, "step": 2769 }, { "epoch": 0.9, "learning_rate": 4.108606186693582e-06, "loss": 0.2015, "step": 2770 }, { "epoch": 0.9, "learning_rate": 4.1079365805290214e-06, "loss": 0.1741, "step": 2771 }, { "epoch": 0.9, "learning_rate": 4.10726677756859e-06, "loss": 0.1747, "step": 2772 }, { "epoch": 0.9, "learning_rate": 4.106596777894265e-06, "loss": 0.1896, "step": 2773 }, { "epoch": 0.9, "learning_rate": 4.105926581588046e-06, "loss": 0.172, "step": 2774 }, { "epoch": 0.9, "learning_rate": 4.105256188731962e-06, "loss": 0.1743, "step": 2775 }, { "epoch": 0.9, "learning_rate": 4.104585599408059e-06, "loss": 0.1807, "step": 2776 }, { "epoch": 0.9, "learning_rate": 4.1039148136984134e-06, "loss": 0.17, "step": 2777 }, { "epoch": 0.9, "learning_rate": 4.103243831685121e-06, "loss": 0.1667, "step": 2778 }, { "epoch": 0.9, "learning_rate": 4.102572653450304e-06, "loss": 0.191, "step": 2779 }, { "epoch": 0.9, "learning_rate": 4.101901279076108e-06, "loss": 0.1802, "step": 2780 }, { "epoch": 0.9, "learning_rate": 4.101229708644704e-06, "loss": 0.1915, "step": 2781 }, { "epoch": 0.9, "learning_rate": 4.100557942238284e-06, "loss": 0.1576, "step": 2782 }, { "epoch": 0.9, "learning_rate": 4.099885979939068e-06, "loss": 0.1931, "step": 2783 }, { "epoch": 0.9, "learning_rate": 4.099213821829295e-06, "loss": 0.1781, "step": 2784 }, { "epoch": 0.9, "learning_rate": 4.098541467991231e-06, "loss": 0.193, "step": 2785 }, { "epoch": 0.9, "learning_rate": 4.097868918507168e-06, "loss": 0.1787, "step": 2786 }, { "epoch": 0.9, "learning_rate": 4.097196173459417e-06, "loss": 0.1929, "step": 2787 }, { "epoch": 0.9, "learning_rate": 4.0965232329303175e-06, "loss": 0.1934, "step": 2788 }, { "epoch": 0.9, "learning_rate": 4.095850097002228e-06, "loss": 0.1733, "step": 2789 }, { "epoch": 0.9, "learning_rate": 4.095176765757537e-06, "loss": 0.1835, "step": 2790 }, { "epoch": 0.9, "learning_rate": 4.094503239278652e-06, "loss": 0.1916, "step": 2791 }, { "epoch": 0.9, "learning_rate": 4.0938295176480055e-06, "loss": 0.1735, "step": 2792 }, { "epoch": 0.91, "learning_rate": 4.093155600948057e-06, "loss": 0.1765, "step": 2793 }, { "epoch": 0.91, "learning_rate": 4.092481489261285e-06, "loss": 0.1876, "step": 2794 }, { "epoch": 0.91, "learning_rate": 4.0918071826701966e-06, "loss": 0.166, "step": 2795 }, { "epoch": 0.91, "learning_rate": 4.091132681257317e-06, "loss": 0.166, "step": 2796 }, { "epoch": 0.91, "learning_rate": 4.090457985105202e-06, "loss": 0.1817, "step": 2797 }, { "epoch": 0.91, "learning_rate": 4.089783094296425e-06, "loss": 0.186, "step": 2798 }, { "epoch": 0.91, "learning_rate": 4.089108008913589e-06, "loss": 0.1873, "step": 2799 }, { "epoch": 0.91, "learning_rate": 4.088432729039316e-06, "loss": 0.1894, "step": 2800 }, { "epoch": 0.91, "learning_rate": 4.087757254756254e-06, "loss": 0.2063, "step": 2801 }, { "epoch": 0.91, "learning_rate": 4.087081586147075e-06, "loss": 0.1846, "step": 2802 }, { "epoch": 0.91, "learning_rate": 4.086405723294474e-06, "loss": 0.2034, "step": 2803 }, { "epoch": 0.91, "learning_rate": 4.0857296662811696e-06, "loss": 0.1773, "step": 2804 }, { "epoch": 0.91, "learning_rate": 4.085053415189905e-06, "loss": 0.1734, "step": 2805 }, { "epoch": 0.91, "learning_rate": 4.084376970103448e-06, "loss": 0.1709, "step": 2806 }, { "epoch": 0.91, "learning_rate": 4.0837003311045865e-06, "loss": 0.1867, "step": 2807 }, { "epoch": 0.91, "learning_rate": 4.083023498276136e-06, "loss": 0.1699, "step": 2808 }, { "epoch": 0.91, "learning_rate": 4.082346471700935e-06, "loss": 0.1734, "step": 2809 }, { "epoch": 0.91, "learning_rate": 4.081669251461844e-06, "loss": 0.1782, "step": 2810 }, { "epoch": 0.91, "learning_rate": 4.080991837641748e-06, "loss": 0.1885, "step": 2811 }, { "epoch": 0.91, "learning_rate": 4.080314230323556e-06, "loss": 0.1949, "step": 2812 }, { "epoch": 0.91, "learning_rate": 4.079636429590201e-06, "loss": 0.169, "step": 2813 }, { "epoch": 0.91, "learning_rate": 4.07895843552464e-06, "loss": 0.1857, "step": 2814 }, { "epoch": 0.91, "learning_rate": 4.078280248209851e-06, "loss": 0.1702, "step": 2815 }, { "epoch": 0.91, "learning_rate": 4.077601867728839e-06, "loss": 0.1843, "step": 2816 }, { "epoch": 0.91, "learning_rate": 4.07692329416463e-06, "loss": 0.1528, "step": 2817 }, { "epoch": 0.91, "learning_rate": 4.0762445276002765e-06, "loss": 0.1788, "step": 2818 }, { "epoch": 0.91, "learning_rate": 4.075565568118852e-06, "loss": 0.2058, "step": 2819 }, { "epoch": 0.91, "learning_rate": 4.074886415803454e-06, "loss": 0.1816, "step": 2820 }, { "epoch": 0.91, "learning_rate": 4.074207070737205e-06, "loss": 0.1932, "step": 2821 }, { "epoch": 0.91, "learning_rate": 4.07352753300325e-06, "loss": 0.1948, "step": 2822 }, { "epoch": 0.91, "learning_rate": 4.072847802684758e-06, "loss": 0.1621, "step": 2823 }, { "epoch": 0.92, "learning_rate": 4.072167879864922e-06, "loss": 0.1666, "step": 2824 }, { "epoch": 0.92, "learning_rate": 4.071487764626957e-06, "loss": 0.1898, "step": 2825 }, { "epoch": 0.92, "learning_rate": 4.070807457054102e-06, "loss": 0.1951, "step": 2826 }, { "epoch": 0.92, "learning_rate": 4.070126957229622e-06, "loss": 0.1801, "step": 2827 }, { "epoch": 0.92, "learning_rate": 4.069446265236801e-06, "loss": 0.1868, "step": 2828 }, { "epoch": 0.92, "learning_rate": 4.068765381158951e-06, "loss": 0.1913, "step": 2829 }, { "epoch": 0.92, "learning_rate": 4.068084305079406e-06, "loss": 0.1785, "step": 2830 }, { "epoch": 0.92, "learning_rate": 4.067403037081522e-06, "loss": 0.1786, "step": 2831 }, { "epoch": 0.92, "learning_rate": 4.06672157724868e-06, "loss": 0.2106, "step": 2832 }, { "epoch": 0.92, "learning_rate": 4.066039925664283e-06, "loss": 0.1768, "step": 2833 }, { "epoch": 0.92, "learning_rate": 4.06535808241176e-06, "loss": 0.202, "step": 2834 }, { "epoch": 0.92, "learning_rate": 4.064676047574561e-06, "loss": 0.1969, "step": 2835 }, { "epoch": 0.92, "learning_rate": 4.063993821236162e-06, "loss": 0.1859, "step": 2836 }, { "epoch": 0.92, "learning_rate": 4.063311403480061e-06, "loss": 0.1967, "step": 2837 }, { "epoch": 0.92, "learning_rate": 4.0626287943897765e-06, "loss": 0.1966, "step": 2838 }, { "epoch": 0.92, "learning_rate": 4.061945994048855e-06, "loss": 0.1784, "step": 2839 }, { "epoch": 0.92, "learning_rate": 4.061263002540865e-06, "loss": 0.1777, "step": 2840 }, { "epoch": 0.92, "learning_rate": 4.060579819949398e-06, "loss": 0.1668, "step": 2841 }, { "epoch": 0.92, "learning_rate": 4.059896446358068e-06, "loss": 0.1656, "step": 2842 }, { "epoch": 0.92, "learning_rate": 4.059212881850515e-06, "loss": 0.1725, "step": 2843 }, { "epoch": 0.92, "learning_rate": 4.0585291265103985e-06, "loss": 0.1896, "step": 2844 }, { "epoch": 0.92, "learning_rate": 4.057845180421405e-06, "loss": 0.1762, "step": 2845 }, { "epoch": 0.92, "learning_rate": 4.057161043667243e-06, "loss": 0.1746, "step": 2846 }, { "epoch": 0.92, "learning_rate": 4.056476716331643e-06, "loss": 0.1771, "step": 2847 }, { "epoch": 0.92, "learning_rate": 4.05579219849836e-06, "loss": 0.1891, "step": 2848 }, { "epoch": 0.92, "learning_rate": 4.055107490251175e-06, "loss": 0.1827, "step": 2849 }, { "epoch": 0.92, "learning_rate": 4.054422591673887e-06, "loss": 0.1872, "step": 2850 }, { "epoch": 0.92, "learning_rate": 4.0537375028503225e-06, "loss": 0.1781, "step": 2851 }, { "epoch": 0.92, "learning_rate": 4.053052223864328e-06, "loss": 0.188, "step": 2852 }, { "epoch": 0.92, "learning_rate": 4.052366754799776e-06, "loss": 0.1813, "step": 2853 }, { "epoch": 0.92, "learning_rate": 4.051681095740561e-06, "loss": 0.1842, "step": 2854 }, { "epoch": 0.93, "learning_rate": 4.050995246770602e-06, "loss": 0.199, "step": 2855 }, { "epoch": 0.93, "learning_rate": 4.05030920797384e-06, "loss": 0.1596, "step": 2856 }, { "epoch": 0.93, "learning_rate": 4.049622979434239e-06, "loss": 0.1818, "step": 2857 }, { "epoch": 0.93, "learning_rate": 4.0489365612357854e-06, "loss": 0.1662, "step": 2858 }, { "epoch": 0.93, "learning_rate": 4.0482499534624934e-06, "loss": 0.1754, "step": 2859 }, { "epoch": 0.93, "learning_rate": 4.047563156198394e-06, "loss": 0.1734, "step": 2860 }, { "epoch": 0.93, "learning_rate": 4.046876169527547e-06, "loss": 0.1862, "step": 2861 }, { "epoch": 0.93, "learning_rate": 4.04618899353403e-06, "loss": 0.2002, "step": 2862 }, { "epoch": 0.93, "learning_rate": 4.04550162830195e-06, "loss": 0.1739, "step": 2863 }, { "epoch": 0.93, "learning_rate": 4.044814073915432e-06, "loss": 0.1842, "step": 2864 }, { "epoch": 0.93, "learning_rate": 4.044126330458626e-06, "loss": 0.1887, "step": 2865 }, { "epoch": 0.93, "learning_rate": 4.0434383980157055e-06, "loss": 0.1906, "step": 2866 }, { "epoch": 0.93, "learning_rate": 4.042750276670867e-06, "loss": 0.1869, "step": 2867 }, { "epoch": 0.93, "learning_rate": 4.04206196650833e-06, "loss": 0.171, "step": 2868 }, { "epoch": 0.93, "learning_rate": 4.041373467612337e-06, "loss": 0.1813, "step": 2869 }, { "epoch": 0.93, "learning_rate": 4.0406847800671515e-06, "loss": 0.1769, "step": 2870 }, { "epoch": 0.93, "learning_rate": 4.0399959039570646e-06, "loss": 0.1734, "step": 2871 }, { "epoch": 0.93, "learning_rate": 4.039306839366387e-06, "loss": 0.1864, "step": 2872 }, { "epoch": 0.93, "learning_rate": 4.038617586379455e-06, "loss": 0.1831, "step": 2873 }, { "epoch": 0.93, "learning_rate": 4.0379281450806255e-06, "loss": 0.1813, "step": 2874 }, { "epoch": 0.93, "learning_rate": 4.037238515554278e-06, "loss": 0.1772, "step": 2875 }, { "epoch": 0.93, "learning_rate": 4.0365486978848176e-06, "loss": 0.1681, "step": 2876 }, { "epoch": 0.93, "learning_rate": 4.035858692156673e-06, "loss": 0.1805, "step": 2877 }, { "epoch": 0.93, "learning_rate": 4.035168498454292e-06, "loss": 0.1886, "step": 2878 }, { "epoch": 0.93, "learning_rate": 4.034478116862149e-06, "loss": 0.1926, "step": 2879 }, { "epoch": 0.93, "learning_rate": 4.033787547464738e-06, "loss": 0.1979, "step": 2880 }, { "epoch": 0.93, "learning_rate": 4.033096790346581e-06, "loss": 0.1729, "step": 2881 }, { "epoch": 0.93, "learning_rate": 4.032405845592218e-06, "loss": 0.1896, "step": 2882 }, { "epoch": 0.93, "learning_rate": 4.0317147132862135e-06, "loss": 0.168, "step": 2883 }, { "epoch": 0.93, "learning_rate": 4.031023393513157e-06, "loss": 0.1969, "step": 2884 }, { "epoch": 0.93, "learning_rate": 4.030331886357659e-06, "loss": 0.1987, "step": 2885 }, { "epoch": 0.94, "learning_rate": 4.029640191904352e-06, "loss": 0.179, "step": 2886 }, { "epoch": 0.94, "learning_rate": 4.028948310237893e-06, "loss": 0.1819, "step": 2887 }, { "epoch": 0.94, "learning_rate": 4.0282562414429635e-06, "loss": 0.1786, "step": 2888 }, { "epoch": 0.94, "learning_rate": 4.027563985604264e-06, "loss": 0.2015, "step": 2889 }, { "epoch": 0.94, "learning_rate": 4.026871542806521e-06, "loss": 0.1862, "step": 2890 }, { "epoch": 0.94, "learning_rate": 4.026178913134482e-06, "loss": 0.1778, "step": 2891 }, { "epoch": 0.94, "learning_rate": 4.02548609667292e-06, "loss": 0.1888, "step": 2892 }, { "epoch": 0.94, "learning_rate": 4.024793093506626e-06, "loss": 0.16, "step": 2893 }, { "epoch": 0.94, "learning_rate": 4.024099903720419e-06, "loss": 0.1742, "step": 2894 }, { "epoch": 0.94, "learning_rate": 4.023406527399137e-06, "loss": 0.1689, "step": 2895 }, { "epoch": 0.94, "learning_rate": 4.022712964627645e-06, "loss": 0.1846, "step": 2896 }, { "epoch": 0.94, "learning_rate": 4.022019215490827e-06, "loss": 0.1786, "step": 2897 }, { "epoch": 0.94, "learning_rate": 4.021325280073592e-06, "loss": 0.1951, "step": 2898 }, { "epoch": 0.94, "learning_rate": 4.0206311584608705e-06, "loss": 0.1795, "step": 2899 }, { "epoch": 0.94, "learning_rate": 4.019936850737615e-06, "loss": 0.1628, "step": 2900 }, { "epoch": 0.94, "learning_rate": 4.019242356988803e-06, "loss": 0.1866, "step": 2901 }, { "epoch": 0.94, "learning_rate": 4.018547677299434e-06, "loss": 0.1757, "step": 2902 }, { "epoch": 0.94, "learning_rate": 4.01785281175453e-06, "loss": 0.1756, "step": 2903 }, { "epoch": 0.94, "learning_rate": 4.017157760439136e-06, "loss": 0.1899, "step": 2904 }, { "epoch": 0.94, "learning_rate": 4.01646252343832e-06, "loss": 0.1833, "step": 2905 }, { "epoch": 0.94, "learning_rate": 4.015767100837171e-06, "loss": 0.1994, "step": 2906 }, { "epoch": 0.94, "learning_rate": 4.015071492720802e-06, "loss": 0.1802, "step": 2907 }, { "epoch": 0.94, "learning_rate": 4.014375699174351e-06, "loss": 0.1682, "step": 2908 }, { "epoch": 0.94, "learning_rate": 4.013679720282973e-06, "loss": 0.2242, "step": 2909 }, { "epoch": 0.94, "learning_rate": 4.012983556131852e-06, "loss": 0.1815, "step": 2910 }, { "epoch": 0.94, "learning_rate": 4.01228720680619e-06, "loss": 0.2, "step": 2911 }, { "epoch": 0.94, "learning_rate": 4.011590672391213e-06, "loss": 0.1874, "step": 2912 }, { "epoch": 0.94, "learning_rate": 4.010893952972173e-06, "loss": 0.161, "step": 2913 }, { "epoch": 0.94, "learning_rate": 4.010197048634338e-06, "loss": 0.1842, "step": 2914 }, { "epoch": 0.94, "learning_rate": 4.009499959463005e-06, "loss": 0.1986, "step": 2915 }, { "epoch": 0.94, "learning_rate": 4.00880268554349e-06, "loss": 0.181, "step": 2916 }, { "epoch": 0.95, "learning_rate": 4.008105226961132e-06, "loss": 0.1706, "step": 2917 }, { "epoch": 0.95, "learning_rate": 4.007407583801295e-06, "loss": 0.174, "step": 2918 }, { "epoch": 0.95, "learning_rate": 4.006709756149362e-06, "loss": 0.1742, "step": 2919 }, { "epoch": 0.95, "learning_rate": 4.006011744090741e-06, "loss": 0.1799, "step": 2920 }, { "epoch": 0.95, "learning_rate": 4.005313547710861e-06, "loss": 0.1676, "step": 2921 }, { "epoch": 0.95, "learning_rate": 4.004615167095176e-06, "loss": 0.1812, "step": 2922 }, { "epoch": 0.95, "learning_rate": 4.003916602329161e-06, "loss": 0.1848, "step": 2923 }, { "epoch": 0.95, "learning_rate": 4.0032178534983115e-06, "loss": 0.1806, "step": 2924 }, { "epoch": 0.95, "learning_rate": 4.00251892068815e-06, "loss": 0.1695, "step": 2925 }, { "epoch": 0.95, "learning_rate": 4.001819803984218e-06, "loss": 0.1873, "step": 2926 }, { "epoch": 0.95, "learning_rate": 4.00112050347208e-06, "loss": 0.1805, "step": 2927 }, { "epoch": 0.95, "learning_rate": 4.000421019237326e-06, "loss": 0.1997, "step": 2928 }, { "epoch": 0.95, "learning_rate": 3.999721351365563e-06, "loss": 0.1837, "step": 2929 }, { "epoch": 0.95, "learning_rate": 3.999021499942425e-06, "loss": 0.1783, "step": 2930 }, { "epoch": 0.95, "learning_rate": 3.998321465053568e-06, "loss": 0.1847, "step": 2931 }, { "epoch": 0.95, "learning_rate": 3.9976212467846674e-06, "loss": 0.1821, "step": 2932 }, { "epoch": 0.95, "learning_rate": 3.996920845221425e-06, "loss": 0.187, "step": 2933 }, { "epoch": 0.95, "learning_rate": 3.996220260449563e-06, "loss": 0.176, "step": 2934 }, { "epoch": 0.95, "learning_rate": 3.9955194925548245e-06, "loss": 0.1678, "step": 2935 }, { "epoch": 0.95, "learning_rate": 3.994818541622979e-06, "loss": 0.1793, "step": 2936 }, { "epoch": 0.95, "learning_rate": 3.994117407739814e-06, "loss": 0.1986, "step": 2937 }, { "epoch": 0.95, "learning_rate": 3.993416090991143e-06, "loss": 0.1917, "step": 2938 }, { "epoch": 0.95, "learning_rate": 3.992714591462799e-06, "loss": 0.2039, "step": 2939 }, { "epoch": 0.95, "learning_rate": 3.992012909240641e-06, "loss": 0.1816, "step": 2940 }, { "epoch": 0.95, "learning_rate": 3.991311044410546e-06, "loss": 0.1726, "step": 2941 }, { "epoch": 0.95, "learning_rate": 3.990608997058416e-06, "loss": 0.1798, "step": 2942 }, { "epoch": 0.95, "learning_rate": 3.989906767270175e-06, "loss": 0.1724, "step": 2943 }, { "epoch": 0.95, "learning_rate": 3.989204355131769e-06, "loss": 0.1928, "step": 2944 }, { "epoch": 0.95, "learning_rate": 3.988501760729168e-06, "loss": 0.1976, "step": 2945 }, { "epoch": 0.95, "learning_rate": 3.98779898414836e-06, "loss": 0.185, "step": 2946 }, { "epoch": 0.95, "learning_rate": 3.98709602547536e-06, "loss": 0.1909, "step": 2947 }, { "epoch": 0.96, "learning_rate": 3.986392884796202e-06, "loss": 0.1709, "step": 2948 }, { "epoch": 0.96, "learning_rate": 3.9856895621969435e-06, "loss": 0.1781, "step": 2949 }, { "epoch": 0.96, "learning_rate": 3.984986057763667e-06, "loss": 0.1872, "step": 2950 }, { "epoch": 0.96, "learning_rate": 3.984282371582472e-06, "loss": 0.1646, "step": 2951 }, { "epoch": 0.96, "learning_rate": 3.983578503739483e-06, "loss": 0.1686, "step": 2952 }, { "epoch": 0.96, "learning_rate": 3.982874454320849e-06, "loss": 0.1767, "step": 2953 }, { "epoch": 0.96, "learning_rate": 3.982170223412735e-06, "loss": 0.1546, "step": 2954 }, { "epoch": 0.96, "learning_rate": 3.981465811101335e-06, "loss": 0.1786, "step": 2955 }, { "epoch": 0.96, "learning_rate": 3.9807612174728615e-06, "loss": 0.1785, "step": 2956 }, { "epoch": 0.96, "learning_rate": 3.98005644261355e-06, "loss": 0.1619, "step": 2957 }, { "epoch": 0.96, "learning_rate": 3.979351486609659e-06, "loss": 0.1832, "step": 2958 }, { "epoch": 0.96, "learning_rate": 3.978646349547466e-06, "loss": 0.184, "step": 2959 }, { "epoch": 0.96, "learning_rate": 3.977941031513275e-06, "loss": 0.1783, "step": 2960 }, { "epoch": 0.96, "learning_rate": 3.977235532593408e-06, "loss": 0.1698, "step": 2961 }, { "epoch": 0.96, "learning_rate": 3.976529852874214e-06, "loss": 0.206, "step": 2962 }, { "epoch": 0.96, "learning_rate": 3.975823992442058e-06, "loss": 0.1898, "step": 2963 }, { "epoch": 0.96, "learning_rate": 3.975117951383334e-06, "loss": 0.2017, "step": 2964 }, { "epoch": 0.96, "learning_rate": 3.974411729784453e-06, "loss": 0.1756, "step": 2965 }, { "epoch": 0.96, "learning_rate": 3.973705327731849e-06, "loss": 0.1762, "step": 2966 }, { "epoch": 0.96, "learning_rate": 3.97299874531198e-06, "loss": 0.1966, "step": 2967 }, { "epoch": 0.96, "learning_rate": 3.972291982611325e-06, "loss": 0.1654, "step": 2968 }, { "epoch": 0.96, "learning_rate": 3.971585039716382e-06, "loss": 0.1836, "step": 2969 }, { "epoch": 0.96, "learning_rate": 3.970877916713678e-06, "loss": 0.1722, "step": 2970 }, { "epoch": 0.96, "learning_rate": 3.9701706136897564e-06, "loss": 0.1898, "step": 2971 }, { "epoch": 0.96, "learning_rate": 3.969463130731183e-06, "loss": 0.1778, "step": 2972 }, { "epoch": 0.96, "learning_rate": 3.968755467924549e-06, "loss": 0.1862, "step": 2973 }, { "epoch": 0.96, "learning_rate": 3.968047625356463e-06, "loss": 0.1934, "step": 2974 }, { "epoch": 0.96, "learning_rate": 3.96733960311356e-06, "loss": 0.173, "step": 2975 }, { "epoch": 0.96, "learning_rate": 3.966631401282495e-06, "loss": 0.1812, "step": 2976 }, { "epoch": 0.96, "learning_rate": 3.965923019949944e-06, "loss": 0.1868, "step": 2977 }, { "epoch": 0.97, "learning_rate": 3.965214459202607e-06, "loss": 0.1712, "step": 2978 }, { "epoch": 0.97, "learning_rate": 3.964505719127205e-06, "loss": 0.1838, "step": 2979 }, { "epoch": 0.97, "learning_rate": 3.963796799810479e-06, "loss": 0.1877, "step": 2980 }, { "epoch": 0.97, "learning_rate": 3.9630877013391964e-06, "loss": 0.193, "step": 2981 }, { "epoch": 0.97, "learning_rate": 3.962378423800143e-06, "loss": 0.1736, "step": 2982 }, { "epoch": 0.97, "learning_rate": 3.961668967280128e-06, "loss": 0.2146, "step": 2983 }, { "epoch": 0.97, "learning_rate": 3.96095933186598e-06, "loss": 0.1732, "step": 2984 }, { "epoch": 0.97, "learning_rate": 3.960249517644553e-06, "loss": 0.1993, "step": 2985 }, { "epoch": 0.97, "learning_rate": 3.959539524702722e-06, "loss": 0.1645, "step": 2986 }, { "epoch": 0.97, "learning_rate": 3.958829353127383e-06, "loss": 0.1767, "step": 2987 }, { "epoch": 0.97, "learning_rate": 3.958119003005453e-06, "loss": 0.169, "step": 2988 }, { "epoch": 0.97, "learning_rate": 3.9574084744238735e-06, "loss": 0.1652, "step": 2989 }, { "epoch": 0.97, "learning_rate": 3.956697767469606e-06, "loss": 0.1925, "step": 2990 }, { "epoch": 0.97, "learning_rate": 3.955986882229632e-06, "loss": 0.1905, "step": 2991 }, { "epoch": 0.97, "learning_rate": 3.95527581879096e-06, "loss": 0.1887, "step": 2992 }, { "epoch": 0.97, "learning_rate": 3.954564577240615e-06, "loss": 0.177, "step": 2993 }, { "epoch": 0.97, "learning_rate": 3.9538531576656465e-06, "loss": 0.1728, "step": 2994 }, { "epoch": 0.97, "learning_rate": 3.953141560153128e-06, "loss": 0.1812, "step": 2995 }, { "epoch": 0.97, "learning_rate": 3.952429784790148e-06, "loss": 0.1848, "step": 2996 }, { "epoch": 0.97, "learning_rate": 3.951717831663825e-06, "loss": 0.181, "step": 2997 }, { "epoch": 0.97, "learning_rate": 3.951005700861291e-06, "loss": 0.1935, "step": 2998 }, { "epoch": 0.97, "learning_rate": 3.9502933924697076e-06, "loss": 0.1771, "step": 2999 }, { "epoch": 0.97, "learning_rate": 3.949580906576252e-06, "loss": 0.167, "step": 3000 }, { "epoch": 0.97, "learning_rate": 3.948868243268127e-06, "loss": 0.1961, "step": 3001 }, { "epoch": 0.97, "learning_rate": 3.948155402632554e-06, "loss": 0.1781, "step": 3002 }, { "epoch": 0.97, "learning_rate": 3.94744238475678e-06, "loss": 0.1725, "step": 3003 }, { "epoch": 0.97, "learning_rate": 3.94672918972807e-06, "loss": 0.1847, "step": 3004 }, { "epoch": 0.97, "learning_rate": 3.946015817633714e-06, "loss": 0.1934, "step": 3005 }, { "epoch": 0.97, "learning_rate": 3.945302268561019e-06, "loss": 0.2033, "step": 3006 }, { "epoch": 0.97, "learning_rate": 3.944588542597319e-06, "loss": 0.1711, "step": 3007 }, { "epoch": 0.97, "learning_rate": 3.943874639829964e-06, "loss": 0.1802, "step": 3008 }, { "epoch": 0.98, "learning_rate": 3.943160560346332e-06, "loss": 0.1739, "step": 3009 }, { "epoch": 0.98, "learning_rate": 3.942446304233819e-06, "loss": 0.1618, "step": 3010 }, { "epoch": 0.98, "learning_rate": 3.941731871579842e-06, "loss": 0.1829, "step": 3011 }, { "epoch": 0.98, "learning_rate": 3.94101726247184e-06, "loss": 0.2023, "step": 3012 }, { "epoch": 0.98, "learning_rate": 3.9403024769972766e-06, "loss": 0.172, "step": 3013 }, { "epoch": 0.98, "learning_rate": 3.939587515243632e-06, "loss": 0.1973, "step": 3014 }, { "epoch": 0.98, "learning_rate": 3.938872377298413e-06, "loss": 0.1788, "step": 3015 }, { "epoch": 0.98, "learning_rate": 3.938157063249144e-06, "loss": 0.1866, "step": 3016 }, { "epoch": 0.98, "learning_rate": 3.937441573183373e-06, "loss": 0.1753, "step": 3017 }, { "epoch": 0.98, "learning_rate": 3.936725907188668e-06, "loss": 0.2131, "step": 3018 }, { "epoch": 0.98, "learning_rate": 3.936010065352622e-06, "loss": 0.1644, "step": 3019 }, { "epoch": 0.98, "learning_rate": 3.935294047762844e-06, "loss": 0.1731, "step": 3020 }, { "epoch": 0.98, "learning_rate": 3.93457785450697e-06, "loss": 0.1919, "step": 3021 }, { "epoch": 0.98, "learning_rate": 3.933861485672656e-06, "loss": 0.1958, "step": 3022 }, { "epoch": 0.98, "learning_rate": 3.933144941347574e-06, "loss": 0.1756, "step": 3023 }, { "epoch": 0.98, "learning_rate": 3.932428221619427e-06, "loss": 0.1721, "step": 3024 }, { "epoch": 0.98, "learning_rate": 3.931711326575933e-06, "loss": 0.1767, "step": 3025 }, { "epoch": 0.98, "learning_rate": 3.9309942563048315e-06, "loss": 0.186, "step": 3026 }, { "epoch": 0.98, "learning_rate": 3.930277010893887e-06, "loss": 0.186, "step": 3027 }, { "epoch": 0.98, "learning_rate": 3.929559590430881e-06, "loss": 0.1713, "step": 3028 }, { "epoch": 0.98, "learning_rate": 3.928841995003622e-06, "loss": 0.1665, "step": 3029 }, { "epoch": 0.98, "learning_rate": 3.928124224699935e-06, "loss": 0.1871, "step": 3030 }, { "epoch": 0.98, "learning_rate": 3.927406279607668e-06, "loss": 0.1747, "step": 3031 }, { "epoch": 0.98, "learning_rate": 3.92668815981469e-06, "loss": 0.1864, "step": 3032 }, { "epoch": 0.98, "learning_rate": 3.925969865408893e-06, "loss": 0.1789, "step": 3033 }, { "epoch": 0.98, "learning_rate": 3.925251396478189e-06, "loss": 0.1807, "step": 3034 }, { "epoch": 0.98, "learning_rate": 3.9245327531105115e-06, "loss": 0.1808, "step": 3035 }, { "epoch": 0.98, "learning_rate": 3.923813935393816e-06, "loss": 0.1947, "step": 3036 }, { "epoch": 0.98, "learning_rate": 3.923094943416078e-06, "loss": 0.1861, "step": 3037 }, { "epoch": 0.98, "learning_rate": 3.922375777265296e-06, "loss": 0.1731, "step": 3038 }, { "epoch": 0.98, "learning_rate": 3.921656437029488e-06, "loss": 0.1749, "step": 3039 }, { "epoch": 0.99, "learning_rate": 3.9209369227966945e-06, "loss": 0.1781, "step": 3040 }, { "epoch": 0.99, "learning_rate": 3.920217234654978e-06, "loss": 0.1606, "step": 3041 }, { "epoch": 0.99, "learning_rate": 3.919497372692421e-06, "loss": 0.1695, "step": 3042 }, { "epoch": 0.99, "learning_rate": 3.918777336997127e-06, "loss": 0.1831, "step": 3043 }, { "epoch": 0.99, "learning_rate": 3.918057127657222e-06, "loss": 0.1683, "step": 3044 }, { "epoch": 0.99, "learning_rate": 3.9173367447608525e-06, "loss": 0.1892, "step": 3045 }, { "epoch": 0.99, "learning_rate": 3.916616188396185e-06, "loss": 0.1907, "step": 3046 }, { "epoch": 0.99, "learning_rate": 3.915895458651411e-06, "loss": 0.19, "step": 3047 }, { "epoch": 0.99, "learning_rate": 3.9151745556147404e-06, "loss": 0.1943, "step": 3048 }, { "epoch": 0.99, "learning_rate": 3.914453479374403e-06, "loss": 0.1737, "step": 3049 }, { "epoch": 0.99, "learning_rate": 3.913732230018654e-06, "loss": 0.1753, "step": 3050 }, { "epoch": 0.99, "learning_rate": 3.913010807635765e-06, "loss": 0.1803, "step": 3051 }, { "epoch": 0.99, "learning_rate": 3.9122892123140324e-06, "loss": 0.1754, "step": 3052 }, { "epoch": 0.99, "learning_rate": 3.911567444141771e-06, "loss": 0.1636, "step": 3053 }, { "epoch": 0.99, "learning_rate": 3.910845503207322e-06, "loss": 0.1813, "step": 3054 }, { "epoch": 0.99, "learning_rate": 3.9101233895990396e-06, "loss": 0.1736, "step": 3055 }, { "epoch": 0.99, "learning_rate": 3.909401103405307e-06, "loss": 0.1795, "step": 3056 }, { "epoch": 0.99, "learning_rate": 3.908678644714522e-06, "loss": 0.1919, "step": 3057 }, { "epoch": 0.99, "learning_rate": 3.907956013615108e-06, "loss": 0.1859, "step": 3058 }, { "epoch": 0.99, "learning_rate": 3.907233210195508e-06, "loss": 0.1844, "step": 3059 }, { "epoch": 0.99, "learning_rate": 3.906510234544186e-06, "loss": 0.1827, "step": 3060 }, { "epoch": 0.99, "learning_rate": 3.905787086749628e-06, "loss": 0.1903, "step": 3061 }, { "epoch": 0.99, "learning_rate": 3.90506376690034e-06, "loss": 0.1553, "step": 3062 }, { "epoch": 0.99, "learning_rate": 3.904340275084848e-06, "loss": 0.1801, "step": 3063 }, { "epoch": 0.99, "learning_rate": 3.9036166113917015e-06, "loss": 0.1762, "step": 3064 }, { "epoch": 0.99, "learning_rate": 3.90289277590947e-06, "loss": 0.1924, "step": 3065 }, { "epoch": 0.99, "learning_rate": 3.902168768726745e-06, "loss": 0.1755, "step": 3066 }, { "epoch": 0.99, "learning_rate": 3.9014445899321355e-06, "loss": 0.1768, "step": 3067 }, { "epoch": 0.99, "learning_rate": 3.900720239614275e-06, "loss": 0.1699, "step": 3068 }, { "epoch": 0.99, "learning_rate": 3.899995717861818e-06, "loss": 0.1755, "step": 3069 }, { "epoch": 0.99, "learning_rate": 3.899271024763438e-06, "loss": 0.1667, "step": 3070 }, { "epoch": 1.0, "learning_rate": 3.89854616040783e-06, "loss": 0.178, "step": 3071 }, { "epoch": 1.0, "learning_rate": 3.897821124883711e-06, "loss": 0.1826, "step": 3072 }, { "epoch": 1.0, "learning_rate": 3.897095918279818e-06, "loss": 0.194, "step": 3073 }, { "epoch": 1.0, "learning_rate": 3.896370540684911e-06, "loss": 0.1831, "step": 3074 }, { "epoch": 1.0, "learning_rate": 3.895644992187767e-06, "loss": 0.1958, "step": 3075 }, { "epoch": 1.0, "learning_rate": 3.894919272877187e-06, "loss": 0.1772, "step": 3076 }, { "epoch": 1.0, "learning_rate": 3.894193382841991e-06, "loss": 0.1748, "step": 3077 }, { "epoch": 1.0, "learning_rate": 3.893467322171022e-06, "loss": 0.1973, "step": 3078 }, { "epoch": 1.0, "learning_rate": 3.892741090953143e-06, "loss": 0.1727, "step": 3079 }, { "epoch": 1.0, "learning_rate": 3.892014689277238e-06, "loss": 0.1672, "step": 3080 }, { "epoch": 1.0, "learning_rate": 3.891288117232209e-06, "loss": 0.1696, "step": 3081 }, { "epoch": 1.0, "learning_rate": 3.890561374906985e-06, "loss": 0.1787, "step": 3082 }, { "epoch": 1.0, "learning_rate": 3.889834462390509e-06, "loss": 0.1786, "step": 3083 }, { "epoch": 1.0, "learning_rate": 3.889107379771749e-06, "loss": 0.1766, "step": 3084 }, { "epoch": 1.0, "learning_rate": 3.888380127139695e-06, "loss": 0.1878, "step": 3085 }, { "epoch": 1.0, "learning_rate": 3.887652704583354e-06, "loss": 0.1967, "step": 3086 }, { "epoch": 1.0, "learning_rate": 3.886925112191754e-06, "loss": 0.1804, "step": 3087 }, { "epoch": 1.0, "learning_rate": 3.886197350053948e-06, "loss": 0.1558, "step": 3088 }, { "epoch": 1.0, "learning_rate": 3.885469418259005e-06, "loss": 0.1629, "step": 3089 }, { "epoch": 1.0, "learning_rate": 3.8847413168960175e-06, "loss": 0.182, "step": 3090 }, { "epoch": 1.0, "learning_rate": 3.884013046054098e-06, "loss": 0.1486, "step": 3091 }, { "epoch": 1.0, "learning_rate": 3.8832846058223814e-06, "loss": 0.1488, "step": 3092 }, { "epoch": 1.0, "learning_rate": 3.882555996290019e-06, "loss": 0.1704, "step": 3093 }, { "epoch": 1.0, "learning_rate": 3.881827217546187e-06, "loss": 0.1603, "step": 3094 }, { "epoch": 1.0, "learning_rate": 3.881098269680081e-06, "loss": 0.1729, "step": 3095 }, { "epoch": 1.0, "learning_rate": 3.880369152780916e-06, "loss": 0.1781, "step": 3096 }, { "epoch": 1.0, "learning_rate": 3.879639866937931e-06, "loss": 0.1585, "step": 3097 }, { "epoch": 1.0, "learning_rate": 3.8789104122403815e-06, "loss": 0.149, "step": 3098 }, { "epoch": 1.0, "learning_rate": 3.878180788777546e-06, "loss": 0.1776, "step": 3099 }, { "epoch": 1.0, "learning_rate": 3.877450996638725e-06, "loss": 0.1658, "step": 3100 }, { "epoch": 1.0, "learning_rate": 3.876721035913236e-06, "loss": 0.1936, "step": 3101 }, { "epoch": 1.01, "learning_rate": 3.87599090669042e-06, "loss": 0.1793, "step": 3102 }, { "epoch": 1.01, "learning_rate": 3.875260609059638e-06, "loss": 0.1558, "step": 3103 }, { "epoch": 1.01, "learning_rate": 3.87453014311027e-06, "loss": 0.1741, "step": 3104 }, { "epoch": 1.01, "learning_rate": 3.87379950893172e-06, "loss": 0.1697, "step": 3105 }, { "epoch": 1.01, "learning_rate": 3.8730687066134086e-06, "loss": 0.1631, "step": 3106 }, { "epoch": 1.01, "learning_rate": 3.8723377362447805e-06, "loss": 0.1679, "step": 3107 }, { "epoch": 1.01, "learning_rate": 3.871606597915298e-06, "loss": 0.1665, "step": 3108 }, { "epoch": 1.01, "learning_rate": 3.870875291714448e-06, "loss": 0.1693, "step": 3109 }, { "epoch": 1.01, "learning_rate": 3.870143817731732e-06, "loss": 0.1655, "step": 3110 }, { "epoch": 1.01, "learning_rate": 3.8694121760566765e-06, "loss": 0.162, "step": 3111 }, { "epoch": 1.01, "learning_rate": 3.868680366778828e-06, "loss": 0.1599, "step": 3112 }, { "epoch": 1.01, "learning_rate": 3.867948389987752e-06, "loss": 0.1785, "step": 3113 }, { "epoch": 1.01, "learning_rate": 3.8672162457730365e-06, "loss": 0.166, "step": 3114 }, { "epoch": 1.01, "learning_rate": 3.866483934224288e-06, "loss": 0.1651, "step": 3115 }, { "epoch": 1.01, "learning_rate": 3.865751455431134e-06, "loss": 0.1494, "step": 3116 }, { "epoch": 1.01, "learning_rate": 3.865018809483224e-06, "loss": 0.1969, "step": 3117 }, { "epoch": 1.01, "learning_rate": 3.864285996470226e-06, "loss": 0.1689, "step": 3118 }, { "epoch": 1.01, "learning_rate": 3.863553016481829e-06, "loss": 0.1696, "step": 3119 }, { "epoch": 1.01, "learning_rate": 3.862819869607743e-06, "loss": 0.1663, "step": 3120 }, { "epoch": 1.01, "learning_rate": 3.862086555937699e-06, "loss": 0.1798, "step": 3121 }, { "epoch": 1.01, "learning_rate": 3.861353075561446e-06, "loss": 0.169, "step": 3122 }, { "epoch": 1.01, "learning_rate": 3.860619428568756e-06, "loss": 0.1843, "step": 3123 }, { "epoch": 1.01, "learning_rate": 3.859885615049419e-06, "loss": 0.1755, "step": 3124 }, { "epoch": 1.01, "learning_rate": 3.8591516350932476e-06, "loss": 0.1678, "step": 3125 }, { "epoch": 1.01, "learning_rate": 3.8584174887900735e-06, "loss": 0.1554, "step": 3126 }, { "epoch": 1.01, "learning_rate": 3.8576831762297495e-06, "loss": 0.1667, "step": 3127 }, { "epoch": 1.01, "learning_rate": 3.856948697502148e-06, "loss": 0.1586, "step": 3128 }, { "epoch": 1.01, "learning_rate": 3.8562140526971625e-06, "loss": 0.166, "step": 3129 }, { "epoch": 1.01, "learning_rate": 3.855479241904705e-06, "loss": 0.1681, "step": 3130 }, { "epoch": 1.01, "learning_rate": 3.8547442652147115e-06, "loss": 0.1712, "step": 3131 }, { "epoch": 1.01, "learning_rate": 3.854009122717135e-06, "loss": 0.1832, "step": 3132 }, { "epoch": 1.02, "learning_rate": 3.8532738145019484e-06, "loss": 0.1814, "step": 3133 }, { "epoch": 1.02, "learning_rate": 3.852538340659149e-06, "loss": 0.1609, "step": 3134 }, { "epoch": 1.02, "learning_rate": 3.85180270127875e-06, "loss": 0.1647, "step": 3135 }, { "epoch": 1.02, "learning_rate": 3.851066896450787e-06, "loss": 0.1592, "step": 3136 }, { "epoch": 1.02, "learning_rate": 3.850330926265314e-06, "loss": 0.1731, "step": 3137 }, { "epoch": 1.02, "learning_rate": 3.849594790812409e-06, "loss": 0.1616, "step": 3138 }, { "epoch": 1.02, "learning_rate": 3.848858490182167e-06, "loss": 0.1895, "step": 3139 }, { "epoch": 1.02, "learning_rate": 3.8481220244647025e-06, "loss": 0.1706, "step": 3140 }, { "epoch": 1.02, "learning_rate": 3.847385393750154e-06, "loss": 0.1618, "step": 3141 }, { "epoch": 1.02, "learning_rate": 3.846648598128677e-06, "loss": 0.1793, "step": 3142 }, { "epoch": 1.02, "learning_rate": 3.8459116376904475e-06, "loss": 0.1657, "step": 3143 }, { "epoch": 1.02, "learning_rate": 3.8451745125256635e-06, "loss": 0.1722, "step": 3144 }, { "epoch": 1.02, "learning_rate": 3.8444372227245415e-06, "loss": 0.1752, "step": 3145 }, { "epoch": 1.02, "learning_rate": 3.843699768377318e-06, "loss": 0.1512, "step": 3146 }, { "epoch": 1.02, "learning_rate": 3.842962149574252e-06, "loss": 0.1858, "step": 3147 }, { "epoch": 1.02, "learning_rate": 3.842224366405619e-06, "loss": 0.1734, "step": 3148 }, { "epoch": 1.02, "learning_rate": 3.841486418961717e-06, "loss": 0.1598, "step": 3149 }, { "epoch": 1.02, "learning_rate": 3.840748307332865e-06, "loss": 0.1651, "step": 3150 }, { "epoch": 1.02, "learning_rate": 3.840010031609398e-06, "loss": 0.1687, "step": 3151 }, { "epoch": 1.02, "learning_rate": 3.8392715918816755e-06, "loss": 0.1632, "step": 3152 }, { "epoch": 1.02, "learning_rate": 3.838532988240077e-06, "loss": 0.161, "step": 3153 }, { "epoch": 1.02, "learning_rate": 3.837794220774998e-06, "loss": 0.1736, "step": 3154 }, { "epoch": 1.02, "learning_rate": 3.8370552895768565e-06, "loss": 0.1682, "step": 3155 }, { "epoch": 1.02, "learning_rate": 3.836316194736093e-06, "loss": 0.1757, "step": 3156 }, { "epoch": 1.02, "learning_rate": 3.835576936343162e-06, "loss": 0.154, "step": 3157 }, { "epoch": 1.02, "learning_rate": 3.8348375144885445e-06, "loss": 0.1758, "step": 3158 }, { "epoch": 1.02, "learning_rate": 3.834097929262737e-06, "loss": 0.1671, "step": 3159 }, { "epoch": 1.02, "learning_rate": 3.833358180756258e-06, "loss": 0.1606, "step": 3160 }, { "epoch": 1.02, "learning_rate": 3.832618269059645e-06, "loss": 0.1799, "step": 3161 }, { "epoch": 1.02, "learning_rate": 3.831878194263458e-06, "loss": 0.1568, "step": 3162 }, { "epoch": 1.02, "learning_rate": 3.831137956458272e-06, "loss": 0.1484, "step": 3163 }, { "epoch": 1.03, "learning_rate": 3.830397555734687e-06, "loss": 0.1775, "step": 3164 }, { "epoch": 1.03, "learning_rate": 3.8296569921833214e-06, "loss": 0.1827, "step": 3165 }, { "epoch": 1.03, "learning_rate": 3.8289162658948114e-06, "loss": 0.1647, "step": 3166 }, { "epoch": 1.03, "learning_rate": 3.828175376959815e-06, "loss": 0.1527, "step": 3167 }, { "epoch": 1.03, "learning_rate": 3.827434325469011e-06, "loss": 0.1827, "step": 3168 }, { "epoch": 1.03, "learning_rate": 3.8266931115130955e-06, "loss": 0.1706, "step": 3169 }, { "epoch": 1.03, "learning_rate": 3.8259517351827866e-06, "loss": 0.1658, "step": 3170 }, { "epoch": 1.03, "learning_rate": 3.825210196568823e-06, "loss": 0.176, "step": 3171 }, { "epoch": 1.03, "learning_rate": 3.824468495761958e-06, "loss": 0.1809, "step": 3172 }, { "epoch": 1.03, "learning_rate": 3.823726632852972e-06, "loss": 0.158, "step": 3173 }, { "epoch": 1.03, "learning_rate": 3.822984607932661e-06, "loss": 0.1673, "step": 3174 }, { "epoch": 1.03, "learning_rate": 3.8222424210918404e-06, "loss": 0.1707, "step": 3175 }, { "epoch": 1.03, "learning_rate": 3.821500072421349e-06, "loss": 0.1545, "step": 3176 }, { "epoch": 1.03, "learning_rate": 3.820757562012042e-06, "loss": 0.1825, "step": 3177 }, { "epoch": 1.03, "learning_rate": 3.820014889954794e-06, "loss": 0.1706, "step": 3178 }, { "epoch": 1.03, "learning_rate": 3.819272056340504e-06, "loss": 0.1522, "step": 3179 }, { "epoch": 1.03, "learning_rate": 3.818529061260084e-06, "loss": 0.1652, "step": 3180 }, { "epoch": 1.03, "learning_rate": 3.817785904804473e-06, "loss": 0.1625, "step": 3181 }, { "epoch": 1.03, "learning_rate": 3.817042587064623e-06, "loss": 0.145, "step": 3182 }, { "epoch": 1.03, "learning_rate": 3.81629910813151e-06, "loss": 0.1606, "step": 3183 }, { "epoch": 1.03, "learning_rate": 3.815555468096131e-06, "loss": 0.1701, "step": 3184 }, { "epoch": 1.03, "learning_rate": 3.814811667049497e-06, "loss": 0.1683, "step": 3185 }, { "epoch": 1.03, "learning_rate": 3.814067705082643e-06, "loss": 0.1953, "step": 3186 }, { "epoch": 1.03, "learning_rate": 3.8133235822866234e-06, "loss": 0.1655, "step": 3187 }, { "epoch": 1.03, "learning_rate": 3.812579298752511e-06, "loss": 0.1685, "step": 3188 }, { "epoch": 1.03, "learning_rate": 3.8118348545714e-06, "loss": 0.1757, "step": 3189 }, { "epoch": 1.03, "learning_rate": 3.8110902498344023e-06, "loss": 0.1766, "step": 3190 }, { "epoch": 1.03, "learning_rate": 3.8103454846326493e-06, "loss": 0.1561, "step": 3191 }, { "epoch": 1.03, "learning_rate": 3.809600559057295e-06, "loss": 0.1732, "step": 3192 }, { "epoch": 1.03, "learning_rate": 3.80885547319951e-06, "loss": 0.1649, "step": 3193 }, { "epoch": 1.03, "learning_rate": 3.808110227150485e-06, "loss": 0.1727, "step": 3194 }, { "epoch": 1.04, "learning_rate": 3.8073648210014323e-06, "loss": 0.191, "step": 3195 }, { "epoch": 1.04, "learning_rate": 3.806619254843582e-06, "loss": 0.1535, "step": 3196 }, { "epoch": 1.04, "learning_rate": 3.8058735287681835e-06, "loss": 0.185, "step": 3197 }, { "epoch": 1.04, "learning_rate": 3.8051276428665074e-06, "loss": 0.1602, "step": 3198 }, { "epoch": 1.04, "learning_rate": 3.8043815972298424e-06, "loss": 0.1732, "step": 3199 }, { "epoch": 1.04, "learning_rate": 3.8036353919494973e-06, "loss": 0.1639, "step": 3200 }, { "epoch": 1.04, "learning_rate": 3.8028890271168e-06, "loss": 0.1657, "step": 3201 }, { "epoch": 1.04, "learning_rate": 3.8021425028230994e-06, "loss": 0.1726, "step": 3202 }, { "epoch": 1.04, "learning_rate": 3.801395819159761e-06, "loss": 0.1535, "step": 3203 }, { "epoch": 1.04, "learning_rate": 3.8006489762181744e-06, "loss": 0.1749, "step": 3204 }, { "epoch": 1.04, "learning_rate": 3.7999019740897423e-06, "loss": 0.1603, "step": 3205 }, { "epoch": 1.04, "learning_rate": 3.799154812865894e-06, "loss": 0.1671, "step": 3206 }, { "epoch": 1.04, "learning_rate": 3.7984074926380733e-06, "loss": 0.1619, "step": 3207 }, { "epoch": 1.04, "learning_rate": 3.7976600134977455e-06, "loss": 0.1666, "step": 3208 }, { "epoch": 1.04, "learning_rate": 3.7969123755363935e-06, "loss": 0.1586, "step": 3209 }, { "epoch": 1.04, "learning_rate": 3.7961645788455225e-06, "loss": 0.1752, "step": 3210 }, { "epoch": 1.04, "learning_rate": 3.7954166235166545e-06, "loss": 0.1725, "step": 3211 }, { "epoch": 1.04, "learning_rate": 3.794668509641332e-06, "loss": 0.1543, "step": 3212 }, { "epoch": 1.04, "learning_rate": 3.793920237311118e-06, "loss": 0.1626, "step": 3213 }, { "epoch": 1.04, "learning_rate": 3.793171806617593e-06, "loss": 0.1682, "step": 3214 }, { "epoch": 1.04, "learning_rate": 3.7924232176523574e-06, "loss": 0.1762, "step": 3215 }, { "epoch": 1.04, "learning_rate": 3.7916744705070318e-06, "loss": 0.1587, "step": 3216 }, { "epoch": 1.04, "learning_rate": 3.790925565273255e-06, "loss": 0.1651, "step": 3217 }, { "epoch": 1.04, "learning_rate": 3.790176502042686e-06, "loss": 0.1605, "step": 3218 }, { "epoch": 1.04, "learning_rate": 3.789427280907004e-06, "loss": 0.1634, "step": 3219 }, { "epoch": 1.04, "learning_rate": 3.7886779019579045e-06, "loss": 0.1608, "step": 3220 }, { "epoch": 1.04, "learning_rate": 3.787928365287106e-06, "loss": 0.1704, "step": 3221 }, { "epoch": 1.04, "learning_rate": 3.7871786709863435e-06, "loss": 0.1496, "step": 3222 }, { "epoch": 1.04, "learning_rate": 3.7864288191473718e-06, "loss": 0.1642, "step": 3223 }, { "epoch": 1.04, "learning_rate": 3.7856788098619667e-06, "loss": 0.169, "step": 3224 }, { "epoch": 1.05, "learning_rate": 3.7849286432219216e-06, "loss": 0.1639, "step": 3225 }, { "epoch": 1.05, "learning_rate": 3.78417831931905e-06, "loss": 0.1651, "step": 3226 }, { "epoch": 1.05, "learning_rate": 3.783427838245184e-06, "loss": 0.1887, "step": 3227 }, { "epoch": 1.05, "learning_rate": 3.7826772000921742e-06, "loss": 0.1714, "step": 3228 }, { "epoch": 1.05, "learning_rate": 3.781926404951893e-06, "loss": 0.1659, "step": 3229 }, { "epoch": 1.05, "learning_rate": 3.7811754529162294e-06, "loss": 0.1695, "step": 3230 }, { "epoch": 1.05, "learning_rate": 3.7804243440770936e-06, "loss": 0.1809, "step": 3231 }, { "epoch": 1.05, "learning_rate": 3.779673078526414e-06, "loss": 0.1537, "step": 3232 }, { "epoch": 1.05, "learning_rate": 3.7789216563561373e-06, "loss": 0.165, "step": 3233 }, { "epoch": 1.05, "learning_rate": 3.778170077658231e-06, "loss": 0.1657, "step": 3234 }, { "epoch": 1.05, "learning_rate": 3.77741834252468e-06, "loss": 0.1613, "step": 3235 }, { "epoch": 1.05, "learning_rate": 3.7766664510474903e-06, "loss": 0.1572, "step": 3236 }, { "epoch": 1.05, "learning_rate": 3.775914403318687e-06, "loss": 0.1888, "step": 3237 }, { "epoch": 1.05, "learning_rate": 3.7751621994303123e-06, "loss": 0.1672, "step": 3238 }, { "epoch": 1.05, "learning_rate": 3.7744098394744287e-06, "loss": 0.1632, "step": 3239 }, { "epoch": 1.05, "learning_rate": 3.7736573235431174e-06, "loss": 0.165, "step": 3240 }, { "epoch": 1.05, "learning_rate": 3.7729046517284805e-06, "loss": 0.1843, "step": 3241 }, { "epoch": 1.05, "learning_rate": 3.7721518241226375e-06, "loss": 0.1744, "step": 3242 }, { "epoch": 1.05, "learning_rate": 3.771398840817725e-06, "loss": 0.1691, "step": 3243 }, { "epoch": 1.05, "learning_rate": 3.770645701905904e-06, "loss": 0.1768, "step": 3244 }, { "epoch": 1.05, "learning_rate": 3.7698924074793484e-06, "loss": 0.1742, "step": 3245 }, { "epoch": 1.05, "learning_rate": 3.7691389576302567e-06, "loss": 0.1809, "step": 3246 }, { "epoch": 1.05, "learning_rate": 3.768385352450842e-06, "loss": 0.1635, "step": 3247 }, { "epoch": 1.05, "learning_rate": 3.7676315920333396e-06, "loss": 0.1735, "step": 3248 }, { "epoch": 1.05, "learning_rate": 3.7668776764700023e-06, "loss": 0.1575, "step": 3249 }, { "epoch": 1.05, "learning_rate": 3.766123605853101e-06, "loss": 0.1436, "step": 3250 }, { "epoch": 1.05, "learning_rate": 3.765369380274928e-06, "loss": 0.1608, "step": 3251 }, { "epoch": 1.05, "learning_rate": 3.7646149998277924e-06, "loss": 0.1678, "step": 3252 }, { "epoch": 1.05, "learning_rate": 3.7638604646040232e-06, "loss": 0.1603, "step": 3253 }, { "epoch": 1.05, "learning_rate": 3.763105774695968e-06, "loss": 0.1771, "step": 3254 }, { "epoch": 1.05, "learning_rate": 3.7623509301959935e-06, "loss": 0.1594, "step": 3255 }, { "epoch": 1.06, "learning_rate": 3.7615959311964865e-06, "loss": 0.1748, "step": 3256 }, { "epoch": 1.06, "learning_rate": 3.760840777789851e-06, "loss": 0.1699, "step": 3257 }, { "epoch": 1.06, "learning_rate": 3.7600854700685095e-06, "loss": 0.1666, "step": 3258 }, { "epoch": 1.06, "learning_rate": 3.759330008124905e-06, "loss": 0.1773, "step": 3259 }, { "epoch": 1.06, "learning_rate": 3.7585743920514985e-06, "loss": 0.1741, "step": 3260 }, { "epoch": 1.06, "learning_rate": 3.757818621940771e-06, "loss": 0.1758, "step": 3261 }, { "epoch": 1.06, "learning_rate": 3.7570626978852203e-06, "loss": 0.1725, "step": 3262 }, { "epoch": 1.06, "learning_rate": 3.7563066199773645e-06, "loss": 0.161, "step": 3263 }, { "epoch": 1.06, "learning_rate": 3.7555503883097414e-06, "loss": 0.1696, "step": 3264 }, { "epoch": 1.06, "learning_rate": 3.7547940029749054e-06, "loss": 0.1538, "step": 3265 }, { "epoch": 1.06, "learning_rate": 3.75403746406543e-06, "loss": 0.1653, "step": 3266 }, { "epoch": 1.06, "learning_rate": 3.7532807716739082e-06, "loss": 0.1488, "step": 3267 }, { "epoch": 1.06, "learning_rate": 3.752523925892954e-06, "loss": 0.1706, "step": 3268 }, { "epoch": 1.06, "learning_rate": 3.7517669268151967e-06, "loss": 0.1775, "step": 3269 }, { "epoch": 1.06, "learning_rate": 3.751009774533285e-06, "loss": 0.1621, "step": 3270 }, { "epoch": 1.06, "learning_rate": 3.7502524691398877e-06, "loss": 0.1511, "step": 3271 }, { "epoch": 1.06, "learning_rate": 3.7494950107276917e-06, "loss": 0.1665, "step": 3272 }, { "epoch": 1.06, "learning_rate": 3.7487373993894027e-06, "loss": 0.1511, "step": 3273 }, { "epoch": 1.06, "learning_rate": 3.7479796352177445e-06, "loss": 0.1541, "step": 3274 }, { "epoch": 1.06, "learning_rate": 3.7472217183054605e-06, "loss": 0.1796, "step": 3275 }, { "epoch": 1.06, "learning_rate": 3.7464636487453122e-06, "loss": 0.1783, "step": 3276 }, { "epoch": 1.06, "learning_rate": 3.74570542663008e-06, "loss": 0.1689, "step": 3277 }, { "epoch": 1.06, "learning_rate": 3.744947052052562e-06, "loss": 0.1677, "step": 3278 }, { "epoch": 1.06, "learning_rate": 3.7441885251055774e-06, "loss": 0.1519, "step": 3279 }, { "epoch": 1.06, "learning_rate": 3.7434298458819622e-06, "loss": 0.2038, "step": 3280 }, { "epoch": 1.06, "learning_rate": 3.7426710144745717e-06, "loss": 0.1498, "step": 3281 }, { "epoch": 1.06, "learning_rate": 3.7419120309762787e-06, "loss": 0.1556, "step": 3282 }, { "epoch": 1.06, "learning_rate": 3.7411528954799752e-06, "loss": 0.1809, "step": 3283 }, { "epoch": 1.06, "learning_rate": 3.740393608078573e-06, "loss": 0.1562, "step": 3284 }, { "epoch": 1.06, "learning_rate": 3.739634168865001e-06, "loss": 0.1507, "step": 3285 }, { "epoch": 1.06, "learning_rate": 3.738874577932208e-06, "loss": 0.1542, "step": 3286 }, { "epoch": 1.07, "learning_rate": 3.738114835373159e-06, "loss": 0.1719, "step": 3287 }, { "epoch": 1.07, "learning_rate": 3.73735494128084e-06, "loss": 0.1601, "step": 3288 }, { "epoch": 1.07, "learning_rate": 3.736594895748255e-06, "loss": 0.149, "step": 3289 }, { "epoch": 1.07, "learning_rate": 3.7358346988684258e-06, "loss": 0.17, "step": 3290 }, { "epoch": 1.07, "learning_rate": 3.735074350734393e-06, "loss": 0.172, "step": 3291 }, { "epoch": 1.07, "learning_rate": 3.734313851439217e-06, "loss": 0.1663, "step": 3292 }, { "epoch": 1.07, "learning_rate": 3.7335532010759747e-06, "loss": 0.1526, "step": 3293 }, { "epoch": 1.07, "learning_rate": 3.732792399737761e-06, "loss": 0.1721, "step": 3294 }, { "epoch": 1.07, "learning_rate": 3.7320314475176933e-06, "loss": 0.1633, "step": 3295 }, { "epoch": 1.07, "learning_rate": 3.731270344508903e-06, "loss": 0.1661, "step": 3296 }, { "epoch": 1.07, "learning_rate": 3.7305090908045422e-06, "loss": 0.1557, "step": 3297 }, { "epoch": 1.07, "learning_rate": 3.7297476864977805e-06, "loss": 0.1854, "step": 3298 }, { "epoch": 1.07, "learning_rate": 3.7289861316818077e-06, "loss": 0.1748, "step": 3299 }, { "epoch": 1.07, "learning_rate": 3.728224426449829e-06, "loss": 0.1729, "step": 3300 }, { "epoch": 1.07, "learning_rate": 3.7274625708950706e-06, "loss": 0.1519, "step": 3301 }, { "epoch": 1.07, "learning_rate": 3.7267005651107763e-06, "loss": 0.174, "step": 3302 }, { "epoch": 1.07, "learning_rate": 3.7259384091902085e-06, "loss": 0.1633, "step": 3303 }, { "epoch": 1.07, "learning_rate": 3.7251761032266475e-06, "loss": 0.1517, "step": 3304 }, { "epoch": 1.07, "learning_rate": 3.7244136473133924e-06, "loss": 0.1608, "step": 3305 }, { "epoch": 1.07, "learning_rate": 3.7236510415437598e-06, "loss": 0.1547, "step": 3306 }, { "epoch": 1.07, "learning_rate": 3.7228882860110856e-06, "loss": 0.1686, "step": 3307 }, { "epoch": 1.07, "learning_rate": 3.7221253808087234e-06, "loss": 0.1629, "step": 3308 }, { "epoch": 1.07, "learning_rate": 3.721362326030046e-06, "loss": 0.1751, "step": 3309 }, { "epoch": 1.07, "learning_rate": 3.720599121768443e-06, "loss": 0.1714, "step": 3310 }, { "epoch": 1.07, "learning_rate": 3.7198357681173247e-06, "loss": 0.1629, "step": 3311 }, { "epoch": 1.07, "learning_rate": 3.7190722651701166e-06, "loss": 0.1613, "step": 3312 }, { "epoch": 1.07, "learning_rate": 3.718308613020265e-06, "loss": 0.1711, "step": 3313 }, { "epoch": 1.07, "learning_rate": 3.717544811761233e-06, "loss": 0.177, "step": 3314 }, { "epoch": 1.07, "learning_rate": 3.716780861486503e-06, "loss": 0.1678, "step": 3315 }, { "epoch": 1.07, "learning_rate": 3.716016762289576e-06, "loss": 0.1796, "step": 3316 }, { "epoch": 1.07, "learning_rate": 3.7152525142639682e-06, "loss": 0.1796, "step": 3317 }, { "epoch": 1.08, "learning_rate": 3.7144881175032178e-06, "loss": 0.1774, "step": 3318 }, { "epoch": 1.08, "learning_rate": 3.713723572100878e-06, "loss": 0.1478, "step": 3319 }, { "epoch": 1.08, "learning_rate": 3.7129588781505232e-06, "loss": 0.1482, "step": 3320 }, { "epoch": 1.08, "learning_rate": 3.7121940357457438e-06, "loss": 0.1633, "step": 3321 }, { "epoch": 1.08, "learning_rate": 3.7114290449801493e-06, "loss": 0.1597, "step": 3322 }, { "epoch": 1.08, "learning_rate": 3.7106639059473675e-06, "loss": 0.1707, "step": 3323 }, { "epoch": 1.08, "learning_rate": 3.7098986187410447e-06, "loss": 0.1644, "step": 3324 }, { "epoch": 1.08, "learning_rate": 3.7091331834548427e-06, "loss": 0.1542, "step": 3325 }, { "epoch": 1.08, "learning_rate": 3.7083676001824443e-06, "loss": 0.1525, "step": 3326 }, { "epoch": 1.08, "learning_rate": 3.70760186901755e-06, "loss": 0.1679, "step": 3327 }, { "epoch": 1.08, "learning_rate": 3.706835990053877e-06, "loss": 0.1786, "step": 3328 }, { "epoch": 1.08, "learning_rate": 3.7060699633851615e-06, "loss": 0.1754, "step": 3329 }, { "epoch": 1.08, "learning_rate": 3.7053037891051596e-06, "loss": 0.175, "step": 3330 }, { "epoch": 1.08, "learning_rate": 3.704537467307641e-06, "loss": 0.1689, "step": 3331 }, { "epoch": 1.08, "learning_rate": 3.7037709980863974e-06, "loss": 0.1658, "step": 3332 }, { "epoch": 1.08, "learning_rate": 3.703004381535237e-06, "loss": 0.163, "step": 3333 }, { "epoch": 1.08, "learning_rate": 3.7022376177479863e-06, "loss": 0.1685, "step": 3334 }, { "epoch": 1.08, "learning_rate": 3.7014707068184895e-06, "loss": 0.1671, "step": 3335 }, { "epoch": 1.08, "learning_rate": 3.70070364884061e-06, "loss": 0.1535, "step": 3336 }, { "epoch": 1.08, "learning_rate": 3.6999364439082274e-06, "loss": 0.1741, "step": 3337 }, { "epoch": 1.08, "learning_rate": 3.6991690921152407e-06, "loss": 0.1875, "step": 3338 }, { "epoch": 1.08, "learning_rate": 3.698401593555565e-06, "loss": 0.1686, "step": 3339 }, { "epoch": 1.08, "learning_rate": 3.697633948323136e-06, "loss": 0.1695, "step": 3340 }, { "epoch": 1.08, "learning_rate": 3.6968661565119062e-06, "loss": 0.1606, "step": 3341 }, { "epoch": 1.08, "learning_rate": 3.6960982182158458e-06, "loss": 0.1782, "step": 3342 }, { "epoch": 1.08, "learning_rate": 3.6953301335289415e-06, "loss": 0.162, "step": 3343 }, { "epoch": 1.08, "learning_rate": 3.6945619025452006e-06, "loss": 0.1808, "step": 3344 }, { "epoch": 1.08, "learning_rate": 3.6937935253586475e-06, "loss": 0.1655, "step": 3345 }, { "epoch": 1.08, "learning_rate": 3.6930250020633237e-06, "loss": 0.1568, "step": 3346 }, { "epoch": 1.08, "learning_rate": 3.692256332753289e-06, "loss": 0.158, "step": 3347 }, { "epoch": 1.08, "learning_rate": 3.691487517522621e-06, "loss": 0.171, "step": 3348 }, { "epoch": 1.09, "learning_rate": 3.690718556465416e-06, "loss": 0.1596, "step": 3349 }, { "epoch": 1.09, "learning_rate": 3.689949449675786e-06, "loss": 0.1745, "step": 3350 }, { "epoch": 1.09, "learning_rate": 3.689180197247863e-06, "loss": 0.1843, "step": 3351 }, { "epoch": 1.09, "learning_rate": 3.688410799275796e-06, "loss": 0.1752, "step": 3352 }, { "epoch": 1.09, "learning_rate": 3.6876412558537524e-06, "loss": 0.184, "step": 3353 }, { "epoch": 1.09, "learning_rate": 3.686871567075916e-06, "loss": 0.1506, "step": 3354 }, { "epoch": 1.09, "learning_rate": 3.6861017330364897e-06, "loss": 0.1667, "step": 3355 }, { "epoch": 1.09, "learning_rate": 3.685331753829693e-06, "loss": 0.1805, "step": 3356 }, { "epoch": 1.09, "learning_rate": 3.684561629549765e-06, "loss": 0.1761, "step": 3357 }, { "epoch": 1.09, "learning_rate": 3.6837913602909615e-06, "loss": 0.1684, "step": 3358 }, { "epoch": 1.09, "learning_rate": 3.6830209461475554e-06, "loss": 0.1606, "step": 3359 }, { "epoch": 1.09, "learning_rate": 3.6822503872138377e-06, "loss": 0.1589, "step": 3360 }, { "epoch": 1.09, "learning_rate": 3.6814796835841172e-06, "loss": 0.1568, "step": 3361 }, { "epoch": 1.09, "learning_rate": 3.6807088353527216e-06, "loss": 0.159, "step": 3362 }, { "epoch": 1.09, "learning_rate": 3.6799378426139942e-06, "loss": 0.172, "step": 3363 }, { "epoch": 1.09, "learning_rate": 3.679166705462298e-06, "loss": 0.1716, "step": 3364 }, { "epoch": 1.09, "learning_rate": 3.6783954239920118e-06, "loss": 0.159, "step": 3365 }, { "epoch": 1.09, "learning_rate": 3.677623998297534e-06, "loss": 0.1593, "step": 3366 }, { "epoch": 1.09, "learning_rate": 3.6768524284732794e-06, "loss": 0.1658, "step": 3367 }, { "epoch": 1.09, "learning_rate": 3.6760807146136796e-06, "loss": 0.1735, "step": 3368 }, { "epoch": 1.09, "learning_rate": 3.675308856813186e-06, "loss": 0.1739, "step": 3369 }, { "epoch": 1.09, "learning_rate": 3.6745368551662663e-06, "loss": 0.1601, "step": 3370 }, { "epoch": 1.09, "learning_rate": 3.6737647097674056e-06, "loss": 0.1665, "step": 3371 }, { "epoch": 1.09, "learning_rate": 3.6729924207111077e-06, "loss": 0.1654, "step": 3372 }, { "epoch": 1.09, "learning_rate": 3.6722199880918928e-06, "loss": 0.1677, "step": 3373 }, { "epoch": 1.09, "learning_rate": 3.6714474120042993e-06, "loss": 0.1645, "step": 3374 }, { "epoch": 1.09, "learning_rate": 3.6706746925428833e-06, "loss": 0.1723, "step": 3375 }, { "epoch": 1.09, "learning_rate": 3.6699018298022173e-06, "loss": 0.163, "step": 3376 }, { "epoch": 1.09, "learning_rate": 3.6691288238768928e-06, "loss": 0.1633, "step": 3377 }, { "epoch": 1.09, "learning_rate": 3.6683556748615196e-06, "loss": 0.1699, "step": 3378 }, { "epoch": 1.09, "learning_rate": 3.667582382850721e-06, "loss": 0.1866, "step": 3379 }, { "epoch": 1.1, "learning_rate": 3.6668089479391433e-06, "loss": 0.1675, "step": 3380 }, { "epoch": 1.1, "learning_rate": 3.666035370221445e-06, "loss": 0.1749, "step": 3381 }, { "epoch": 1.1, "learning_rate": 3.665261649792305e-06, "loss": 0.1563, "step": 3382 }, { "epoch": 1.1, "learning_rate": 3.66448778674642e-06, "loss": 0.1692, "step": 3383 }, { "epoch": 1.1, "learning_rate": 3.663713781178504e-06, "loss": 0.1804, "step": 3384 }, { "epoch": 1.1, "learning_rate": 3.6629396331832854e-06, "loss": 0.1529, "step": 3385 }, { "epoch": 1.1, "learning_rate": 3.6621653428555144e-06, "loss": 0.1482, "step": 3386 }, { "epoch": 1.1, "learning_rate": 3.661390910289956e-06, "loss": 0.1733, "step": 3387 }, { "epoch": 1.1, "learning_rate": 3.6606163355813935e-06, "loss": 0.1645, "step": 3388 }, { "epoch": 1.1, "learning_rate": 3.6598416188246265e-06, "loss": 0.1717, "step": 3389 }, { "epoch": 1.1, "learning_rate": 3.6590667601144748e-06, "loss": 0.169, "step": 3390 }, { "epoch": 1.1, "learning_rate": 3.6582917595457718e-06, "loss": 0.1856, "step": 3391 }, { "epoch": 1.1, "learning_rate": 3.6575166172133703e-06, "loss": 0.1647, "step": 3392 }, { "epoch": 1.1, "learning_rate": 3.6567413332121402e-06, "loss": 0.1637, "step": 3393 }, { "epoch": 1.1, "learning_rate": 3.655965907636969e-06, "loss": 0.1679, "step": 3394 }, { "epoch": 1.1, "learning_rate": 3.6551903405827615e-06, "loss": 0.1562, "step": 3395 }, { "epoch": 1.1, "learning_rate": 3.6544146321444397e-06, "loss": 0.1621, "step": 3396 }, { "epoch": 1.1, "learning_rate": 3.653638782416943e-06, "loss": 0.1732, "step": 3397 }, { "epoch": 1.1, "learning_rate": 3.6528627914952263e-06, "loss": 0.175, "step": 3398 }, { "epoch": 1.1, "learning_rate": 3.652086659474265e-06, "loss": 0.1798, "step": 3399 }, { "epoch": 1.1, "learning_rate": 3.6513103864490497e-06, "loss": 0.157, "step": 3400 }, { "epoch": 1.1, "learning_rate": 3.650533972514589e-06, "loss": 0.1715, "step": 3401 }, { "epoch": 1.1, "learning_rate": 3.6497574177659073e-06, "loss": 0.1574, "step": 3402 }, { "epoch": 1.1, "learning_rate": 3.6489807222980487e-06, "loss": 0.154, "step": 3403 }, { "epoch": 1.1, "learning_rate": 3.648203886206073e-06, "loss": 0.1757, "step": 3404 }, { "epoch": 1.1, "learning_rate": 3.6474269095850568e-06, "loss": 0.1642, "step": 3405 }, { "epoch": 1.1, "learning_rate": 3.646649792530094e-06, "loss": 0.153, "step": 3406 }, { "epoch": 1.1, "learning_rate": 3.645872535136298e-06, "loss": 0.1679, "step": 3407 }, { "epoch": 1.1, "learning_rate": 3.6450951374987958e-06, "loss": 0.1612, "step": 3408 }, { "epoch": 1.1, "learning_rate": 3.6443175997127354e-06, "loss": 0.1712, "step": 3409 }, { "epoch": 1.1, "learning_rate": 3.6435399218732776e-06, "loss": 0.1523, "step": 3410 }, { "epoch": 1.11, "learning_rate": 3.642762104075604e-06, "loss": 0.1817, "step": 3411 }, { "epoch": 1.11, "learning_rate": 3.641984146414912e-06, "loss": 0.1562, "step": 3412 }, { "epoch": 1.11, "learning_rate": 3.6412060489864155e-06, "loss": 0.1469, "step": 3413 }, { "epoch": 1.11, "learning_rate": 3.640427811885346e-06, "loss": 0.1599, "step": 3414 }, { "epoch": 1.11, "learning_rate": 3.639649435206953e-06, "loss": 0.1861, "step": 3415 }, { "epoch": 1.11, "learning_rate": 3.6388709190465018e-06, "loss": 0.1744, "step": 3416 }, { "epoch": 1.11, "learning_rate": 3.638092263499274e-06, "loss": 0.1693, "step": 3417 }, { "epoch": 1.11, "learning_rate": 3.6373134686605722e-06, "loss": 0.1531, "step": 3418 }, { "epoch": 1.11, "learning_rate": 3.6365345346257112e-06, "loss": 0.1672, "step": 3419 }, { "epoch": 1.11, "learning_rate": 3.635755461490026e-06, "loss": 0.1703, "step": 3420 }, { "epoch": 1.11, "learning_rate": 3.634976249348867e-06, "loss": 0.1723, "step": 3421 }, { "epoch": 1.11, "learning_rate": 3.6341968982976027e-06, "loss": 0.1586, "step": 3422 }, { "epoch": 1.11, "learning_rate": 3.6334174084316186e-06, "loss": 0.1719, "step": 3423 }, { "epoch": 1.11, "learning_rate": 3.632637779846315e-06, "loss": 0.1602, "step": 3424 }, { "epoch": 1.11, "learning_rate": 3.6318580126371124e-06, "loss": 0.1544, "step": 3425 }, { "epoch": 1.11, "learning_rate": 3.631078106899446e-06, "loss": 0.1562, "step": 3426 }, { "epoch": 1.11, "learning_rate": 3.630298062728769e-06, "loss": 0.1433, "step": 3427 }, { "epoch": 1.11, "learning_rate": 3.6295178802205515e-06, "loss": 0.1544, "step": 3428 }, { "epoch": 1.11, "learning_rate": 3.62873755947028e-06, "loss": 0.1564, "step": 3429 }, { "epoch": 1.11, "learning_rate": 3.6279571005734583e-06, "loss": 0.1674, "step": 3430 }, { "epoch": 1.11, "learning_rate": 3.6271765036256064e-06, "loss": 0.1684, "step": 3431 }, { "epoch": 1.11, "learning_rate": 3.6263957687222633e-06, "loss": 0.1634, "step": 3432 }, { "epoch": 1.11, "learning_rate": 3.625614895958982e-06, "loss": 0.1747, "step": 3433 }, { "epoch": 1.11, "learning_rate": 3.624833885431334e-06, "loss": 0.171, "step": 3434 }, { "epoch": 1.11, "learning_rate": 3.624052737234908e-06, "loss": 0.1606, "step": 3435 }, { "epoch": 1.11, "learning_rate": 3.6232714514653082e-06, "loss": 0.1795, "step": 3436 }, { "epoch": 1.11, "learning_rate": 3.6224900282181574e-06, "loss": 0.159, "step": 3437 }, { "epoch": 1.11, "learning_rate": 3.6217084675890935e-06, "loss": 0.1816, "step": 3438 }, { "epoch": 1.11, "learning_rate": 3.6209267696737723e-06, "loss": 0.1672, "step": 3439 }, { "epoch": 1.11, "learning_rate": 3.6201449345678657e-06, "loss": 0.1612, "step": 3440 }, { "epoch": 1.12, "learning_rate": 3.6193629623670627e-06, "loss": 0.1734, "step": 3441 }, { "epoch": 1.12, "learning_rate": 3.6185808531670695e-06, "loss": 0.1627, "step": 3442 }, { "epoch": 1.12, "learning_rate": 3.617798607063609e-06, "loss": 0.1683, "step": 3443 }, { "epoch": 1.12, "learning_rate": 3.61701622415242e-06, "loss": 0.1545, "step": 3444 }, { "epoch": 1.12, "learning_rate": 3.616233704529259e-06, "loss": 0.1642, "step": 3445 }, { "epoch": 1.12, "learning_rate": 3.6154510482898973e-06, "loss": 0.1643, "step": 3446 }, { "epoch": 1.12, "learning_rate": 3.6146682555301266e-06, "loss": 0.1768, "step": 3447 }, { "epoch": 1.12, "learning_rate": 3.613885326345752e-06, "loss": 0.1564, "step": 3448 }, { "epoch": 1.12, "learning_rate": 3.6131022608325973e-06, "loss": 0.1675, "step": 3449 }, { "epoch": 1.12, "learning_rate": 3.6123190590865e-06, "loss": 0.1885, "step": 3450 }, { "epoch": 1.12, "learning_rate": 3.6115357212033196e-06, "loss": 0.166, "step": 3451 }, { "epoch": 1.12, "learning_rate": 3.610752247278927e-06, "loss": 0.1809, "step": 3452 }, { "epoch": 1.12, "learning_rate": 3.609968637409212e-06, "loss": 0.1737, "step": 3453 }, { "epoch": 1.12, "learning_rate": 3.6091848916900816e-06, "loss": 0.172, "step": 3454 }, { "epoch": 1.12, "learning_rate": 3.6084010102174576e-06, "loss": 0.1666, "step": 3455 }, { "epoch": 1.12, "learning_rate": 3.6076169930872805e-06, "loss": 0.1732, "step": 3456 }, { "epoch": 1.12, "learning_rate": 3.606832840395506e-06, "loss": 0.1749, "step": 3457 }, { "epoch": 1.12, "learning_rate": 3.6060485522381067e-06, "loss": 0.1609, "step": 3458 }, { "epoch": 1.12, "learning_rate": 3.605264128711072e-06, "loss": 0.1595, "step": 3459 }, { "epoch": 1.12, "learning_rate": 3.6044795699104074e-06, "loss": 0.1752, "step": 3460 }, { "epoch": 1.12, "learning_rate": 3.6036948759321357e-06, "loss": 0.1748, "step": 3461 }, { "epoch": 1.12, "learning_rate": 3.6029100468722954e-06, "loss": 0.1798, "step": 3462 }, { "epoch": 1.12, "learning_rate": 3.602125082826944e-06, "loss": 0.1852, "step": 3463 }, { "epoch": 1.12, "learning_rate": 3.60133998389215e-06, "loss": 0.1651, "step": 3464 }, { "epoch": 1.12, "learning_rate": 3.600554750164005e-06, "loss": 0.1668, "step": 3465 }, { "epoch": 1.12, "learning_rate": 3.5997693817386128e-06, "loss": 0.1614, "step": 3466 }, { "epoch": 1.12, "learning_rate": 3.598983878712094e-06, "loss": 0.1737, "step": 3467 }, { "epoch": 1.12, "learning_rate": 3.598198241180588e-06, "loss": 0.1778, "step": 3468 }, { "epoch": 1.12, "learning_rate": 3.597412469240248e-06, "loss": 0.1774, "step": 3469 }, { "epoch": 1.12, "learning_rate": 3.5966265629872466e-06, "loss": 0.1642, "step": 3470 }, { "epoch": 1.12, "learning_rate": 3.595840522517769e-06, "loss": 0.1693, "step": 3471 }, { "epoch": 1.13, "learning_rate": 3.5950543479280205e-06, "loss": 0.1694, "step": 3472 }, { "epoch": 1.13, "learning_rate": 3.5942680393142203e-06, "loss": 0.1761, "step": 3473 }, { "epoch": 1.13, "learning_rate": 3.593481596772606e-06, "loss": 0.1629, "step": 3474 }, { "epoch": 1.13, "learning_rate": 3.5926950203994303e-06, "loss": 0.183, "step": 3475 }, { "epoch": 1.13, "learning_rate": 3.5919083102909615e-06, "loss": 0.1408, "step": 3476 }, { "epoch": 1.13, "learning_rate": 3.591121466543487e-06, "loss": 0.1663, "step": 3477 }, { "epoch": 1.13, "learning_rate": 3.5903344892533067e-06, "loss": 0.159, "step": 3478 }, { "epoch": 1.13, "learning_rate": 3.5895473785167407e-06, "loss": 0.1803, "step": 3479 }, { "epoch": 1.13, "learning_rate": 3.5887601344301228e-06, "loss": 0.1574, "step": 3480 }, { "epoch": 1.13, "learning_rate": 3.587972757089805e-06, "loss": 0.1799, "step": 3481 }, { "epoch": 1.13, "learning_rate": 3.587185246592154e-06, "loss": 0.1828, "step": 3482 }, { "epoch": 1.13, "learning_rate": 3.5863976030335535e-06, "loss": 0.1716, "step": 3483 }, { "epoch": 1.13, "learning_rate": 3.5856098265104033e-06, "loss": 0.1596, "step": 3484 }, { "epoch": 1.13, "learning_rate": 3.58482191711912e-06, "loss": 0.1615, "step": 3485 }, { "epoch": 1.13, "learning_rate": 3.5840338749561365e-06, "loss": 0.1614, "step": 3486 }, { "epoch": 1.13, "learning_rate": 3.5832457001179e-06, "loss": 0.1684, "step": 3487 }, { "epoch": 1.13, "learning_rate": 3.582457392700878e-06, "loss": 0.173, "step": 3488 }, { "epoch": 1.13, "learning_rate": 3.5816689528015485e-06, "loss": 0.1777, "step": 3489 }, { "epoch": 1.13, "learning_rate": 3.580880380516411e-06, "loss": 0.1843, "step": 3490 }, { "epoch": 1.13, "learning_rate": 3.5800916759419784e-06, "loss": 0.1742, "step": 3491 }, { "epoch": 1.13, "learning_rate": 3.579302839174781e-06, "loss": 0.1767, "step": 3492 }, { "epoch": 1.13, "learning_rate": 3.578513870311365e-06, "loss": 0.1623, "step": 3493 }, { "epoch": 1.13, "learning_rate": 3.577724769448292e-06, "loss": 0.1558, "step": 3494 }, { "epoch": 1.13, "learning_rate": 3.57693553668214e-06, "loss": 0.1798, "step": 3495 }, { "epoch": 1.13, "learning_rate": 3.5761461721095037e-06, "loss": 0.1666, "step": 3496 }, { "epoch": 1.13, "learning_rate": 3.575356675826995e-06, "loss": 0.1727, "step": 3497 }, { "epoch": 1.13, "learning_rate": 3.574567047931238e-06, "loss": 0.165, "step": 3498 }, { "epoch": 1.13, "learning_rate": 3.5737772885188777e-06, "loss": 0.1562, "step": 3499 }, { "epoch": 1.13, "learning_rate": 3.5729873976865726e-06, "loss": 0.1638, "step": 3500 }, { "epoch": 1.13, "learning_rate": 3.5721973755309963e-06, "loss": 0.1843, "step": 3501 }, { "epoch": 1.13, "learning_rate": 3.5714072221488414e-06, "loss": 0.1678, "step": 3502 }, { "epoch": 1.14, "learning_rate": 3.5706169376368143e-06, "loss": 0.1692, "step": 3503 }, { "epoch": 1.14, "learning_rate": 3.5698265220916388e-06, "loss": 0.1665, "step": 3504 }, { "epoch": 1.14, "learning_rate": 3.5690359756100532e-06, "loss": 0.1701, "step": 3505 }, { "epoch": 1.14, "learning_rate": 3.5682452982888143e-06, "loss": 0.1653, "step": 3506 }, { "epoch": 1.14, "learning_rate": 3.5674544902246916e-06, "loss": 0.1633, "step": 3507 }, { "epoch": 1.14, "learning_rate": 3.566663551514473e-06, "loss": 0.1738, "step": 3508 }, { "epoch": 1.14, "learning_rate": 3.5658724822549624e-06, "loss": 0.1721, "step": 3509 }, { "epoch": 1.14, "learning_rate": 3.5650812825429774e-06, "loss": 0.1576, "step": 3510 }, { "epoch": 1.14, "learning_rate": 3.5642899524753548e-06, "loss": 0.1666, "step": 3511 }, { "epoch": 1.14, "learning_rate": 3.5634984921489455e-06, "loss": 0.1779, "step": 3512 }, { "epoch": 1.14, "learning_rate": 3.562706901660616e-06, "loss": 0.1672, "step": 3513 }, { "epoch": 1.14, "learning_rate": 3.561915181107249e-06, "loss": 0.1457, "step": 3514 }, { "epoch": 1.14, "learning_rate": 3.561123330585744e-06, "loss": 0.1655, "step": 3515 }, { "epoch": 1.14, "learning_rate": 3.560331350193016e-06, "loss": 0.1798, "step": 3516 }, { "epoch": 1.14, "learning_rate": 3.5595392400259963e-06, "loss": 0.1669, "step": 3517 }, { "epoch": 1.14, "learning_rate": 3.55874700018163e-06, "loss": 0.1697, "step": 3518 }, { "epoch": 1.14, "learning_rate": 3.5579546307568807e-06, "loss": 0.1817, "step": 3519 }, { "epoch": 1.14, "learning_rate": 3.557162131848726e-06, "loss": 0.1626, "step": 3520 }, { "epoch": 1.14, "learning_rate": 3.5563695035541607e-06, "loss": 0.1773, "step": 3521 }, { "epoch": 1.14, "learning_rate": 3.5555767459701946e-06, "loss": 0.185, "step": 3522 }, { "epoch": 1.14, "learning_rate": 3.554783859193853e-06, "loss": 0.1576, "step": 3523 }, { "epoch": 1.14, "learning_rate": 3.5539908433221793e-06, "loss": 0.1742, "step": 3524 }, { "epoch": 1.14, "learning_rate": 3.553197698452229e-06, "loss": 0.1659, "step": 3525 }, { "epoch": 1.14, "learning_rate": 3.5524044246810764e-06, "loss": 0.1623, "step": 3526 }, { "epoch": 1.14, "learning_rate": 3.5516110221058096e-06, "loss": 0.18, "step": 3527 }, { "epoch": 1.14, "learning_rate": 3.550817490823535e-06, "loss": 0.156, "step": 3528 }, { "epoch": 1.14, "learning_rate": 3.5500238309313717e-06, "loss": 0.1637, "step": 3529 }, { "epoch": 1.14, "learning_rate": 3.5492300425264574e-06, "loss": 0.1572, "step": 3530 }, { "epoch": 1.14, "learning_rate": 3.5484361257059425e-06, "loss": 0.1796, "step": 3531 }, { "epoch": 1.14, "learning_rate": 3.5476420805669953e-06, "loss": 0.1717, "step": 3532 }, { "epoch": 1.14, "learning_rate": 3.5468479072067996e-06, "loss": 0.1609, "step": 3533 }, { "epoch": 1.15, "learning_rate": 3.5460536057225542e-06, "loss": 0.1486, "step": 3534 }, { "epoch": 1.15, "learning_rate": 3.545259176211474e-06, "loss": 0.1563, "step": 3535 }, { "epoch": 1.15, "learning_rate": 3.5444646187707897e-06, "loss": 0.1644, "step": 3536 }, { "epoch": 1.15, "learning_rate": 3.5436699334977476e-06, "loss": 0.176, "step": 3537 }, { "epoch": 1.15, "learning_rate": 3.5428751204896083e-06, "loss": 0.163, "step": 3538 }, { "epoch": 1.15, "learning_rate": 3.542080179843651e-06, "loss": 0.1681, "step": 3539 }, { "epoch": 1.15, "learning_rate": 3.5412851116571673e-06, "loss": 0.1763, "step": 3540 }, { "epoch": 1.15, "learning_rate": 3.5404899160274664e-06, "loss": 0.1796, "step": 3541 }, { "epoch": 1.15, "learning_rate": 3.5396945930518722e-06, "loss": 0.1546, "step": 3542 }, { "epoch": 1.15, "learning_rate": 3.538899142827726e-06, "loss": 0.1755, "step": 3543 }, { "epoch": 1.15, "learning_rate": 3.538103565452381e-06, "loss": 0.152, "step": 3544 }, { "epoch": 1.15, "learning_rate": 3.537307861023209e-06, "loss": 0.148, "step": 3545 }, { "epoch": 1.15, "learning_rate": 3.536512029637597e-06, "loss": 0.1762, "step": 3546 }, { "epoch": 1.15, "learning_rate": 3.5357160713929473e-06, "loss": 0.1528, "step": 3547 }, { "epoch": 1.15, "learning_rate": 3.534919986386676e-06, "loss": 0.1783, "step": 3548 }, { "epoch": 1.15, "learning_rate": 3.5341237747162183e-06, "loss": 0.1572, "step": 3549 }, { "epoch": 1.15, "learning_rate": 3.533327436479021e-06, "loss": 0.1717, "step": 3550 }, { "epoch": 1.15, "learning_rate": 3.53253097177255e-06, "loss": 0.1707, "step": 3551 }, { "epoch": 1.15, "learning_rate": 3.531734380694282e-06, "loss": 0.1416, "step": 3552 }, { "epoch": 1.15, "learning_rate": 3.5309376633417146e-06, "loss": 0.1927, "step": 3553 }, { "epoch": 1.15, "learning_rate": 3.530140819812357e-06, "loss": 0.1524, "step": 3554 }, { "epoch": 1.15, "learning_rate": 3.5293438502037363e-06, "loss": 0.1659, "step": 3555 }, { "epoch": 1.15, "learning_rate": 3.5285467546133926e-06, "loss": 0.1631, "step": 3556 }, { "epoch": 1.15, "learning_rate": 3.5277495331388835e-06, "loss": 0.1596, "step": 3557 }, { "epoch": 1.15, "learning_rate": 3.526952185877781e-06, "loss": 0.1697, "step": 3558 }, { "epoch": 1.15, "learning_rate": 3.526154712927672e-06, "loss": 0.16, "step": 3559 }, { "epoch": 1.15, "learning_rate": 3.525357114386161e-06, "loss": 0.1604, "step": 3560 }, { "epoch": 1.15, "learning_rate": 3.524559390350865e-06, "loss": 0.1556, "step": 3561 }, { "epoch": 1.15, "learning_rate": 3.523761540919418e-06, "loss": 0.1591, "step": 3562 }, { "epoch": 1.15, "learning_rate": 3.5229635661894696e-06, "loss": 0.1415, "step": 3563 }, { "epoch": 1.15, "learning_rate": 3.5221654662586837e-06, "loss": 0.168, "step": 3564 }, { "epoch": 1.16, "learning_rate": 3.521367241224739e-06, "loss": 0.1674, "step": 3565 }, { "epoch": 1.16, "learning_rate": 3.5205688911853326e-06, "loss": 0.162, "step": 3566 }, { "epoch": 1.16, "learning_rate": 3.5197704162381742e-06, "loss": 0.1576, "step": 3567 }, { "epoch": 1.16, "learning_rate": 3.5189718164809884e-06, "loss": 0.1603, "step": 3568 }, { "epoch": 1.16, "learning_rate": 3.5181730920115165e-06, "loss": 0.1587, "step": 3569 }, { "epoch": 1.16, "learning_rate": 3.517374242927514e-06, "loss": 0.1598, "step": 3570 }, { "epoch": 1.16, "learning_rate": 3.516575269326755e-06, "loss": 0.1725, "step": 3571 }, { "epoch": 1.16, "learning_rate": 3.515776171307023e-06, "loss": 0.1532, "step": 3572 }, { "epoch": 1.16, "learning_rate": 3.5149769489661216e-06, "loss": 0.1465, "step": 3573 }, { "epoch": 1.16, "learning_rate": 3.5141776024018676e-06, "loss": 0.1748, "step": 3574 }, { "epoch": 1.16, "learning_rate": 3.513378131712092e-06, "loss": 0.1715, "step": 3575 }, { "epoch": 1.16, "learning_rate": 3.5125785369946442e-06, "loss": 0.1781, "step": 3576 }, { "epoch": 1.16, "learning_rate": 3.5117788183473856e-06, "loss": 0.1575, "step": 3577 }, { "epoch": 1.16, "learning_rate": 3.5109789758681944e-06, "loss": 0.1458, "step": 3578 }, { "epoch": 1.16, "learning_rate": 3.5101790096549643e-06, "loss": 0.1642, "step": 3579 }, { "epoch": 1.16, "learning_rate": 3.509378919805602e-06, "loss": 0.1865, "step": 3580 }, { "epoch": 1.16, "learning_rate": 3.5085787064180317e-06, "loss": 0.1621, "step": 3581 }, { "epoch": 1.16, "learning_rate": 3.5077783695901917e-06, "loss": 0.1612, "step": 3582 }, { "epoch": 1.16, "learning_rate": 3.506977909420035e-06, "loss": 0.168, "step": 3583 }, { "epoch": 1.16, "learning_rate": 3.506177326005531e-06, "loss": 0.1583, "step": 3584 }, { "epoch": 1.16, "learning_rate": 3.5053766194446626e-06, "loss": 0.138, "step": 3585 }, { "epoch": 1.16, "learning_rate": 3.504575789835428e-06, "loss": 0.1681, "step": 3586 }, { "epoch": 1.16, "learning_rate": 3.503774837275843e-06, "loss": 0.1756, "step": 3587 }, { "epoch": 1.16, "learning_rate": 3.5029737618639344e-06, "loss": 0.1601, "step": 3588 }, { "epoch": 1.16, "learning_rate": 3.5021725636977466e-06, "loss": 0.172, "step": 3589 }, { "epoch": 1.16, "learning_rate": 3.5013712428753392e-06, "loss": 0.1629, "step": 3590 }, { "epoch": 1.16, "learning_rate": 3.500569799494786e-06, "loss": 0.1866, "step": 3591 }, { "epoch": 1.16, "learning_rate": 3.4997682336541756e-06, "loss": 0.1579, "step": 3592 }, { "epoch": 1.16, "learning_rate": 3.498966545451612e-06, "loss": 0.1951, "step": 3593 }, { "epoch": 1.16, "learning_rate": 3.4981647349852137e-06, "loss": 0.17, "step": 3594 }, { "epoch": 1.16, "learning_rate": 3.4973628023531146e-06, "loss": 0.1818, "step": 3595 }, { "epoch": 1.17, "learning_rate": 3.496560747653464e-06, "loss": 0.1877, "step": 3596 }, { "epoch": 1.17, "learning_rate": 3.4957585709844254e-06, "loss": 0.1617, "step": 3597 }, { "epoch": 1.17, "learning_rate": 3.494956272444177e-06, "loss": 0.1706, "step": 3598 }, { "epoch": 1.17, "learning_rate": 3.494153852130913e-06, "loss": 0.1656, "step": 3599 }, { "epoch": 1.17, "learning_rate": 3.4933513101428416e-06, "loss": 0.1463, "step": 3600 }, { "epoch": 1.17, "learning_rate": 3.4925486465781865e-06, "loss": 0.174, "step": 3601 }, { "epoch": 1.17, "learning_rate": 3.4917458615351853e-06, "loss": 0.1687, "step": 3602 }, { "epoch": 1.17, "learning_rate": 3.490942955112092e-06, "loss": 0.165, "step": 3603 }, { "epoch": 1.17, "learning_rate": 3.490139927407174e-06, "loss": 0.1803, "step": 3604 }, { "epoch": 1.17, "learning_rate": 3.4893367785187137e-06, "loss": 0.1688, "step": 3605 }, { "epoch": 1.17, "learning_rate": 3.4885335085450095e-06, "loss": 0.1627, "step": 3606 }, { "epoch": 1.17, "learning_rate": 3.4877301175843735e-06, "loss": 0.1685, "step": 3607 }, { "epoch": 1.17, "learning_rate": 3.486926605735133e-06, "loss": 0.161, "step": 3608 }, { "epoch": 1.17, "learning_rate": 3.486122973095631e-06, "loss": 0.1575, "step": 3609 }, { "epoch": 1.17, "learning_rate": 3.4853192197642226e-06, "loss": 0.1288, "step": 3610 }, { "epoch": 1.17, "learning_rate": 3.48451534583928e-06, "loss": 0.1716, "step": 3611 }, { "epoch": 1.17, "learning_rate": 3.4837113514191907e-06, "loss": 0.1649, "step": 3612 }, { "epoch": 1.17, "learning_rate": 3.482907236602354e-06, "loss": 0.1594, "step": 3613 }, { "epoch": 1.17, "learning_rate": 3.4821030014871886e-06, "loss": 0.167, "step": 3614 }, { "epoch": 1.17, "learning_rate": 3.481298646172122e-06, "loss": 0.164, "step": 3615 }, { "epoch": 1.17, "learning_rate": 3.480494170755602e-06, "loss": 0.1629, "step": 3616 }, { "epoch": 1.17, "learning_rate": 3.479689575336086e-06, "loss": 0.163, "step": 3617 }, { "epoch": 1.17, "learning_rate": 3.4788848600120507e-06, "loss": 0.1679, "step": 3618 }, { "epoch": 1.17, "learning_rate": 3.4780800248819847e-06, "loss": 0.1633, "step": 3619 }, { "epoch": 1.17, "learning_rate": 3.4772750700443923e-06, "loss": 0.1785, "step": 3620 }, { "epoch": 1.17, "learning_rate": 3.476469995597792e-06, "loss": 0.1558, "step": 3621 }, { "epoch": 1.17, "learning_rate": 3.4756648016407175e-06, "loss": 0.1721, "step": 3622 }, { "epoch": 1.17, "learning_rate": 3.4748594882717163e-06, "loss": 0.1511, "step": 3623 }, { "epoch": 1.17, "learning_rate": 3.474054055589351e-06, "loss": 0.1704, "step": 3624 }, { "epoch": 1.17, "learning_rate": 3.473248503692199e-06, "loss": 0.1532, "step": 3625 }, { "epoch": 1.17, "learning_rate": 3.472442832678852e-06, "loss": 0.1755, "step": 3626 }, { "epoch": 1.18, "learning_rate": 3.471637042647916e-06, "loss": 0.158, "step": 3627 }, { "epoch": 1.18, "learning_rate": 3.470831133698013e-06, "loss": 0.1539, "step": 3628 }, { "epoch": 1.18, "learning_rate": 3.470025105927777e-06, "loss": 0.1534, "step": 3629 }, { "epoch": 1.18, "learning_rate": 3.4692189594358578e-06, "loss": 0.1799, "step": 3630 }, { "epoch": 1.18, "learning_rate": 3.468412694320921e-06, "loss": 0.1696, "step": 3631 }, { "epoch": 1.18, "learning_rate": 3.467606310681646e-06, "loss": 0.1749, "step": 3632 }, { "epoch": 1.18, "learning_rate": 3.4667998086167253e-06, "loss": 0.1721, "step": 3633 }, { "epoch": 1.18, "learning_rate": 3.465993188224868e-06, "loss": 0.1717, "step": 3634 }, { "epoch": 1.18, "learning_rate": 3.4651864496047952e-06, "loss": 0.1643, "step": 3635 }, { "epoch": 1.18, "learning_rate": 3.464379592855246e-06, "loss": 0.1739, "step": 3636 }, { "epoch": 1.18, "learning_rate": 3.4635726180749698e-06, "loss": 0.1737, "step": 3637 }, { "epoch": 1.18, "learning_rate": 3.4627655253627324e-06, "loss": 0.1622, "step": 3638 }, { "epoch": 1.18, "learning_rate": 3.461958314817316e-06, "loss": 0.1805, "step": 3639 }, { "epoch": 1.18, "learning_rate": 3.4611509865375143e-06, "loss": 0.1595, "step": 3640 }, { "epoch": 1.18, "learning_rate": 3.4603435406221356e-06, "loss": 0.163, "step": 3641 }, { "epoch": 1.18, "learning_rate": 3.4595359771700055e-06, "loss": 0.1789, "step": 3642 }, { "epoch": 1.18, "learning_rate": 3.4587282962799602e-06, "loss": 0.1665, "step": 3643 }, { "epoch": 1.18, "learning_rate": 3.4579204980508525e-06, "loss": 0.164, "step": 3644 }, { "epoch": 1.18, "learning_rate": 3.45711258258155e-06, "loss": 0.1661, "step": 3645 }, { "epoch": 1.18, "learning_rate": 3.4563045499709324e-06, "loss": 0.1583, "step": 3646 }, { "epoch": 1.18, "learning_rate": 3.455496400317896e-06, "loss": 0.1824, "step": 3647 }, { "epoch": 1.18, "learning_rate": 3.45468813372135e-06, "loss": 0.169, "step": 3648 }, { "epoch": 1.18, "learning_rate": 3.453879750280218e-06, "loss": 0.159, "step": 3649 }, { "epoch": 1.18, "learning_rate": 3.4530712500934393e-06, "loss": 0.1556, "step": 3650 }, { "epoch": 1.18, "learning_rate": 3.4522626332599657e-06, "loss": 0.1739, "step": 3651 }, { "epoch": 1.18, "learning_rate": 3.451453899878765e-06, "loss": 0.1696, "step": 3652 }, { "epoch": 1.18, "learning_rate": 3.450645050048817e-06, "loss": 0.1717, "step": 3653 }, { "epoch": 1.18, "learning_rate": 3.449836083869118e-06, "loss": 0.1711, "step": 3654 }, { "epoch": 1.18, "learning_rate": 3.449027001438678e-06, "loss": 0.1519, "step": 3655 }, { "epoch": 1.18, "learning_rate": 3.44821780285652e-06, "loss": 0.1833, "step": 3656 }, { "epoch": 1.19, "learning_rate": 3.4474084882216826e-06, "loss": 0.1761, "step": 3657 }, { "epoch": 1.19, "learning_rate": 3.4465990576332177e-06, "loss": 0.167, "step": 3658 }, { "epoch": 1.19, "learning_rate": 3.445789511190192e-06, "loss": 0.1584, "step": 3659 }, { "epoch": 1.19, "learning_rate": 3.4449798489916856e-06, "loss": 0.1592, "step": 3660 }, { "epoch": 1.19, "learning_rate": 3.444170071136794e-06, "loss": 0.1669, "step": 3661 }, { "epoch": 1.19, "learning_rate": 3.4433601777246263e-06, "loss": 0.1541, "step": 3662 }, { "epoch": 1.19, "learning_rate": 3.442550168854305e-06, "loss": 0.1713, "step": 3663 }, { "epoch": 1.19, "learning_rate": 3.4417400446249684e-06, "loss": 0.1855, "step": 3664 }, { "epoch": 1.19, "learning_rate": 3.440929805135766e-06, "loss": 0.1881, "step": 3665 }, { "epoch": 1.19, "learning_rate": 3.440119450485865e-06, "loss": 0.1429, "step": 3666 }, { "epoch": 1.19, "learning_rate": 3.439308980774444e-06, "loss": 0.1753, "step": 3667 }, { "epoch": 1.19, "learning_rate": 3.438498396100697e-06, "loss": 0.167, "step": 3668 }, { "epoch": 1.19, "learning_rate": 3.4376876965638317e-06, "loss": 0.1698, "step": 3669 }, { "epoch": 1.19, "learning_rate": 3.4368768822630705e-06, "loss": 0.1699, "step": 3670 }, { "epoch": 1.19, "learning_rate": 3.4360659532976475e-06, "loss": 0.1497, "step": 3671 }, { "epoch": 1.19, "learning_rate": 3.435254909766814e-06, "loss": 0.1718, "step": 3672 }, { "epoch": 1.19, "learning_rate": 3.4344437517698336e-06, "loss": 0.1509, "step": 3673 }, { "epoch": 1.19, "learning_rate": 3.433632479405984e-06, "loss": 0.1602, "step": 3674 }, { "epoch": 1.19, "learning_rate": 3.4328210927745577e-06, "loss": 0.1759, "step": 3675 }, { "epoch": 1.19, "learning_rate": 3.4320095919748596e-06, "loss": 0.1633, "step": 3676 }, { "epoch": 1.19, "learning_rate": 3.43119797710621e-06, "loss": 0.1602, "step": 3677 }, { "epoch": 1.19, "learning_rate": 3.4303862482679435e-06, "loss": 0.1577, "step": 3678 }, { "epoch": 1.19, "learning_rate": 3.429574405559406e-06, "loss": 0.1705, "step": 3679 }, { "epoch": 1.19, "learning_rate": 3.4287624490799605e-06, "loss": 0.1736, "step": 3680 }, { "epoch": 1.19, "learning_rate": 3.4279503789289824e-06, "loss": 0.1552, "step": 3681 }, { "epoch": 1.19, "learning_rate": 3.4271381952058607e-06, "loss": 0.1748, "step": 3682 }, { "epoch": 1.19, "learning_rate": 3.42632589801e-06, "loss": 0.1577, "step": 3683 }, { "epoch": 1.19, "learning_rate": 3.425513487440817e-06, "loss": 0.1692, "step": 3684 }, { "epoch": 1.19, "learning_rate": 3.4247009635977425e-06, "loss": 0.169, "step": 3685 }, { "epoch": 1.19, "learning_rate": 3.4238883265802215e-06, "loss": 0.1686, "step": 3686 }, { "epoch": 1.19, "learning_rate": 3.4230755764877133e-06, "loss": 0.1801, "step": 3687 }, { "epoch": 1.2, "learning_rate": 3.4222627134196917e-06, "loss": 0.1687, "step": 3688 }, { "epoch": 1.2, "learning_rate": 3.4214497374756415e-06, "loss": 0.1833, "step": 3689 }, { "epoch": 1.2, "learning_rate": 3.4206366487550637e-06, "loss": 0.1809, "step": 3690 }, { "epoch": 1.2, "learning_rate": 3.419823447357472e-06, "loss": 0.171, "step": 3691 }, { "epoch": 1.2, "learning_rate": 3.4190101333823956e-06, "loss": 0.1749, "step": 3692 }, { "epoch": 1.2, "learning_rate": 3.4181967069293754e-06, "loss": 0.1825, "step": 3693 }, { "epoch": 1.2, "learning_rate": 3.417383168097967e-06, "loss": 0.1605, "step": 3694 }, { "epoch": 1.2, "learning_rate": 3.41656951698774e-06, "loss": 0.1642, "step": 3695 }, { "epoch": 1.2, "learning_rate": 3.4157557536982773e-06, "loss": 0.1641, "step": 3696 }, { "epoch": 1.2, "learning_rate": 3.414941878329175e-06, "loss": 0.1574, "step": 3697 }, { "epoch": 1.2, "learning_rate": 3.4141278909800444e-06, "loss": 0.1676, "step": 3698 }, { "epoch": 1.2, "learning_rate": 3.41331379175051e-06, "loss": 0.1741, "step": 3699 }, { "epoch": 1.2, "learning_rate": 3.4124995807402082e-06, "loss": 0.1556, "step": 3700 }, { "epoch": 1.2, "learning_rate": 3.4116852580487925e-06, "loss": 0.1736, "step": 3701 }, { "epoch": 1.2, "learning_rate": 3.4108708237759258e-06, "loss": 0.1628, "step": 3702 }, { "epoch": 1.2, "learning_rate": 3.4100562780212887e-06, "loss": 0.1638, "step": 3703 }, { "epoch": 1.2, "learning_rate": 3.4092416208845723e-06, "loss": 0.172, "step": 3704 }, { "epoch": 1.2, "learning_rate": 3.4084268524654847e-06, "loss": 0.162, "step": 3705 }, { "epoch": 1.2, "learning_rate": 3.407611972863744e-06, "loss": 0.164, "step": 3706 }, { "epoch": 1.2, "learning_rate": 3.406796982179085e-06, "loss": 0.1776, "step": 3707 }, { "epoch": 1.2, "learning_rate": 3.4059818805112534e-06, "loss": 0.1627, "step": 3708 }, { "epoch": 1.2, "learning_rate": 3.4051666679600105e-06, "loss": 0.175, "step": 3709 }, { "epoch": 1.2, "learning_rate": 3.40435134462513e-06, "loss": 0.1725, "step": 3710 }, { "epoch": 1.2, "learning_rate": 3.403535910606399e-06, "loss": 0.1707, "step": 3711 }, { "epoch": 1.2, "learning_rate": 3.4027203660036202e-06, "loss": 0.1697, "step": 3712 }, { "epoch": 1.2, "learning_rate": 3.4019047109166077e-06, "loss": 0.1646, "step": 3713 }, { "epoch": 1.2, "learning_rate": 3.401088945445189e-06, "loss": 0.1586, "step": 3714 }, { "epoch": 1.2, "learning_rate": 3.4002730696892073e-06, "loss": 0.1605, "step": 3715 }, { "epoch": 1.2, "learning_rate": 3.3994570837485163e-06, "loss": 0.1761, "step": 3716 }, { "epoch": 1.2, "learning_rate": 3.3986409877229863e-06, "loss": 0.1667, "step": 3717 }, { "epoch": 1.2, "learning_rate": 3.3978247817124986e-06, "loss": 0.1749, "step": 3718 }, { "epoch": 1.21, "learning_rate": 3.39700846581695e-06, "loss": 0.155, "step": 3719 }, { "epoch": 1.21, "learning_rate": 3.3961920401362488e-06, "loss": 0.1618, "step": 3720 }, { "epoch": 1.21, "learning_rate": 3.3953755047703174e-06, "loss": 0.1584, "step": 3721 }, { "epoch": 1.21, "learning_rate": 3.394558859819092e-06, "loss": 0.1812, "step": 3722 }, { "epoch": 1.21, "learning_rate": 3.393742105382522e-06, "loss": 0.152, "step": 3723 }, { "epoch": 1.21, "learning_rate": 3.3929252415605708e-06, "loss": 0.1722, "step": 3724 }, { "epoch": 1.21, "learning_rate": 3.3921082684532143e-06, "loss": 0.1617, "step": 3725 }, { "epoch": 1.21, "learning_rate": 3.391291186160441e-06, "loss": 0.1686, "step": 3726 }, { "epoch": 1.21, "learning_rate": 3.3904739947822556e-06, "loss": 0.1688, "step": 3727 }, { "epoch": 1.21, "learning_rate": 3.3896566944186737e-06, "loss": 0.1717, "step": 3728 }, { "epoch": 1.21, "learning_rate": 3.388839285169725e-06, "loss": 0.1671, "step": 3729 }, { "epoch": 1.21, "learning_rate": 3.3880217671354527e-06, "loss": 0.1599, "step": 3730 }, { "epoch": 1.21, "learning_rate": 3.3872041404159124e-06, "loss": 0.1598, "step": 3731 }, { "epoch": 1.21, "learning_rate": 3.3863864051111744e-06, "loss": 0.167, "step": 3732 }, { "epoch": 1.21, "learning_rate": 3.385568561321321e-06, "loss": 0.1648, "step": 3733 }, { "epoch": 1.21, "learning_rate": 3.3847506091464487e-06, "loss": 0.1662, "step": 3734 }, { "epoch": 1.21, "learning_rate": 3.383932548686667e-06, "loss": 0.1686, "step": 3735 }, { "epoch": 1.21, "learning_rate": 3.3831143800420983e-06, "loss": 0.1642, "step": 3736 }, { "epoch": 1.21, "learning_rate": 3.3822961033128793e-06, "loss": 0.154, "step": 3737 }, { "epoch": 1.21, "learning_rate": 3.3814777185991577e-06, "loss": 0.1664, "step": 3738 }, { "epoch": 1.21, "learning_rate": 3.380659226001097e-06, "loss": 0.158, "step": 3739 }, { "epoch": 1.21, "learning_rate": 3.3798406256188725e-06, "loss": 0.1569, "step": 3740 }, { "epoch": 1.21, "learning_rate": 3.3790219175526733e-06, "loss": 0.1494, "step": 3741 }, { "epoch": 1.21, "learning_rate": 3.3782031019027006e-06, "loss": 0.1559, "step": 3742 }, { "epoch": 1.21, "learning_rate": 3.3773841787691708e-06, "loss": 0.1669, "step": 3743 }, { "epoch": 1.21, "learning_rate": 3.3765651482523097e-06, "loss": 0.1599, "step": 3744 }, { "epoch": 1.21, "learning_rate": 3.375746010452361e-06, "loss": 0.1652, "step": 3745 }, { "epoch": 1.21, "learning_rate": 3.374926765469578e-06, "loss": 0.1854, "step": 3746 }, { "epoch": 1.21, "learning_rate": 3.3741074134042297e-06, "loss": 0.1648, "step": 3747 }, { "epoch": 1.21, "learning_rate": 3.3732879543565955e-06, "loss": 0.1656, "step": 3748 }, { "epoch": 1.21, "learning_rate": 3.3724683884269702e-06, "loss": 0.1668, "step": 3749 }, { "epoch": 1.22, "learning_rate": 3.37164871571566e-06, "loss": 0.1532, "step": 3750 }, { "epoch": 1.22, "learning_rate": 3.370828936322985e-06, "loss": 0.1775, "step": 3751 }, { "epoch": 1.22, "learning_rate": 3.3700090503492795e-06, "loss": 0.1838, "step": 3752 }, { "epoch": 1.22, "learning_rate": 3.3691890578948876e-06, "loss": 0.1665, "step": 3753 }, { "epoch": 1.22, "learning_rate": 3.36836895906017e-06, "loss": 0.1601, "step": 3754 }, { "epoch": 1.22, "learning_rate": 3.3675487539454972e-06, "loss": 0.1486, "step": 3755 }, { "epoch": 1.22, "learning_rate": 3.3667284426512565e-06, "loss": 0.1664, "step": 3756 }, { "epoch": 1.22, "learning_rate": 3.3659080252778446e-06, "loss": 0.1563, "step": 3757 }, { "epoch": 1.22, "learning_rate": 3.365087501925673e-06, "loss": 0.1758, "step": 3758 }, { "epoch": 1.22, "learning_rate": 3.3642668726951657e-06, "loss": 0.1616, "step": 3759 }, { "epoch": 1.22, "learning_rate": 3.36344613768676e-06, "loss": 0.1603, "step": 3760 }, { "epoch": 1.22, "learning_rate": 3.362625297000906e-06, "loss": 0.1762, "step": 3761 }, { "epoch": 1.22, "learning_rate": 3.3618043507380673e-06, "loss": 0.1644, "step": 3762 }, { "epoch": 1.22, "learning_rate": 3.3609832989987178e-06, "loss": 0.1709, "step": 3763 }, { "epoch": 1.22, "learning_rate": 3.360162141883348e-06, "loss": 0.1531, "step": 3764 }, { "epoch": 1.22, "learning_rate": 3.3593408794924585e-06, "loss": 0.1653, "step": 3765 }, { "epoch": 1.22, "learning_rate": 3.358519511926565e-06, "loss": 0.1655, "step": 3766 }, { "epoch": 1.22, "learning_rate": 3.357698039286194e-06, "loss": 0.1801, "step": 3767 }, { "epoch": 1.22, "learning_rate": 3.356876461671887e-06, "loss": 0.1676, "step": 3768 }, { "epoch": 1.22, "learning_rate": 3.3560547791841957e-06, "loss": 0.1787, "step": 3769 }, { "epoch": 1.22, "learning_rate": 3.3552329919236865e-06, "loss": 0.1549, "step": 3770 }, { "epoch": 1.22, "learning_rate": 3.3544110999909385e-06, "loss": 0.1599, "step": 3771 }, { "epoch": 1.22, "learning_rate": 3.3535891034865433e-06, "loss": 0.1572, "step": 3772 }, { "epoch": 1.22, "learning_rate": 3.3527670025111046e-06, "loss": 0.1614, "step": 3773 }, { "epoch": 1.22, "learning_rate": 3.3519447971652407e-06, "loss": 0.1614, "step": 3774 }, { "epoch": 1.22, "learning_rate": 3.351122487549582e-06, "loss": 0.172, "step": 3775 }, { "epoch": 1.22, "learning_rate": 3.3503000737647696e-06, "loss": 0.1769, "step": 3776 }, { "epoch": 1.22, "learning_rate": 3.349477555911459e-06, "loss": 0.1562, "step": 3777 }, { "epoch": 1.22, "learning_rate": 3.3486549340903196e-06, "loss": 0.1665, "step": 3778 }, { "epoch": 1.22, "learning_rate": 3.3478322084020322e-06, "loss": 0.1603, "step": 3779 }, { "epoch": 1.22, "learning_rate": 3.34700937894729e-06, "loss": 0.1649, "step": 3780 }, { "epoch": 1.23, "learning_rate": 3.3461864458267996e-06, "loss": 0.1658, "step": 3781 }, { "epoch": 1.23, "learning_rate": 3.3453634091412795e-06, "loss": 0.1876, "step": 3782 }, { "epoch": 1.23, "learning_rate": 3.344540268991462e-06, "loss": 0.1675, "step": 3783 }, { "epoch": 1.23, "learning_rate": 3.343717025478092e-06, "loss": 0.1786, "step": 3784 }, { "epoch": 1.23, "learning_rate": 3.342893678701925e-06, "loss": 0.1688, "step": 3785 }, { "epoch": 1.23, "learning_rate": 3.3420702287637325e-06, "loss": 0.1655, "step": 3786 }, { "epoch": 1.23, "learning_rate": 3.341246675764295e-06, "loss": 0.1646, "step": 3787 }, { "epoch": 1.23, "learning_rate": 3.3404230198044085e-06, "loss": 0.1613, "step": 3788 }, { "epoch": 1.23, "learning_rate": 3.3395992609848804e-06, "loss": 0.1523, "step": 3789 }, { "epoch": 1.23, "learning_rate": 3.338775399406531e-06, "loss": 0.157, "step": 3790 }, { "epoch": 1.23, "learning_rate": 3.3379514351701924e-06, "loss": 0.1493, "step": 3791 }, { "epoch": 1.23, "learning_rate": 3.3371273683767102e-06, "loss": 0.1789, "step": 3792 }, { "epoch": 1.23, "learning_rate": 3.3363031991269423e-06, "loss": 0.1768, "step": 3793 }, { "epoch": 1.23, "learning_rate": 3.3354789275217587e-06, "loss": 0.167, "step": 3794 }, { "epoch": 1.23, "learning_rate": 3.3346545536620425e-06, "loss": 0.1577, "step": 3795 }, { "epoch": 1.23, "learning_rate": 3.3338300776486886e-06, "loss": 0.1499, "step": 3796 }, { "epoch": 1.23, "learning_rate": 3.3330054995826056e-06, "loss": 0.1586, "step": 3797 }, { "epoch": 1.23, "learning_rate": 3.3321808195647144e-06, "loss": 0.1595, "step": 3798 }, { "epoch": 1.23, "learning_rate": 3.3313560376959456e-06, "loss": 0.1807, "step": 3799 }, { "epoch": 1.23, "learning_rate": 3.3305311540772467e-06, "loss": 0.1658, "step": 3800 }, { "epoch": 1.23, "learning_rate": 3.3297061688095746e-06, "loss": 0.1614, "step": 3801 }, { "epoch": 1.23, "learning_rate": 3.3288810819938995e-06, "loss": 0.1626, "step": 3802 }, { "epoch": 1.23, "learning_rate": 3.3280558937312037e-06, "loss": 0.1801, "step": 3803 }, { "epoch": 1.23, "learning_rate": 3.327230604122484e-06, "loss": 0.1626, "step": 3804 }, { "epoch": 1.23, "learning_rate": 3.326405213268745e-06, "loss": 0.1538, "step": 3805 }, { "epoch": 1.23, "learning_rate": 3.3255797212710095e-06, "loss": 0.1753, "step": 3806 }, { "epoch": 1.23, "learning_rate": 3.3247541282303082e-06, "loss": 0.1598, "step": 3807 }, { "epoch": 1.23, "learning_rate": 3.3239284342476852e-06, "loss": 0.1729, "step": 3808 }, { "epoch": 1.23, "learning_rate": 3.3231026394241983e-06, "loss": 0.1679, "step": 3809 }, { "epoch": 1.23, "learning_rate": 3.3222767438609166e-06, "loss": 0.1722, "step": 3810 }, { "epoch": 1.23, "learning_rate": 3.321450747658922e-06, "loss": 0.1877, "step": 3811 }, { "epoch": 1.24, "learning_rate": 3.3206246509193076e-06, "loss": 0.1592, "step": 3812 }, { "epoch": 1.24, "learning_rate": 3.3197984537431797e-06, "loss": 0.1697, "step": 3813 }, { "epoch": 1.24, "learning_rate": 3.3189721562316585e-06, "loss": 0.1632, "step": 3814 }, { "epoch": 1.24, "learning_rate": 3.3181457584858736e-06, "loss": 0.1629, "step": 3815 }, { "epoch": 1.24, "learning_rate": 3.3173192606069673e-06, "loss": 0.1864, "step": 3816 }, { "epoch": 1.24, "learning_rate": 3.316492662696097e-06, "loss": 0.1707, "step": 3817 }, { "epoch": 1.24, "learning_rate": 3.3156659648544276e-06, "loss": 0.1668, "step": 3818 }, { "epoch": 1.24, "learning_rate": 3.314839167183141e-06, "loss": 0.1742, "step": 3819 }, { "epoch": 1.24, "learning_rate": 3.3140122697834287e-06, "loss": 0.1416, "step": 3820 }, { "epoch": 1.24, "learning_rate": 3.3131852727564947e-06, "loss": 0.1538, "step": 3821 }, { "epoch": 1.24, "learning_rate": 3.3123581762035557e-06, "loss": 0.1514, "step": 3822 }, { "epoch": 1.24, "learning_rate": 3.31153098022584e-06, "loss": 0.1745, "step": 3823 }, { "epoch": 1.24, "learning_rate": 3.3107036849245883e-06, "loss": 0.1635, "step": 3824 }, { "epoch": 1.24, "learning_rate": 3.309876290401054e-06, "loss": 0.1661, "step": 3825 }, { "epoch": 1.24, "learning_rate": 3.309048796756503e-06, "loss": 0.1632, "step": 3826 }, { "epoch": 1.24, "learning_rate": 3.3082212040922103e-06, "loss": 0.1616, "step": 3827 }, { "epoch": 1.24, "learning_rate": 3.307393512509466e-06, "loss": 0.1605, "step": 3828 }, { "epoch": 1.24, "learning_rate": 3.3065657221095732e-06, "loss": 0.1624, "step": 3829 }, { "epoch": 1.24, "learning_rate": 3.3057378329938432e-06, "loss": 0.1667, "step": 3830 }, { "epoch": 1.24, "learning_rate": 3.304909845263603e-06, "loss": 0.1512, "step": 3831 }, { "epoch": 1.24, "learning_rate": 3.3040817590201897e-06, "loss": 0.1711, "step": 3832 }, { "epoch": 1.24, "learning_rate": 3.303253574364953e-06, "loss": 0.1631, "step": 3833 }, { "epoch": 1.24, "learning_rate": 3.3024252913992548e-06, "loss": 0.179, "step": 3834 }, { "epoch": 1.24, "learning_rate": 3.3015969102244704e-06, "loss": 0.1646, "step": 3835 }, { "epoch": 1.24, "learning_rate": 3.300768430941983e-06, "loss": 0.1807, "step": 3836 }, { "epoch": 1.24, "learning_rate": 3.299939853653192e-06, "loss": 0.1588, "step": 3837 }, { "epoch": 1.24, "learning_rate": 3.299111178459507e-06, "loss": 0.1628, "step": 3838 }, { "epoch": 1.24, "learning_rate": 3.29828240546235e-06, "loss": 0.1624, "step": 3839 }, { "epoch": 1.24, "learning_rate": 3.297453534763154e-06, "loss": 0.17, "step": 3840 }, { "epoch": 1.24, "learning_rate": 3.2966245664633654e-06, "loss": 0.1651, "step": 3841 }, { "epoch": 1.24, "learning_rate": 3.295795500664442e-06, "loss": 0.1626, "step": 3842 }, { "epoch": 1.25, "learning_rate": 3.294966337467853e-06, "loss": 0.1629, "step": 3843 }, { "epoch": 1.25, "learning_rate": 3.2941370769750804e-06, "loss": 0.1663, "step": 3844 }, { "epoch": 1.25, "learning_rate": 3.293307719287617e-06, "loss": 0.1625, "step": 3845 }, { "epoch": 1.25, "learning_rate": 3.2924782645069684e-06, "loss": 0.1639, "step": 3846 }, { "epoch": 1.25, "learning_rate": 3.291648712734653e-06, "loss": 0.151, "step": 3847 }, { "epoch": 1.25, "learning_rate": 3.290819064072198e-06, "loss": 0.1545, "step": 3848 }, { "epoch": 1.25, "learning_rate": 3.289989318621146e-06, "loss": 0.1643, "step": 3849 }, { "epoch": 1.25, "learning_rate": 3.289159476483049e-06, "loss": 0.1743, "step": 3850 }, { "epoch": 1.25, "learning_rate": 3.2883295377594716e-06, "loss": 0.1533, "step": 3851 }, { "epoch": 1.25, "learning_rate": 3.2874995025519897e-06, "loss": 0.1788, "step": 3852 }, { "epoch": 1.25, "learning_rate": 3.2866693709621933e-06, "loss": 0.1591, "step": 3853 }, { "epoch": 1.25, "learning_rate": 3.285839143091681e-06, "loss": 0.1614, "step": 3854 }, { "epoch": 1.25, "learning_rate": 3.2850088190420647e-06, "loss": 0.1689, "step": 3855 }, { "epoch": 1.25, "learning_rate": 3.284178398914969e-06, "loss": 0.1727, "step": 3856 }, { "epoch": 1.25, "learning_rate": 3.283347882812028e-06, "loss": 0.1693, "step": 3857 }, { "epoch": 1.25, "learning_rate": 3.282517270834891e-06, "loss": 0.1555, "step": 3858 }, { "epoch": 1.25, "learning_rate": 3.281686563085214e-06, "loss": 0.1551, "step": 3859 }, { "epoch": 1.25, "learning_rate": 3.28085575966467e-06, "loss": 0.1517, "step": 3860 }, { "epoch": 1.25, "learning_rate": 3.2800248606749395e-06, "loss": 0.1617, "step": 3861 }, { "epoch": 1.25, "learning_rate": 3.2791938662177174e-06, "loss": 0.165, "step": 3862 }, { "epoch": 1.25, "learning_rate": 3.278362776394709e-06, "loss": 0.1723, "step": 3863 }, { "epoch": 1.25, "learning_rate": 3.277531591307632e-06, "loss": 0.1556, "step": 3864 }, { "epoch": 1.25, "learning_rate": 3.2767003110582164e-06, "loss": 0.172, "step": 3865 }, { "epoch": 1.25, "learning_rate": 3.275868935748201e-06, "loss": 0.1727, "step": 3866 }, { "epoch": 1.25, "learning_rate": 3.2750374654793387e-06, "loss": 0.1711, "step": 3867 }, { "epoch": 1.25, "learning_rate": 3.2742059003533933e-06, "loss": 0.1552, "step": 3868 }, { "epoch": 1.25, "learning_rate": 3.2733742404721413e-06, "loss": 0.1661, "step": 3869 }, { "epoch": 1.25, "learning_rate": 3.272542485937369e-06, "loss": 0.1713, "step": 3870 }, { "epoch": 1.25, "learning_rate": 3.2717106368508755e-06, "loss": 0.1773, "step": 3871 }, { "epoch": 1.25, "learning_rate": 3.27087869331447e-06, "loss": 0.153, "step": 3872 }, { "epoch": 1.26, "learning_rate": 3.2700466554299755e-06, "loss": 0.1752, "step": 3873 }, { "epoch": 1.26, "learning_rate": 3.2692145232992244e-06, "loss": 0.1683, "step": 3874 }, { "epoch": 1.26, "learning_rate": 3.268382297024063e-06, "loss": 0.17, "step": 3875 }, { "epoch": 1.26, "learning_rate": 3.2675499767063464e-06, "loss": 0.1499, "step": 3876 }, { "epoch": 1.26, "learning_rate": 3.266717562447944e-06, "loss": 0.1627, "step": 3877 }, { "epoch": 1.26, "learning_rate": 3.2658850543507336e-06, "loss": 0.1758, "step": 3878 }, { "epoch": 1.26, "learning_rate": 3.2650524525166064e-06, "loss": 0.1572, "step": 3879 }, { "epoch": 1.26, "learning_rate": 3.2642197570474665e-06, "loss": 0.1595, "step": 3880 }, { "epoch": 1.26, "learning_rate": 3.263386968045226e-06, "loss": 0.156, "step": 3881 }, { "epoch": 1.26, "learning_rate": 3.262554085611811e-06, "loss": 0.1658, "step": 3882 }, { "epoch": 1.26, "learning_rate": 3.261721109849158e-06, "loss": 0.1672, "step": 3883 }, { "epoch": 1.26, "learning_rate": 3.2608880408592148e-06, "loss": 0.1597, "step": 3884 }, { "epoch": 1.26, "learning_rate": 3.2600548787439413e-06, "loss": 0.1625, "step": 3885 }, { "epoch": 1.26, "learning_rate": 3.2592216236053086e-06, "loss": 0.1715, "step": 3886 }, { "epoch": 1.26, "learning_rate": 3.2583882755452994e-06, "loss": 0.1679, "step": 3887 }, { "epoch": 1.26, "learning_rate": 3.257554834665907e-06, "loss": 0.1666, "step": 3888 }, { "epoch": 1.26, "learning_rate": 3.2567213010691367e-06, "loss": 0.1705, "step": 3889 }, { "epoch": 1.26, "learning_rate": 3.255887674857004e-06, "loss": 0.177, "step": 3890 }, { "epoch": 1.26, "learning_rate": 3.2550539561315385e-06, "loss": 0.1694, "step": 3891 }, { "epoch": 1.26, "learning_rate": 3.2542201449947774e-06, "loss": 0.1771, "step": 3892 }, { "epoch": 1.26, "learning_rate": 3.2533862415487723e-06, "loss": 0.152, "step": 3893 }, { "epoch": 1.26, "learning_rate": 3.2525522458955843e-06, "loss": 0.1665, "step": 3894 }, { "epoch": 1.26, "learning_rate": 3.251718158137287e-06, "loss": 0.1716, "step": 3895 }, { "epoch": 1.26, "learning_rate": 3.2508839783759642e-06, "loss": 0.171, "step": 3896 }, { "epoch": 1.26, "learning_rate": 3.2500497067137116e-06, "loss": 0.1941, "step": 3897 }, { "epoch": 1.26, "learning_rate": 3.2492153432526356e-06, "loss": 0.164, "step": 3898 }, { "epoch": 1.26, "learning_rate": 3.2483808880948552e-06, "loss": 0.1663, "step": 3899 }, { "epoch": 1.26, "learning_rate": 3.2475463413424983e-06, "loss": 0.1765, "step": 3900 }, { "epoch": 1.26, "learning_rate": 3.246711703097707e-06, "loss": 0.1676, "step": 3901 }, { "epoch": 1.26, "learning_rate": 3.2458769734626315e-06, "loss": 0.1735, "step": 3902 }, { "epoch": 1.26, "learning_rate": 3.245042152539435e-06, "loss": 0.1455, "step": 3903 }, { "epoch": 1.27, "learning_rate": 3.2442072404302917e-06, "loss": 0.1586, "step": 3904 }, { "epoch": 1.27, "learning_rate": 3.243372237237386e-06, "loss": 0.1724, "step": 3905 }, { "epoch": 1.27, "learning_rate": 3.2425371430629155e-06, "loss": 0.1628, "step": 3906 }, { "epoch": 1.27, "learning_rate": 3.241701958009087e-06, "loss": 0.159, "step": 3907 }, { "epoch": 1.27, "learning_rate": 3.2408666821781186e-06, "loss": 0.1648, "step": 3908 }, { "epoch": 1.27, "learning_rate": 3.2400313156722414e-06, "loss": 0.1646, "step": 3909 }, { "epoch": 1.27, "learning_rate": 3.2391958585936946e-06, "loss": 0.1723, "step": 3910 }, { "epoch": 1.27, "learning_rate": 3.2383603110447304e-06, "loss": 0.1688, "step": 3911 }, { "epoch": 1.27, "learning_rate": 3.2375246731276122e-06, "loss": 0.1657, "step": 3912 }, { "epoch": 1.27, "learning_rate": 3.236688944944614e-06, "loss": 0.1614, "step": 3913 }, { "epoch": 1.27, "learning_rate": 3.2358531265980207e-06, "loss": 0.1702, "step": 3914 }, { "epoch": 1.27, "learning_rate": 3.2350172181901283e-06, "loss": 0.1787, "step": 3915 }, { "epoch": 1.27, "learning_rate": 3.2341812198232437e-06, "loss": 0.1728, "step": 3916 }, { "epoch": 1.27, "learning_rate": 3.2333451315996857e-06, "loss": 0.1552, "step": 3917 }, { "epoch": 1.27, "learning_rate": 3.232508953621782e-06, "loss": 0.1702, "step": 3918 }, { "epoch": 1.27, "learning_rate": 3.231672685991874e-06, "loss": 0.1761, "step": 3919 }, { "epoch": 1.27, "learning_rate": 3.2308363288123128e-06, "loss": 0.1761, "step": 3920 }, { "epoch": 1.27, "learning_rate": 3.2299998821854593e-06, "loss": 0.1785, "step": 3921 }, { "epoch": 1.27, "learning_rate": 3.229163346213688e-06, "loss": 0.155, "step": 3922 }, { "epoch": 1.27, "learning_rate": 3.228326720999382e-06, "loss": 0.1632, "step": 3923 }, { "epoch": 1.27, "learning_rate": 3.2274900066449355e-06, "loss": 0.1785, "step": 3924 }, { "epoch": 1.27, "learning_rate": 3.2266532032527548e-06, "loss": 0.1653, "step": 3925 }, { "epoch": 1.27, "learning_rate": 3.225816310925257e-06, "loss": 0.1653, "step": 3926 }, { "epoch": 1.27, "learning_rate": 3.224979329764869e-06, "loss": 0.1798, "step": 3927 }, { "epoch": 1.27, "learning_rate": 3.224142259874029e-06, "loss": 0.174, "step": 3928 }, { "epoch": 1.27, "learning_rate": 3.223305101355187e-06, "loss": 0.1753, "step": 3929 }, { "epoch": 1.27, "learning_rate": 3.2224678543108024e-06, "loss": 0.1595, "step": 3930 }, { "epoch": 1.27, "learning_rate": 3.221630518843347e-06, "loss": 0.1704, "step": 3931 }, { "epoch": 1.27, "learning_rate": 3.2207930950553017e-06, "loss": 0.1778, "step": 3932 }, { "epoch": 1.27, "learning_rate": 3.2199555830491597e-06, "loss": 0.1527, "step": 3933 }, { "epoch": 1.27, "learning_rate": 3.2191179829274244e-06, "loss": 0.1758, "step": 3934 }, { "epoch": 1.28, "learning_rate": 3.2182802947926086e-06, "loss": 0.1559, "step": 3935 }, { "epoch": 1.28, "learning_rate": 3.2174425187472387e-06, "loss": 0.1536, "step": 3936 }, { "epoch": 1.28, "learning_rate": 3.2166046548938497e-06, "loss": 0.1821, "step": 3937 }, { "epoch": 1.28, "learning_rate": 3.215766703334988e-06, "loss": 0.143, "step": 3938 }, { "epoch": 1.28, "learning_rate": 3.214928664173211e-06, "loss": 0.1583, "step": 3939 }, { "epoch": 1.28, "learning_rate": 3.2140905375110875e-06, "loss": 0.1703, "step": 3940 }, { "epoch": 1.28, "learning_rate": 3.2132523234511943e-06, "loss": 0.1783, "step": 3941 }, { "epoch": 1.28, "learning_rate": 3.2124140220961215e-06, "loss": 0.1691, "step": 3942 }, { "epoch": 1.28, "learning_rate": 3.2115756335484694e-06, "loss": 0.1584, "step": 3943 }, { "epoch": 1.28, "learning_rate": 3.210737157910848e-06, "loss": 0.1677, "step": 3944 }, { "epoch": 1.28, "learning_rate": 3.2098985952858796e-06, "loss": 0.1714, "step": 3945 }, { "epoch": 1.28, "learning_rate": 3.209059945776195e-06, "loss": 0.16, "step": 3946 }, { "epoch": 1.28, "learning_rate": 3.2082212094844374e-06, "loss": 0.1716, "step": 3947 }, { "epoch": 1.28, "learning_rate": 3.20738238651326e-06, "loss": 0.1766, "step": 3948 }, { "epoch": 1.28, "learning_rate": 3.206543476965326e-06, "loss": 0.1657, "step": 3949 }, { "epoch": 1.28, "learning_rate": 3.2057044809433108e-06, "loss": 0.1497, "step": 3950 }, { "epoch": 1.28, "learning_rate": 3.2048653985498985e-06, "loss": 0.1764, "step": 3951 }, { "epoch": 1.28, "learning_rate": 3.204026229887785e-06, "loss": 0.1552, "step": 3952 }, { "epoch": 1.28, "learning_rate": 3.203186975059677e-06, "loss": 0.1569, "step": 3953 }, { "epoch": 1.28, "learning_rate": 3.2023476341682902e-06, "loss": 0.1758, "step": 3954 }, { "epoch": 1.28, "learning_rate": 3.2015082073163524e-06, "loss": 0.1603, "step": 3955 }, { "epoch": 1.28, "learning_rate": 3.2006686946066012e-06, "loss": 0.1755, "step": 3956 }, { "epoch": 1.28, "learning_rate": 3.1998290961417844e-06, "loss": 0.1605, "step": 3957 }, { "epoch": 1.28, "learning_rate": 3.1989894120246613e-06, "loss": 0.1497, "step": 3958 }, { "epoch": 1.28, "learning_rate": 3.1981496423580012e-06, "loss": 0.1704, "step": 3959 }, { "epoch": 1.28, "learning_rate": 3.1973097872445828e-06, "loss": 0.1432, "step": 3960 }, { "epoch": 1.28, "learning_rate": 3.1964698467871976e-06, "loss": 0.1759, "step": 3961 }, { "epoch": 1.28, "learning_rate": 3.1956298210886454e-06, "loss": 0.1665, "step": 3962 }, { "epoch": 1.28, "learning_rate": 3.1947897102517374e-06, "loss": 0.1642, "step": 3963 }, { "epoch": 1.28, "learning_rate": 3.1939495143792944e-06, "loss": 0.1684, "step": 3964 }, { "epoch": 1.28, "learning_rate": 3.1931092335741497e-06, "loss": 0.1621, "step": 3965 }, { "epoch": 1.29, "learning_rate": 3.192268867939144e-06, "loss": 0.1686, "step": 3966 }, { "epoch": 1.29, "learning_rate": 3.1914284175771303e-06, "loss": 0.1625, "step": 3967 }, { "epoch": 1.29, "learning_rate": 3.1905878825909726e-06, "loss": 0.1632, "step": 3968 }, { "epoch": 1.29, "learning_rate": 3.189747263083543e-06, "loss": 0.1709, "step": 3969 }, { "epoch": 1.29, "learning_rate": 3.188906559157725e-06, "loss": 0.175, "step": 3970 }, { "epoch": 1.29, "learning_rate": 3.1880657709164144e-06, "loss": 0.1723, "step": 3971 }, { "epoch": 1.29, "learning_rate": 3.1872248984625135e-06, "loss": 0.1903, "step": 3972 }, { "epoch": 1.29, "learning_rate": 3.1863839418989385e-06, "loss": 0.146, "step": 3973 }, { "epoch": 1.29, "learning_rate": 3.185542901328613e-06, "loss": 0.1675, "step": 3974 }, { "epoch": 1.29, "learning_rate": 3.184701776854474e-06, "loss": 0.1492, "step": 3975 }, { "epoch": 1.29, "learning_rate": 3.1838605685794665e-06, "loss": 0.1678, "step": 3976 }, { "epoch": 1.29, "learning_rate": 3.1830192766065445e-06, "loss": 0.1623, "step": 3977 }, { "epoch": 1.29, "learning_rate": 3.1821779010386755e-06, "loss": 0.1767, "step": 3978 }, { "epoch": 1.29, "learning_rate": 3.181336441978835e-06, "loss": 0.1815, "step": 3979 }, { "epoch": 1.29, "learning_rate": 3.18049489953001e-06, "loss": 0.1797, "step": 3980 }, { "epoch": 1.29, "learning_rate": 3.1796532737951975e-06, "loss": 0.1552, "step": 3981 }, { "epoch": 1.29, "learning_rate": 3.1788115648774033e-06, "loss": 0.162, "step": 3982 }, { "epoch": 1.29, "learning_rate": 3.177969772879645e-06, "loss": 0.1563, "step": 3983 }, { "epoch": 1.29, "learning_rate": 3.1771278979049496e-06, "loss": 0.1626, "step": 3984 }, { "epoch": 1.29, "learning_rate": 3.176285940056355e-06, "loss": 0.1619, "step": 3985 }, { "epoch": 1.29, "learning_rate": 3.1754438994369087e-06, "loss": 0.1725, "step": 3986 }, { "epoch": 1.29, "learning_rate": 3.174601776149668e-06, "loss": 0.166, "step": 3987 }, { "epoch": 1.29, "learning_rate": 3.1737595702976996e-06, "loss": 0.1795, "step": 3988 }, { "epoch": 1.29, "learning_rate": 3.1729172819840825e-06, "loss": 0.1741, "step": 3989 }, { "epoch": 1.29, "learning_rate": 3.1720749113119045e-06, "loss": 0.1676, "step": 3990 }, { "epoch": 1.29, "learning_rate": 3.1712324583842637e-06, "loss": 0.1871, "step": 3991 }, { "epoch": 1.29, "learning_rate": 3.1703899233042675e-06, "loss": 0.1543, "step": 3992 }, { "epoch": 1.29, "learning_rate": 3.1695473061750353e-06, "loss": 0.1641, "step": 3993 }, { "epoch": 1.29, "learning_rate": 3.1687046070996942e-06, "loss": 0.1648, "step": 3994 }, { "epoch": 1.29, "learning_rate": 3.1678618261813828e-06, "loss": 0.1634, "step": 3995 }, { "epoch": 1.29, "learning_rate": 3.167018963523249e-06, "loss": 0.1546, "step": 3996 }, { "epoch": 1.3, "learning_rate": 3.1661760192284518e-06, "loss": 0.1618, "step": 3997 }, { "epoch": 1.3, "learning_rate": 3.165332993400159e-06, "loss": 0.146, "step": 3998 }, { "epoch": 1.3, "learning_rate": 3.1644898861415484e-06, "loss": 0.1706, "step": 3999 }, { "epoch": 1.3, "learning_rate": 3.163646697555809e-06, "loss": 0.1541, "step": 4000 }, { "epoch": 1.3, "learning_rate": 3.1628034277461376e-06, "loss": 0.178, "step": 4001 }, { "epoch": 1.3, "learning_rate": 3.161960076815743e-06, "loss": 0.1756, "step": 4002 }, { "epoch": 1.3, "learning_rate": 3.1611166448678445e-06, "loss": 0.1695, "step": 4003 }, { "epoch": 1.3, "learning_rate": 3.1602731320056675e-06, "loss": 0.169, "step": 4004 }, { "epoch": 1.3, "learning_rate": 3.159429538332452e-06, "loss": 0.203, "step": 4005 }, { "epoch": 1.3, "learning_rate": 3.1585858639514444e-06, "loss": 0.1714, "step": 4006 }, { "epoch": 1.3, "learning_rate": 3.1577421089659023e-06, "loss": 0.164, "step": 4007 }, { "epoch": 1.3, "learning_rate": 3.1568982734790943e-06, "loss": 0.1758, "step": 4008 }, { "epoch": 1.3, "learning_rate": 3.1560543575942958e-06, "loss": 0.1522, "step": 4009 }, { "epoch": 1.3, "learning_rate": 3.1552103614147955e-06, "loss": 0.1442, "step": 4010 }, { "epoch": 1.3, "learning_rate": 3.1543662850438905e-06, "loss": 0.1645, "step": 4011 }, { "epoch": 1.3, "learning_rate": 3.1535221285848866e-06, "loss": 0.1843, "step": 4012 }, { "epoch": 1.3, "learning_rate": 3.1526778921411006e-06, "loss": 0.1696, "step": 4013 }, { "epoch": 1.3, "learning_rate": 3.151833575815859e-06, "loss": 0.1493, "step": 4014 }, { "epoch": 1.3, "learning_rate": 3.1509891797124977e-06, "loss": 0.1439, "step": 4015 }, { "epoch": 1.3, "learning_rate": 3.150144703934363e-06, "loss": 0.1617, "step": 4016 }, { "epoch": 1.3, "learning_rate": 3.149300148584811e-06, "loss": 0.1628, "step": 4017 }, { "epoch": 1.3, "learning_rate": 3.1484555137672063e-06, "loss": 0.166, "step": 4018 }, { "epoch": 1.3, "learning_rate": 3.147610799584924e-06, "loss": 0.1688, "step": 4019 }, { "epoch": 1.3, "learning_rate": 3.1467660061413497e-06, "loss": 0.1769, "step": 4020 }, { "epoch": 1.3, "learning_rate": 3.1459211335398765e-06, "loss": 0.1784, "step": 4021 }, { "epoch": 1.3, "learning_rate": 3.14507618188391e-06, "loss": 0.1657, "step": 4022 }, { "epoch": 1.3, "learning_rate": 3.144231151276864e-06, "loss": 0.1698, "step": 4023 }, { "epoch": 1.3, "learning_rate": 3.143386041822162e-06, "loss": 0.154, "step": 4024 }, { "epoch": 1.3, "learning_rate": 3.142540853623236e-06, "loss": 0.1614, "step": 4025 }, { "epoch": 1.3, "learning_rate": 3.14169558678353e-06, "loss": 0.1801, "step": 4026 }, { "epoch": 1.3, "learning_rate": 3.1408502414064963e-06, "loss": 0.1745, "step": 4027 }, { "epoch": 1.31, "learning_rate": 3.140004817595597e-06, "loss": 0.157, "step": 4028 }, { "epoch": 1.31, "learning_rate": 3.1391593154543043e-06, "loss": 0.1789, "step": 4029 }, { "epoch": 1.31, "learning_rate": 3.138313735086099e-06, "loss": 0.1673, "step": 4030 }, { "epoch": 1.31, "learning_rate": 3.137468076594471e-06, "loss": 0.158, "step": 4031 }, { "epoch": 1.31, "learning_rate": 3.1366223400829215e-06, "loss": 0.1709, "step": 4032 }, { "epoch": 1.31, "learning_rate": 3.135776525654961e-06, "loss": 0.1516, "step": 4033 }, { "epoch": 1.31, "learning_rate": 3.1349306334141084e-06, "loss": 0.1672, "step": 4034 }, { "epoch": 1.31, "learning_rate": 3.134084663463894e-06, "loss": 0.1544, "step": 4035 }, { "epoch": 1.31, "learning_rate": 3.1332386159078536e-06, "loss": 0.1686, "step": 4036 }, { "epoch": 1.31, "learning_rate": 3.132392490849537e-06, "loss": 0.1693, "step": 4037 }, { "epoch": 1.31, "learning_rate": 3.1315462883925026e-06, "loss": 0.1776, "step": 4038 }, { "epoch": 1.31, "learning_rate": 3.1307000086403162e-06, "loss": 0.1722, "step": 4039 }, { "epoch": 1.31, "learning_rate": 3.1298536516965537e-06, "loss": 0.1652, "step": 4040 }, { "epoch": 1.31, "learning_rate": 3.129007217664802e-06, "loss": 0.1665, "step": 4041 }, { "epoch": 1.31, "learning_rate": 3.1281607066486565e-06, "loss": 0.1832, "step": 4042 }, { "epoch": 1.31, "learning_rate": 3.127314118751721e-06, "loss": 0.1745, "step": 4043 }, { "epoch": 1.31, "learning_rate": 3.12646745407761e-06, "loss": 0.169, "step": 4044 }, { "epoch": 1.31, "learning_rate": 3.1256207127299475e-06, "loss": 0.1665, "step": 4045 }, { "epoch": 1.31, "learning_rate": 3.124773894812367e-06, "loss": 0.1563, "step": 4046 }, { "epoch": 1.31, "learning_rate": 3.123927000428509e-06, "loss": 0.1701, "step": 4047 }, { "epoch": 1.31, "learning_rate": 3.123080029682027e-06, "loss": 0.1455, "step": 4048 }, { "epoch": 1.31, "learning_rate": 3.1222329826765806e-06, "loss": 0.1619, "step": 4049 }, { "epoch": 1.31, "learning_rate": 3.121385859515842e-06, "loss": 0.1776, "step": 4050 }, { "epoch": 1.31, "learning_rate": 3.1205386603034886e-06, "loss": 0.1729, "step": 4051 }, { "epoch": 1.31, "learning_rate": 3.1196913851432108e-06, "loss": 0.1426, "step": 4052 }, { "epoch": 1.31, "learning_rate": 3.1188440341387063e-06, "loss": 0.1635, "step": 4053 }, { "epoch": 1.31, "learning_rate": 3.1179966073936837e-06, "loss": 0.1721, "step": 4054 }, { "epoch": 1.31, "learning_rate": 3.117149105011858e-06, "loss": 0.1726, "step": 4055 }, { "epoch": 1.31, "learning_rate": 3.1163015270969567e-06, "loss": 0.1567, "step": 4056 }, { "epoch": 1.31, "learning_rate": 3.115453873752714e-06, "loss": 0.155, "step": 4057 }, { "epoch": 1.31, "learning_rate": 3.114606145082876e-06, "loss": 0.175, "step": 4058 }, { "epoch": 1.32, "learning_rate": 3.1137583411911954e-06, "loss": 0.1763, "step": 4059 }, { "epoch": 1.32, "learning_rate": 3.1129104621814365e-06, "loss": 0.1582, "step": 4060 }, { "epoch": 1.32, "learning_rate": 3.1120625081573696e-06, "loss": 0.174, "step": 4061 }, { "epoch": 1.32, "learning_rate": 3.1112144792227774e-06, "loss": 0.162, "step": 4062 }, { "epoch": 1.32, "learning_rate": 3.1103663754814493e-06, "loss": 0.1616, "step": 4063 }, { "epoch": 1.32, "learning_rate": 3.109518197037186e-06, "loss": 0.1571, "step": 4064 }, { "epoch": 1.32, "learning_rate": 3.1086699439937957e-06, "loss": 0.1743, "step": 4065 }, { "epoch": 1.32, "learning_rate": 3.1078216164550966e-06, "loss": 0.1709, "step": 4066 }, { "epoch": 1.32, "learning_rate": 3.1069732145249166e-06, "loss": 0.1812, "step": 4067 }, { "epoch": 1.32, "learning_rate": 3.1061247383070905e-06, "loss": 0.1758, "step": 4068 }, { "epoch": 1.32, "learning_rate": 3.1052761879054637e-06, "loss": 0.1648, "step": 4069 }, { "epoch": 1.32, "learning_rate": 3.1044275634238913e-06, "loss": 0.166, "step": 4070 }, { "epoch": 1.32, "learning_rate": 3.103578864966237e-06, "loss": 0.1675, "step": 4071 }, { "epoch": 1.32, "learning_rate": 3.1027300926363723e-06, "loss": 0.1577, "step": 4072 }, { "epoch": 1.32, "learning_rate": 3.1018812465381796e-06, "loss": 0.1644, "step": 4073 }, { "epoch": 1.32, "learning_rate": 3.1010323267755486e-06, "loss": 0.167, "step": 4074 }, { "epoch": 1.32, "learning_rate": 3.100183333452379e-06, "loss": 0.1642, "step": 4075 }, { "epoch": 1.32, "learning_rate": 3.0993342666725803e-06, "loss": 0.1663, "step": 4076 }, { "epoch": 1.32, "learning_rate": 3.0984851265400683e-06, "loss": 0.1723, "step": 4077 }, { "epoch": 1.32, "learning_rate": 3.097635913158772e-06, "loss": 0.1766, "step": 4078 }, { "epoch": 1.32, "learning_rate": 3.096786626632624e-06, "loss": 0.1606, "step": 4079 }, { "epoch": 1.32, "learning_rate": 3.0959372670655714e-06, "loss": 0.1772, "step": 4080 }, { "epoch": 1.32, "learning_rate": 3.0950878345615654e-06, "loss": 0.167, "step": 4081 }, { "epoch": 1.32, "learning_rate": 3.0942383292245704e-06, "loss": 0.1527, "step": 4082 }, { "epoch": 1.32, "learning_rate": 3.0933887511585564e-06, "loss": 0.1673, "step": 4083 }, { "epoch": 1.32, "learning_rate": 3.0925391004675037e-06, "loss": 0.1673, "step": 4084 }, { "epoch": 1.32, "learning_rate": 3.0916893772554006e-06, "loss": 0.1586, "step": 4085 }, { "epoch": 1.32, "learning_rate": 3.0908395816262466e-06, "loss": 0.1791, "step": 4086 }, { "epoch": 1.32, "learning_rate": 3.0899897136840468e-06, "loss": 0.169, "step": 4087 }, { "epoch": 1.32, "learning_rate": 3.0891397735328176e-06, "loss": 0.1734, "step": 4088 }, { "epoch": 1.33, "learning_rate": 3.088289761276584e-06, "loss": 0.1569, "step": 4089 }, { "epoch": 1.33, "learning_rate": 3.0874396770193785e-06, "loss": 0.1596, "step": 4090 }, { "epoch": 1.33, "learning_rate": 3.0865895208652436e-06, "loss": 0.1767, "step": 4091 }, { "epoch": 1.33, "learning_rate": 3.0857392929182296e-06, "loss": 0.1531, "step": 4092 }, { "epoch": 1.33, "learning_rate": 3.084888993282397e-06, "loss": 0.1452, "step": 4093 }, { "epoch": 1.33, "learning_rate": 3.0840386220618137e-06, "loss": 0.1427, "step": 4094 }, { "epoch": 1.33, "learning_rate": 3.083188179360556e-06, "loss": 0.1758, "step": 4095 }, { "epoch": 1.33, "learning_rate": 3.0823376652827123e-06, "loss": 0.1732, "step": 4096 }, { "epoch": 1.33, "learning_rate": 3.0814870799323748e-06, "loss": 0.1648, "step": 4097 }, { "epoch": 1.33, "learning_rate": 3.080636423413649e-06, "loss": 0.1633, "step": 4098 }, { "epoch": 1.33, "learning_rate": 3.079785695830645e-06, "loss": 0.1677, "step": 4099 }, { "epoch": 1.33, "learning_rate": 3.0789348972874844e-06, "loss": 0.155, "step": 4100 }, { "epoch": 1.33, "learning_rate": 3.0780840278882974e-06, "loss": 0.1829, "step": 4101 }, { "epoch": 1.33, "learning_rate": 3.077233087737222e-06, "loss": 0.1658, "step": 4102 }, { "epoch": 1.33, "learning_rate": 3.0763820769384038e-06, "loss": 0.1673, "step": 4103 }, { "epoch": 1.33, "learning_rate": 3.0755309955960007e-06, "loss": 0.1781, "step": 4104 }, { "epoch": 1.33, "learning_rate": 3.074679843814174e-06, "loss": 0.1578, "step": 4105 }, { "epoch": 1.33, "learning_rate": 3.073828621697098e-06, "loss": 0.1528, "step": 4106 }, { "epoch": 1.33, "learning_rate": 3.072977329348954e-06, "loss": 0.1725, "step": 4107 }, { "epoch": 1.33, "learning_rate": 3.072125966873932e-06, "loss": 0.1681, "step": 4108 }, { "epoch": 1.33, "learning_rate": 3.0712745343762295e-06, "loss": 0.1659, "step": 4109 }, { "epoch": 1.33, "learning_rate": 3.0704230319600547e-06, "loss": 0.1599, "step": 4110 }, { "epoch": 1.33, "learning_rate": 3.069571459729623e-06, "loss": 0.1571, "step": 4111 }, { "epoch": 1.33, "learning_rate": 3.068719817789158e-06, "loss": 0.1752, "step": 4112 }, { "epoch": 1.33, "learning_rate": 3.067868106242894e-06, "loss": 0.1652, "step": 4113 }, { "epoch": 1.33, "learning_rate": 3.0670163251950703e-06, "loss": 0.1621, "step": 4114 }, { "epoch": 1.33, "learning_rate": 3.0661644747499385e-06, "loss": 0.1492, "step": 4115 }, { "epoch": 1.33, "learning_rate": 3.0653125550117547e-06, "loss": 0.1524, "step": 4116 }, { "epoch": 1.33, "learning_rate": 3.0644605660847875e-06, "loss": 0.1689, "step": 4117 }, { "epoch": 1.33, "learning_rate": 3.0636085080733113e-06, "loss": 0.1629, "step": 4118 }, { "epoch": 1.33, "learning_rate": 3.0627563810816097e-06, "loss": 0.1592, "step": 4119 }, { "epoch": 1.34, "learning_rate": 3.0619041852139746e-06, "loss": 0.175, "step": 4120 }, { "epoch": 1.34, "learning_rate": 3.061051920574708e-06, "loss": 0.1628, "step": 4121 }, { "epoch": 1.34, "learning_rate": 3.0601995872681167e-06, "loss": 0.1709, "step": 4122 }, { "epoch": 1.34, "learning_rate": 3.0593471853985197e-06, "loss": 0.1696, "step": 4123 }, { "epoch": 1.34, "learning_rate": 3.058494715070242e-06, "loss": 0.1675, "step": 4124 }, { "epoch": 1.34, "learning_rate": 3.0576421763876174e-06, "loss": 0.1789, "step": 4125 }, { "epoch": 1.34, "learning_rate": 3.056789569454989e-06, "loss": 0.176, "step": 4126 }, { "epoch": 1.34, "learning_rate": 3.055936894376708e-06, "loss": 0.1594, "step": 4127 }, { "epoch": 1.34, "learning_rate": 3.055084151257133e-06, "loss": 0.1687, "step": 4128 }, { "epoch": 1.34, "learning_rate": 3.054231340200631e-06, "loss": 0.1857, "step": 4129 }, { "epoch": 1.34, "learning_rate": 3.053378461311578e-06, "loss": 0.1716, "step": 4130 }, { "epoch": 1.34, "learning_rate": 3.0525255146943582e-06, "loss": 0.1692, "step": 4131 }, { "epoch": 1.34, "learning_rate": 3.0516725004533648e-06, "loss": 0.1707, "step": 4132 }, { "epoch": 1.34, "learning_rate": 3.0508194186929983e-06, "loss": 0.1788, "step": 4133 }, { "epoch": 1.34, "learning_rate": 3.0499662695176675e-06, "loss": 0.1874, "step": 4134 }, { "epoch": 1.34, "learning_rate": 3.0491130530317887e-06, "loss": 0.181, "step": 4135 }, { "epoch": 1.34, "learning_rate": 3.0482597693397887e-06, "loss": 0.1658, "step": 4136 }, { "epoch": 1.34, "learning_rate": 3.0474064185461e-06, "loss": 0.1568, "step": 4137 }, { "epoch": 1.34, "learning_rate": 3.0465530007551646e-06, "loss": 0.1782, "step": 4138 }, { "epoch": 1.34, "learning_rate": 3.0456995160714344e-06, "loss": 0.1634, "step": 4139 }, { "epoch": 1.34, "learning_rate": 3.044845964599365e-06, "loss": 0.1722, "step": 4140 }, { "epoch": 1.34, "learning_rate": 3.043992346443424e-06, "loss": 0.1575, "step": 4141 }, { "epoch": 1.34, "learning_rate": 3.043138661708086e-06, "loss": 0.1687, "step": 4142 }, { "epoch": 1.34, "learning_rate": 3.042284910497834e-06, "loss": 0.165, "step": 4143 }, { "epoch": 1.34, "learning_rate": 3.0414310929171587e-06, "loss": 0.1798, "step": 4144 }, { "epoch": 1.34, "learning_rate": 3.04057720907056e-06, "loss": 0.1612, "step": 4145 }, { "epoch": 1.34, "learning_rate": 3.039723259062543e-06, "loss": 0.1843, "step": 4146 }, { "epoch": 1.34, "learning_rate": 3.0388692429976247e-06, "loss": 0.163, "step": 4147 }, { "epoch": 1.34, "learning_rate": 3.038015160980327e-06, "loss": 0.1663, "step": 4148 }, { "epoch": 1.34, "learning_rate": 3.0371610131151823e-06, "loss": 0.1756, "step": 4149 }, { "epoch": 1.34, "learning_rate": 3.0363067995067297e-06, "loss": 0.1659, "step": 4150 }, { "epoch": 1.35, "learning_rate": 3.035452520259517e-06, "loss": 0.1644, "step": 4151 }, { "epoch": 1.35, "learning_rate": 3.034598175478099e-06, "loss": 0.1855, "step": 4152 }, { "epoch": 1.35, "learning_rate": 3.03374376526704e-06, "loss": 0.1498, "step": 4153 }, { "epoch": 1.35, "learning_rate": 3.0328892897309105e-06, "loss": 0.1472, "step": 4154 }, { "epoch": 1.35, "learning_rate": 3.0320347489742905e-06, "loss": 0.171, "step": 4155 }, { "epoch": 1.35, "learning_rate": 3.031180143101769e-06, "loss": 0.1795, "step": 4156 }, { "epoch": 1.35, "learning_rate": 3.03032547221794e-06, "loss": 0.1745, "step": 4157 }, { "epoch": 1.35, "learning_rate": 3.0294707364274066e-06, "loss": 0.1639, "step": 4158 }, { "epoch": 1.35, "learning_rate": 3.028615935834781e-06, "loss": 0.158, "step": 4159 }, { "epoch": 1.35, "learning_rate": 3.027761070544682e-06, "loss": 0.1465, "step": 4160 }, { "epoch": 1.35, "learning_rate": 3.026906140661737e-06, "loss": 0.1667, "step": 4161 }, { "epoch": 1.35, "learning_rate": 3.026051146290581e-06, "loss": 0.1747, "step": 4162 }, { "epoch": 1.35, "learning_rate": 3.025196087535858e-06, "loss": 0.1723, "step": 4163 }, { "epoch": 1.35, "learning_rate": 3.024340964502218e-06, "loss": 0.1678, "step": 4164 }, { "epoch": 1.35, "learning_rate": 3.0234857772943197e-06, "loss": 0.1527, "step": 4165 }, { "epoch": 1.35, "learning_rate": 3.0226305260168298e-06, "loss": 0.1651, "step": 4166 }, { "epoch": 1.35, "learning_rate": 3.0217752107744237e-06, "loss": 0.1519, "step": 4167 }, { "epoch": 1.35, "learning_rate": 3.0209198316717825e-06, "loss": 0.1753, "step": 4168 }, { "epoch": 1.35, "learning_rate": 3.0200643888135973e-06, "loss": 0.1663, "step": 4169 }, { "epoch": 1.35, "learning_rate": 3.019208882304565e-06, "loss": 0.1682, "step": 4170 }, { "epoch": 1.35, "learning_rate": 3.0183533122493917e-06, "loss": 0.1695, "step": 4171 }, { "epoch": 1.35, "learning_rate": 3.017497678752791e-06, "loss": 0.1689, "step": 4172 }, { "epoch": 1.35, "learning_rate": 3.016641981919485e-06, "loss": 0.1658, "step": 4173 }, { "epoch": 1.35, "learning_rate": 3.0157862218542004e-06, "loss": 0.1508, "step": 4174 }, { "epoch": 1.35, "learning_rate": 3.0149303986616772e-06, "loss": 0.1499, "step": 4175 }, { "epoch": 1.35, "learning_rate": 3.014074512446657e-06, "loss": 0.1571, "step": 4176 }, { "epoch": 1.35, "learning_rate": 3.0132185633138934e-06, "loss": 0.1675, "step": 4177 }, { "epoch": 1.35, "learning_rate": 3.0123625513681463e-06, "loss": 0.1764, "step": 4178 }, { "epoch": 1.35, "learning_rate": 3.0115064767141827e-06, "loss": 0.1639, "step": 4179 }, { "epoch": 1.35, "learning_rate": 3.0106503394567775e-06, "loss": 0.1755, "step": 4180 }, { "epoch": 1.35, "learning_rate": 3.0097941397007156e-06, "loss": 0.1752, "step": 4181 }, { "epoch": 1.36, "learning_rate": 3.008937877550785e-06, "loss": 0.1687, "step": 4182 }, { "epoch": 1.36, "learning_rate": 3.008081553111786e-06, "loss": 0.1607, "step": 4183 }, { "epoch": 1.36, "learning_rate": 3.0072251664885222e-06, "loss": 0.1701, "step": 4184 }, { "epoch": 1.36, "learning_rate": 3.006368717785809e-06, "loss": 0.1853, "step": 4185 }, { "epoch": 1.36, "learning_rate": 3.005512207108467e-06, "loss": 0.1773, "step": 4186 }, { "epoch": 1.36, "learning_rate": 3.004655634561325e-06, "loss": 0.1566, "step": 4187 }, { "epoch": 1.36, "learning_rate": 3.003799000249218e-06, "loss": 0.1861, "step": 4188 }, { "epoch": 1.36, "learning_rate": 3.002942304276991e-06, "loss": 0.1683, "step": 4189 }, { "epoch": 1.36, "learning_rate": 3.002085546749495e-06, "loss": 0.1666, "step": 4190 }, { "epoch": 1.36, "learning_rate": 3.001228727771588e-06, "loss": 0.1758, "step": 4191 }, { "epoch": 1.36, "learning_rate": 3.000371847448137e-06, "loss": 0.1675, "step": 4192 }, { "epoch": 1.36, "learning_rate": 2.9995149058840157e-06, "loss": 0.1618, "step": 4193 }, { "epoch": 1.36, "learning_rate": 2.998657903184107e-06, "loss": 0.154, "step": 4194 }, { "epoch": 1.36, "learning_rate": 2.9978008394532966e-06, "loss": 0.1775, "step": 4195 }, { "epoch": 1.36, "learning_rate": 2.996943714796483e-06, "loss": 0.1527, "step": 4196 }, { "epoch": 1.36, "learning_rate": 2.9960865293185697e-06, "loss": 0.1594, "step": 4197 }, { "epoch": 1.36, "learning_rate": 2.995229283124468e-06, "loss": 0.1385, "step": 4198 }, { "epoch": 1.36, "learning_rate": 2.994371976319096e-06, "loss": 0.1667, "step": 4199 }, { "epoch": 1.36, "learning_rate": 2.993514609007381e-06, "loss": 0.1618, "step": 4200 }, { "epoch": 1.36, "learning_rate": 2.992657181294254e-06, "loss": 0.1567, "step": 4201 }, { "epoch": 1.36, "learning_rate": 2.9917996932846572e-06, "loss": 0.1561, "step": 4202 }, { "epoch": 1.36, "learning_rate": 2.99094214508354e-06, "loss": 0.1587, "step": 4203 }, { "epoch": 1.36, "learning_rate": 2.990084536795856e-06, "loss": 0.1709, "step": 4204 }, { "epoch": 1.36, "learning_rate": 2.989226868526569e-06, "loss": 0.1625, "step": 4205 }, { "epoch": 1.36, "learning_rate": 2.98836914038065e-06, "loss": 0.1556, "step": 4206 }, { "epoch": 1.36, "learning_rate": 2.987511352463076e-06, "loss": 0.1583, "step": 4207 }, { "epoch": 1.36, "learning_rate": 2.9866535048788314e-06, "loss": 0.1751, "step": 4208 }, { "epoch": 1.36, "learning_rate": 2.9857955977329095e-06, "loss": 0.1739, "step": 4209 }, { "epoch": 1.36, "learning_rate": 2.9849376311303095e-06, "loss": 0.158, "step": 4210 }, { "epoch": 1.36, "learning_rate": 2.984079605176038e-06, "loss": 0.1865, "step": 4211 }, { "epoch": 1.36, "learning_rate": 2.9832215199751085e-06, "loss": 0.1701, "step": 4212 }, { "epoch": 1.37, "learning_rate": 2.9823633756325433e-06, "loss": 0.1485, "step": 4213 }, { "epoch": 1.37, "learning_rate": 2.9815051722533707e-06, "loss": 0.1718, "step": 4214 }, { "epoch": 1.37, "learning_rate": 2.9806469099426254e-06, "loss": 0.1604, "step": 4215 }, { "epoch": 1.37, "learning_rate": 2.9797885888053517e-06, "loss": 0.1693, "step": 4216 }, { "epoch": 1.37, "learning_rate": 2.9789302089466e-06, "loss": 0.1645, "step": 4217 }, { "epoch": 1.37, "learning_rate": 2.978071770471427e-06, "loss": 0.1887, "step": 4218 }, { "epoch": 1.37, "learning_rate": 2.9772132734848974e-06, "loss": 0.1663, "step": 4219 }, { "epoch": 1.37, "learning_rate": 2.9763547180920825e-06, "loss": 0.1872, "step": 4220 }, { "epoch": 1.37, "learning_rate": 2.9754961043980623e-06, "loss": 0.1613, "step": 4221 }, { "epoch": 1.37, "learning_rate": 2.9746374325079213e-06, "loss": 0.1702, "step": 4222 }, { "epoch": 1.37, "learning_rate": 2.973778702526754e-06, "loss": 0.1628, "step": 4223 }, { "epoch": 1.37, "learning_rate": 2.97291991455966e-06, "loss": 0.1803, "step": 4224 }, { "epoch": 1.37, "learning_rate": 2.9720610687117462e-06, "loss": 0.1639, "step": 4225 }, { "epoch": 1.37, "learning_rate": 2.971202165088128e-06, "loss": 0.1585, "step": 4226 }, { "epoch": 1.37, "learning_rate": 2.9703432037939255e-06, "loss": 0.1651, "step": 4227 }, { "epoch": 1.37, "learning_rate": 2.9694841849342688e-06, "loss": 0.1668, "step": 4228 }, { "epoch": 1.37, "learning_rate": 2.9686251086142927e-06, "loss": 0.1786, "step": 4229 }, { "epoch": 1.37, "learning_rate": 2.9677659749391404e-06, "loss": 0.1497, "step": 4230 }, { "epoch": 1.37, "learning_rate": 2.9669067840139603e-06, "loss": 0.1607, "step": 4231 }, { "epoch": 1.37, "learning_rate": 2.9660475359439113e-06, "loss": 0.165, "step": 4232 }, { "epoch": 1.37, "learning_rate": 2.965188230834154e-06, "loss": 0.1614, "step": 4233 }, { "epoch": 1.37, "learning_rate": 2.9643288687898614e-06, "loss": 0.1595, "step": 4234 }, { "epoch": 1.37, "learning_rate": 2.96346944991621e-06, "loss": 0.1564, "step": 4235 }, { "epoch": 1.37, "learning_rate": 2.962609974318385e-06, "loss": 0.1648, "step": 4236 }, { "epoch": 1.37, "learning_rate": 2.961750442101577e-06, "loss": 0.1767, "step": 4237 }, { "epoch": 1.37, "learning_rate": 2.9608908533709852e-06, "loss": 0.1583, "step": 4238 }, { "epoch": 1.37, "learning_rate": 2.9600312082318144e-06, "loss": 0.1649, "step": 4239 }, { "epoch": 1.37, "learning_rate": 2.9591715067892777e-06, "loss": 0.1581, "step": 4240 }, { "epoch": 1.37, "learning_rate": 2.958311749148594e-06, "loss": 0.1696, "step": 4241 }, { "epoch": 1.37, "learning_rate": 2.9574519354149884e-06, "loss": 0.1754, "step": 4242 }, { "epoch": 1.37, "learning_rate": 2.9565920656936947e-06, "loss": 0.1728, "step": 4243 }, { "epoch": 1.38, "learning_rate": 2.9557321400899524e-06, "loss": 0.1548, "step": 4244 }, { "epoch": 1.38, "learning_rate": 2.9548721587090075e-06, "loss": 0.1679, "step": 4245 }, { "epoch": 1.38, "learning_rate": 2.954012121656114e-06, "loss": 0.1586, "step": 4246 }, { "epoch": 1.38, "learning_rate": 2.9531520290365316e-06, "loss": 0.1658, "step": 4247 }, { "epoch": 1.38, "learning_rate": 2.952291880955529e-06, "loss": 0.1705, "step": 4248 }, { "epoch": 1.38, "learning_rate": 2.9514316775183777e-06, "loss": 0.1607, "step": 4249 }, { "epoch": 1.38, "learning_rate": 2.950571418830359e-06, "loss": 0.1475, "step": 4250 }, { "epoch": 1.38, "learning_rate": 2.949711104996761e-06, "loss": 0.1749, "step": 4251 }, { "epoch": 1.38, "learning_rate": 2.948850736122878e-06, "loss": 0.1528, "step": 4252 }, { "epoch": 1.38, "learning_rate": 2.947990312314009e-06, "loss": 0.1674, "step": 4253 }, { "epoch": 1.38, "learning_rate": 2.9471298336754633e-06, "loss": 0.1496, "step": 4254 }, { "epoch": 1.38, "learning_rate": 2.9462693003125544e-06, "loss": 0.1609, "step": 4255 }, { "epoch": 1.38, "learning_rate": 2.945408712330603e-06, "loss": 0.1578, "step": 4256 }, { "epoch": 1.38, "learning_rate": 2.944548069834937e-06, "loss": 0.1756, "step": 4257 }, { "epoch": 1.38, "learning_rate": 2.943687372930891e-06, "loss": 0.1641, "step": 4258 }, { "epoch": 1.38, "learning_rate": 2.942826621723806e-06, "loss": 0.1686, "step": 4259 }, { "epoch": 1.38, "learning_rate": 2.9419658163190295e-06, "loss": 0.1486, "step": 4260 }, { "epoch": 1.38, "learning_rate": 2.9411049568219153e-06, "loss": 0.1732, "step": 4261 }, { "epoch": 1.38, "learning_rate": 2.9402440433378247e-06, "loss": 0.177, "step": 4262 }, { "epoch": 1.38, "learning_rate": 2.939383075972125e-06, "loss": 0.1582, "step": 4263 }, { "epoch": 1.38, "learning_rate": 2.9385220548301906e-06, "loss": 0.1716, "step": 4264 }, { "epoch": 1.38, "learning_rate": 2.937660980017402e-06, "loss": 0.1567, "step": 4265 }, { "epoch": 1.38, "learning_rate": 2.936799851639146e-06, "loss": 0.1617, "step": 4266 }, { "epoch": 1.38, "learning_rate": 2.9359386698008172e-06, "loss": 0.1539, "step": 4267 }, { "epoch": 1.38, "learning_rate": 2.935077434607815e-06, "loss": 0.1768, "step": 4268 }, { "epoch": 1.38, "learning_rate": 2.9342161461655468e-06, "loss": 0.1719, "step": 4269 }, { "epoch": 1.38, "learning_rate": 2.9333548045794253e-06, "loss": 0.1662, "step": 4270 }, { "epoch": 1.38, "learning_rate": 2.9324934099548713e-06, "loss": 0.1722, "step": 4271 }, { "epoch": 1.38, "learning_rate": 2.931631962397311e-06, "loss": 0.1512, "step": 4272 }, { "epoch": 1.38, "learning_rate": 2.9307704620121775e-06, "loss": 0.1564, "step": 4273 }, { "epoch": 1.38, "learning_rate": 2.9299089089049092e-06, "loss": 0.1595, "step": 4274 }, { "epoch": 1.39, "learning_rate": 2.929047303180952e-06, "loss": 0.1713, "step": 4275 }, { "epoch": 1.39, "learning_rate": 2.9281856449457587e-06, "loss": 0.1831, "step": 4276 }, { "epoch": 1.39, "learning_rate": 2.927323934304787e-06, "loss": 0.1558, "step": 4277 }, { "epoch": 1.39, "learning_rate": 2.926462171363503e-06, "loss": 0.1453, "step": 4278 }, { "epoch": 1.39, "learning_rate": 2.9256003562273784e-06, "loss": 0.1808, "step": 4279 }, { "epoch": 1.39, "learning_rate": 2.924738489001889e-06, "loss": 0.1684, "step": 4280 }, { "epoch": 1.39, "learning_rate": 2.923876569792521e-06, "loss": 0.1532, "step": 4281 }, { "epoch": 1.39, "learning_rate": 2.923014598704764e-06, "loss": 0.1531, "step": 4282 }, { "epoch": 1.39, "learning_rate": 2.9221525758441155e-06, "loss": 0.1729, "step": 4283 }, { "epoch": 1.39, "learning_rate": 2.9212905013160784e-06, "loss": 0.163, "step": 4284 }, { "epoch": 1.39, "learning_rate": 2.920428375226163e-06, "loss": 0.153, "step": 4285 }, { "epoch": 1.39, "learning_rate": 2.9195661976798838e-06, "loss": 0.1695, "step": 4286 }, { "epoch": 1.39, "learning_rate": 2.918703968782764e-06, "loss": 0.1775, "step": 4287 }, { "epoch": 1.39, "learning_rate": 2.9178416886403318e-06, "loss": 0.1638, "step": 4288 }, { "epoch": 1.39, "learning_rate": 2.916979357358121e-06, "loss": 0.1649, "step": 4289 }, { "epoch": 1.39, "learning_rate": 2.9161169750416746e-06, "loss": 0.1446, "step": 4290 }, { "epoch": 1.39, "learning_rate": 2.915254541796539e-06, "loss": 0.1633, "step": 4291 }, { "epoch": 1.39, "learning_rate": 2.914392057728267e-06, "loss": 0.1666, "step": 4292 }, { "epoch": 1.39, "learning_rate": 2.913529522942418e-06, "loss": 0.1525, "step": 4293 }, { "epoch": 1.39, "learning_rate": 2.9126669375445595e-06, "loss": 0.1754, "step": 4294 }, { "epoch": 1.39, "learning_rate": 2.911804301640263e-06, "loss": 0.1698, "step": 4295 }, { "epoch": 1.39, "learning_rate": 2.910941615335106e-06, "loss": 0.1773, "step": 4296 }, { "epoch": 1.39, "learning_rate": 2.9100788787346746e-06, "loss": 0.1607, "step": 4297 }, { "epoch": 1.39, "learning_rate": 2.9092160919445566e-06, "loss": 0.1515, "step": 4298 }, { "epoch": 1.39, "learning_rate": 2.9083532550703515e-06, "loss": 0.1563, "step": 4299 }, { "epoch": 1.39, "learning_rate": 2.9074903682176607e-06, "loss": 0.1591, "step": 4300 }, { "epoch": 1.39, "learning_rate": 2.906627431492094e-06, "loss": 0.1513, "step": 4301 }, { "epoch": 1.39, "learning_rate": 2.9057644449992655e-06, "loss": 0.1437, "step": 4302 }, { "epoch": 1.39, "learning_rate": 2.904901408844798e-06, "loss": 0.1544, "step": 4303 }, { "epoch": 1.39, "learning_rate": 2.9040383231343173e-06, "loss": 0.159, "step": 4304 }, { "epoch": 1.4, "learning_rate": 2.903175187973457e-06, "loss": 0.1631, "step": 4305 }, { "epoch": 1.4, "learning_rate": 2.9023120034678575e-06, "loss": 0.1783, "step": 4306 }, { "epoch": 1.4, "learning_rate": 2.901448769723163e-06, "loss": 0.174, "step": 4307 }, { "epoch": 1.4, "learning_rate": 2.900585486845026e-06, "loss": 0.152, "step": 4308 }, { "epoch": 1.4, "learning_rate": 2.8997221549391025e-06, "loss": 0.1519, "step": 4309 }, { "epoch": 1.4, "learning_rate": 2.8988587741110575e-06, "loss": 0.1674, "step": 4310 }, { "epoch": 1.4, "learning_rate": 2.8979953444665585e-06, "loss": 0.1743, "step": 4311 }, { "epoch": 1.4, "learning_rate": 2.8971318661112836e-06, "loss": 0.1556, "step": 4312 }, { "epoch": 1.4, "learning_rate": 2.896268339150912e-06, "loss": 0.1551, "step": 4313 }, { "epoch": 1.4, "learning_rate": 2.895404763691132e-06, "loss": 0.1673, "step": 4314 }, { "epoch": 1.4, "learning_rate": 2.894541139837638e-06, "loss": 0.1525, "step": 4315 }, { "epoch": 1.4, "learning_rate": 2.8936774676961264e-06, "loss": 0.1519, "step": 4316 }, { "epoch": 1.4, "learning_rate": 2.892813747372305e-06, "loss": 0.1468, "step": 4317 }, { "epoch": 1.4, "learning_rate": 2.891949978971883e-06, "loss": 0.1646, "step": 4318 }, { "epoch": 1.4, "learning_rate": 2.8910861626005774e-06, "loss": 0.1679, "step": 4319 }, { "epoch": 1.4, "learning_rate": 2.890222298364112e-06, "loss": 0.1597, "step": 4320 }, { "epoch": 1.4, "learning_rate": 2.8893583863682157e-06, "loss": 0.1656, "step": 4321 }, { "epoch": 1.4, "learning_rate": 2.888494426718621e-06, "loss": 0.1694, "step": 4322 }, { "epoch": 1.4, "learning_rate": 2.8876304195210697e-06, "loss": 0.1581, "step": 4323 }, { "epoch": 1.4, "learning_rate": 2.8867663648813077e-06, "loss": 0.1716, "step": 4324 }, { "epoch": 1.4, "learning_rate": 2.885902262905087e-06, "loss": 0.1525, "step": 4325 }, { "epoch": 1.4, "learning_rate": 2.885038113698165e-06, "loss": 0.1654, "step": 4326 }, { "epoch": 1.4, "learning_rate": 2.8841739173663057e-06, "loss": 0.157, "step": 4327 }, { "epoch": 1.4, "learning_rate": 2.883309674015278e-06, "loss": 0.1736, "step": 4328 }, { "epoch": 1.4, "learning_rate": 2.8824453837508563e-06, "loss": 0.1642, "step": 4329 }, { "epoch": 1.4, "learning_rate": 2.8815810466788225e-06, "loss": 0.1672, "step": 4330 }, { "epoch": 1.4, "learning_rate": 2.8807166629049623e-06, "loss": 0.1621, "step": 4331 }, { "epoch": 1.4, "learning_rate": 2.8798522325350683e-06, "loss": 0.1738, "step": 4332 }, { "epoch": 1.4, "learning_rate": 2.8789877556749383e-06, "loss": 0.17, "step": 4333 }, { "epoch": 1.4, "learning_rate": 2.8781232324303758e-06, "loss": 0.1484, "step": 4334 }, { "epoch": 1.4, "learning_rate": 2.8772586629071902e-06, "loss": 0.1632, "step": 4335 }, { "epoch": 1.41, "learning_rate": 2.876394047211196e-06, "loss": 0.1737, "step": 4336 }, { "epoch": 1.41, "learning_rate": 2.875529385448215e-06, "loss": 0.1701, "step": 4337 }, { "epoch": 1.41, "learning_rate": 2.8746646777240724e-06, "loss": 0.169, "step": 4338 }, { "epoch": 1.41, "learning_rate": 2.8737999241446e-06, "loss": 0.1866, "step": 4339 }, { "epoch": 1.41, "learning_rate": 2.8729351248156364e-06, "loss": 0.1664, "step": 4340 }, { "epoch": 1.41, "learning_rate": 2.872070279843023e-06, "loss": 0.1718, "step": 4341 }, { "epoch": 1.41, "learning_rate": 2.8712053893326088e-06, "loss": 0.1738, "step": 4342 }, { "epoch": 1.41, "learning_rate": 2.8703404533902492e-06, "loss": 0.1694, "step": 4343 }, { "epoch": 1.41, "learning_rate": 2.8694754721218027e-06, "loss": 0.1784, "step": 4344 }, { "epoch": 1.41, "learning_rate": 2.8686104456331356e-06, "loss": 0.156, "step": 4345 }, { "epoch": 1.41, "learning_rate": 2.8677453740301185e-06, "loss": 0.1752, "step": 4346 }, { "epoch": 1.41, "learning_rate": 2.8668802574186277e-06, "loss": 0.1511, "step": 4347 }, { "epoch": 1.41, "learning_rate": 2.8660150959045456e-06, "loss": 0.1764, "step": 4348 }, { "epoch": 1.41, "learning_rate": 2.865149889593758e-06, "loss": 0.1357, "step": 4349 }, { "epoch": 1.41, "learning_rate": 2.8642846385921593e-06, "loss": 0.1725, "step": 4350 }, { "epoch": 1.41, "learning_rate": 2.863419343005647e-06, "loss": 0.1645, "step": 4351 }, { "epoch": 1.41, "learning_rate": 2.8625540029401262e-06, "loss": 0.161, "step": 4352 }, { "epoch": 1.41, "learning_rate": 2.8616886185015046e-06, "loss": 0.1678, "step": 4353 }, { "epoch": 1.41, "learning_rate": 2.860823189795697e-06, "loss": 0.1588, "step": 4354 }, { "epoch": 1.41, "learning_rate": 2.859957716928625e-06, "loss": 0.1744, "step": 4355 }, { "epoch": 1.41, "learning_rate": 2.8590922000062125e-06, "loss": 0.1688, "step": 4356 }, { "epoch": 1.41, "learning_rate": 2.858226639134391e-06, "loss": 0.1543, "step": 4357 }, { "epoch": 1.41, "learning_rate": 2.8573610344190978e-06, "loss": 0.1609, "step": 4358 }, { "epoch": 1.41, "learning_rate": 2.8564953859662725e-06, "loss": 0.1592, "step": 4359 }, { "epoch": 1.41, "learning_rate": 2.8556296938818632e-06, "loss": 0.1405, "step": 4360 }, { "epoch": 1.41, "learning_rate": 2.8547639582718223e-06, "loss": 0.1592, "step": 4361 }, { "epoch": 1.41, "learning_rate": 2.853898179242107e-06, "loss": 0.1628, "step": 4362 }, { "epoch": 1.41, "learning_rate": 2.8530323568986805e-06, "loss": 0.1595, "step": 4363 }, { "epoch": 1.41, "learning_rate": 2.8521664913475123e-06, "loss": 0.1801, "step": 4364 }, { "epoch": 1.41, "learning_rate": 2.8513005826945733e-06, "loss": 0.1596, "step": 4365 }, { "epoch": 1.41, "learning_rate": 2.8504346310458446e-06, "loss": 0.1551, "step": 4366 }, { "epoch": 1.42, "learning_rate": 2.8495686365073096e-06, "loss": 0.1735, "step": 4367 }, { "epoch": 1.42, "learning_rate": 2.848702599184957e-06, "loss": 0.1674, "step": 4368 }, { "epoch": 1.42, "learning_rate": 2.8478365191847824e-06, "loss": 0.174, "step": 4369 }, { "epoch": 1.42, "learning_rate": 2.8469703966127853e-06, "loss": 0.1888, "step": 4370 }, { "epoch": 1.42, "learning_rate": 2.8461042315749706e-06, "loss": 0.1487, "step": 4371 }, { "epoch": 1.42, "learning_rate": 2.845238024177348e-06, "loss": 0.1679, "step": 4372 }, { "epoch": 1.42, "learning_rate": 2.8443717745259335e-06, "loss": 0.1615, "step": 4373 }, { "epoch": 1.42, "learning_rate": 2.8435054827267476e-06, "loss": 0.1637, "step": 4374 }, { "epoch": 1.42, "learning_rate": 2.8426391488858163e-06, "loss": 0.1577, "step": 4375 }, { "epoch": 1.42, "learning_rate": 2.8417727731091705e-06, "loss": 0.1704, "step": 4376 }, { "epoch": 1.42, "learning_rate": 2.840906355502845e-06, "loss": 0.1532, "step": 4377 }, { "epoch": 1.42, "learning_rate": 2.840039896172882e-06, "loss": 0.1778, "step": 4378 }, { "epoch": 1.42, "learning_rate": 2.8391733952253277e-06, "loss": 0.1644, "step": 4379 }, { "epoch": 1.42, "learning_rate": 2.838306852766234e-06, "loss": 0.176, "step": 4380 }, { "epoch": 1.42, "learning_rate": 2.8374402689016557e-06, "loss": 0.1567, "step": 4381 }, { "epoch": 1.42, "learning_rate": 2.8365736437376555e-06, "loss": 0.1589, "step": 4382 }, { "epoch": 1.42, "learning_rate": 2.8357069773802996e-06, "loss": 0.1632, "step": 4383 }, { "epoch": 1.42, "learning_rate": 2.834840269935659e-06, "loss": 0.1709, "step": 4384 }, { "epoch": 1.42, "learning_rate": 2.833973521509812e-06, "loss": 0.1617, "step": 4385 }, { "epoch": 1.42, "learning_rate": 2.833106732208838e-06, "loss": 0.1739, "step": 4386 }, { "epoch": 1.42, "learning_rate": 2.8322399021388248e-06, "loss": 0.1728, "step": 4387 }, { "epoch": 1.42, "learning_rate": 2.8313730314058645e-06, "loss": 0.1602, "step": 4388 }, { "epoch": 1.42, "learning_rate": 2.830506120116053e-06, "loss": 0.1734, "step": 4389 }, { "epoch": 1.42, "learning_rate": 2.8296391683754916e-06, "loss": 0.159, "step": 4390 }, { "epoch": 1.42, "learning_rate": 2.8287721762902877e-06, "loss": 0.17, "step": 4391 }, { "epoch": 1.42, "learning_rate": 2.8279051439665516e-06, "loss": 0.1585, "step": 4392 }, { "epoch": 1.42, "learning_rate": 2.8270380715104e-06, "loss": 0.1681, "step": 4393 }, { "epoch": 1.42, "learning_rate": 2.826170959027956e-06, "loss": 0.164, "step": 4394 }, { "epoch": 1.42, "learning_rate": 2.8253038066253423e-06, "loss": 0.1779, "step": 4395 }, { "epoch": 1.42, "learning_rate": 2.8244366144086926e-06, "loss": 0.1714, "step": 4396 }, { "epoch": 1.42, "learning_rate": 2.823569382484142e-06, "loss": 0.1575, "step": 4397 }, { "epoch": 1.43, "learning_rate": 2.822702110957831e-06, "loss": 0.1515, "step": 4398 }, { "epoch": 1.43, "learning_rate": 2.8218347999359066e-06, "loss": 0.1706, "step": 4399 }, { "epoch": 1.43, "learning_rate": 2.8209674495245177e-06, "loss": 0.1597, "step": 4400 }, { "epoch": 1.43, "learning_rate": 2.82010005982982e-06, "loss": 0.1769, "step": 4401 }, { "epoch": 1.43, "learning_rate": 2.819232630957975e-06, "loss": 0.1672, "step": 4402 }, { "epoch": 1.43, "learning_rate": 2.818365163015145e-06, "loss": 0.1662, "step": 4403 }, { "epoch": 1.43, "learning_rate": 2.8174976561075013e-06, "loss": 0.1705, "step": 4404 }, { "epoch": 1.43, "learning_rate": 2.816630110341218e-06, "loss": 0.1617, "step": 4405 }, { "epoch": 1.43, "learning_rate": 2.8157625258224746e-06, "loss": 0.1664, "step": 4406 }, { "epoch": 1.43, "learning_rate": 2.814894902657456e-06, "loss": 0.1662, "step": 4407 }, { "epoch": 1.43, "learning_rate": 2.814027240952348e-06, "loss": 0.1505, "step": 4408 }, { "epoch": 1.43, "learning_rate": 2.8131595408133467e-06, "loss": 0.1602, "step": 4409 }, { "epoch": 1.43, "learning_rate": 2.8122918023466485e-06, "loss": 0.1576, "step": 4410 }, { "epoch": 1.43, "learning_rate": 2.811424025658458e-06, "loss": 0.1772, "step": 4411 }, { "epoch": 1.43, "learning_rate": 2.8105562108549807e-06, "loss": 0.1605, "step": 4412 }, { "epoch": 1.43, "learning_rate": 2.80968835804243e-06, "loss": 0.1485, "step": 4413 }, { "epoch": 1.43, "learning_rate": 2.808820467327022e-06, "loss": 0.169, "step": 4414 }, { "epoch": 1.43, "learning_rate": 2.8079525388149787e-06, "loss": 0.1623, "step": 4415 }, { "epoch": 1.43, "learning_rate": 2.8070845726125257e-06, "loss": 0.1665, "step": 4416 }, { "epoch": 1.43, "learning_rate": 2.8062165688258934e-06, "loss": 0.1627, "step": 4417 }, { "epoch": 1.43, "learning_rate": 2.8053485275613177e-06, "loss": 0.1758, "step": 4418 }, { "epoch": 1.43, "learning_rate": 2.804480448925039e-06, "loss": 0.1706, "step": 4419 }, { "epoch": 1.43, "learning_rate": 2.8036123330233e-06, "loss": 0.1618, "step": 4420 }, { "epoch": 1.43, "learning_rate": 2.802744179962351e-06, "loss": 0.1691, "step": 4421 }, { "epoch": 1.43, "learning_rate": 2.801875989848446e-06, "loss": 0.1656, "step": 4422 }, { "epoch": 1.43, "learning_rate": 2.8010077627878414e-06, "loss": 0.1598, "step": 4423 }, { "epoch": 1.43, "learning_rate": 2.8001394988868003e-06, "loss": 0.1853, "step": 4424 }, { "epoch": 1.43, "learning_rate": 2.7992711982515908e-06, "loss": 0.156, "step": 4425 }, { "epoch": 1.43, "learning_rate": 2.798402860988483e-06, "loss": 0.1711, "step": 4426 }, { "epoch": 1.43, "learning_rate": 2.797534487203755e-06, "loss": 0.1648, "step": 4427 }, { "epoch": 1.43, "learning_rate": 2.7966660770036845e-06, "loss": 0.1668, "step": 4428 }, { "epoch": 1.44, "learning_rate": 2.795797630494559e-06, "loss": 0.1509, "step": 4429 }, { "epoch": 1.44, "learning_rate": 2.7949291477826666e-06, "loss": 0.1768, "step": 4430 }, { "epoch": 1.44, "learning_rate": 2.7940606289743026e-06, "loss": 0.1749, "step": 4431 }, { "epoch": 1.44, "learning_rate": 2.793192074175764e-06, "loss": 0.1511, "step": 4432 }, { "epoch": 1.44, "learning_rate": 2.792323483493354e-06, "loss": 0.1534, "step": 4433 }, { "epoch": 1.44, "learning_rate": 2.791454857033379e-06, "loss": 0.1747, "step": 4434 }, { "epoch": 1.44, "learning_rate": 2.790586194902151e-06, "loss": 0.1598, "step": 4435 }, { "epoch": 1.44, "learning_rate": 2.789717497205986e-06, "loss": 0.1698, "step": 4436 }, { "epoch": 1.44, "learning_rate": 2.7888487640512046e-06, "loss": 0.168, "step": 4437 }, { "epoch": 1.44, "learning_rate": 2.78797999554413e-06, "loss": 0.1561, "step": 4438 }, { "epoch": 1.44, "learning_rate": 2.787111191791092e-06, "loss": 0.164, "step": 4439 }, { "epoch": 1.44, "learning_rate": 2.7862423528984233e-06, "loss": 0.1676, "step": 4440 }, { "epoch": 1.44, "learning_rate": 2.7853734789724618e-06, "loss": 0.176, "step": 4441 }, { "epoch": 1.44, "learning_rate": 2.7845045701195494e-06, "loss": 0.159, "step": 4442 }, { "epoch": 1.44, "learning_rate": 2.7836356264460316e-06, "loss": 0.1678, "step": 4443 }, { "epoch": 1.44, "learning_rate": 2.7827666480582593e-06, "loss": 0.1712, "step": 4444 }, { "epoch": 1.44, "learning_rate": 2.7818976350625864e-06, "loss": 0.1661, "step": 4445 }, { "epoch": 1.44, "learning_rate": 2.781028587565372e-06, "loss": 0.148, "step": 4446 }, { "epoch": 1.44, "learning_rate": 2.780159505672979e-06, "loss": 0.1624, "step": 4447 }, { "epoch": 1.44, "learning_rate": 2.7792903894917746e-06, "loss": 0.1699, "step": 4448 }, { "epoch": 1.44, "learning_rate": 2.7784212391281307e-06, "loss": 0.1833, "step": 4449 }, { "epoch": 1.44, "learning_rate": 2.7775520546884216e-06, "loss": 0.1735, "step": 4450 }, { "epoch": 1.44, "learning_rate": 2.7766828362790283e-06, "loss": 0.1541, "step": 4451 }, { "epoch": 1.44, "learning_rate": 2.7758135840063344e-06, "loss": 0.1641, "step": 4452 }, { "epoch": 1.44, "learning_rate": 2.7749442979767276e-06, "loss": 0.163, "step": 4453 }, { "epoch": 1.44, "learning_rate": 2.7740749782966016e-06, "loss": 0.1531, "step": 4454 }, { "epoch": 1.44, "learning_rate": 2.7732056250723505e-06, "loss": 0.1605, "step": 4455 }, { "epoch": 1.44, "learning_rate": 2.7723362384103757e-06, "loss": 0.1583, "step": 4456 }, { "epoch": 1.44, "learning_rate": 2.771466818417082e-06, "loss": 0.1557, "step": 4457 }, { "epoch": 1.44, "learning_rate": 2.7705973651988777e-06, "loss": 0.1515, "step": 4458 }, { "epoch": 1.44, "learning_rate": 2.769727878862175e-06, "loss": 0.1586, "step": 4459 }, { "epoch": 1.45, "learning_rate": 2.768858359513392e-06, "loss": 0.188, "step": 4460 }, { "epoch": 1.45, "learning_rate": 2.767988807258948e-06, "loss": 0.1668, "step": 4461 }, { "epoch": 1.45, "learning_rate": 2.7671192222052685e-06, "loss": 0.138, "step": 4462 }, { "epoch": 1.45, "learning_rate": 2.7662496044587817e-06, "loss": 0.1734, "step": 4463 }, { "epoch": 1.45, "learning_rate": 2.765379954125921e-06, "loss": 0.1707, "step": 4464 }, { "epoch": 1.45, "learning_rate": 2.764510271313123e-06, "loss": 0.1754, "step": 4465 }, { "epoch": 1.45, "learning_rate": 2.7636405561268286e-06, "loss": 0.1809, "step": 4466 }, { "epoch": 1.45, "learning_rate": 2.7627708086734827e-06, "loss": 0.1745, "step": 4467 }, { "epoch": 1.45, "learning_rate": 2.7619010290595333e-06, "loss": 0.1757, "step": 4468 }, { "epoch": 1.45, "learning_rate": 2.7610312173914334e-06, "loss": 0.1674, "step": 4469 }, { "epoch": 1.45, "learning_rate": 2.760161373775639e-06, "loss": 0.1575, "step": 4470 }, { "epoch": 1.45, "learning_rate": 2.7592914983186113e-06, "loss": 0.1552, "step": 4471 }, { "epoch": 1.45, "learning_rate": 2.758421591126814e-06, "loss": 0.1546, "step": 4472 }, { "epoch": 1.45, "learning_rate": 2.757551652306717e-06, "loss": 0.1671, "step": 4473 }, { "epoch": 1.45, "learning_rate": 2.7566816819647897e-06, "loss": 0.1771, "step": 4474 }, { "epoch": 1.45, "learning_rate": 2.7558116802075095e-06, "loss": 0.1619, "step": 4475 }, { "epoch": 1.45, "learning_rate": 2.754941647141357e-06, "loss": 0.1715, "step": 4476 }, { "epoch": 1.45, "learning_rate": 2.754071582872814e-06, "loss": 0.1537, "step": 4477 }, { "epoch": 1.45, "learning_rate": 2.753201487508369e-06, "loss": 0.1458, "step": 4478 }, { "epoch": 1.45, "learning_rate": 2.7523313611545133e-06, "loss": 0.1691, "step": 4479 }, { "epoch": 1.45, "learning_rate": 2.7514612039177422e-06, "loss": 0.163, "step": 4480 }, { "epoch": 1.45, "learning_rate": 2.7505910159045534e-06, "loss": 0.1655, "step": 4481 }, { "epoch": 1.45, "learning_rate": 2.74972079722145e-06, "loss": 0.1535, "step": 4482 }, { "epoch": 1.45, "learning_rate": 2.7488505479749395e-06, "loss": 0.1591, "step": 4483 }, { "epoch": 1.45, "learning_rate": 2.74798026827153e-06, "loss": 0.1634, "step": 4484 }, { "epoch": 1.45, "learning_rate": 2.747109958217737e-06, "loss": 0.1552, "step": 4485 }, { "epoch": 1.45, "learning_rate": 2.746239617920077e-06, "loss": 0.1673, "step": 4486 }, { "epoch": 1.45, "learning_rate": 2.745369247485072e-06, "loss": 0.1627, "step": 4487 }, { "epoch": 1.45, "learning_rate": 2.7444988470192457e-06, "loss": 0.1579, "step": 4488 }, { "epoch": 1.45, "learning_rate": 2.743628416629128e-06, "loss": 0.1516, "step": 4489 }, { "epoch": 1.45, "learning_rate": 2.7427579564212496e-06, "loss": 0.1643, "step": 4490 }, { "epoch": 1.46, "learning_rate": 2.7418874665021483e-06, "loss": 0.1554, "step": 4491 }, { "epoch": 1.46, "learning_rate": 2.7410169469783632e-06, "loss": 0.1707, "step": 4492 }, { "epoch": 1.46, "learning_rate": 2.7401463979564365e-06, "loss": 0.17, "step": 4493 }, { "epoch": 1.46, "learning_rate": 2.7392758195429153e-06, "loss": 0.1637, "step": 4494 }, { "epoch": 1.46, "learning_rate": 2.73840521184435e-06, "loss": 0.1494, "step": 4495 }, { "epoch": 1.46, "learning_rate": 2.737534574967295e-06, "loss": 0.1743, "step": 4496 }, { "epoch": 1.46, "learning_rate": 2.7366639090183076e-06, "loss": 0.1789, "step": 4497 }, { "epoch": 1.46, "learning_rate": 2.7357932141039494e-06, "loss": 0.1614, "step": 4498 }, { "epoch": 1.46, "learning_rate": 2.7349224903307836e-06, "loss": 0.1695, "step": 4499 }, { "epoch": 1.46, "learning_rate": 2.734051737805379e-06, "loss": 0.1775, "step": 4500 }, { "epoch": 1.46, "learning_rate": 2.733180956634308e-06, "loss": 0.1795, "step": 4501 }, { "epoch": 1.46, "learning_rate": 2.7323101469241454e-06, "loss": 0.1464, "step": 4502 }, { "epoch": 1.46, "learning_rate": 2.7314393087814693e-06, "loss": 0.1645, "step": 4503 }, { "epoch": 1.46, "learning_rate": 2.7305684423128633e-06, "loss": 0.1548, "step": 4504 }, { "epoch": 1.46, "learning_rate": 2.729697547624911e-06, "loss": 0.151, "step": 4505 }, { "epoch": 1.46, "learning_rate": 2.7288266248242025e-06, "loss": 0.1625, "step": 4506 }, { "epoch": 1.46, "learning_rate": 2.7279556740173306e-06, "loss": 0.1534, "step": 4507 }, { "epoch": 1.46, "learning_rate": 2.7270846953108913e-06, "loss": 0.1735, "step": 4508 }, { "epoch": 1.46, "learning_rate": 2.7262136888114833e-06, "loss": 0.1929, "step": 4509 }, { "epoch": 1.46, "learning_rate": 2.72534265462571e-06, "loss": 0.1536, "step": 4510 }, { "epoch": 1.46, "learning_rate": 2.7244715928601774e-06, "loss": 0.151, "step": 4511 }, { "epoch": 1.46, "learning_rate": 2.723600503621494e-06, "loss": 0.1593, "step": 4512 }, { "epoch": 1.46, "learning_rate": 2.7227293870162742e-06, "loss": 0.1521, "step": 4513 }, { "epoch": 1.46, "learning_rate": 2.721858243151133e-06, "loss": 0.1654, "step": 4514 }, { "epoch": 1.46, "learning_rate": 2.7209870721326915e-06, "loss": 0.1504, "step": 4515 }, { "epoch": 1.46, "learning_rate": 2.7201158740675714e-06, "loss": 0.1646, "step": 4516 }, { "epoch": 1.46, "learning_rate": 2.719244649062399e-06, "loss": 0.1665, "step": 4517 }, { "epoch": 1.46, "learning_rate": 2.718373397223804e-06, "loss": 0.1543, "step": 4518 }, { "epoch": 1.46, "learning_rate": 2.71750211865842e-06, "loss": 0.1727, "step": 4519 }, { "epoch": 1.46, "learning_rate": 2.7166308134728814e-06, "loss": 0.1595, "step": 4520 }, { "epoch": 1.47, "learning_rate": 2.715759481773828e-06, "loss": 0.1601, "step": 4521 }, { "epoch": 1.47, "learning_rate": 2.7148881236679035e-06, "loss": 0.1611, "step": 4522 }, { "epoch": 1.47, "learning_rate": 2.7140167392617527e-06, "loss": 0.1656, "step": 4523 }, { "epoch": 1.47, "learning_rate": 2.7131453286620253e-06, "loss": 0.1574, "step": 4524 }, { "epoch": 1.47, "learning_rate": 2.712273891975372e-06, "loss": 0.1623, "step": 4525 }, { "epoch": 1.47, "learning_rate": 2.7114024293084502e-06, "loss": 0.1531, "step": 4526 }, { "epoch": 1.47, "learning_rate": 2.710530940767917e-06, "loss": 0.1492, "step": 4527 }, { "epoch": 1.47, "learning_rate": 2.7096594264604357e-06, "loss": 0.1671, "step": 4528 }, { "epoch": 1.47, "learning_rate": 2.7087878864926696e-06, "loss": 0.1656, "step": 4529 }, { "epoch": 1.47, "learning_rate": 2.707916320971288e-06, "loss": 0.1737, "step": 4530 }, { "epoch": 1.47, "learning_rate": 2.7070447300029607e-06, "loss": 0.1662, "step": 4531 }, { "epoch": 1.47, "learning_rate": 2.706173113694363e-06, "loss": 0.1609, "step": 4532 }, { "epoch": 1.47, "learning_rate": 2.705301472152172e-06, "loss": 0.1679, "step": 4533 }, { "epoch": 1.47, "learning_rate": 2.7044298054830687e-06, "loss": 0.1745, "step": 4534 }, { "epoch": 1.47, "learning_rate": 2.703558113793736e-06, "loss": 0.1702, "step": 4535 }, { "epoch": 1.47, "learning_rate": 2.7026863971908607e-06, "loss": 0.1664, "step": 4536 }, { "epoch": 1.47, "learning_rate": 2.7018146557811325e-06, "loss": 0.1647, "step": 4537 }, { "epoch": 1.47, "learning_rate": 2.7009428896712443e-06, "loss": 0.1632, "step": 4538 }, { "epoch": 1.47, "learning_rate": 2.700071098967892e-06, "loss": 0.173, "step": 4539 }, { "epoch": 1.47, "learning_rate": 2.699199283777773e-06, "loss": 0.1637, "step": 4540 }, { "epoch": 1.47, "learning_rate": 2.6983274442075914e-06, "loss": 0.172, "step": 4541 }, { "epoch": 1.47, "learning_rate": 2.69745558036405e-06, "loss": 0.1612, "step": 4542 }, { "epoch": 1.47, "learning_rate": 2.6965836923538568e-06, "loss": 0.1588, "step": 4543 }, { "epoch": 1.47, "learning_rate": 2.695711780283723e-06, "loss": 0.1457, "step": 4544 }, { "epoch": 1.47, "learning_rate": 2.694839844260361e-06, "loss": 0.1753, "step": 4545 }, { "epoch": 1.47, "learning_rate": 2.6939678843904897e-06, "loss": 0.1626, "step": 4546 }, { "epoch": 1.47, "learning_rate": 2.6930959007808268e-06, "loss": 0.1571, "step": 4547 }, { "epoch": 1.47, "learning_rate": 2.6922238935380946e-06, "loss": 0.1672, "step": 4548 }, { "epoch": 1.47, "learning_rate": 2.691351862769018e-06, "loss": 0.1487, "step": 4549 }, { "epoch": 1.47, "learning_rate": 2.6904798085803276e-06, "loss": 0.1672, "step": 4550 }, { "epoch": 1.47, "learning_rate": 2.689607731078751e-06, "loss": 0.1804, "step": 4551 }, { "epoch": 1.48, "learning_rate": 2.688735630371024e-06, "loss": 0.1741, "step": 4552 }, { "epoch": 1.48, "learning_rate": 2.6878635065638843e-06, "loss": 0.1693, "step": 4553 }, { "epoch": 1.48, "learning_rate": 2.6869913597640686e-06, "loss": 0.1789, "step": 4554 }, { "epoch": 1.48, "learning_rate": 2.6861191900783213e-06, "loss": 0.16, "step": 4555 }, { "epoch": 1.48, "learning_rate": 2.685246997613386e-06, "loss": 0.1529, "step": 4556 }, { "epoch": 1.48, "learning_rate": 2.6843747824760125e-06, "loss": 0.1623, "step": 4557 }, { "epoch": 1.48, "learning_rate": 2.6835025447729495e-06, "loss": 0.1629, "step": 4558 }, { "epoch": 1.48, "learning_rate": 2.682630284610953e-06, "loss": 0.1622, "step": 4559 }, { "epoch": 1.48, "learning_rate": 2.6817580020967767e-06, "loss": 0.1613, "step": 4560 }, { "epoch": 1.48, "learning_rate": 2.680885697337181e-06, "loss": 0.1652, "step": 4561 }, { "epoch": 1.48, "learning_rate": 2.6800133704389263e-06, "loss": 0.1637, "step": 4562 }, { "epoch": 1.48, "learning_rate": 2.6791410215087783e-06, "loss": 0.1707, "step": 4563 }, { "epoch": 1.48, "learning_rate": 2.678268650653503e-06, "loss": 0.1711, "step": 4564 }, { "epoch": 1.48, "learning_rate": 2.6773962579798713e-06, "loss": 0.1852, "step": 4565 }, { "epoch": 1.48, "learning_rate": 2.6765238435946543e-06, "loss": 0.1545, "step": 4566 }, { "epoch": 1.48, "learning_rate": 2.675651407604628e-06, "loss": 0.1643, "step": 4567 }, { "epoch": 1.48, "learning_rate": 2.67477895011657e-06, "loss": 0.1722, "step": 4568 }, { "epoch": 1.48, "learning_rate": 2.6739064712372596e-06, "loss": 0.1589, "step": 4569 }, { "epoch": 1.48, "learning_rate": 2.6730339710734815e-06, "loss": 0.1556, "step": 4570 }, { "epoch": 1.48, "learning_rate": 2.672161449732021e-06, "loss": 0.161, "step": 4571 }, { "epoch": 1.48, "learning_rate": 2.671288907319666e-06, "loss": 0.1667, "step": 4572 }, { "epoch": 1.48, "learning_rate": 2.670416343943205e-06, "loss": 0.1612, "step": 4573 }, { "epoch": 1.48, "learning_rate": 2.669543759709434e-06, "loss": 0.1723, "step": 4574 }, { "epoch": 1.48, "learning_rate": 2.668671154725149e-06, "loss": 0.156, "step": 4575 }, { "epoch": 1.48, "learning_rate": 2.6677985290971464e-06, "loss": 0.1788, "step": 4576 }, { "epoch": 1.48, "learning_rate": 2.666925882932229e-06, "loss": 0.1534, "step": 4577 }, { "epoch": 1.48, "learning_rate": 2.6660532163371995e-06, "loss": 0.1814, "step": 4578 }, { "epoch": 1.48, "learning_rate": 2.665180529418863e-06, "loss": 0.1805, "step": 4579 }, { "epoch": 1.48, "learning_rate": 2.6643078222840295e-06, "loss": 0.1659, "step": 4580 }, { "epoch": 1.48, "learning_rate": 2.6634350950395096e-06, "loss": 0.1565, "step": 4581 }, { "epoch": 1.48, "learning_rate": 2.662562347792116e-06, "loss": 0.1494, "step": 4582 }, { "epoch": 1.49, "learning_rate": 2.6616895806486644e-06, "loss": 0.1705, "step": 4583 }, { "epoch": 1.49, "learning_rate": 2.6608167937159735e-06, "loss": 0.1478, "step": 4584 }, { "epoch": 1.49, "learning_rate": 2.6599439871008636e-06, "loss": 0.1571, "step": 4585 }, { "epoch": 1.49, "learning_rate": 2.659071160910158e-06, "loss": 0.1636, "step": 4586 }, { "epoch": 1.49, "learning_rate": 2.6581983152506825e-06, "loss": 0.1674, "step": 4587 }, { "epoch": 1.49, "learning_rate": 2.6573254502292644e-06, "loss": 0.1491, "step": 4588 }, { "epoch": 1.49, "learning_rate": 2.656452565952735e-06, "loss": 0.1578, "step": 4589 }, { "epoch": 1.49, "learning_rate": 2.6555796625279257e-06, "loss": 0.1516, "step": 4590 }, { "epoch": 1.49, "learning_rate": 2.6547067400616717e-06, "loss": 0.1658, "step": 4591 }, { "epoch": 1.49, "learning_rate": 2.6538337986608105e-06, "loss": 0.1742, "step": 4592 }, { "epoch": 1.49, "learning_rate": 2.6529608384321815e-06, "loss": 0.1426, "step": 4593 }, { "epoch": 1.49, "learning_rate": 2.6520878594826268e-06, "loss": 0.1475, "step": 4594 }, { "epoch": 1.49, "learning_rate": 2.651214861918991e-06, "loss": 0.1673, "step": 4595 }, { "epoch": 1.49, "learning_rate": 2.6503418458481188e-06, "loss": 0.1764, "step": 4596 }, { "epoch": 1.49, "learning_rate": 2.649468811376861e-06, "loss": 0.1774, "step": 4597 }, { "epoch": 1.49, "learning_rate": 2.6485957586120664e-06, "loss": 0.1709, "step": 4598 }, { "epoch": 1.49, "learning_rate": 2.6477226876605903e-06, "loss": 0.1658, "step": 4599 }, { "epoch": 1.49, "learning_rate": 2.646849598629287e-06, "loss": 0.174, "step": 4600 }, { "epoch": 1.49, "learning_rate": 2.645976491625015e-06, "loss": 0.1577, "step": 4601 }, { "epoch": 1.49, "learning_rate": 2.645103366754633e-06, "loss": 0.1577, "step": 4602 }, { "epoch": 1.49, "learning_rate": 2.6442302241250047e-06, "loss": 0.1718, "step": 4603 }, { "epoch": 1.49, "learning_rate": 2.6433570638429923e-06, "loss": 0.153, "step": 4604 }, { "epoch": 1.49, "learning_rate": 2.6424838860154633e-06, "loss": 0.1607, "step": 4605 }, { "epoch": 1.49, "learning_rate": 2.641610690749286e-06, "loss": 0.1757, "step": 4606 }, { "epoch": 1.49, "learning_rate": 2.640737478151331e-06, "loss": 0.1699, "step": 4607 }, { "epoch": 1.49, "learning_rate": 2.6398642483284716e-06, "loss": 0.149, "step": 4608 }, { "epoch": 1.49, "learning_rate": 2.6389910013875814e-06, "loss": 0.1705, "step": 4609 }, { "epoch": 1.49, "learning_rate": 2.638117737435538e-06, "loss": 0.1675, "step": 4610 }, { "epoch": 1.49, "learning_rate": 2.637244456579221e-06, "loss": 0.1622, "step": 4611 }, { "epoch": 1.49, "learning_rate": 2.6363711589255115e-06, "loss": 0.1858, "step": 4612 }, { "epoch": 1.49, "learning_rate": 2.6354978445812923e-06, "loss": 0.169, "step": 4613 }, { "epoch": 1.5, "learning_rate": 2.6346245136534483e-06, "loss": 0.173, "step": 4614 }, { "epoch": 1.5, "learning_rate": 2.6337511662488678e-06, "loss": 0.1541, "step": 4615 }, { "epoch": 1.5, "learning_rate": 2.6328778024744384e-06, "loss": 0.1657, "step": 4616 }, { "epoch": 1.5, "learning_rate": 2.6320044224370526e-06, "loss": 0.1583, "step": 4617 }, { "epoch": 1.5, "learning_rate": 2.6311310262436035e-06, "loss": 0.1718, "step": 4618 }, { "epoch": 1.5, "learning_rate": 2.6302576140009866e-06, "loss": 0.1793, "step": 4619 }, { "epoch": 1.5, "learning_rate": 2.6293841858160983e-06, "loss": 0.1519, "step": 4620 }, { "epoch": 1.5, "learning_rate": 2.6285107417958385e-06, "loss": 0.1535, "step": 4621 }, { "epoch": 1.5, "learning_rate": 2.6276372820471073e-06, "loss": 0.162, "step": 4622 }, { "epoch": 1.5, "learning_rate": 2.6267638066768087e-06, "loss": 0.1777, "step": 4623 }, { "epoch": 1.5, "learning_rate": 2.625890315791848e-06, "loss": 0.1604, "step": 4624 }, { "epoch": 1.5, "learning_rate": 2.625016809499131e-06, "loss": 0.1561, "step": 4625 }, { "epoch": 1.5, "learning_rate": 2.6241432879055667e-06, "loss": 0.1728, "step": 4626 }, { "epoch": 1.5, "learning_rate": 2.6232697511180654e-06, "loss": 0.155, "step": 4627 }, { "epoch": 1.5, "learning_rate": 2.6223961992435406e-06, "loss": 0.1743, "step": 4628 }, { "epoch": 1.5, "learning_rate": 2.6215226323889048e-06, "loss": 0.1807, "step": 4629 }, { "epoch": 1.5, "learning_rate": 2.620649050661076e-06, "loss": 0.1619, "step": 4630 }, { "epoch": 1.5, "learning_rate": 2.6197754541669714e-06, "loss": 0.1679, "step": 4631 }, { "epoch": 1.5, "learning_rate": 2.6189018430135106e-06, "loss": 0.179, "step": 4632 }, { "epoch": 1.5, "learning_rate": 2.6180282173076156e-06, "loss": 0.1737, "step": 4633 }, { "epoch": 1.5, "learning_rate": 2.6171545771562085e-06, "loss": 0.1758, "step": 4634 }, { "epoch": 1.5, "learning_rate": 2.6162809226662167e-06, "loss": 0.1555, "step": 4635 }, { "epoch": 1.5, "learning_rate": 2.6154072539445645e-06, "loss": 0.1637, "step": 4636 }, { "epoch": 1.5, "learning_rate": 2.6145335710981817e-06, "loss": 0.159, "step": 4637 }, { "epoch": 1.5, "learning_rate": 2.613659874233999e-06, "loss": 0.1611, "step": 4638 }, { "epoch": 1.5, "learning_rate": 2.612786163458948e-06, "loss": 0.1655, "step": 4639 }, { "epoch": 1.5, "learning_rate": 2.611912438879962e-06, "loss": 0.1716, "step": 4640 }, { "epoch": 1.5, "learning_rate": 2.611038700603977e-06, "loss": 0.1603, "step": 4641 }, { "epoch": 1.5, "learning_rate": 2.6101649487379304e-06, "loss": 0.1593, "step": 4642 }, { "epoch": 1.5, "learning_rate": 2.6092911833887602e-06, "loss": 0.1648, "step": 4643 }, { "epoch": 1.5, "learning_rate": 2.6084174046634075e-06, "loss": 0.1782, "step": 4644 }, { "epoch": 1.51, "learning_rate": 2.607543612668814e-06, "loss": 0.1728, "step": 4645 }, { "epoch": 1.51, "learning_rate": 2.6066698075119237e-06, "loss": 0.1497, "step": 4646 }, { "epoch": 1.51, "learning_rate": 2.605795989299681e-06, "loss": 0.14, "step": 4647 }, { "epoch": 1.51, "learning_rate": 2.604922158139033e-06, "loss": 0.1595, "step": 4648 }, { "epoch": 1.51, "learning_rate": 2.6040483141369293e-06, "loss": 0.1641, "step": 4649 }, { "epoch": 1.51, "learning_rate": 2.603174457400319e-06, "loss": 0.1615, "step": 4650 }, { "epoch": 1.51, "learning_rate": 2.602300588036154e-06, "loss": 0.1699, "step": 4651 }, { "epoch": 1.51, "learning_rate": 2.6014267061513875e-06, "loss": 0.1636, "step": 4652 }, { "epoch": 1.51, "learning_rate": 2.6005528118529738e-06, "loss": 0.146, "step": 4653 }, { "epoch": 1.51, "learning_rate": 2.5996789052478693e-06, "loss": 0.1657, "step": 4654 }, { "epoch": 1.51, "learning_rate": 2.5988049864430314e-06, "loss": 0.1678, "step": 4655 }, { "epoch": 1.51, "learning_rate": 2.597931055545421e-06, "loss": 0.1797, "step": 4656 }, { "epoch": 1.51, "learning_rate": 2.597057112661997e-06, "loss": 0.1705, "step": 4657 }, { "epoch": 1.51, "learning_rate": 2.5961831578997214e-06, "loss": 0.154, "step": 4658 }, { "epoch": 1.51, "learning_rate": 2.5953091913655586e-06, "loss": 0.1593, "step": 4659 }, { "epoch": 1.51, "learning_rate": 2.594435213166473e-06, "loss": 0.1645, "step": 4660 }, { "epoch": 1.51, "learning_rate": 2.593561223409432e-06, "loss": 0.1669, "step": 4661 }, { "epoch": 1.51, "learning_rate": 2.592687222201403e-06, "loss": 0.1777, "step": 4662 }, { "epoch": 1.51, "learning_rate": 2.5918132096493552e-06, "loss": 0.175, "step": 4663 }, { "epoch": 1.51, "learning_rate": 2.5909391858602596e-06, "loss": 0.182, "step": 4664 }, { "epoch": 1.51, "learning_rate": 2.5900651509410875e-06, "loss": 0.1658, "step": 4665 }, { "epoch": 1.51, "learning_rate": 2.5891911049988133e-06, "loss": 0.1542, "step": 4666 }, { "epoch": 1.51, "learning_rate": 2.5883170481404112e-06, "loss": 0.1623, "step": 4667 }, { "epoch": 1.51, "learning_rate": 2.587442980472858e-06, "loss": 0.1618, "step": 4668 }, { "epoch": 1.51, "learning_rate": 2.5865689021031292e-06, "loss": 0.1501, "step": 4669 }, { "epoch": 1.51, "learning_rate": 2.5856948131382055e-06, "loss": 0.1755, "step": 4670 }, { "epoch": 1.51, "learning_rate": 2.584820713685066e-06, "loss": 0.1625, "step": 4671 }, { "epoch": 1.51, "learning_rate": 2.5839466038506927e-06, "loss": 0.1603, "step": 4672 }, { "epoch": 1.51, "learning_rate": 2.5830724837420675e-06, "loss": 0.1734, "step": 4673 }, { "epoch": 1.51, "learning_rate": 2.582198353466175e-06, "loss": 0.1599, "step": 4674 }, { "epoch": 1.51, "learning_rate": 2.5813242131299986e-06, "loss": 0.1554, "step": 4675 }, { "epoch": 1.52, "learning_rate": 2.5804500628405265e-06, "loss": 0.1596, "step": 4676 }, { "epoch": 1.52, "learning_rate": 2.5795759027047457e-06, "loss": 0.1548, "step": 4677 }, { "epoch": 1.52, "learning_rate": 2.578701732829645e-06, "loss": 0.1581, "step": 4678 }, { "epoch": 1.52, "learning_rate": 2.5778275533222135e-06, "loss": 0.1558, "step": 4679 }, { "epoch": 1.52, "learning_rate": 2.5769533642894433e-06, "loss": 0.1642, "step": 4680 }, { "epoch": 1.52, "learning_rate": 2.576079165838326e-06, "loss": 0.1618, "step": 4681 }, { "epoch": 1.52, "learning_rate": 2.5752049580758555e-06, "loss": 0.1709, "step": 4682 }, { "epoch": 1.52, "learning_rate": 2.5743307411090255e-06, "loss": 0.1861, "step": 4683 }, { "epoch": 1.52, "learning_rate": 2.5734565150448325e-06, "loss": 0.1682, "step": 4684 }, { "epoch": 1.52, "learning_rate": 2.5725822799902738e-06, "loss": 0.1693, "step": 4685 }, { "epoch": 1.52, "learning_rate": 2.5717080360523464e-06, "loss": 0.1645, "step": 4686 }, { "epoch": 1.52, "learning_rate": 2.57083378333805e-06, "loss": 0.159, "step": 4687 }, { "epoch": 1.52, "learning_rate": 2.5699595219543838e-06, "loss": 0.1467, "step": 4688 }, { "epoch": 1.52, "learning_rate": 2.5690852520083496e-06, "loss": 0.1557, "step": 4689 }, { "epoch": 1.52, "learning_rate": 2.5682109736069492e-06, "loss": 0.1611, "step": 4690 }, { "epoch": 1.52, "learning_rate": 2.5673366868571858e-06, "loss": 0.1632, "step": 4691 }, { "epoch": 1.52, "learning_rate": 2.566462391866064e-06, "loss": 0.1622, "step": 4692 }, { "epoch": 1.52, "learning_rate": 2.5655880887405893e-06, "loss": 0.1759, "step": 4693 }, { "epoch": 1.52, "learning_rate": 2.564713777587767e-06, "loss": 0.1526, "step": 4694 }, { "epoch": 1.52, "learning_rate": 2.5638394585146044e-06, "loss": 0.148, "step": 4695 }, { "epoch": 1.52, "learning_rate": 2.56296513162811e-06, "loss": 0.1703, "step": 4696 }, { "epoch": 1.52, "learning_rate": 2.5620907970352937e-06, "loss": 0.1526, "step": 4697 }, { "epoch": 1.52, "learning_rate": 2.561216454843165e-06, "loss": 0.1727, "step": 4698 }, { "epoch": 1.52, "learning_rate": 2.5603421051587344e-06, "loss": 0.1534, "step": 4699 }, { "epoch": 1.52, "learning_rate": 2.5594677480890152e-06, "loss": 0.158, "step": 4700 }, { "epoch": 1.52, "learning_rate": 2.558593383741018e-06, "loss": 0.1466, "step": 4701 }, { "epoch": 1.52, "learning_rate": 2.5577190122217583e-06, "loss": 0.157, "step": 4702 }, { "epoch": 1.52, "learning_rate": 2.55684463363825e-06, "loss": 0.1592, "step": 4703 }, { "epoch": 1.52, "learning_rate": 2.5559702480975094e-06, "loss": 0.1738, "step": 4704 }, { "epoch": 1.52, "learning_rate": 2.5550958557065523e-06, "loss": 0.1735, "step": 4705 }, { "epoch": 1.52, "learning_rate": 2.554221456572396e-06, "loss": 0.169, "step": 4706 }, { "epoch": 1.53, "learning_rate": 2.553347050802058e-06, "loss": 0.1633, "step": 4707 }, { "epoch": 1.53, "learning_rate": 2.552472638502557e-06, "loss": 0.1628, "step": 4708 }, { "epoch": 1.53, "learning_rate": 2.5515982197809142e-06, "loss": 0.1756, "step": 4709 }, { "epoch": 1.53, "learning_rate": 2.5507237947441478e-06, "loss": 0.1657, "step": 4710 }, { "epoch": 1.53, "learning_rate": 2.5498493634992803e-06, "loss": 0.1623, "step": 4711 }, { "epoch": 1.53, "learning_rate": 2.5489749261533333e-06, "loss": 0.1521, "step": 4712 }, { "epoch": 1.53, "learning_rate": 2.548100482813329e-06, "loss": 0.1535, "step": 4713 }, { "epoch": 1.53, "learning_rate": 2.5472260335862915e-06, "loss": 0.1862, "step": 4714 }, { "epoch": 1.53, "learning_rate": 2.546351578579245e-06, "loss": 0.1536, "step": 4715 }, { "epoch": 1.53, "learning_rate": 2.545477117899213e-06, "loss": 0.1656, "step": 4716 }, { "epoch": 1.53, "learning_rate": 2.5446026516532235e-06, "loss": 0.1695, "step": 4717 }, { "epoch": 1.53, "learning_rate": 2.5437281799483005e-06, "loss": 0.1638, "step": 4718 }, { "epoch": 1.53, "learning_rate": 2.542853702891471e-06, "loss": 0.1755, "step": 4719 }, { "epoch": 1.53, "learning_rate": 2.541979220589765e-06, "loss": 0.1586, "step": 4720 }, { "epoch": 1.53, "learning_rate": 2.541104733150207e-06, "loss": 0.1636, "step": 4721 }, { "epoch": 1.53, "learning_rate": 2.540230240679828e-06, "loss": 0.152, "step": 4722 }, { "epoch": 1.53, "learning_rate": 2.5393557432856575e-06, "loss": 0.1666, "step": 4723 }, { "epoch": 1.53, "learning_rate": 2.5384812410747244e-06, "loss": 0.1517, "step": 4724 }, { "epoch": 1.53, "learning_rate": 2.53760673415406e-06, "loss": 0.1666, "step": 4725 }, { "epoch": 1.53, "learning_rate": 2.5367322226306956e-06, "loss": 0.154, "step": 4726 }, { "epoch": 1.53, "learning_rate": 2.5358577066116622e-06, "loss": 0.1508, "step": 4727 }, { "epoch": 1.53, "learning_rate": 2.534983186203993e-06, "loss": 0.1763, "step": 4728 }, { "epoch": 1.53, "learning_rate": 2.5341086615147207e-06, "loss": 0.1685, "step": 4729 }, { "epoch": 1.53, "learning_rate": 2.5332341326508786e-06, "loss": 0.1639, "step": 4730 }, { "epoch": 1.53, "learning_rate": 2.5323595997195005e-06, "loss": 0.1459, "step": 4731 }, { "epoch": 1.53, "learning_rate": 2.53148506282762e-06, "loss": 0.1502, "step": 4732 }, { "epoch": 1.53, "learning_rate": 2.530610522082273e-06, "loss": 0.1602, "step": 4733 }, { "epoch": 1.53, "learning_rate": 2.529735977590494e-06, "loss": 0.1617, "step": 4734 }, { "epoch": 1.53, "learning_rate": 2.52886142945932e-06, "loss": 0.1744, "step": 4735 }, { "epoch": 1.53, "learning_rate": 2.527986877795786e-06, "loss": 0.1589, "step": 4736 }, { "epoch": 1.53, "learning_rate": 2.527112322706929e-06, "loss": 0.1657, "step": 4737 }, { "epoch": 1.54, "learning_rate": 2.526237764299786e-06, "loss": 0.1637, "step": 4738 }, { "epoch": 1.54, "learning_rate": 2.5253632026813945e-06, "loss": 0.1683, "step": 4739 }, { "epoch": 1.54, "learning_rate": 2.524488637958793e-06, "loss": 0.1621, "step": 4740 }, { "epoch": 1.54, "learning_rate": 2.5236140702390194e-06, "loss": 0.149, "step": 4741 }, { "epoch": 1.54, "learning_rate": 2.522739499629112e-06, "loss": 0.1624, "step": 4742 }, { "epoch": 1.54, "learning_rate": 2.5218649262361104e-06, "loss": 0.1671, "step": 4743 }, { "epoch": 1.54, "learning_rate": 2.520990350167053e-06, "loss": 0.1868, "step": 4744 }, { "epoch": 1.54, "learning_rate": 2.5201157715289796e-06, "loss": 0.1773, "step": 4745 }, { "epoch": 1.54, "learning_rate": 2.519241190428931e-06, "loss": 0.1668, "step": 4746 }, { "epoch": 1.54, "learning_rate": 2.518366606973947e-06, "loss": 0.1526, "step": 4747 }, { "epoch": 1.54, "learning_rate": 2.517492021271068e-06, "loss": 0.181, "step": 4748 }, { "epoch": 1.54, "learning_rate": 2.5166174334273347e-06, "loss": 0.1629, "step": 4749 }, { "epoch": 1.54, "learning_rate": 2.5157428435497887e-06, "loss": 0.1702, "step": 4750 }, { "epoch": 1.54, "learning_rate": 2.5148682517454707e-06, "loss": 0.158, "step": 4751 }, { "epoch": 1.54, "learning_rate": 2.5139936581214235e-06, "loss": 0.1551, "step": 4752 }, { "epoch": 1.54, "learning_rate": 2.5131190627846875e-06, "loss": 0.1577, "step": 4753 }, { "epoch": 1.54, "learning_rate": 2.512244465842305e-06, "loss": 0.1593, "step": 4754 }, { "epoch": 1.54, "learning_rate": 2.5113698674013186e-06, "loss": 0.1655, "step": 4755 }, { "epoch": 1.54, "learning_rate": 2.5104952675687706e-06, "loss": 0.1719, "step": 4756 }, { "epoch": 1.54, "learning_rate": 2.509620666451703e-06, "loss": 0.1732, "step": 4757 }, { "epoch": 1.54, "learning_rate": 2.5087460641571594e-06, "loss": 0.1537, "step": 4758 }, { "epoch": 1.54, "learning_rate": 2.5078714607921825e-06, "loss": 0.161, "step": 4759 }, { "epoch": 1.54, "learning_rate": 2.506996856463814e-06, "loss": 0.1698, "step": 4760 }, { "epoch": 1.54, "learning_rate": 2.506122251279099e-06, "loss": 0.1743, "step": 4761 }, { "epoch": 1.54, "learning_rate": 2.5052476453450788e-06, "loss": 0.1561, "step": 4762 }, { "epoch": 1.54, "learning_rate": 2.504373038768799e-06, "loss": 0.1587, "step": 4763 }, { "epoch": 1.54, "learning_rate": 2.5034984316573003e-06, "loss": 0.1803, "step": 4764 }, { "epoch": 1.54, "learning_rate": 2.5026238241176283e-06, "loss": 0.1639, "step": 4765 }, { "epoch": 1.54, "learning_rate": 2.5017492162568246e-06, "loss": 0.1541, "step": 4766 }, { "epoch": 1.54, "learning_rate": 2.5008746081819345e-06, "loss": 0.1722, "step": 4767 }, { "epoch": 1.55, "learning_rate": 2.5e-06, "loss": 0.1701, "step": 4768 }, { "epoch": 1.55, "learning_rate": 2.4991253918180668e-06, "loss": 0.1647, "step": 4769 }, { "epoch": 1.55, "learning_rate": 2.498250783743176e-06, "loss": 0.166, "step": 4770 }, { "epoch": 1.55, "learning_rate": 2.4973761758823734e-06, "loss": 0.1653, "step": 4771 }, { "epoch": 1.55, "learning_rate": 2.4965015683427005e-06, "loss": 0.1926, "step": 4772 }, { "epoch": 1.55, "learning_rate": 2.4956269612312025e-06, "loss": 0.1581, "step": 4773 }, { "epoch": 1.55, "learning_rate": 2.494752354654921e-06, "loss": 0.1768, "step": 4774 }, { "epoch": 1.55, "learning_rate": 2.4938777487209022e-06, "loss": 0.1417, "step": 4775 }, { "epoch": 1.55, "learning_rate": 2.493003143536187e-06, "loss": 0.1481, "step": 4776 }, { "epoch": 1.55, "learning_rate": 2.4921285392078184e-06, "loss": 0.174, "step": 4777 }, { "epoch": 1.55, "learning_rate": 2.491253935842842e-06, "loss": 0.1627, "step": 4778 }, { "epoch": 1.55, "learning_rate": 2.490379333548297e-06, "loss": 0.1616, "step": 4779 }, { "epoch": 1.55, "learning_rate": 2.4895047324312303e-06, "loss": 0.1817, "step": 4780 }, { "epoch": 1.55, "learning_rate": 2.4886301325986827e-06, "loss": 0.1551, "step": 4781 }, { "epoch": 1.55, "learning_rate": 2.4877555341576955e-06, "loss": 0.1592, "step": 4782 }, { "epoch": 1.55, "learning_rate": 2.4868809372153137e-06, "loss": 0.159, "step": 4783 }, { "epoch": 1.55, "learning_rate": 2.4860063418785773e-06, "loss": 0.1604, "step": 4784 }, { "epoch": 1.55, "learning_rate": 2.4851317482545297e-06, "loss": 0.1629, "step": 4785 }, { "epoch": 1.55, "learning_rate": 2.4842571564502117e-06, "loss": 0.1565, "step": 4786 }, { "epoch": 1.55, "learning_rate": 2.4833825665726657e-06, "loss": 0.1481, "step": 4787 }, { "epoch": 1.55, "learning_rate": 2.482507978728933e-06, "loss": 0.1614, "step": 4788 }, { "epoch": 1.55, "learning_rate": 2.4816333930260535e-06, "loss": 0.1603, "step": 4789 }, { "epoch": 1.55, "learning_rate": 2.4807588095710696e-06, "loss": 0.1544, "step": 4790 }, { "epoch": 1.55, "learning_rate": 2.4798842284710203e-06, "loss": 0.1573, "step": 4791 }, { "epoch": 1.55, "learning_rate": 2.4790096498329477e-06, "loss": 0.1559, "step": 4792 }, { "epoch": 1.55, "learning_rate": 2.478135073763891e-06, "loss": 0.1699, "step": 4793 }, { "epoch": 1.55, "learning_rate": 2.4772605003708885e-06, "loss": 0.1583, "step": 4794 }, { "epoch": 1.55, "learning_rate": 2.476385929760981e-06, "loss": 0.1694, "step": 4795 }, { "epoch": 1.55, "learning_rate": 2.475511362041207e-06, "loss": 0.1901, "step": 4796 }, { "epoch": 1.55, "learning_rate": 2.4746367973186063e-06, "loss": 0.1557, "step": 4797 }, { "epoch": 1.55, "learning_rate": 2.473762235700214e-06, "loss": 0.1804, "step": 4798 }, { "epoch": 1.56, "learning_rate": 2.472887677293072e-06, "loss": 0.168, "step": 4799 }, { "epoch": 1.56, "learning_rate": 2.4720131222042156e-06, "loss": 0.1553, "step": 4800 }, { "epoch": 1.56, "learning_rate": 2.4711385705406805e-06, "loss": 0.1612, "step": 4801 }, { "epoch": 1.56, "learning_rate": 2.4702640224095066e-06, "loss": 0.1503, "step": 4802 }, { "epoch": 1.56, "learning_rate": 2.469389477917727e-06, "loss": 0.1605, "step": 4803 }, { "epoch": 1.56, "learning_rate": 2.4685149371723806e-06, "loss": 0.1628, "step": 4804 }, { "epoch": 1.56, "learning_rate": 2.467640400280501e-06, "loss": 0.1615, "step": 4805 }, { "epoch": 1.56, "learning_rate": 2.466765867349122e-06, "loss": 0.1479, "step": 4806 }, { "epoch": 1.56, "learning_rate": 2.46589133848528e-06, "loss": 0.1717, "step": 4807 }, { "epoch": 1.56, "learning_rate": 2.465016813796007e-06, "loss": 0.1744, "step": 4808 }, { "epoch": 1.56, "learning_rate": 2.464142293388338e-06, "loss": 0.1736, "step": 4809 }, { "epoch": 1.56, "learning_rate": 2.4632677773693048e-06, "loss": 0.1702, "step": 4810 }, { "epoch": 1.56, "learning_rate": 2.4623932658459406e-06, "loss": 0.1668, "step": 4811 }, { "epoch": 1.56, "learning_rate": 2.461518758925277e-06, "loss": 0.1594, "step": 4812 }, { "epoch": 1.56, "learning_rate": 2.4606442567143434e-06, "loss": 0.1602, "step": 4813 }, { "epoch": 1.56, "learning_rate": 2.4597697593201728e-06, "loss": 0.1434, "step": 4814 }, { "epoch": 1.56, "learning_rate": 2.4588952668497937e-06, "loss": 0.1658, "step": 4815 }, { "epoch": 1.56, "learning_rate": 2.4580207794102364e-06, "loss": 0.1601, "step": 4816 }, { "epoch": 1.56, "learning_rate": 2.4571462971085293e-06, "loss": 0.1603, "step": 4817 }, { "epoch": 1.56, "learning_rate": 2.4562718200517003e-06, "loss": 0.1623, "step": 4818 }, { "epoch": 1.56, "learning_rate": 2.4553973483467778e-06, "loss": 0.1579, "step": 4819 }, { "epoch": 1.56, "learning_rate": 2.454522882100787e-06, "loss": 0.1721, "step": 4820 }, { "epoch": 1.56, "learning_rate": 2.453648421420756e-06, "loss": 0.1679, "step": 4821 }, { "epoch": 1.56, "learning_rate": 2.4527739664137085e-06, "loss": 0.1554, "step": 4822 }, { "epoch": 1.56, "learning_rate": 2.4518995171866717e-06, "loss": 0.1652, "step": 4823 }, { "epoch": 1.56, "learning_rate": 2.451025073846668e-06, "loss": 0.1584, "step": 4824 }, { "epoch": 1.56, "learning_rate": 2.45015063650072e-06, "loss": 0.1665, "step": 4825 }, { "epoch": 1.56, "learning_rate": 2.449276205255853e-06, "loss": 0.1601, "step": 4826 }, { "epoch": 1.56, "learning_rate": 2.448401780219087e-06, "loss": 0.1592, "step": 4827 }, { "epoch": 1.56, "learning_rate": 2.4475273614974437e-06, "loss": 0.1515, "step": 4828 }, { "epoch": 1.56, "learning_rate": 2.4466529491979437e-06, "loss": 0.147, "step": 4829 }, { "epoch": 1.57, "learning_rate": 2.445778543427605e-06, "loss": 0.1642, "step": 4830 }, { "epoch": 1.57, "learning_rate": 2.4449041442934485e-06, "loss": 0.1577, "step": 4831 }, { "epoch": 1.57, "learning_rate": 2.4440297519024906e-06, "loss": 0.1763, "step": 4832 }, { "epoch": 1.57, "learning_rate": 2.4431553663617502e-06, "loss": 0.1712, "step": 4833 }, { "epoch": 1.57, "learning_rate": 2.4422809877782417e-06, "loss": 0.1512, "step": 4834 }, { "epoch": 1.57, "learning_rate": 2.4414066162589823e-06, "loss": 0.1619, "step": 4835 }, { "epoch": 1.57, "learning_rate": 2.4405322519109864e-06, "loss": 0.1561, "step": 4836 }, { "epoch": 1.57, "learning_rate": 2.4396578948412664e-06, "loss": 0.1667, "step": 4837 }, { "epoch": 1.57, "learning_rate": 2.4387835451568355e-06, "loss": 0.1603, "step": 4838 }, { "epoch": 1.57, "learning_rate": 2.4379092029647067e-06, "loss": 0.1638, "step": 4839 }, { "epoch": 1.57, "learning_rate": 2.4370348683718906e-06, "loss": 0.1643, "step": 4840 }, { "epoch": 1.57, "learning_rate": 2.436160541485396e-06, "loss": 0.1603, "step": 4841 }, { "epoch": 1.57, "learning_rate": 2.4352862224122344e-06, "loss": 0.1748, "step": 4842 }, { "epoch": 1.57, "learning_rate": 2.4344119112594124e-06, "loss": 0.1631, "step": 4843 }, { "epoch": 1.57, "learning_rate": 2.4335376081339364e-06, "loss": 0.1778, "step": 4844 }, { "epoch": 1.57, "learning_rate": 2.4326633131428147e-06, "loss": 0.1663, "step": 4845 }, { "epoch": 1.57, "learning_rate": 2.4317890263930516e-06, "loss": 0.1595, "step": 4846 }, { "epoch": 1.57, "learning_rate": 2.430914747991651e-06, "loss": 0.1574, "step": 4847 }, { "epoch": 1.57, "learning_rate": 2.430040478045617e-06, "loss": 0.1483, "step": 4848 }, { "epoch": 1.57, "learning_rate": 2.429166216661951e-06, "loss": 0.1625, "step": 4849 }, { "epoch": 1.57, "learning_rate": 2.4282919639476544e-06, "loss": 0.1615, "step": 4850 }, { "epoch": 1.57, "learning_rate": 2.4274177200097266e-06, "loss": 0.1488, "step": 4851 }, { "epoch": 1.57, "learning_rate": 2.426543484955168e-06, "loss": 0.1678, "step": 4852 }, { "epoch": 1.57, "learning_rate": 2.425669258890975e-06, "loss": 0.1727, "step": 4853 }, { "epoch": 1.57, "learning_rate": 2.4247950419241457e-06, "loss": 0.1588, "step": 4854 }, { "epoch": 1.57, "learning_rate": 2.4239208341616755e-06, "loss": 0.1537, "step": 4855 }, { "epoch": 1.57, "learning_rate": 2.4230466357105575e-06, "loss": 0.1556, "step": 4856 }, { "epoch": 1.57, "learning_rate": 2.4221724466777874e-06, "loss": 0.1512, "step": 4857 }, { "epoch": 1.57, "learning_rate": 2.421298267170356e-06, "loss": 0.1707, "step": 4858 }, { "epoch": 1.57, "learning_rate": 2.420424097295255e-06, "loss": 0.1611, "step": 4859 }, { "epoch": 1.57, "learning_rate": 2.419549937159474e-06, "loss": 0.1617, "step": 4860 }, { "epoch": 1.58, "learning_rate": 2.418675786870002e-06, "loss": 0.1764, "step": 4861 }, { "epoch": 1.58, "learning_rate": 2.4178016465338266e-06, "loss": 0.1665, "step": 4862 }, { "epoch": 1.58, "learning_rate": 2.416927516257933e-06, "loss": 0.1559, "step": 4863 }, { "epoch": 1.58, "learning_rate": 2.416053396149308e-06, "loss": 0.1612, "step": 4864 }, { "epoch": 1.58, "learning_rate": 2.415179286314934e-06, "loss": 0.1666, "step": 4865 }, { "epoch": 1.58, "learning_rate": 2.414305186861795e-06, "loss": 0.1586, "step": 4866 }, { "epoch": 1.58, "learning_rate": 2.4134310978968716e-06, "loss": 0.1737, "step": 4867 }, { "epoch": 1.58, "learning_rate": 2.412557019527143e-06, "loss": 0.1724, "step": 4868 }, { "epoch": 1.58, "learning_rate": 2.4116829518595896e-06, "loss": 0.1559, "step": 4869 }, { "epoch": 1.58, "learning_rate": 2.410808895001187e-06, "loss": 0.1657, "step": 4870 }, { "epoch": 1.58, "learning_rate": 2.409934849058913e-06, "loss": 0.159, "step": 4871 }, { "epoch": 1.58, "learning_rate": 2.4090608141397417e-06, "loss": 0.1785, "step": 4872 }, { "epoch": 1.58, "learning_rate": 2.408186790350645e-06, "loss": 0.1846, "step": 4873 }, { "epoch": 1.58, "learning_rate": 2.4073127777985982e-06, "loss": 0.1594, "step": 4874 }, { "epoch": 1.58, "learning_rate": 2.406438776590568e-06, "loss": 0.1602, "step": 4875 }, { "epoch": 1.58, "learning_rate": 2.4055647868335273e-06, "loss": 0.151, "step": 4876 }, { "epoch": 1.58, "learning_rate": 2.404690808634442e-06, "loss": 0.1593, "step": 4877 }, { "epoch": 1.58, "learning_rate": 2.4038168421002795e-06, "loss": 0.1581, "step": 4878 }, { "epoch": 1.58, "learning_rate": 2.4029428873380044e-06, "loss": 0.1624, "step": 4879 }, { "epoch": 1.58, "learning_rate": 2.4020689444545796e-06, "loss": 0.1717, "step": 4880 }, { "epoch": 1.58, "learning_rate": 2.401195013556969e-06, "loss": 0.156, "step": 4881 }, { "epoch": 1.58, "learning_rate": 2.400321094752131e-06, "loss": 0.1671, "step": 4882 }, { "epoch": 1.58, "learning_rate": 2.399447188147027e-06, "loss": 0.1537, "step": 4883 }, { "epoch": 1.58, "learning_rate": 2.3985732938486137e-06, "loss": 0.163, "step": 4884 }, { "epoch": 1.58, "learning_rate": 2.3976994119638464e-06, "loss": 0.1611, "step": 4885 }, { "epoch": 1.58, "learning_rate": 2.3968255425996817e-06, "loss": 0.1689, "step": 4886 }, { "epoch": 1.58, "learning_rate": 2.3959516858630707e-06, "loss": 0.164, "step": 4887 }, { "epoch": 1.58, "learning_rate": 2.3950778418609676e-06, "loss": 0.1685, "step": 4888 }, { "epoch": 1.58, "learning_rate": 2.39420401070032e-06, "loss": 0.1581, "step": 4889 }, { "epoch": 1.58, "learning_rate": 2.3933301924880768e-06, "loss": 0.147, "step": 4890 }, { "epoch": 1.58, "learning_rate": 2.3924563873311868e-06, "loss": 0.1636, "step": 4891 }, { "epoch": 1.59, "learning_rate": 2.391582595336593e-06, "loss": 0.1782, "step": 4892 }, { "epoch": 1.59, "learning_rate": 2.3907088166112406e-06, "loss": 0.1555, "step": 4893 }, { "epoch": 1.59, "learning_rate": 2.3898350512620696e-06, "loss": 0.1598, "step": 4894 }, { "epoch": 1.59, "learning_rate": 2.3889612993960233e-06, "loss": 0.1594, "step": 4895 }, { "epoch": 1.59, "learning_rate": 2.3880875611200387e-06, "loss": 0.1595, "step": 4896 }, { "epoch": 1.59, "learning_rate": 2.3872138365410525e-06, "loss": 0.1587, "step": 4897 }, { "epoch": 1.59, "learning_rate": 2.3863401257660016e-06, "loss": 0.1659, "step": 4898 }, { "epoch": 1.59, "learning_rate": 2.3854664289018182e-06, "loss": 0.1719, "step": 4899 }, { "epoch": 1.59, "learning_rate": 2.3845927460554363e-06, "loss": 0.1429, "step": 4900 }, { "epoch": 1.59, "learning_rate": 2.383719077333784e-06, "loss": 0.1452, "step": 4901 }, { "epoch": 1.59, "learning_rate": 2.382845422843792e-06, "loss": 0.1634, "step": 4902 }, { "epoch": 1.59, "learning_rate": 2.381971782692386e-06, "loss": 0.164, "step": 4903 }, { "epoch": 1.59, "learning_rate": 2.3810981569864898e-06, "loss": 0.1515, "step": 4904 }, { "epoch": 1.59, "learning_rate": 2.38022454583303e-06, "loss": 0.168, "step": 4905 }, { "epoch": 1.59, "learning_rate": 2.379350949338924e-06, "loss": 0.1763, "step": 4906 }, { "epoch": 1.59, "learning_rate": 2.378477367611096e-06, "loss": 0.16, "step": 4907 }, { "epoch": 1.59, "learning_rate": 2.377603800756461e-06, "loss": 0.1853, "step": 4908 }, { "epoch": 1.59, "learning_rate": 2.376730248881935e-06, "loss": 0.1569, "step": 4909 }, { "epoch": 1.59, "learning_rate": 2.3758567120944345e-06, "loss": 0.1724, "step": 4910 }, { "epoch": 1.59, "learning_rate": 2.3749831905008704e-06, "loss": 0.1718, "step": 4911 }, { "epoch": 1.59, "learning_rate": 2.374109684208153e-06, "loss": 0.1423, "step": 4912 }, { "epoch": 1.59, "learning_rate": 2.3732361933231917e-06, "loss": 0.1734, "step": 4913 }, { "epoch": 1.59, "learning_rate": 2.3723627179528935e-06, "loss": 0.183, "step": 4914 }, { "epoch": 1.59, "learning_rate": 2.371489258204163e-06, "loss": 0.1664, "step": 4915 }, { "epoch": 1.59, "learning_rate": 2.3706158141839025e-06, "loss": 0.1528, "step": 4916 }, { "epoch": 1.59, "learning_rate": 2.3697423859990147e-06, "loss": 0.1692, "step": 4917 }, { "epoch": 1.59, "learning_rate": 2.3688689737563965e-06, "loss": 0.1828, "step": 4918 }, { "epoch": 1.59, "learning_rate": 2.367995577562948e-06, "loss": 0.163, "step": 4919 }, { "epoch": 1.59, "learning_rate": 2.3671221975255616e-06, "loss": 0.1518, "step": 4920 }, { "epoch": 1.59, "learning_rate": 2.366248833751133e-06, "loss": 0.1591, "step": 4921 }, { "epoch": 1.59, "learning_rate": 2.365375486346552e-06, "loss": 0.1554, "step": 4922 }, { "epoch": 1.6, "learning_rate": 2.3645021554187086e-06, "loss": 0.1548, "step": 4923 }, { "epoch": 1.6, "learning_rate": 2.3636288410744894e-06, "loss": 0.1544, "step": 4924 }, { "epoch": 1.6, "learning_rate": 2.3627555434207787e-06, "loss": 0.1674, "step": 4925 }, { "epoch": 1.6, "learning_rate": 2.3618822625644624e-06, "loss": 0.1613, "step": 4926 }, { "epoch": 1.6, "learning_rate": 2.36100899861242e-06, "loss": 0.1753, "step": 4927 }, { "epoch": 1.6, "learning_rate": 2.3601357516715297e-06, "loss": 0.1628, "step": 4928 }, { "epoch": 1.6, "learning_rate": 2.35926252184867e-06, "loss": 0.1791, "step": 4929 }, { "epoch": 1.6, "learning_rate": 2.3583893092507144e-06, "loss": 0.1586, "step": 4930 }, { "epoch": 1.6, "learning_rate": 2.3575161139845375e-06, "loss": 0.1488, "step": 4931 }, { "epoch": 1.6, "learning_rate": 2.356642936157008e-06, "loss": 0.1473, "step": 4932 }, { "epoch": 1.6, "learning_rate": 2.3557697758749966e-06, "loss": 0.1668, "step": 4933 }, { "epoch": 1.6, "learning_rate": 2.3548966332453673e-06, "loss": 0.166, "step": 4934 }, { "epoch": 1.6, "learning_rate": 2.3540235083749853e-06, "loss": 0.1724, "step": 4935 }, { "epoch": 1.6, "learning_rate": 2.3531504013707134e-06, "loss": 0.1678, "step": 4936 }, { "epoch": 1.6, "learning_rate": 2.35227731233941e-06, "loss": 0.1641, "step": 4937 }, { "epoch": 1.6, "learning_rate": 2.3514042413879344e-06, "loss": 0.1619, "step": 4938 }, { "epoch": 1.6, "learning_rate": 2.350531188623141e-06, "loss": 0.1828, "step": 4939 }, { "epoch": 1.6, "learning_rate": 2.349658154151882e-06, "loss": 0.1607, "step": 4940 }, { "epoch": 1.6, "learning_rate": 2.3487851380810106e-06, "loss": 0.1655, "step": 4941 }, { "epoch": 1.6, "learning_rate": 2.3479121405173736e-06, "loss": 0.1686, "step": 4942 }, { "epoch": 1.6, "learning_rate": 2.347039161567819e-06, "loss": 0.1633, "step": 4943 }, { "epoch": 1.6, "learning_rate": 2.34616620133919e-06, "loss": 0.1537, "step": 4944 }, { "epoch": 1.6, "learning_rate": 2.345293259938329e-06, "loss": 0.1413, "step": 4945 }, { "epoch": 1.6, "learning_rate": 2.3444203374720755e-06, "loss": 0.1609, "step": 4946 }, { "epoch": 1.6, "learning_rate": 2.3435474340472657e-06, "loss": 0.173, "step": 4947 }, { "epoch": 1.6, "learning_rate": 2.3426745497707364e-06, "loss": 0.1788, "step": 4948 }, { "epoch": 1.6, "learning_rate": 2.341801684749318e-06, "loss": 0.1714, "step": 4949 }, { "epoch": 1.6, "learning_rate": 2.3409288390898427e-06, "loss": 0.1688, "step": 4950 }, { "epoch": 1.6, "learning_rate": 2.3400560128991377e-06, "loss": 0.1576, "step": 4951 }, { "epoch": 1.6, "learning_rate": 2.3391832062840273e-06, "loss": 0.1589, "step": 4952 }, { "epoch": 1.6, "learning_rate": 2.338310419351337e-06, "loss": 0.1566, "step": 4953 }, { "epoch": 1.61, "learning_rate": 2.3374376522078852e-06, "loss": 0.1718, "step": 4954 }, { "epoch": 1.61, "learning_rate": 2.3365649049604917e-06, "loss": 0.1652, "step": 4955 }, { "epoch": 1.61, "learning_rate": 2.3356921777159705e-06, "loss": 0.1641, "step": 4956 }, { "epoch": 1.61, "learning_rate": 2.334819470581137e-06, "loss": 0.161, "step": 4957 }, { "epoch": 1.61, "learning_rate": 2.3339467836628018e-06, "loss": 0.1565, "step": 4958 }, { "epoch": 1.61, "learning_rate": 2.3330741170677713e-06, "loss": 0.1605, "step": 4959 }, { "epoch": 1.61, "learning_rate": 2.3322014709028545e-06, "loss": 0.1614, "step": 4960 }, { "epoch": 1.61, "learning_rate": 2.3313288452748515e-06, "loss": 0.1557, "step": 4961 }, { "epoch": 1.61, "learning_rate": 2.3304562402905662e-06, "loss": 0.156, "step": 4962 }, { "epoch": 1.61, "learning_rate": 2.329583656056796e-06, "loss": 0.1589, "step": 4963 }, { "epoch": 1.61, "learning_rate": 2.3287110926803354e-06, "loss": 0.1698, "step": 4964 }, { "epoch": 1.61, "learning_rate": 2.32783855026798e-06, "loss": 0.1528, "step": 4965 }, { "epoch": 1.61, "learning_rate": 2.3269660289265184e-06, "loss": 0.1629, "step": 4966 }, { "epoch": 1.61, "learning_rate": 2.3260935287627408e-06, "loss": 0.1599, "step": 4967 }, { "epoch": 1.61, "learning_rate": 2.3252210498834306e-06, "loss": 0.1538, "step": 4968 }, { "epoch": 1.61, "learning_rate": 2.3243485923953725e-06, "loss": 0.1451, "step": 4969 }, { "epoch": 1.61, "learning_rate": 2.323476156405347e-06, "loss": 0.1535, "step": 4970 }, { "epoch": 1.61, "learning_rate": 2.3226037420201296e-06, "loss": 0.1816, "step": 4971 }, { "epoch": 1.61, "learning_rate": 2.3217313493464977e-06, "loss": 0.1508, "step": 4972 }, { "epoch": 1.61, "learning_rate": 2.320858978491222e-06, "loss": 0.1603, "step": 4973 }, { "epoch": 1.61, "learning_rate": 2.319986629561074e-06, "loss": 0.1633, "step": 4974 }, { "epoch": 1.61, "learning_rate": 2.3191143026628206e-06, "loss": 0.1683, "step": 4975 }, { "epoch": 1.61, "learning_rate": 2.318241997903224e-06, "loss": 0.1524, "step": 4976 }, { "epoch": 1.61, "learning_rate": 2.3173697153890486e-06, "loss": 0.1711, "step": 4977 }, { "epoch": 1.61, "learning_rate": 2.31649745522705e-06, "loss": 0.1665, "step": 4978 }, { "epoch": 1.61, "learning_rate": 2.3156252175239883e-06, "loss": 0.1689, "step": 4979 }, { "epoch": 1.61, "learning_rate": 2.3147530023866136e-06, "loss": 0.1761, "step": 4980 }, { "epoch": 1.61, "learning_rate": 2.3138808099216796e-06, "loss": 0.1703, "step": 4981 }, { "epoch": 1.61, "learning_rate": 2.3130086402359327e-06, "loss": 0.167, "step": 4982 }, { "epoch": 1.61, "learning_rate": 2.312136493436117e-06, "loss": 0.1625, "step": 4983 }, { "epoch": 1.62, "learning_rate": 2.311264369628976e-06, "loss": 0.1745, "step": 4984 }, { "epoch": 1.62, "learning_rate": 2.3103922689212494e-06, "loss": 0.1578, "step": 4985 }, { "epoch": 1.62, "learning_rate": 2.3095201914196732e-06, "loss": 0.1673, "step": 4986 }, { "epoch": 1.62, "learning_rate": 2.308648137230982e-06, "loss": 0.1553, "step": 4987 }, { "epoch": 1.62, "learning_rate": 2.3077761064619062e-06, "loss": 0.1553, "step": 4988 }, { "epoch": 1.62, "learning_rate": 2.3069040992191745e-06, "loss": 0.1682, "step": 4989 }, { "epoch": 1.62, "learning_rate": 2.3060321156095107e-06, "loss": 0.1522, "step": 4990 }, { "epoch": 1.62, "learning_rate": 2.3051601557396393e-06, "loss": 0.1689, "step": 4991 }, { "epoch": 1.62, "learning_rate": 2.3042882197162776e-06, "loss": 0.1561, "step": 4992 }, { "epoch": 1.62, "learning_rate": 2.303416307646144e-06, "loss": 0.1591, "step": 4993 }, { "epoch": 1.62, "learning_rate": 2.3025444196359513e-06, "loss": 0.1672, "step": 4994 }, { "epoch": 1.62, "learning_rate": 2.3016725557924095e-06, "loss": 0.1726, "step": 4995 }, { "epoch": 1.62, "learning_rate": 2.3008007162222273e-06, "loss": 0.1789, "step": 4996 }, { "epoch": 1.62, "learning_rate": 2.2999289010321092e-06, "loss": 0.1713, "step": 4997 }, { "epoch": 1.62, "learning_rate": 2.299057110328757e-06, "loss": 0.1508, "step": 4998 }, { "epoch": 1.62, "learning_rate": 2.298185344218868e-06, "loss": 0.1754, "step": 4999 }, { "epoch": 1.62, "learning_rate": 2.29731360280914e-06, "loss": 0.1635, "step": 5000 }, { "epoch": 1.62, "learning_rate": 2.2964418862062655e-06, "loss": 0.1696, "step": 5001 }, { "epoch": 1.62, "learning_rate": 2.2955701945169317e-06, "loss": 0.1637, "step": 5002 }, { "epoch": 1.62, "learning_rate": 2.294698527847829e-06, "loss": 0.1654, "step": 5003 }, { "epoch": 1.62, "learning_rate": 2.2938268863056373e-06, "loss": 0.1543, "step": 5004 }, { "epoch": 1.62, "learning_rate": 2.29295526999704e-06, "loss": 0.1682, "step": 5005 }, { "epoch": 1.62, "learning_rate": 2.2920836790287134e-06, "loss": 0.1741, "step": 5006 }, { "epoch": 1.62, "learning_rate": 2.291212113507331e-06, "loss": 0.1696, "step": 5007 }, { "epoch": 1.62, "learning_rate": 2.290340573539565e-06, "loss": 0.1467, "step": 5008 }, { "epoch": 1.62, "learning_rate": 2.2894690592320827e-06, "loss": 0.1644, "step": 5009 }, { "epoch": 1.62, "learning_rate": 2.2885975706915506e-06, "loss": 0.163, "step": 5010 }, { "epoch": 1.62, "learning_rate": 2.287726108024628e-06, "loss": 0.1653, "step": 5011 }, { "epoch": 1.62, "learning_rate": 2.2868546713379755e-06, "loss": 0.1605, "step": 5012 }, { "epoch": 1.62, "learning_rate": 2.285983260738248e-06, "loss": 0.1615, "step": 5013 }, { "epoch": 1.62, "learning_rate": 2.285111876332097e-06, "loss": 0.1607, "step": 5014 }, { "epoch": 1.63, "learning_rate": 2.2842405182261725e-06, "loss": 0.1568, "step": 5015 }, { "epoch": 1.63, "learning_rate": 2.283369186527119e-06, "loss": 0.1613, "step": 5016 }, { "epoch": 1.63, "learning_rate": 2.282497881341581e-06, "loss": 0.1615, "step": 5017 }, { "epoch": 1.63, "learning_rate": 2.2816266027761965e-06, "loss": 0.1618, "step": 5018 }, { "epoch": 1.63, "learning_rate": 2.280755350937602e-06, "loss": 0.1638, "step": 5019 }, { "epoch": 1.63, "learning_rate": 2.27988412593243e-06, "loss": 0.1759, "step": 5020 }, { "epoch": 1.63, "learning_rate": 2.279012927867309e-06, "loss": 0.1638, "step": 5021 }, { "epoch": 1.63, "learning_rate": 2.2781417568488677e-06, "loss": 0.1373, "step": 5022 }, { "epoch": 1.63, "learning_rate": 2.277270612983726e-06, "loss": 0.154, "step": 5023 }, { "epoch": 1.63, "learning_rate": 2.2763994963785066e-06, "loss": 0.1608, "step": 5024 }, { "epoch": 1.63, "learning_rate": 2.2755284071398243e-06, "loss": 0.1579, "step": 5025 }, { "epoch": 1.63, "learning_rate": 2.2746573453742905e-06, "loss": 0.1609, "step": 5026 }, { "epoch": 1.63, "learning_rate": 2.2737863111885175e-06, "loss": 0.1543, "step": 5027 }, { "epoch": 1.63, "learning_rate": 2.2729153046891095e-06, "loss": 0.1855, "step": 5028 }, { "epoch": 1.63, "learning_rate": 2.2720443259826702e-06, "loss": 0.1619, "step": 5029 }, { "epoch": 1.63, "learning_rate": 2.2711733751757983e-06, "loss": 0.1691, "step": 5030 }, { "epoch": 1.63, "learning_rate": 2.27030245237509e-06, "loss": 0.1578, "step": 5031 }, { "epoch": 1.63, "learning_rate": 2.2694315576871384e-06, "loss": 0.1589, "step": 5032 }, { "epoch": 1.63, "learning_rate": 2.268560691218531e-06, "loss": 0.1734, "step": 5033 }, { "epoch": 1.63, "learning_rate": 2.2676898530758554e-06, "loss": 0.1835, "step": 5034 }, { "epoch": 1.63, "learning_rate": 2.266819043365692e-06, "loss": 0.159, "step": 5035 }, { "epoch": 1.63, "learning_rate": 2.265948262194621e-06, "loss": 0.1618, "step": 5036 }, { "epoch": 1.63, "learning_rate": 2.2650775096692176e-06, "loss": 0.166, "step": 5037 }, { "epoch": 1.63, "learning_rate": 2.2642067858960514e-06, "loss": 0.1733, "step": 5038 }, { "epoch": 1.63, "learning_rate": 2.263336090981693e-06, "loss": 0.1535, "step": 5039 }, { "epoch": 1.63, "learning_rate": 2.2624654250327054e-06, "loss": 0.1676, "step": 5040 }, { "epoch": 1.63, "learning_rate": 2.2615947881556506e-06, "loss": 0.1563, "step": 5041 }, { "epoch": 1.63, "learning_rate": 2.2607241804570864e-06, "loss": 0.1533, "step": 5042 }, { "epoch": 1.63, "learning_rate": 2.2598536020435644e-06, "loss": 0.1635, "step": 5043 }, { "epoch": 1.63, "learning_rate": 2.258983053021638e-06, "loss": 0.1702, "step": 5044 }, { "epoch": 1.63, "learning_rate": 2.2581125334978517e-06, "loss": 0.1547, "step": 5045 }, { "epoch": 1.64, "learning_rate": 2.257242043578751e-06, "loss": 0.1619, "step": 5046 }, { "epoch": 1.64, "learning_rate": 2.2563715833708726e-06, "loss": 0.1676, "step": 5047 }, { "epoch": 1.64, "learning_rate": 2.255501152980755e-06, "loss": 0.1523, "step": 5048 }, { "epoch": 1.64, "learning_rate": 2.2546307525149293e-06, "loss": 0.1563, "step": 5049 }, { "epoch": 1.64, "learning_rate": 2.253760382079924e-06, "loss": 0.1685, "step": 5050 }, { "epoch": 1.64, "learning_rate": 2.2528900417822636e-06, "loss": 0.1436, "step": 5051 }, { "epoch": 1.64, "learning_rate": 2.2520197317284702e-06, "loss": 0.1556, "step": 5052 }, { "epoch": 1.64, "learning_rate": 2.2511494520250613e-06, "loss": 0.1476, "step": 5053 }, { "epoch": 1.64, "learning_rate": 2.2502792027785508e-06, "loss": 0.1688, "step": 5054 }, { "epoch": 1.64, "learning_rate": 2.249408984095447e-06, "loss": 0.1744, "step": 5055 }, { "epoch": 1.64, "learning_rate": 2.248538796082259e-06, "loss": 0.1663, "step": 5056 }, { "epoch": 1.64, "learning_rate": 2.2476686388454867e-06, "loss": 0.1696, "step": 5057 }, { "epoch": 1.64, "learning_rate": 2.2467985124916314e-06, "loss": 0.1569, "step": 5058 }, { "epoch": 1.64, "learning_rate": 2.2459284171271863e-06, "loss": 0.1557, "step": 5059 }, { "epoch": 1.64, "learning_rate": 2.2450583528586437e-06, "loss": 0.1511, "step": 5060 }, { "epoch": 1.64, "learning_rate": 2.244188319792491e-06, "loss": 0.1581, "step": 5061 }, { "epoch": 1.64, "learning_rate": 2.243318318035211e-06, "loss": 0.1555, "step": 5062 }, { "epoch": 1.64, "learning_rate": 2.2424483476932847e-06, "loss": 0.1649, "step": 5063 }, { "epoch": 1.64, "learning_rate": 2.241578408873186e-06, "loss": 0.1529, "step": 5064 }, { "epoch": 1.64, "learning_rate": 2.2407085016813895e-06, "loss": 0.1779, "step": 5065 }, { "epoch": 1.64, "learning_rate": 2.239838626224361e-06, "loss": 0.1627, "step": 5066 }, { "epoch": 1.64, "learning_rate": 2.2389687826085675e-06, "loss": 0.1704, "step": 5067 }, { "epoch": 1.64, "learning_rate": 2.238098970940468e-06, "loss": 0.154, "step": 5068 }, { "epoch": 1.64, "learning_rate": 2.2372291913265177e-06, "loss": 0.1515, "step": 5069 }, { "epoch": 1.64, "learning_rate": 2.236359443873172e-06, "loss": 0.1539, "step": 5070 }, { "epoch": 1.64, "learning_rate": 2.2354897286868773e-06, "loss": 0.1704, "step": 5071 }, { "epoch": 1.64, "learning_rate": 2.23462004587408e-06, "loss": 0.1668, "step": 5072 }, { "epoch": 1.64, "learning_rate": 2.233750395541219e-06, "loss": 0.1701, "step": 5073 }, { "epoch": 1.64, "learning_rate": 2.2328807777947323e-06, "loss": 0.1597, "step": 5074 }, { "epoch": 1.64, "learning_rate": 2.232011192741053e-06, "loss": 0.1719, "step": 5075 }, { "epoch": 1.64, "learning_rate": 2.2311416404866085e-06, "loss": 0.1652, "step": 5076 }, { "epoch": 1.65, "learning_rate": 2.2302721211378254e-06, "loss": 0.1629, "step": 5077 }, { "epoch": 1.65, "learning_rate": 2.2294026348011223e-06, "loss": 0.16, "step": 5078 }, { "epoch": 1.65, "learning_rate": 2.2285331815829187e-06, "loss": 0.1573, "step": 5079 }, { "epoch": 1.65, "learning_rate": 2.227663761589625e-06, "loss": 0.1465, "step": 5080 }, { "epoch": 1.65, "learning_rate": 2.2267943749276503e-06, "loss": 0.1888, "step": 5081 }, { "epoch": 1.65, "learning_rate": 2.225925021703399e-06, "loss": 0.1613, "step": 5082 }, { "epoch": 1.65, "learning_rate": 2.2250557020232724e-06, "loss": 0.1598, "step": 5083 }, { "epoch": 1.65, "learning_rate": 2.2241864159936664e-06, "loss": 0.1748, "step": 5084 }, { "epoch": 1.65, "learning_rate": 2.223317163720973e-06, "loss": 0.1539, "step": 5085 }, { "epoch": 1.65, "learning_rate": 2.222447945311579e-06, "loss": 0.1543, "step": 5086 }, { "epoch": 1.65, "learning_rate": 2.2215787608718706e-06, "loss": 0.1725, "step": 5087 }, { "epoch": 1.65, "learning_rate": 2.220709610508226e-06, "loss": 0.1663, "step": 5088 }, { "epoch": 1.65, "learning_rate": 2.2198404943270217e-06, "loss": 0.1765, "step": 5089 }, { "epoch": 1.65, "learning_rate": 2.218971412434628e-06, "loss": 0.1623, "step": 5090 }, { "epoch": 1.65, "learning_rate": 2.218102364937414e-06, "loss": 0.1573, "step": 5091 }, { "epoch": 1.65, "learning_rate": 2.2172333519417415e-06, "loss": 0.1653, "step": 5092 }, { "epoch": 1.65, "learning_rate": 2.2163643735539688e-06, "loss": 0.158, "step": 5093 }, { "epoch": 1.65, "learning_rate": 2.2154954298804514e-06, "loss": 0.1649, "step": 5094 }, { "epoch": 1.65, "learning_rate": 2.214626521027538e-06, "loss": 0.1612, "step": 5095 }, { "epoch": 1.65, "learning_rate": 2.213757647101577e-06, "loss": 0.1649, "step": 5096 }, { "epoch": 1.65, "learning_rate": 2.2128888082089093e-06, "loss": 0.1507, "step": 5097 }, { "epoch": 1.65, "learning_rate": 2.2120200044558705e-06, "loss": 0.1642, "step": 5098 }, { "epoch": 1.65, "learning_rate": 2.2111512359487967e-06, "loss": 0.1665, "step": 5099 }, { "epoch": 1.65, "learning_rate": 2.2102825027940143e-06, "loss": 0.154, "step": 5100 }, { "epoch": 1.65, "learning_rate": 2.2094138050978496e-06, "loss": 0.1594, "step": 5101 }, { "epoch": 1.65, "learning_rate": 2.2085451429666215e-06, "loss": 0.156, "step": 5102 }, { "epoch": 1.65, "learning_rate": 2.207676516506647e-06, "loss": 0.1441, "step": 5103 }, { "epoch": 1.65, "learning_rate": 2.206807925824237e-06, "loss": 0.166, "step": 5104 }, { "epoch": 1.65, "learning_rate": 2.205939371025698e-06, "loss": 0.1577, "step": 5105 }, { "epoch": 1.65, "learning_rate": 2.205070852217334e-06, "loss": 0.1644, "step": 5106 }, { "epoch": 1.65, "learning_rate": 2.204202369505441e-06, "loss": 0.1651, "step": 5107 }, { "epoch": 1.66, "learning_rate": 2.203333922996316e-06, "loss": 0.1564, "step": 5108 }, { "epoch": 1.66, "learning_rate": 2.202465512796247e-06, "loss": 0.1675, "step": 5109 }, { "epoch": 1.66, "learning_rate": 2.2015971390115172e-06, "loss": 0.1585, "step": 5110 }, { "epoch": 1.66, "learning_rate": 2.2007288017484105e-06, "loss": 0.1804, "step": 5111 }, { "epoch": 1.66, "learning_rate": 2.1998605011131997e-06, "loss": 0.1611, "step": 5112 }, { "epoch": 1.66, "learning_rate": 2.19899223721216e-06, "loss": 0.1588, "step": 5113 }, { "epoch": 1.66, "learning_rate": 2.1981240101515548e-06, "loss": 0.1347, "step": 5114 }, { "epoch": 1.66, "learning_rate": 2.1972558200376497e-06, "loss": 0.1623, "step": 5115 }, { "epoch": 1.66, "learning_rate": 2.1963876669767008e-06, "loss": 0.1475, "step": 5116 }, { "epoch": 1.66, "learning_rate": 2.1955195510749614e-06, "loss": 0.1542, "step": 5117 }, { "epoch": 1.66, "learning_rate": 2.1946514724386827e-06, "loss": 0.1653, "step": 5118 }, { "epoch": 1.66, "learning_rate": 2.1937834311741066e-06, "loss": 0.1606, "step": 5119 }, { "epoch": 1.66, "learning_rate": 2.192915427387475e-06, "loss": 0.1707, "step": 5120 }, { "epoch": 1.66, "learning_rate": 2.1920474611850225e-06, "loss": 0.1548, "step": 5121 }, { "epoch": 1.66, "learning_rate": 2.1911795326729784e-06, "loss": 0.1659, "step": 5122 }, { "epoch": 1.66, "learning_rate": 2.190311641957571e-06, "loss": 0.1778, "step": 5123 }, { "epoch": 1.66, "learning_rate": 2.18944378914502e-06, "loss": 0.1725, "step": 5124 }, { "epoch": 1.66, "learning_rate": 2.188575974341543e-06, "loss": 0.156, "step": 5125 }, { "epoch": 1.66, "learning_rate": 2.1877081976533515e-06, "loss": 0.1521, "step": 5126 }, { "epoch": 1.66, "learning_rate": 2.186840459186654e-06, "loss": 0.1541, "step": 5127 }, { "epoch": 1.66, "learning_rate": 2.185972759047653e-06, "loss": 0.1664, "step": 5128 }, { "epoch": 1.66, "learning_rate": 2.1851050973425454e-06, "loss": 0.1662, "step": 5129 }, { "epoch": 1.66, "learning_rate": 2.1842374741775262e-06, "loss": 0.1689, "step": 5130 }, { "epoch": 1.66, "learning_rate": 2.1833698896587816e-06, "loss": 0.1684, "step": 5131 }, { "epoch": 1.66, "learning_rate": 2.1825023438924995e-06, "loss": 0.1434, "step": 5132 }, { "epoch": 1.66, "learning_rate": 2.1816348369848555e-06, "loss": 0.1615, "step": 5133 }, { "epoch": 1.66, "learning_rate": 2.180767369042026e-06, "loss": 0.1634, "step": 5134 }, { "epoch": 1.66, "learning_rate": 2.1798999401701802e-06, "loss": 0.1601, "step": 5135 }, { "epoch": 1.66, "learning_rate": 2.1790325504754827e-06, "loss": 0.1565, "step": 5136 }, { "epoch": 1.66, "learning_rate": 2.1781652000640947e-06, "loss": 0.1746, "step": 5137 }, { "epoch": 1.66, "learning_rate": 2.177297889042169e-06, "loss": 0.1653, "step": 5138 }, { "epoch": 1.67, "learning_rate": 2.1764306175158588e-06, "loss": 0.1614, "step": 5139 }, { "epoch": 1.67, "learning_rate": 2.1755633855913086e-06, "loss": 0.1793, "step": 5140 }, { "epoch": 1.67, "learning_rate": 2.174696193374658e-06, "loss": 0.1624, "step": 5141 }, { "epoch": 1.67, "learning_rate": 2.173829040972046e-06, "loss": 0.15, "step": 5142 }, { "epoch": 1.67, "learning_rate": 2.1729619284896e-06, "loss": 0.165, "step": 5143 }, { "epoch": 1.67, "learning_rate": 2.1720948560334492e-06, "loss": 0.1579, "step": 5144 }, { "epoch": 1.67, "learning_rate": 2.171227823709713e-06, "loss": 0.1558, "step": 5145 }, { "epoch": 1.67, "learning_rate": 2.1703608316245092e-06, "loss": 0.1659, "step": 5146 }, { "epoch": 1.67, "learning_rate": 2.169493879883948e-06, "loss": 0.1603, "step": 5147 }, { "epoch": 1.67, "learning_rate": 2.168626968594136e-06, "loss": 0.1598, "step": 5148 }, { "epoch": 1.67, "learning_rate": 2.167760097861176e-06, "loss": 0.1709, "step": 5149 }, { "epoch": 1.67, "learning_rate": 2.1668932677911624e-06, "loss": 0.1388, "step": 5150 }, { "epoch": 1.67, "learning_rate": 2.166026478490189e-06, "loss": 0.1604, "step": 5151 }, { "epoch": 1.67, "learning_rate": 2.1651597300643418e-06, "loss": 0.1579, "step": 5152 }, { "epoch": 1.67, "learning_rate": 2.1642930226197012e-06, "loss": 0.1688, "step": 5153 }, { "epoch": 1.67, "learning_rate": 2.1634263562623454e-06, "loss": 0.1803, "step": 5154 }, { "epoch": 1.67, "learning_rate": 2.162559731098345e-06, "loss": 0.1702, "step": 5155 }, { "epoch": 1.67, "learning_rate": 2.161693147233767e-06, "loss": 0.1636, "step": 5156 }, { "epoch": 1.67, "learning_rate": 2.1608266047746723e-06, "loss": 0.1662, "step": 5157 }, { "epoch": 1.67, "learning_rate": 2.1599601038271186e-06, "loss": 0.1554, "step": 5158 }, { "epoch": 1.67, "learning_rate": 2.1590936444971563e-06, "loss": 0.1455, "step": 5159 }, { "epoch": 1.67, "learning_rate": 2.1582272268908307e-06, "loss": 0.1377, "step": 5160 }, { "epoch": 1.67, "learning_rate": 2.1573608511141845e-06, "loss": 0.1647, "step": 5161 }, { "epoch": 1.67, "learning_rate": 2.1564945172732523e-06, "loss": 0.1651, "step": 5162 }, { "epoch": 1.67, "learning_rate": 2.155628225474067e-06, "loss": 0.1678, "step": 5163 }, { "epoch": 1.67, "learning_rate": 2.154761975822653e-06, "loss": 0.1672, "step": 5164 }, { "epoch": 1.67, "learning_rate": 2.1538957684250303e-06, "loss": 0.1589, "step": 5165 }, { "epoch": 1.67, "learning_rate": 2.1530296033872155e-06, "loss": 0.1513, "step": 5166 }, { "epoch": 1.67, "learning_rate": 2.152163480815218e-06, "loss": 0.1546, "step": 5167 }, { "epoch": 1.67, "learning_rate": 2.151297400815044e-06, "loss": 0.1745, "step": 5168 }, { "epoch": 1.67, "learning_rate": 2.150431363492691e-06, "loss": 0.171, "step": 5169 }, { "epoch": 1.68, "learning_rate": 2.1495653689541562e-06, "loss": 0.1719, "step": 5170 }, { "epoch": 1.68, "learning_rate": 2.1486994173054276e-06, "loss": 0.1585, "step": 5171 }, { "epoch": 1.68, "learning_rate": 2.1478335086524885e-06, "loss": 0.1547, "step": 5172 }, { "epoch": 1.68, "learning_rate": 2.14696764310132e-06, "loss": 0.1657, "step": 5173 }, { "epoch": 1.68, "learning_rate": 2.1461018207578932e-06, "loss": 0.1533, "step": 5174 }, { "epoch": 1.68, "learning_rate": 2.1452360417281786e-06, "loss": 0.1514, "step": 5175 }, { "epoch": 1.68, "learning_rate": 2.144370306118138e-06, "loss": 0.1616, "step": 5176 }, { "epoch": 1.68, "learning_rate": 2.143504614033728e-06, "loss": 0.1586, "step": 5177 }, { "epoch": 1.68, "learning_rate": 2.142638965580903e-06, "loss": 0.168, "step": 5178 }, { "epoch": 1.68, "learning_rate": 2.141773360865609e-06, "loss": 0.1825, "step": 5179 }, { "epoch": 1.68, "learning_rate": 2.1409077999937883e-06, "loss": 0.1733, "step": 5180 }, { "epoch": 1.68, "learning_rate": 2.1400422830713752e-06, "loss": 0.1444, "step": 5181 }, { "epoch": 1.68, "learning_rate": 2.1391768102043032e-06, "loss": 0.1613, "step": 5182 }, { "epoch": 1.68, "learning_rate": 2.1383113814984967e-06, "loss": 0.1494, "step": 5183 }, { "epoch": 1.68, "learning_rate": 2.137445997059874e-06, "loss": 0.1656, "step": 5184 }, { "epoch": 1.68, "learning_rate": 2.1365806569943533e-06, "loss": 0.1499, "step": 5185 }, { "epoch": 1.68, "learning_rate": 2.1357153614078407e-06, "loss": 0.1689, "step": 5186 }, { "epoch": 1.68, "learning_rate": 2.1348501104062423e-06, "loss": 0.1362, "step": 5187 }, { "epoch": 1.68, "learning_rate": 2.1339849040954556e-06, "loss": 0.1528, "step": 5188 }, { "epoch": 1.68, "learning_rate": 2.133119742581373e-06, "loss": 0.1543, "step": 5189 }, { "epoch": 1.68, "learning_rate": 2.1322546259698823e-06, "loss": 0.1607, "step": 5190 }, { "epoch": 1.68, "learning_rate": 2.1313895543668644e-06, "loss": 0.1689, "step": 5191 }, { "epoch": 1.68, "learning_rate": 2.1305245278781977e-06, "loss": 0.1485, "step": 5192 }, { "epoch": 1.68, "learning_rate": 2.129659546609751e-06, "loss": 0.1562, "step": 5193 }, { "epoch": 1.68, "learning_rate": 2.1287946106673916e-06, "loss": 0.1573, "step": 5194 }, { "epoch": 1.68, "learning_rate": 2.1279297201569787e-06, "loss": 0.1642, "step": 5195 }, { "epoch": 1.68, "learning_rate": 2.127064875184365e-06, "loss": 0.1661, "step": 5196 }, { "epoch": 1.68, "learning_rate": 2.126200075855401e-06, "loss": 0.1583, "step": 5197 }, { "epoch": 1.68, "learning_rate": 2.125335322275928e-06, "loss": 0.1532, "step": 5198 }, { "epoch": 1.68, "learning_rate": 2.1244706145517853e-06, "loss": 0.1486, "step": 5199 }, { "epoch": 1.69, "learning_rate": 2.1236059527888044e-06, "loss": 0.1612, "step": 5200 }, { "epoch": 1.69, "learning_rate": 2.1227413370928106e-06, "loss": 0.1813, "step": 5201 }, { "epoch": 1.69, "learning_rate": 2.1218767675696255e-06, "loss": 0.1785, "step": 5202 }, { "epoch": 1.69, "learning_rate": 2.1210122443250625e-06, "loss": 0.1628, "step": 5203 }, { "epoch": 1.69, "learning_rate": 2.1201477674649326e-06, "loss": 0.1601, "step": 5204 }, { "epoch": 1.69, "learning_rate": 2.119283337095038e-06, "loss": 0.1607, "step": 5205 }, { "epoch": 1.69, "learning_rate": 2.1184189533211783e-06, "loss": 0.1737, "step": 5206 }, { "epoch": 1.69, "learning_rate": 2.117554616249145e-06, "loss": 0.1495, "step": 5207 }, { "epoch": 1.69, "learning_rate": 2.1166903259847228e-06, "loss": 0.1671, "step": 5208 }, { "epoch": 1.69, "learning_rate": 2.115826082633695e-06, "loss": 0.151, "step": 5209 }, { "epoch": 1.69, "learning_rate": 2.114961886301835e-06, "loss": 0.1556, "step": 5210 }, { "epoch": 1.69, "learning_rate": 2.114097737094914e-06, "loss": 0.1565, "step": 5211 }, { "epoch": 1.69, "learning_rate": 2.1132336351186923e-06, "loss": 0.1599, "step": 5212 }, { "epoch": 1.69, "learning_rate": 2.1123695804789307e-06, "loss": 0.1561, "step": 5213 }, { "epoch": 1.69, "learning_rate": 2.11150557328138e-06, "loss": 0.1487, "step": 5214 }, { "epoch": 1.69, "learning_rate": 2.110641613631785e-06, "loss": 0.1967, "step": 5215 }, { "epoch": 1.69, "learning_rate": 2.109777701635889e-06, "loss": 0.1455, "step": 5216 }, { "epoch": 1.69, "learning_rate": 2.1089138373994226e-06, "loss": 0.1615, "step": 5217 }, { "epoch": 1.69, "learning_rate": 2.108050021028118e-06, "loss": 0.1604, "step": 5218 }, { "epoch": 1.69, "learning_rate": 2.1071862526276963e-06, "loss": 0.1664, "step": 5219 }, { "epoch": 1.69, "learning_rate": 2.1063225323038744e-06, "loss": 0.1781, "step": 5220 }, { "epoch": 1.69, "learning_rate": 2.1054588601623634e-06, "loss": 0.145, "step": 5221 }, { "epoch": 1.69, "learning_rate": 2.104595236308868e-06, "loss": 0.168, "step": 5222 }, { "epoch": 1.69, "learning_rate": 2.1037316608490886e-06, "loss": 0.1814, "step": 5223 }, { "epoch": 1.69, "learning_rate": 2.1028681338887164e-06, "loss": 0.1631, "step": 5224 }, { "epoch": 1.69, "learning_rate": 2.102004655533442e-06, "loss": 0.1706, "step": 5225 }, { "epoch": 1.69, "learning_rate": 2.101141225888944e-06, "loss": 0.151, "step": 5226 }, { "epoch": 1.69, "learning_rate": 2.100277845060898e-06, "loss": 0.1622, "step": 5227 }, { "epoch": 1.69, "learning_rate": 2.0994145131549755e-06, "loss": 0.1552, "step": 5228 }, { "epoch": 1.69, "learning_rate": 2.0985512302768366e-06, "loss": 0.1849, "step": 5229 }, { "epoch": 1.69, "learning_rate": 2.097687996532143e-06, "loss": 0.1595, "step": 5230 }, { "epoch": 1.7, "learning_rate": 2.0968248120265433e-06, "loss": 0.1717, "step": 5231 }, { "epoch": 1.7, "learning_rate": 2.095961676865683e-06, "loss": 0.1695, "step": 5232 }, { "epoch": 1.7, "learning_rate": 2.095098591155203e-06, "loss": 0.1617, "step": 5233 }, { "epoch": 1.7, "learning_rate": 2.094235555000734e-06, "loss": 0.1404, "step": 5234 }, { "epoch": 1.7, "learning_rate": 2.093372568507907e-06, "loss": 0.1501, "step": 5235 }, { "epoch": 1.7, "learning_rate": 2.0925096317823393e-06, "loss": 0.17, "step": 5236 }, { "epoch": 1.7, "learning_rate": 2.091646744929649e-06, "loss": 0.1806, "step": 5237 }, { "epoch": 1.7, "learning_rate": 2.0907839080554443e-06, "loss": 0.1818, "step": 5238 }, { "epoch": 1.7, "learning_rate": 2.0899211212653262e-06, "loss": 0.1622, "step": 5239 }, { "epoch": 1.7, "learning_rate": 2.0890583846648945e-06, "loss": 0.1555, "step": 5240 }, { "epoch": 1.7, "learning_rate": 2.0881956983597375e-06, "loss": 0.1658, "step": 5241 }, { "epoch": 1.7, "learning_rate": 2.087333062455441e-06, "loss": 0.1509, "step": 5242 }, { "epoch": 1.7, "learning_rate": 2.0864704770575824e-06, "loss": 0.1586, "step": 5243 }, { "epoch": 1.7, "learning_rate": 2.085607942271734e-06, "loss": 0.1614, "step": 5244 }, { "epoch": 1.7, "learning_rate": 2.0847454582034625e-06, "loss": 0.1783, "step": 5245 }, { "epoch": 1.7, "learning_rate": 2.0838830249583254e-06, "loss": 0.1719, "step": 5246 }, { "epoch": 1.7, "learning_rate": 2.0830206426418794e-06, "loss": 0.1606, "step": 5247 }, { "epoch": 1.7, "learning_rate": 2.0821583113596686e-06, "loss": 0.1561, "step": 5248 }, { "epoch": 1.7, "learning_rate": 2.081296031217237e-06, "loss": 0.1712, "step": 5249 }, { "epoch": 1.7, "learning_rate": 2.080433802320117e-06, "loss": 0.1561, "step": 5250 }, { "epoch": 1.7, "learning_rate": 2.0795716247738374e-06, "loss": 0.1568, "step": 5251 }, { "epoch": 1.7, "learning_rate": 2.078709498683922e-06, "loss": 0.1572, "step": 5252 }, { "epoch": 1.7, "learning_rate": 2.0778474241558845e-06, "loss": 0.1553, "step": 5253 }, { "epoch": 1.7, "learning_rate": 2.0769854012952368e-06, "loss": 0.166, "step": 5254 }, { "epoch": 1.7, "learning_rate": 2.0761234302074803e-06, "loss": 0.1645, "step": 5255 }, { "epoch": 1.7, "learning_rate": 2.0752615109981116e-06, "loss": 0.1721, "step": 5256 }, { "epoch": 1.7, "learning_rate": 2.0743996437726233e-06, "loss": 0.1608, "step": 5257 }, { "epoch": 1.7, "learning_rate": 2.073537828636497e-06, "loss": 0.1589, "step": 5258 }, { "epoch": 1.7, "learning_rate": 2.0726760656952137e-06, "loss": 0.1551, "step": 5259 }, { "epoch": 1.7, "learning_rate": 2.0718143550542418e-06, "loss": 0.1613, "step": 5260 }, { "epoch": 1.7, "learning_rate": 2.0709526968190483e-06, "loss": 0.15, "step": 5261 }, { "epoch": 1.71, "learning_rate": 2.070091091095092e-06, "loss": 0.1664, "step": 5262 }, { "epoch": 1.71, "learning_rate": 2.0692295379878237e-06, "loss": 0.17, "step": 5263 }, { "epoch": 1.71, "learning_rate": 2.0683680376026897e-06, "loss": 0.1611, "step": 5264 }, { "epoch": 1.71, "learning_rate": 2.0675065900451287e-06, "loss": 0.1623, "step": 5265 }, { "epoch": 1.71, "learning_rate": 2.066645195420575e-06, "loss": 0.1483, "step": 5266 }, { "epoch": 1.71, "learning_rate": 2.0657838538344545e-06, "loss": 0.1585, "step": 5267 }, { "epoch": 1.71, "learning_rate": 2.0649225653921855e-06, "loss": 0.1472, "step": 5268 }, { "epoch": 1.71, "learning_rate": 2.064061330199184e-06, "loss": 0.1623, "step": 5269 }, { "epoch": 1.71, "learning_rate": 2.0632001483608544e-06, "loss": 0.1435, "step": 5270 }, { "epoch": 1.71, "learning_rate": 2.062339019982599e-06, "loss": 0.161, "step": 5271 }, { "epoch": 1.71, "learning_rate": 2.06147794516981e-06, "loss": 0.1679, "step": 5272 }, { "epoch": 1.71, "learning_rate": 2.0606169240278752e-06, "loss": 0.1706, "step": 5273 }, { "epoch": 1.71, "learning_rate": 2.059755956662176e-06, "loss": 0.1481, "step": 5274 }, { "epoch": 1.71, "learning_rate": 2.058895043178085e-06, "loss": 0.1691, "step": 5275 }, { "epoch": 1.71, "learning_rate": 2.0580341836809718e-06, "loss": 0.1491, "step": 5276 }, { "epoch": 1.71, "learning_rate": 2.0571733782761943e-06, "loss": 0.158, "step": 5277 }, { "epoch": 1.71, "learning_rate": 2.0563126270691097e-06, "loss": 0.1561, "step": 5278 }, { "epoch": 1.71, "learning_rate": 2.055451930165063e-06, "loss": 0.1545, "step": 5279 }, { "epoch": 1.71, "learning_rate": 2.054591287669398e-06, "loss": 0.1685, "step": 5280 }, { "epoch": 1.71, "learning_rate": 2.053730699687447e-06, "loss": 0.1489, "step": 5281 }, { "epoch": 1.71, "learning_rate": 2.052870166324537e-06, "loss": 0.1499, "step": 5282 }, { "epoch": 1.71, "learning_rate": 2.0520096876859918e-06, "loss": 0.161, "step": 5283 }, { "epoch": 1.71, "learning_rate": 2.051149263877123e-06, "loss": 0.1484, "step": 5284 }, { "epoch": 1.71, "learning_rate": 2.0502888950032396e-06, "loss": 0.1552, "step": 5285 }, { "epoch": 1.71, "learning_rate": 2.0494285811696417e-06, "loss": 0.1555, "step": 5286 }, { "epoch": 1.71, "learning_rate": 2.048568322481623e-06, "loss": 0.1511, "step": 5287 }, { "epoch": 1.71, "learning_rate": 2.0477081190444724e-06, "loss": 0.1572, "step": 5288 }, { "epoch": 1.71, "learning_rate": 2.046847970963468e-06, "loss": 0.169, "step": 5289 }, { "epoch": 1.71, "learning_rate": 2.0459878783438867e-06, "loss": 0.154, "step": 5290 }, { "epoch": 1.71, "learning_rate": 2.045127841290993e-06, "loss": 0.1581, "step": 5291 }, { "epoch": 1.71, "learning_rate": 2.0442678599100484e-06, "loss": 0.1519, "step": 5292 }, { "epoch": 1.72, "learning_rate": 2.043407934306306e-06, "loss": 0.1605, "step": 5293 }, { "epoch": 1.72, "learning_rate": 2.0425480645850124e-06, "loss": 0.1675, "step": 5294 }, { "epoch": 1.72, "learning_rate": 2.041688250851407e-06, "loss": 0.1636, "step": 5295 }, { "epoch": 1.72, "learning_rate": 2.0408284932107227e-06, "loss": 0.1769, "step": 5296 }, { "epoch": 1.72, "learning_rate": 2.039968791768186e-06, "loss": 0.1513, "step": 5297 }, { "epoch": 1.72, "learning_rate": 2.039109146629016e-06, "loss": 0.1728, "step": 5298 }, { "epoch": 1.72, "learning_rate": 2.0382495578984236e-06, "loss": 0.1642, "step": 5299 }, { "epoch": 1.72, "learning_rate": 2.0373900256816166e-06, "loss": 0.1531, "step": 5300 }, { "epoch": 1.72, "learning_rate": 2.0365305500837906e-06, "loss": 0.1694, "step": 5301 }, { "epoch": 1.72, "learning_rate": 2.0356711312101394e-06, "loss": 0.1577, "step": 5302 }, { "epoch": 1.72, "learning_rate": 2.0348117691658463e-06, "loss": 0.1571, "step": 5303 }, { "epoch": 1.72, "learning_rate": 2.03395246405609e-06, "loss": 0.1646, "step": 5304 }, { "epoch": 1.72, "learning_rate": 2.03309321598604e-06, "loss": 0.1785, "step": 5305 }, { "epoch": 1.72, "learning_rate": 2.03223402506086e-06, "loss": 0.1654, "step": 5306 }, { "epoch": 1.72, "learning_rate": 2.031374891385708e-06, "loss": 0.1717, "step": 5307 }, { "epoch": 1.72, "learning_rate": 2.0305158150657316e-06, "loss": 0.1608, "step": 5308 }, { "epoch": 1.72, "learning_rate": 2.0296567962060753e-06, "loss": 0.1567, "step": 5309 }, { "epoch": 1.72, "learning_rate": 2.0287978349118737e-06, "loss": 0.1673, "step": 5310 }, { "epoch": 1.72, "learning_rate": 2.0279389312882546e-06, "loss": 0.1635, "step": 5311 }, { "epoch": 1.72, "learning_rate": 2.027080085440341e-06, "loss": 0.172, "step": 5312 }, { "epoch": 1.72, "learning_rate": 2.0262212974732465e-06, "loss": 0.1606, "step": 5313 }, { "epoch": 1.72, "learning_rate": 2.0253625674920795e-06, "loss": 0.1484, "step": 5314 }, { "epoch": 1.72, "learning_rate": 2.0245038956019386e-06, "loss": 0.1666, "step": 5315 }, { "epoch": 1.72, "learning_rate": 2.0236452819079183e-06, "loss": 0.1522, "step": 5316 }, { "epoch": 1.72, "learning_rate": 2.0227867265151035e-06, "loss": 0.1507, "step": 5317 }, { "epoch": 1.72, "learning_rate": 2.0219282295285734e-06, "loss": 0.1708, "step": 5318 }, { "epoch": 1.72, "learning_rate": 2.021069791053401e-06, "loss": 0.1606, "step": 5319 }, { "epoch": 1.72, "learning_rate": 2.0202114111946483e-06, "loss": 0.1551, "step": 5320 }, { "epoch": 1.72, "learning_rate": 2.019353090057375e-06, "loss": 0.1635, "step": 5321 }, { "epoch": 1.72, "learning_rate": 2.018494827746631e-06, "loss": 0.1623, "step": 5322 }, { "epoch": 1.72, "learning_rate": 2.0176366243674575e-06, "loss": 0.1616, "step": 5323 }, { "epoch": 1.73, "learning_rate": 2.0167784800248924e-06, "loss": 0.156, "step": 5324 }, { "epoch": 1.73, "learning_rate": 2.0159203948239624e-06, "loss": 0.1428, "step": 5325 }, { "epoch": 1.73, "learning_rate": 2.015062368869691e-06, "loss": 0.156, "step": 5326 }, { "epoch": 1.73, "learning_rate": 2.0142044022670905e-06, "loss": 0.17, "step": 5327 }, { "epoch": 1.73, "learning_rate": 2.013346495121169e-06, "loss": 0.1643, "step": 5328 }, { "epoch": 1.73, "learning_rate": 2.012488647536925e-06, "loss": 0.1737, "step": 5329 }, { "epoch": 1.73, "learning_rate": 2.0116308596193502e-06, "loss": 0.1782, "step": 5330 }, { "epoch": 1.73, "learning_rate": 2.0107731314734316e-06, "loss": 0.1711, "step": 5331 }, { "epoch": 1.73, "learning_rate": 2.0099154632041446e-06, "loss": 0.1659, "step": 5332 }, { "epoch": 1.73, "learning_rate": 2.0090578549164614e-06, "loss": 0.1565, "step": 5333 }, { "epoch": 1.73, "learning_rate": 2.0082003067153436e-06, "loss": 0.1637, "step": 5334 }, { "epoch": 1.73, "learning_rate": 2.007342818705747e-06, "loss": 0.1699, "step": 5335 }, { "epoch": 1.73, "learning_rate": 2.006485390992621e-06, "loss": 0.1623, "step": 5336 }, { "epoch": 1.73, "learning_rate": 2.0056280236809044e-06, "loss": 0.1456, "step": 5337 }, { "epoch": 1.73, "learning_rate": 2.004770716875533e-06, "loss": 0.1558, "step": 5338 }, { "epoch": 1.73, "learning_rate": 2.0039134706814303e-06, "loss": 0.1558, "step": 5339 }, { "epoch": 1.73, "learning_rate": 2.0030562852035175e-06, "loss": 0.1725, "step": 5340 }, { "epoch": 1.73, "learning_rate": 2.0021991605467043e-06, "loss": 0.1589, "step": 5341 }, { "epoch": 1.73, "learning_rate": 2.0013420968158944e-06, "loss": 0.1524, "step": 5342 }, { "epoch": 1.73, "learning_rate": 2.0004850941159847e-06, "loss": 0.1493, "step": 5343 }, { "epoch": 1.73, "learning_rate": 1.999628152551863e-06, "loss": 0.1498, "step": 5344 }, { "epoch": 1.73, "learning_rate": 1.9987712722284132e-06, "loss": 0.1648, "step": 5345 }, { "epoch": 1.73, "learning_rate": 1.9979144532505064e-06, "loss": 0.1628, "step": 5346 }, { "epoch": 1.73, "learning_rate": 1.9970576957230094e-06, "loss": 0.1897, "step": 5347 }, { "epoch": 1.73, "learning_rate": 1.996200999750783e-06, "loss": 0.157, "step": 5348 }, { "epoch": 1.73, "learning_rate": 1.995344365438676e-06, "loss": 0.1691, "step": 5349 }, { "epoch": 1.73, "learning_rate": 1.994487792891534e-06, "loss": 0.1534, "step": 5350 }, { "epoch": 1.73, "learning_rate": 1.993631282214191e-06, "loss": 0.148, "step": 5351 }, { "epoch": 1.73, "learning_rate": 1.992774833511478e-06, "loss": 0.1816, "step": 5352 }, { "epoch": 1.73, "learning_rate": 1.991918446888216e-06, "loss": 0.1624, "step": 5353 }, { "epoch": 1.73, "learning_rate": 1.9910621224492154e-06, "loss": 0.148, "step": 5354 }, { "epoch": 1.74, "learning_rate": 1.9902058602992856e-06, "loss": 0.1757, "step": 5355 }, { "epoch": 1.74, "learning_rate": 1.989349660543222e-06, "loss": 0.1673, "step": 5356 }, { "epoch": 1.74, "learning_rate": 1.988493523285818e-06, "loss": 0.1773, "step": 5357 }, { "epoch": 1.74, "learning_rate": 1.9876374486318545e-06, "loss": 0.1672, "step": 5358 }, { "epoch": 1.74, "learning_rate": 1.9867814366861075e-06, "loss": 0.153, "step": 5359 }, { "epoch": 1.74, "learning_rate": 1.9859254875533435e-06, "loss": 0.152, "step": 5360 }, { "epoch": 1.74, "learning_rate": 1.9850696013383236e-06, "loss": 0.1564, "step": 5361 }, { "epoch": 1.74, "learning_rate": 1.9842137781458e-06, "loss": 0.1539, "step": 5362 }, { "epoch": 1.74, "learning_rate": 1.9833580180805155e-06, "loss": 0.1528, "step": 5363 }, { "epoch": 1.74, "learning_rate": 1.9825023212472095e-06, "loss": 0.1634, "step": 5364 }, { "epoch": 1.74, "learning_rate": 1.9816466877506095e-06, "loss": 0.1501, "step": 5365 }, { "epoch": 1.74, "learning_rate": 1.9807911176954357e-06, "loss": 0.1467, "step": 5366 }, { "epoch": 1.74, "learning_rate": 1.9799356111864036e-06, "loss": 0.1763, "step": 5367 }, { "epoch": 1.74, "learning_rate": 1.979080168328218e-06, "loss": 0.1487, "step": 5368 }, { "epoch": 1.74, "learning_rate": 1.9782247892255767e-06, "loss": 0.16, "step": 5369 }, { "epoch": 1.74, "learning_rate": 1.9773694739831702e-06, "loss": 0.1605, "step": 5370 }, { "epoch": 1.74, "learning_rate": 1.976514222705681e-06, "loss": 0.1696, "step": 5371 }, { "epoch": 1.74, "learning_rate": 1.975659035497783e-06, "loss": 0.1584, "step": 5372 }, { "epoch": 1.74, "learning_rate": 1.9748039124641426e-06, "loss": 0.16, "step": 5373 }, { "epoch": 1.74, "learning_rate": 1.9739488537094197e-06, "loss": 0.1608, "step": 5374 }, { "epoch": 1.74, "learning_rate": 1.973093859338263e-06, "loss": 0.1446, "step": 5375 }, { "epoch": 1.74, "learning_rate": 1.9722389294553188e-06, "loss": 0.1699, "step": 5376 }, { "epoch": 1.74, "learning_rate": 1.9713840641652206e-06, "loss": 0.1735, "step": 5377 }, { "epoch": 1.74, "learning_rate": 1.970529263572594e-06, "loss": 0.155, "step": 5378 }, { "epoch": 1.74, "learning_rate": 1.9696745277820613e-06, "loss": 0.1548, "step": 5379 }, { "epoch": 1.74, "learning_rate": 1.9688198568982316e-06, "loss": 0.1893, "step": 5380 }, { "epoch": 1.74, "learning_rate": 1.96796525102571e-06, "loss": 0.1665, "step": 5381 }, { "epoch": 1.74, "learning_rate": 1.96711071026909e-06, "loss": 0.164, "step": 5382 }, { "epoch": 1.74, "learning_rate": 1.9662562347329613e-06, "loss": 0.1672, "step": 5383 }, { "epoch": 1.74, "learning_rate": 1.9654018245219024e-06, "loss": 0.1678, "step": 5384 }, { "epoch": 1.74, "learning_rate": 1.9645474797404838e-06, "loss": 0.1795, "step": 5385 }, { "epoch": 1.75, "learning_rate": 1.963693200493271e-06, "loss": 0.1423, "step": 5386 }, { "epoch": 1.75, "learning_rate": 1.962838986884818e-06, "loss": 0.1699, "step": 5387 }, { "epoch": 1.75, "learning_rate": 1.9619848390196734e-06, "loss": 0.1556, "step": 5388 }, { "epoch": 1.75, "learning_rate": 1.9611307570023766e-06, "loss": 0.1642, "step": 5389 }, { "epoch": 1.75, "learning_rate": 1.960276740937458e-06, "loss": 0.1567, "step": 5390 }, { "epoch": 1.75, "learning_rate": 1.959422790929441e-06, "loss": 0.1514, "step": 5391 }, { "epoch": 1.75, "learning_rate": 1.9585689070828413e-06, "loss": 0.1522, "step": 5392 }, { "epoch": 1.75, "learning_rate": 1.9577150895021664e-06, "loss": 0.1585, "step": 5393 }, { "epoch": 1.75, "learning_rate": 1.9568613382919142e-06, "loss": 0.1524, "step": 5394 }, { "epoch": 1.75, "learning_rate": 1.9560076535565766e-06, "loss": 0.162, "step": 5395 }, { "epoch": 1.75, "learning_rate": 1.9551540354006366e-06, "loss": 0.1706, "step": 5396 }, { "epoch": 1.75, "learning_rate": 1.954300483928567e-06, "loss": 0.1366, "step": 5397 }, { "epoch": 1.75, "learning_rate": 1.953446999244836e-06, "loss": 0.1726, "step": 5398 }, { "epoch": 1.75, "learning_rate": 1.9525935814539e-06, "loss": 0.1657, "step": 5399 }, { "epoch": 1.75, "learning_rate": 1.951740230660212e-06, "loss": 0.1765, "step": 5400 }, { "epoch": 1.75, "learning_rate": 1.950886946968212e-06, "loss": 0.1703, "step": 5401 }, { "epoch": 1.75, "learning_rate": 1.9500337304823333e-06, "loss": 0.1556, "step": 5402 }, { "epoch": 1.75, "learning_rate": 1.9491805813070025e-06, "loss": 0.1489, "step": 5403 }, { "epoch": 1.75, "learning_rate": 1.948327499546635e-06, "loss": 0.1419, "step": 5404 }, { "epoch": 1.75, "learning_rate": 1.947474485305642e-06, "loss": 0.1597, "step": 5405 }, { "epoch": 1.75, "learning_rate": 1.9466215386884223e-06, "loss": 0.1565, "step": 5406 }, { "epoch": 1.75, "learning_rate": 1.9457686597993704e-06, "loss": 0.1544, "step": 5407 }, { "epoch": 1.75, "learning_rate": 1.9449158487428688e-06, "loss": 0.1513, "step": 5408 }, { "epoch": 1.75, "learning_rate": 1.9440631056232926e-06, "loss": 0.1681, "step": 5409 }, { "epoch": 1.75, "learning_rate": 1.9432104305450117e-06, "loss": 0.18, "step": 5410 }, { "epoch": 1.75, "learning_rate": 1.942357823612383e-06, "loss": 0.1493, "step": 5411 }, { "epoch": 1.75, "learning_rate": 1.9415052849297585e-06, "loss": 0.1554, "step": 5412 }, { "epoch": 1.75, "learning_rate": 1.9406528146014815e-06, "loss": 0.1477, "step": 5413 }, { "epoch": 1.75, "learning_rate": 1.939800412731884e-06, "loss": 0.1536, "step": 5414 }, { "epoch": 1.75, "learning_rate": 1.9389480794252933e-06, "loss": 0.1529, "step": 5415 }, { "epoch": 1.76, "learning_rate": 1.9380958147860254e-06, "loss": 0.1409, "step": 5416 }, { "epoch": 1.76, "learning_rate": 1.937243618918391e-06, "loss": 0.1604, "step": 5417 }, { "epoch": 1.76, "learning_rate": 1.936391491926689e-06, "loss": 0.164, "step": 5418 }, { "epoch": 1.76, "learning_rate": 1.9355394339152133e-06, "loss": 0.1746, "step": 5419 }, { "epoch": 1.76, "learning_rate": 1.9346874449882465e-06, "loss": 0.1687, "step": 5420 }, { "epoch": 1.76, "learning_rate": 1.9338355252500624e-06, "loss": 0.1539, "step": 5421 }, { "epoch": 1.76, "learning_rate": 1.93298367480493e-06, "loss": 0.1563, "step": 5422 }, { "epoch": 1.76, "learning_rate": 1.932131893757107e-06, "loss": 0.1451, "step": 5423 }, { "epoch": 1.76, "learning_rate": 1.9312801822108425e-06, "loss": 0.1581, "step": 5424 }, { "epoch": 1.76, "learning_rate": 1.9304285402703775e-06, "loss": 0.172, "step": 5425 }, { "epoch": 1.76, "learning_rate": 1.929576968039946e-06, "loss": 0.1562, "step": 5426 }, { "epoch": 1.76, "learning_rate": 1.928725465623772e-06, "loss": 0.1714, "step": 5427 }, { "epoch": 1.76, "learning_rate": 1.927874033126069e-06, "loss": 0.1623, "step": 5428 }, { "epoch": 1.76, "learning_rate": 1.927022670651047e-06, "loss": 0.1475, "step": 5429 }, { "epoch": 1.76, "learning_rate": 1.9261713783029024e-06, "loss": 0.1579, "step": 5430 }, { "epoch": 1.76, "learning_rate": 1.9253201561858266e-06, "loss": 0.1417, "step": 5431 }, { "epoch": 1.76, "learning_rate": 1.924469004404001e-06, "loss": 0.1605, "step": 5432 }, { "epoch": 1.76, "learning_rate": 1.9236179230615967e-06, "loss": 0.1546, "step": 5433 }, { "epoch": 1.76, "learning_rate": 1.922766912262779e-06, "loss": 0.1565, "step": 5434 }, { "epoch": 1.76, "learning_rate": 1.921915972111703e-06, "loss": 0.1553, "step": 5435 }, { "epoch": 1.76, "learning_rate": 1.9210651027125164e-06, "loss": 0.1502, "step": 5436 }, { "epoch": 1.76, "learning_rate": 1.9202143041693554e-06, "loss": 0.1607, "step": 5437 }, { "epoch": 1.76, "learning_rate": 1.919363576586352e-06, "loss": 0.1595, "step": 5438 }, { "epoch": 1.76, "learning_rate": 1.918512920067626e-06, "loss": 0.1549, "step": 5439 }, { "epoch": 1.76, "learning_rate": 1.9176623347172885e-06, "loss": 0.1506, "step": 5440 }, { "epoch": 1.76, "learning_rate": 1.9168118206394443e-06, "loss": 0.164, "step": 5441 }, { "epoch": 1.76, "learning_rate": 1.915961377938187e-06, "loss": 0.1554, "step": 5442 }, { "epoch": 1.76, "learning_rate": 1.9151110067176038e-06, "loss": 0.1904, "step": 5443 }, { "epoch": 1.76, "learning_rate": 1.914260707081771e-06, "loss": 0.1777, "step": 5444 }, { "epoch": 1.76, "learning_rate": 1.913410479134757e-06, "loss": 0.1565, "step": 5445 }, { "epoch": 1.76, "learning_rate": 1.9125603229806223e-06, "loss": 0.1568, "step": 5446 }, { "epoch": 1.77, "learning_rate": 1.9117102387234165e-06, "loss": 0.1538, "step": 5447 }, { "epoch": 1.77, "learning_rate": 1.910860226467183e-06, "loss": 0.1801, "step": 5448 }, { "epoch": 1.77, "learning_rate": 1.910010286315953e-06, "loss": 0.1492, "step": 5449 }, { "epoch": 1.77, "learning_rate": 1.9091604183737546e-06, "loss": 0.1643, "step": 5450 }, { "epoch": 1.77, "learning_rate": 1.9083106227446e-06, "loss": 0.1666, "step": 5451 }, { "epoch": 1.77, "learning_rate": 1.907460899532497e-06, "loss": 0.1618, "step": 5452 }, { "epoch": 1.77, "learning_rate": 1.9066112488414445e-06, "loss": 0.1504, "step": 5453 }, { "epoch": 1.77, "learning_rate": 1.90576167077543e-06, "loss": 0.1713, "step": 5454 }, { "epoch": 1.77, "learning_rate": 1.904912165438435e-06, "loss": 0.1601, "step": 5455 }, { "epoch": 1.77, "learning_rate": 1.9040627329344296e-06, "loss": 0.1635, "step": 5456 }, { "epoch": 1.77, "learning_rate": 1.9032133733673764e-06, "loss": 0.1638, "step": 5457 }, { "epoch": 1.77, "learning_rate": 1.9023640868412297e-06, "loss": 0.1568, "step": 5458 }, { "epoch": 1.77, "learning_rate": 1.9015148734599317e-06, "loss": 0.151, "step": 5459 }, { "epoch": 1.77, "learning_rate": 1.900665733327421e-06, "loss": 0.1602, "step": 5460 }, { "epoch": 1.77, "learning_rate": 1.899816666547621e-06, "loss": 0.1578, "step": 5461 }, { "epoch": 1.77, "learning_rate": 1.8989676732244522e-06, "loss": 0.1583, "step": 5462 }, { "epoch": 1.77, "learning_rate": 1.8981187534618217e-06, "loss": 0.1676, "step": 5463 }, { "epoch": 1.77, "learning_rate": 1.8972699073636283e-06, "loss": 0.1494, "step": 5464 }, { "epoch": 1.77, "learning_rate": 1.8964211350337637e-06, "loss": 0.1655, "step": 5465 }, { "epoch": 1.77, "learning_rate": 1.895572436576109e-06, "loss": 0.1442, "step": 5466 }, { "epoch": 1.77, "learning_rate": 1.8947238120945372e-06, "loss": 0.1777, "step": 5467 }, { "epoch": 1.77, "learning_rate": 1.8938752616929112e-06, "loss": 0.1505, "step": 5468 }, { "epoch": 1.77, "learning_rate": 1.8930267854750845e-06, "loss": 0.1638, "step": 5469 }, { "epoch": 1.77, "learning_rate": 1.8921783835449042e-06, "loss": 0.1514, "step": 5470 }, { "epoch": 1.77, "learning_rate": 1.8913300560062047e-06, "loss": 0.1715, "step": 5471 }, { "epoch": 1.77, "learning_rate": 1.890481802962815e-06, "loss": 0.1431, "step": 5472 }, { "epoch": 1.77, "learning_rate": 1.889633624518551e-06, "loss": 0.166, "step": 5473 }, { "epoch": 1.77, "learning_rate": 1.8887855207772235e-06, "loss": 0.172, "step": 5474 }, { "epoch": 1.77, "learning_rate": 1.8879374918426312e-06, "loss": 0.1509, "step": 5475 }, { "epoch": 1.77, "learning_rate": 1.8870895378185643e-06, "loss": 0.16, "step": 5476 }, { "epoch": 1.77, "learning_rate": 1.886241658808805e-06, "loss": 0.1584, "step": 5477 }, { "epoch": 1.78, "learning_rate": 1.8853938549171242e-06, "loss": 0.1583, "step": 5478 }, { "epoch": 1.78, "learning_rate": 1.8845461262472863e-06, "loss": 0.1488, "step": 5479 }, { "epoch": 1.78, "learning_rate": 1.883698472903045e-06, "loss": 0.1614, "step": 5480 }, { "epoch": 1.78, "learning_rate": 1.882850894988143e-06, "loss": 0.1444, "step": 5481 }, { "epoch": 1.78, "learning_rate": 1.882003392606318e-06, "loss": 0.162, "step": 5482 }, { "epoch": 1.78, "learning_rate": 1.8811559658612941e-06, "loss": 0.1592, "step": 5483 }, { "epoch": 1.78, "learning_rate": 1.88030861485679e-06, "loss": 0.1702, "step": 5484 }, { "epoch": 1.78, "learning_rate": 1.879461339696512e-06, "loss": 0.1724, "step": 5485 }, { "epoch": 1.78, "learning_rate": 1.8786141404841587e-06, "loss": 0.1508, "step": 5486 }, { "epoch": 1.78, "learning_rate": 1.8777670173234198e-06, "loss": 0.1725, "step": 5487 }, { "epoch": 1.78, "learning_rate": 1.8769199703179736e-06, "loss": 0.1698, "step": 5488 }, { "epoch": 1.78, "learning_rate": 1.8760729995714916e-06, "loss": 0.153, "step": 5489 }, { "epoch": 1.78, "learning_rate": 1.8752261051876337e-06, "loss": 0.1448, "step": 5490 }, { "epoch": 1.78, "learning_rate": 1.8743792872700529e-06, "loss": 0.1577, "step": 5491 }, { "epoch": 1.78, "learning_rate": 1.873532545922391e-06, "loss": 0.1562, "step": 5492 }, { "epoch": 1.78, "learning_rate": 1.8726858812482798e-06, "loss": 0.1697, "step": 5493 }, { "epoch": 1.78, "learning_rate": 1.871839293351345e-06, "loss": 0.1625, "step": 5494 }, { "epoch": 1.78, "learning_rate": 1.870992782335198e-06, "loss": 0.1551, "step": 5495 }, { "epoch": 1.78, "learning_rate": 1.8701463483034471e-06, "loss": 0.1659, "step": 5496 }, { "epoch": 1.78, "learning_rate": 1.8692999913596846e-06, "loss": 0.1516, "step": 5497 }, { "epoch": 1.78, "learning_rate": 1.8684537116074983e-06, "loss": 0.1492, "step": 5498 }, { "epoch": 1.78, "learning_rate": 1.8676075091504637e-06, "loss": 0.1452, "step": 5499 }, { "epoch": 1.78, "learning_rate": 1.866761384092147e-06, "loss": 0.1469, "step": 5500 }, { "epoch": 1.78, "learning_rate": 1.8659153365361076e-06, "loss": 0.1626, "step": 5501 }, { "epoch": 1.78, "learning_rate": 1.8650693665858916e-06, "loss": 0.1651, "step": 5502 }, { "epoch": 1.78, "learning_rate": 1.8642234743450394e-06, "loss": 0.1906, "step": 5503 }, { "epoch": 1.78, "learning_rate": 1.8633776599170783e-06, "loss": 0.1701, "step": 5504 }, { "epoch": 1.78, "learning_rate": 1.86253192340553e-06, "loss": 0.1576, "step": 5505 }, { "epoch": 1.78, "learning_rate": 1.8616862649139024e-06, "loss": 0.1723, "step": 5506 }, { "epoch": 1.78, "learning_rate": 1.8608406845456968e-06, "loss": 0.1748, "step": 5507 }, { "epoch": 1.78, "learning_rate": 1.8599951824044033e-06, "loss": 0.1741, "step": 5508 }, { "epoch": 1.79, "learning_rate": 1.8591497585935041e-06, "loss": 0.1505, "step": 5509 }, { "epoch": 1.79, "learning_rate": 1.858304413216471e-06, "loss": 0.1553, "step": 5510 }, { "epoch": 1.79, "learning_rate": 1.8574591463767656e-06, "loss": 0.1395, "step": 5511 }, { "epoch": 1.79, "learning_rate": 1.8566139581778392e-06, "loss": 0.1577, "step": 5512 }, { "epoch": 1.79, "learning_rate": 1.855768848723137e-06, "loss": 0.1537, "step": 5513 }, { "epoch": 1.79, "learning_rate": 1.85492381811609e-06, "loss": 0.1576, "step": 5514 }, { "epoch": 1.79, "learning_rate": 1.854078866460124e-06, "loss": 0.1578, "step": 5515 }, { "epoch": 1.79, "learning_rate": 1.8532339938586513e-06, "loss": 0.1575, "step": 5516 }, { "epoch": 1.79, "learning_rate": 1.8523892004150765e-06, "loss": 0.1597, "step": 5517 }, { "epoch": 1.79, "learning_rate": 1.8515444862327947e-06, "loss": 0.1675, "step": 5518 }, { "epoch": 1.79, "learning_rate": 1.8506998514151896e-06, "loss": 0.1454, "step": 5519 }, { "epoch": 1.79, "learning_rate": 1.8498552960656378e-06, "loss": 0.1756, "step": 5520 }, { "epoch": 1.79, "learning_rate": 1.8490108202875023e-06, "loss": 0.191, "step": 5521 }, { "epoch": 1.79, "learning_rate": 1.848166424184142e-06, "loss": 0.1647, "step": 5522 }, { "epoch": 1.79, "learning_rate": 1.8473221078589006e-06, "loss": 0.1769, "step": 5523 }, { "epoch": 1.79, "learning_rate": 1.846477871415114e-06, "loss": 0.1603, "step": 5524 }, { "epoch": 1.79, "learning_rate": 1.8456337149561105e-06, "loss": 0.1372, "step": 5525 }, { "epoch": 1.79, "learning_rate": 1.8447896385852043e-06, "loss": 0.1647, "step": 5526 }, { "epoch": 1.79, "learning_rate": 1.8439456424057044e-06, "loss": 0.1515, "step": 5527 }, { "epoch": 1.79, "learning_rate": 1.8431017265209067e-06, "loss": 0.1548, "step": 5528 }, { "epoch": 1.79, "learning_rate": 1.8422578910340985e-06, "loss": 0.1544, "step": 5529 }, { "epoch": 1.79, "learning_rate": 1.8414141360485565e-06, "loss": 0.161, "step": 5530 }, { "epoch": 1.79, "learning_rate": 1.840570461667549e-06, "loss": 0.1525, "step": 5531 }, { "epoch": 1.79, "learning_rate": 1.8397268679943333e-06, "loss": 0.1626, "step": 5532 }, { "epoch": 1.79, "learning_rate": 1.8388833551321562e-06, "loss": 0.1844, "step": 5533 }, { "epoch": 1.79, "learning_rate": 1.838039923184257e-06, "loss": 0.1677, "step": 5534 }, { "epoch": 1.79, "learning_rate": 1.8371965722538636e-06, "loss": 0.1692, "step": 5535 }, { "epoch": 1.79, "learning_rate": 1.836353302444192e-06, "loss": 0.1543, "step": 5536 }, { "epoch": 1.79, "learning_rate": 1.8355101138584524e-06, "loss": 0.149, "step": 5537 }, { "epoch": 1.79, "learning_rate": 1.8346670065998411e-06, "loss": 0.1577, "step": 5538 }, { "epoch": 1.79, "learning_rate": 1.8338239807715486e-06, "loss": 0.1668, "step": 5539 }, { "epoch": 1.8, "learning_rate": 1.8329810364767511e-06, "loss": 0.1688, "step": 5540 }, { "epoch": 1.8, "learning_rate": 1.8321381738186178e-06, "loss": 0.1614, "step": 5541 }, { "epoch": 1.8, "learning_rate": 1.8312953929003068e-06, "loss": 0.1549, "step": 5542 }, { "epoch": 1.8, "learning_rate": 1.8304526938249653e-06, "loss": 0.1749, "step": 5543 }, { "epoch": 1.8, "learning_rate": 1.8296100766957331e-06, "loss": 0.1753, "step": 5544 }, { "epoch": 1.8, "learning_rate": 1.828767541615737e-06, "loss": 0.1706, "step": 5545 }, { "epoch": 1.8, "learning_rate": 1.8279250886880962e-06, "loss": 0.1714, "step": 5546 }, { "epoch": 1.8, "learning_rate": 1.827082718015919e-06, "loss": 0.1567, "step": 5547 }, { "epoch": 1.8, "learning_rate": 1.8262404297023013e-06, "loss": 0.1524, "step": 5548 }, { "epoch": 1.8, "learning_rate": 1.8253982238503338e-06, "loss": 0.1634, "step": 5549 }, { "epoch": 1.8, "learning_rate": 1.8245561005630921e-06, "loss": 0.161, "step": 5550 }, { "epoch": 1.8, "learning_rate": 1.823714059943646e-06, "loss": 0.1585, "step": 5551 }, { "epoch": 1.8, "learning_rate": 1.8228721020950504e-06, "loss": 0.1526, "step": 5552 }, { "epoch": 1.8, "learning_rate": 1.8220302271203557e-06, "loss": 0.1377, "step": 5553 }, { "epoch": 1.8, "learning_rate": 1.8211884351225978e-06, "loss": 0.1472, "step": 5554 }, { "epoch": 1.8, "learning_rate": 1.8203467262048033e-06, "loss": 0.1384, "step": 5555 }, { "epoch": 1.8, "learning_rate": 1.819505100469991e-06, "loss": 0.1379, "step": 5556 }, { "epoch": 1.8, "learning_rate": 1.8186635580211654e-06, "loss": 0.1685, "step": 5557 }, { "epoch": 1.8, "learning_rate": 1.8178220989613255e-06, "loss": 0.1646, "step": 5558 }, { "epoch": 1.8, "learning_rate": 1.8169807233934567e-06, "loss": 0.1573, "step": 5559 }, { "epoch": 1.8, "learning_rate": 1.8161394314205343e-06, "loss": 0.1573, "step": 5560 }, { "epoch": 1.8, "learning_rate": 1.8152982231455262e-06, "loss": 0.1596, "step": 5561 }, { "epoch": 1.8, "learning_rate": 1.8144570986713867e-06, "loss": 0.1525, "step": 5562 }, { "epoch": 1.8, "learning_rate": 1.8136160581010624e-06, "loss": 0.1671, "step": 5563 }, { "epoch": 1.8, "learning_rate": 1.8127751015374865e-06, "loss": 0.1569, "step": 5564 }, { "epoch": 1.8, "learning_rate": 1.8119342290835864e-06, "loss": 0.1468, "step": 5565 }, { "epoch": 1.8, "learning_rate": 1.8110934408422758e-06, "loss": 0.1443, "step": 5566 }, { "epoch": 1.8, "learning_rate": 1.810252736916458e-06, "loss": 0.155, "step": 5567 }, { "epoch": 1.8, "learning_rate": 1.8094121174090288e-06, "loss": 0.1641, "step": 5568 }, { "epoch": 1.8, "learning_rate": 1.80857158242287e-06, "loss": 0.1396, "step": 5569 }, { "epoch": 1.8, "learning_rate": 1.8077311320608571e-06, "loss": 0.157, "step": 5570 }, { "epoch": 1.81, "learning_rate": 1.806890766425851e-06, "loss": 0.1766, "step": 5571 }, { "epoch": 1.81, "learning_rate": 1.8060504856207062e-06, "loss": 0.1684, "step": 5572 }, { "epoch": 1.81, "learning_rate": 1.8052102897482643e-06, "loss": 0.1614, "step": 5573 }, { "epoch": 1.81, "learning_rate": 1.8043701789113552e-06, "loss": 0.1679, "step": 5574 }, { "epoch": 1.81, "learning_rate": 1.8035301532128032e-06, "loss": 0.1725, "step": 5575 }, { "epoch": 1.81, "learning_rate": 1.8026902127554172e-06, "loss": 0.1455, "step": 5576 }, { "epoch": 1.81, "learning_rate": 1.8018503576419996e-06, "loss": 0.1603, "step": 5577 }, { "epoch": 1.81, "learning_rate": 1.8010105879753398e-06, "loss": 0.1774, "step": 5578 }, { "epoch": 1.81, "learning_rate": 1.800170903858216e-06, "loss": 0.1412, "step": 5579 }, { "epoch": 1.81, "learning_rate": 1.7993313053933998e-06, "loss": 0.1621, "step": 5580 }, { "epoch": 1.81, "learning_rate": 1.7984917926836484e-06, "loss": 0.1605, "step": 5581 }, { "epoch": 1.81, "learning_rate": 1.7976523658317104e-06, "loss": 0.1707, "step": 5582 }, { "epoch": 1.81, "learning_rate": 1.7968130249403238e-06, "loss": 0.1548, "step": 5583 }, { "epoch": 1.81, "learning_rate": 1.7959737701122157e-06, "loss": 0.1535, "step": 5584 }, { "epoch": 1.81, "learning_rate": 1.7951346014501027e-06, "loss": 0.157, "step": 5585 }, { "epoch": 1.81, "learning_rate": 1.7942955190566899e-06, "loss": 0.1628, "step": 5586 }, { "epoch": 1.81, "learning_rate": 1.7934565230346752e-06, "loss": 0.1789, "step": 5587 }, { "epoch": 1.81, "learning_rate": 1.7926176134867408e-06, "loss": 0.166, "step": 5588 }, { "epoch": 1.81, "learning_rate": 1.7917787905155634e-06, "loss": 0.1568, "step": 5589 }, { "epoch": 1.81, "learning_rate": 1.790940054223806e-06, "loss": 0.1581, "step": 5590 }, { "epoch": 1.81, "learning_rate": 1.7901014047141208e-06, "loss": 0.1639, "step": 5591 }, { "epoch": 1.81, "learning_rate": 1.7892628420891526e-06, "loss": 0.168, "step": 5592 }, { "epoch": 1.81, "learning_rate": 1.788424366451531e-06, "loss": 0.1705, "step": 5593 }, { "epoch": 1.81, "learning_rate": 1.7875859779038796e-06, "loss": 0.1649, "step": 5594 }, { "epoch": 1.81, "learning_rate": 1.7867476765488061e-06, "loss": 0.1553, "step": 5595 }, { "epoch": 1.81, "learning_rate": 1.7859094624889135e-06, "loss": 0.1574, "step": 5596 }, { "epoch": 1.81, "learning_rate": 1.7850713358267897e-06, "loss": 0.1552, "step": 5597 }, { "epoch": 1.81, "learning_rate": 1.7842332966650122e-06, "loss": 0.1592, "step": 5598 }, { "epoch": 1.81, "learning_rate": 1.7833953451061513e-06, "loss": 0.1616, "step": 5599 }, { "epoch": 1.81, "learning_rate": 1.7825574812527617e-06, "loss": 0.1655, "step": 5600 }, { "epoch": 1.81, "learning_rate": 1.781719705207392e-06, "loss": 0.1642, "step": 5601 }, { "epoch": 1.82, "learning_rate": 1.7808820170725772e-06, "loss": 0.1545, "step": 5602 }, { "epoch": 1.82, "learning_rate": 1.7800444169508414e-06, "loss": 0.175, "step": 5603 }, { "epoch": 1.82, "learning_rate": 1.7792069049446987e-06, "loss": 0.1692, "step": 5604 }, { "epoch": 1.82, "learning_rate": 1.7783694811566534e-06, "loss": 0.1423, "step": 5605 }, { "epoch": 1.82, "learning_rate": 1.777532145689198e-06, "loss": 0.1549, "step": 5606 }, { "epoch": 1.82, "learning_rate": 1.7766948986448131e-06, "loss": 0.1611, "step": 5607 }, { "epoch": 1.82, "learning_rate": 1.7758577401259716e-06, "loss": 0.1664, "step": 5608 }, { "epoch": 1.82, "learning_rate": 1.7750206702351325e-06, "loss": 0.1508, "step": 5609 }, { "epoch": 1.82, "learning_rate": 1.7741836890747438e-06, "loss": 0.1593, "step": 5610 }, { "epoch": 1.82, "learning_rate": 1.7733467967472459e-06, "loss": 0.1472, "step": 5611 }, { "epoch": 1.82, "learning_rate": 1.7725099933550649e-06, "loss": 0.1539, "step": 5612 }, { "epoch": 1.82, "learning_rate": 1.7716732790006188e-06, "loss": 0.1677, "step": 5613 }, { "epoch": 1.82, "learning_rate": 1.7708366537863129e-06, "loss": 0.1817, "step": 5614 }, { "epoch": 1.82, "learning_rate": 1.7700001178145409e-06, "loss": 0.1671, "step": 5615 }, { "epoch": 1.82, "learning_rate": 1.7691636711876883e-06, "loss": 0.1521, "step": 5616 }, { "epoch": 1.82, "learning_rate": 1.768327314008126e-06, "loss": 0.16, "step": 5617 }, { "epoch": 1.82, "learning_rate": 1.7674910463782186e-06, "loss": 0.1637, "step": 5618 }, { "epoch": 1.82, "learning_rate": 1.766654868400315e-06, "loss": 0.1538, "step": 5619 }, { "epoch": 1.82, "learning_rate": 1.7658187801767568e-06, "loss": 0.1734, "step": 5620 }, { "epoch": 1.82, "learning_rate": 1.7649827818098727e-06, "loss": 0.1629, "step": 5621 }, { "epoch": 1.82, "learning_rate": 1.7641468734019795e-06, "loss": 0.1651, "step": 5622 }, { "epoch": 1.82, "learning_rate": 1.7633110550553867e-06, "loss": 0.1744, "step": 5623 }, { "epoch": 1.82, "learning_rate": 1.7624753268723882e-06, "loss": 0.1618, "step": 5624 }, { "epoch": 1.82, "learning_rate": 1.7616396889552706e-06, "loss": 0.1615, "step": 5625 }, { "epoch": 1.82, "learning_rate": 1.7608041414063065e-06, "loss": 0.1624, "step": 5626 }, { "epoch": 1.82, "learning_rate": 1.7599686843277596e-06, "loss": 0.1845, "step": 5627 }, { "epoch": 1.82, "learning_rate": 1.7591333178218823e-06, "loss": 0.16, "step": 5628 }, { "epoch": 1.82, "learning_rate": 1.7582980419909135e-06, "loss": 0.1648, "step": 5629 }, { "epoch": 1.82, "learning_rate": 1.7574628569370855e-06, "loss": 0.1722, "step": 5630 }, { "epoch": 1.82, "learning_rate": 1.756627762762614e-06, "loss": 0.1613, "step": 5631 }, { "epoch": 1.83, "learning_rate": 1.7557927595697094e-06, "loss": 0.1798, "step": 5632 }, { "epoch": 1.83, "learning_rate": 1.7549578474605661e-06, "loss": 0.1598, "step": 5633 }, { "epoch": 1.83, "learning_rate": 1.754123026537369e-06, "loss": 0.1605, "step": 5634 }, { "epoch": 1.83, "learning_rate": 1.7532882969022941e-06, "loss": 0.1492, "step": 5635 }, { "epoch": 1.83, "learning_rate": 1.752453658657502e-06, "loss": 0.1642, "step": 5636 }, { "epoch": 1.83, "learning_rate": 1.7516191119051456e-06, "loss": 0.1545, "step": 5637 }, { "epoch": 1.83, "learning_rate": 1.7507846567473643e-06, "loss": 0.1669, "step": 5638 }, { "epoch": 1.83, "learning_rate": 1.749950293286289e-06, "loss": 0.1515, "step": 5639 }, { "epoch": 1.83, "learning_rate": 1.7491160216240368e-06, "loss": 0.1703, "step": 5640 }, { "epoch": 1.83, "learning_rate": 1.7482818418627134e-06, "loss": 0.18, "step": 5641 }, { "epoch": 1.83, "learning_rate": 1.7474477541044165e-06, "loss": 0.1712, "step": 5642 }, { "epoch": 1.83, "learning_rate": 1.746613758451228e-06, "loss": 0.1586, "step": 5643 }, { "epoch": 1.83, "learning_rate": 1.7457798550052232e-06, "loss": 0.1533, "step": 5644 }, { "epoch": 1.83, "learning_rate": 1.744946043868463e-06, "loss": 0.1612, "step": 5645 }, { "epoch": 1.83, "learning_rate": 1.7441123251429968e-06, "loss": 0.1618, "step": 5646 }, { "epoch": 1.83, "learning_rate": 1.7432786989308648e-06, "loss": 0.1643, "step": 5647 }, { "epoch": 1.83, "learning_rate": 1.7424451653340934e-06, "loss": 0.1653, "step": 5648 }, { "epoch": 1.83, "learning_rate": 1.7416117244547014e-06, "loss": 0.1659, "step": 5649 }, { "epoch": 1.83, "learning_rate": 1.7407783763946911e-06, "loss": 0.1506, "step": 5650 }, { "epoch": 1.83, "learning_rate": 1.7399451212560593e-06, "loss": 0.1616, "step": 5651 }, { "epoch": 1.83, "learning_rate": 1.7391119591407863e-06, "loss": 0.1561, "step": 5652 }, { "epoch": 1.83, "learning_rate": 1.7382788901508426e-06, "loss": 0.1633, "step": 5653 }, { "epoch": 1.83, "learning_rate": 1.7374459143881899e-06, "loss": 0.1547, "step": 5654 }, { "epoch": 1.83, "learning_rate": 1.7366130319547747e-06, "loss": 0.1409, "step": 5655 }, { "epoch": 1.83, "learning_rate": 1.735780242952534e-06, "loss": 0.1501, "step": 5656 }, { "epoch": 1.83, "learning_rate": 1.7349475474833938e-06, "loss": 0.1616, "step": 5657 }, { "epoch": 1.83, "learning_rate": 1.7341149456492672e-06, "loss": 0.1651, "step": 5658 }, { "epoch": 1.83, "learning_rate": 1.7332824375520574e-06, "loss": 0.1489, "step": 5659 }, { "epoch": 1.83, "learning_rate": 1.7324500232936536e-06, "loss": 0.1612, "step": 5660 }, { "epoch": 1.83, "learning_rate": 1.731617702975938e-06, "loss": 0.1563, "step": 5661 }, { "epoch": 1.83, "learning_rate": 1.7307854767007756e-06, "loss": 0.1649, "step": 5662 }, { "epoch": 1.84, "learning_rate": 1.7299533445700253e-06, "loss": 0.1612, "step": 5663 }, { "epoch": 1.84, "learning_rate": 1.7291213066855312e-06, "loss": 0.1655, "step": 5664 }, { "epoch": 1.84, "learning_rate": 1.7282893631491253e-06, "loss": 0.1475, "step": 5665 }, { "epoch": 1.84, "learning_rate": 1.7274575140626318e-06, "loss": 0.1667, "step": 5666 }, { "epoch": 1.84, "learning_rate": 1.7266257595278591e-06, "loss": 0.1531, "step": 5667 }, { "epoch": 1.84, "learning_rate": 1.725794099646607e-06, "loss": 0.1627, "step": 5668 }, { "epoch": 1.84, "learning_rate": 1.7249625345206623e-06, "loss": 0.1569, "step": 5669 }, { "epoch": 1.84, "learning_rate": 1.7241310642517998e-06, "loss": 0.149, "step": 5670 }, { "epoch": 1.84, "learning_rate": 1.7232996889417846e-06, "loss": 0.142, "step": 5671 }, { "epoch": 1.84, "learning_rate": 1.7224684086923677e-06, "loss": 0.1611, "step": 5672 }, { "epoch": 1.84, "learning_rate": 1.7216372236052914e-06, "loss": 0.1572, "step": 5673 }, { "epoch": 1.84, "learning_rate": 1.7208061337822828e-06, "loss": 0.1615, "step": 5674 }, { "epoch": 1.84, "learning_rate": 1.7199751393250614e-06, "loss": 0.1536, "step": 5675 }, { "epoch": 1.84, "learning_rate": 1.7191442403353314e-06, "loss": 0.1573, "step": 5676 }, { "epoch": 1.84, "learning_rate": 1.7183134369147866e-06, "loss": 0.181, "step": 5677 }, { "epoch": 1.84, "learning_rate": 1.71748272916511e-06, "loss": 0.1761, "step": 5678 }, { "epoch": 1.84, "learning_rate": 1.716652117187972e-06, "loss": 0.1399, "step": 5679 }, { "epoch": 1.84, "learning_rate": 1.7158216010850318e-06, "loss": 0.1553, "step": 5680 }, { "epoch": 1.84, "learning_rate": 1.7149911809579361e-06, "loss": 0.1612, "step": 5681 }, { "epoch": 1.84, "learning_rate": 1.7141608569083195e-06, "loss": 0.1567, "step": 5682 }, { "epoch": 1.84, "learning_rate": 1.7133306290378077e-06, "loss": 0.1681, "step": 5683 }, { "epoch": 1.84, "learning_rate": 1.7125004974480102e-06, "loss": 0.175, "step": 5684 }, { "epoch": 1.84, "learning_rate": 1.7116704622405295e-06, "loss": 0.1603, "step": 5685 }, { "epoch": 1.84, "learning_rate": 1.7108405235169511e-06, "loss": 0.1542, "step": 5686 }, { "epoch": 1.84, "learning_rate": 1.7100106813788544e-06, "loss": 0.1502, "step": 5687 }, { "epoch": 1.84, "learning_rate": 1.7091809359278025e-06, "loss": 0.1575, "step": 5688 }, { "epoch": 1.84, "learning_rate": 1.7083512872653477e-06, "loss": 0.1488, "step": 5689 }, { "epoch": 1.84, "learning_rate": 1.7075217354930324e-06, "loss": 0.1606, "step": 5690 }, { "epoch": 1.84, "learning_rate": 1.7066922807123834e-06, "loss": 0.1431, "step": 5691 }, { "epoch": 1.84, "learning_rate": 1.7058629230249207e-06, "loss": 0.1555, "step": 5692 }, { "epoch": 1.84, "learning_rate": 1.7050336625321484e-06, "loss": 0.1708, "step": 5693 }, { "epoch": 1.85, "learning_rate": 1.704204499335559e-06, "loss": 0.1682, "step": 5694 }, { "epoch": 1.85, "learning_rate": 1.7033754335366356e-06, "loss": 0.1603, "step": 5695 }, { "epoch": 1.85, "learning_rate": 1.7025464652368464e-06, "loss": 0.1649, "step": 5696 }, { "epoch": 1.85, "learning_rate": 1.701717594537651e-06, "loss": 0.17, "step": 5697 }, { "epoch": 1.85, "learning_rate": 1.7008888215404933e-06, "loss": 0.1714, "step": 5698 }, { "epoch": 1.85, "learning_rate": 1.7000601463468088e-06, "loss": 0.1557, "step": 5699 }, { "epoch": 1.85, "learning_rate": 1.6992315690580178e-06, "loss": 0.167, "step": 5700 }, { "epoch": 1.85, "learning_rate": 1.6984030897755304e-06, "loss": 0.1516, "step": 5701 }, { "epoch": 1.85, "learning_rate": 1.6975747086007454e-06, "loss": 0.1663, "step": 5702 }, { "epoch": 1.85, "learning_rate": 1.6967464256350468e-06, "loss": 0.1622, "step": 5703 }, { "epoch": 1.85, "learning_rate": 1.6959182409798111e-06, "loss": 0.1751, "step": 5704 }, { "epoch": 1.85, "learning_rate": 1.695090154736398e-06, "loss": 0.1616, "step": 5705 }, { "epoch": 1.85, "learning_rate": 1.6942621670061574e-06, "loss": 0.1497, "step": 5706 }, { "epoch": 1.85, "learning_rate": 1.693434277890428e-06, "loss": 0.1593, "step": 5707 }, { "epoch": 1.85, "learning_rate": 1.692606487490534e-06, "loss": 0.1587, "step": 5708 }, { "epoch": 1.85, "learning_rate": 1.6917787959077907e-06, "loss": 0.1688, "step": 5709 }, { "epoch": 1.85, "learning_rate": 1.6909512032434984e-06, "loss": 0.1694, "step": 5710 }, { "epoch": 1.85, "learning_rate": 1.6901237095989464e-06, "loss": 0.1574, "step": 5711 }, { "epoch": 1.85, "learning_rate": 1.6892963150754128e-06, "loss": 0.1524, "step": 5712 }, { "epoch": 1.85, "learning_rate": 1.6884690197741608e-06, "loss": 0.1799, "step": 5713 }, { "epoch": 1.85, "learning_rate": 1.6876418237964453e-06, "loss": 0.1574, "step": 5714 }, { "epoch": 1.85, "learning_rate": 1.6868147272435057e-06, "loss": 0.1574, "step": 5715 }, { "epoch": 1.85, "learning_rate": 1.6859877302165723e-06, "loss": 0.1604, "step": 5716 }, { "epoch": 1.85, "learning_rate": 1.6851608328168589e-06, "loss": 0.1669, "step": 5717 }, { "epoch": 1.85, "learning_rate": 1.6843340351455728e-06, "loss": 0.1729, "step": 5718 }, { "epoch": 1.85, "learning_rate": 1.6835073373039045e-06, "loss": 0.1609, "step": 5719 }, { "epoch": 1.85, "learning_rate": 1.6826807393930334e-06, "loss": 0.1658, "step": 5720 }, { "epoch": 1.85, "learning_rate": 1.6818542415141273e-06, "loss": 0.1558, "step": 5721 }, { "epoch": 1.85, "learning_rate": 1.6810278437683419e-06, "loss": 0.1506, "step": 5722 }, { "epoch": 1.85, "learning_rate": 1.6802015462568205e-06, "loss": 0.1676, "step": 5723 }, { "epoch": 1.85, "learning_rate": 1.6793753490806939e-06, "loss": 0.1594, "step": 5724 }, { "epoch": 1.86, "learning_rate": 1.678549252341079e-06, "loss": 0.1532, "step": 5725 }, { "epoch": 1.86, "learning_rate": 1.6777232561390844e-06, "loss": 0.1634, "step": 5726 }, { "epoch": 1.86, "learning_rate": 1.6768973605758021e-06, "loss": 0.1683, "step": 5727 }, { "epoch": 1.86, "learning_rate": 1.6760715657523158e-06, "loss": 0.1523, "step": 5728 }, { "epoch": 1.86, "learning_rate": 1.6752458717696928e-06, "loss": 0.151, "step": 5729 }, { "epoch": 1.86, "learning_rate": 1.674420278728991e-06, "loss": 0.1451, "step": 5730 }, { "epoch": 1.86, "learning_rate": 1.6735947867312553e-06, "loss": 0.1658, "step": 5731 }, { "epoch": 1.86, "learning_rate": 1.6727693958775172e-06, "loss": 0.1715, "step": 5732 }, { "epoch": 1.86, "learning_rate": 1.671944106268797e-06, "loss": 0.1463, "step": 5733 }, { "epoch": 1.86, "learning_rate": 1.671118918006101e-06, "loss": 0.1566, "step": 5734 }, { "epoch": 1.86, "learning_rate": 1.6702938311904262e-06, "loss": 0.1692, "step": 5735 }, { "epoch": 1.86, "learning_rate": 1.6694688459227545e-06, "loss": 0.1707, "step": 5736 }, { "epoch": 1.86, "learning_rate": 1.6686439623040548e-06, "loss": 0.1689, "step": 5737 }, { "epoch": 1.86, "learning_rate": 1.6678191804352873e-06, "loss": 0.1634, "step": 5738 }, { "epoch": 1.86, "learning_rate": 1.6669945004173944e-06, "loss": 0.1753, "step": 5739 }, { "epoch": 1.86, "learning_rate": 1.6661699223513118e-06, "loss": 0.1635, "step": 5740 }, { "epoch": 1.86, "learning_rate": 1.6653454463379582e-06, "loss": 0.1437, "step": 5741 }, { "epoch": 1.86, "learning_rate": 1.6645210724782423e-06, "loss": 0.1615, "step": 5742 }, { "epoch": 1.86, "learning_rate": 1.6636968008730586e-06, "loss": 0.1594, "step": 5743 }, { "epoch": 1.86, "learning_rate": 1.6628726316232902e-06, "loss": 0.1541, "step": 5744 }, { "epoch": 1.86, "learning_rate": 1.6620485648298084e-06, "loss": 0.1564, "step": 5745 }, { "epoch": 1.86, "learning_rate": 1.6612246005934694e-06, "loss": 0.181, "step": 5746 }, { "epoch": 1.86, "learning_rate": 1.66040073901512e-06, "loss": 0.1641, "step": 5747 }, { "epoch": 1.86, "learning_rate": 1.6595769801955925e-06, "loss": 0.1657, "step": 5748 }, { "epoch": 1.86, "learning_rate": 1.6587533242357053e-06, "loss": 0.1627, "step": 5749 }, { "epoch": 1.86, "learning_rate": 1.6579297712362686e-06, "loss": 0.1503, "step": 5750 }, { "epoch": 1.86, "learning_rate": 1.6571063212980753e-06, "loss": 0.1549, "step": 5751 }, { "epoch": 1.86, "learning_rate": 1.6562829745219089e-06, "loss": 0.1368, "step": 5752 }, { "epoch": 1.86, "learning_rate": 1.6554597310085383e-06, "loss": 0.1527, "step": 5753 }, { "epoch": 1.86, "learning_rate": 1.6546365908587213e-06, "loss": 0.1503, "step": 5754 }, { "epoch": 1.86, "learning_rate": 1.653813554173202e-06, "loss": 0.1554, "step": 5755 }, { "epoch": 1.87, "learning_rate": 1.6529906210527107e-06, "loss": 0.149, "step": 5756 }, { "epoch": 1.87, "learning_rate": 1.6521677915979688e-06, "loss": 0.1613, "step": 5757 }, { "epoch": 1.87, "learning_rate": 1.6513450659096804e-06, "loss": 0.1677, "step": 5758 }, { "epoch": 1.87, "learning_rate": 1.6505224440885414e-06, "loss": 0.157, "step": 5759 }, { "epoch": 1.87, "learning_rate": 1.649699926235232e-06, "loss": 0.1566, "step": 5760 }, { "epoch": 1.87, "learning_rate": 1.6488775124504188e-06, "loss": 0.158, "step": 5761 }, { "epoch": 1.87, "learning_rate": 1.6480552028347597e-06, "loss": 0.1537, "step": 5762 }, { "epoch": 1.87, "learning_rate": 1.6472329974888956e-06, "loss": 0.1614, "step": 5763 }, { "epoch": 1.87, "learning_rate": 1.6464108965134578e-06, "loss": 0.1806, "step": 5764 }, { "epoch": 1.87, "learning_rate": 1.645588900009062e-06, "loss": 0.1642, "step": 5765 }, { "epoch": 1.87, "learning_rate": 1.6447670080763146e-06, "loss": 0.1572, "step": 5766 }, { "epoch": 1.87, "learning_rate": 1.6439452208158058e-06, "loss": 0.153, "step": 5767 }, { "epoch": 1.87, "learning_rate": 1.6431235383281135e-06, "loss": 0.1597, "step": 5768 }, { "epoch": 1.87, "learning_rate": 1.6423019607138064e-06, "loss": 0.1525, "step": 5769 }, { "epoch": 1.87, "learning_rate": 1.641480488073435e-06, "loss": 0.1519, "step": 5770 }, { "epoch": 1.87, "learning_rate": 1.6406591205075417e-06, "loss": 0.1541, "step": 5771 }, { "epoch": 1.87, "learning_rate": 1.639837858116653e-06, "loss": 0.1597, "step": 5772 }, { "epoch": 1.87, "learning_rate": 1.6390167010012824e-06, "loss": 0.1564, "step": 5773 }, { "epoch": 1.87, "learning_rate": 1.638195649261934e-06, "loss": 0.1505, "step": 5774 }, { "epoch": 1.87, "learning_rate": 1.6373747029990943e-06, "loss": 0.1704, "step": 5775 }, { "epoch": 1.87, "learning_rate": 1.6365538623132405e-06, "loss": 0.1686, "step": 5776 }, { "epoch": 1.87, "learning_rate": 1.6357331273048343e-06, "loss": 0.1427, "step": 5777 }, { "epoch": 1.87, "learning_rate": 1.6349124980743278e-06, "loss": 0.1598, "step": 5778 }, { "epoch": 1.87, "learning_rate": 1.6340919747221568e-06, "loss": 0.1529, "step": 5779 }, { "epoch": 1.87, "learning_rate": 1.633271557348744e-06, "loss": 0.1563, "step": 5780 }, { "epoch": 1.87, "learning_rate": 1.6324512460545034e-06, "loss": 0.15, "step": 5781 }, { "epoch": 1.87, "learning_rate": 1.6316310409398306e-06, "loss": 0.1725, "step": 5782 }, { "epoch": 1.87, "learning_rate": 1.6308109421051132e-06, "loss": 0.1498, "step": 5783 }, { "epoch": 1.87, "learning_rate": 1.6299909496507214e-06, "loss": 0.1687, "step": 5784 }, { "epoch": 1.87, "learning_rate": 1.6291710636770152e-06, "loss": 0.1594, "step": 5785 }, { "epoch": 1.87, "learning_rate": 1.628351284284341e-06, "loss": 0.1561, "step": 5786 }, { "epoch": 1.88, "learning_rate": 1.6275316115730302e-06, "loss": 0.1661, "step": 5787 }, { "epoch": 1.88, "learning_rate": 1.626712045643405e-06, "loss": 0.1703, "step": 5788 }, { "epoch": 1.88, "learning_rate": 1.6258925865957703e-06, "loss": 0.1737, "step": 5789 }, { "epoch": 1.88, "learning_rate": 1.625073234530422e-06, "loss": 0.1747, "step": 5790 }, { "epoch": 1.88, "learning_rate": 1.62425398954764e-06, "loss": 0.1533, "step": 5791 }, { "epoch": 1.88, "learning_rate": 1.6234348517476905e-06, "loss": 0.149, "step": 5792 }, { "epoch": 1.88, "learning_rate": 1.6226158212308307e-06, "loss": 0.1731, "step": 5793 }, { "epoch": 1.88, "learning_rate": 1.6217968980972998e-06, "loss": 0.1579, "step": 5794 }, { "epoch": 1.88, "learning_rate": 1.620978082447327e-06, "loss": 0.1593, "step": 5795 }, { "epoch": 1.88, "learning_rate": 1.6201593743811275e-06, "loss": 0.1524, "step": 5796 }, { "epoch": 1.88, "learning_rate": 1.6193407739989037e-06, "loss": 0.1588, "step": 5797 }, { "epoch": 1.88, "learning_rate": 1.6185222814008434e-06, "loss": 0.15, "step": 5798 }, { "epoch": 1.88, "learning_rate": 1.6177038966871213e-06, "loss": 0.1605, "step": 5799 }, { "epoch": 1.88, "learning_rate": 1.6168856199579025e-06, "loss": 0.1513, "step": 5800 }, { "epoch": 1.88, "learning_rate": 1.6160674513133332e-06, "loss": 0.1542, "step": 5801 }, { "epoch": 1.88, "learning_rate": 1.615249390853552e-06, "loss": 0.1588, "step": 5802 }, { "epoch": 1.88, "learning_rate": 1.61443143867868e-06, "loss": 0.1623, "step": 5803 }, { "epoch": 1.88, "learning_rate": 1.613613594888826e-06, "loss": 0.1615, "step": 5804 }, { "epoch": 1.88, "learning_rate": 1.612795859584088e-06, "loss": 0.1793, "step": 5805 }, { "epoch": 1.88, "learning_rate": 1.611978232864548e-06, "loss": 0.1508, "step": 5806 }, { "epoch": 1.88, "learning_rate": 1.6111607148302758e-06, "loss": 0.1651, "step": 5807 }, { "epoch": 1.88, "learning_rate": 1.6103433055813265e-06, "loss": 0.1645, "step": 5808 }, { "epoch": 1.88, "learning_rate": 1.6095260052177446e-06, "loss": 0.159, "step": 5809 }, { "epoch": 1.88, "learning_rate": 1.6087088138395598e-06, "loss": 0.1704, "step": 5810 }, { "epoch": 1.88, "learning_rate": 1.6078917315467867e-06, "loss": 0.1888, "step": 5811 }, { "epoch": 1.88, "learning_rate": 1.6070747584394303e-06, "loss": 0.1436, "step": 5812 }, { "epoch": 1.88, "learning_rate": 1.6062578946174785e-06, "loss": 0.1511, "step": 5813 }, { "epoch": 1.88, "learning_rate": 1.605441140180909e-06, "loss": 0.1764, "step": 5814 }, { "epoch": 1.88, "learning_rate": 1.6046244952296839e-06, "loss": 0.1534, "step": 5815 }, { "epoch": 1.88, "learning_rate": 1.6038079598637523e-06, "loss": 0.1518, "step": 5816 }, { "epoch": 1.88, "learning_rate": 1.6029915341830503e-06, "loss": 0.1599, "step": 5817 }, { "epoch": 1.89, "learning_rate": 1.6021752182875012e-06, "loss": 0.1505, "step": 5818 }, { "epoch": 1.89, "learning_rate": 1.6013590122770143e-06, "loss": 0.149, "step": 5819 }, { "epoch": 1.89, "learning_rate": 1.6005429162514834e-06, "loss": 0.1634, "step": 5820 }, { "epoch": 1.89, "learning_rate": 1.5997269303107937e-06, "loss": 0.1627, "step": 5821 }, { "epoch": 1.89, "learning_rate": 1.598911054554812e-06, "loss": 0.1626, "step": 5822 }, { "epoch": 1.89, "learning_rate": 1.5980952890833929e-06, "loss": 0.1636, "step": 5823 }, { "epoch": 1.89, "learning_rate": 1.5972796339963806e-06, "loss": 0.1529, "step": 5824 }, { "epoch": 1.89, "learning_rate": 1.5964640893936015e-06, "loss": 0.1498, "step": 5825 }, { "epoch": 1.89, "learning_rate": 1.595648655374871e-06, "loss": 0.1548, "step": 5826 }, { "epoch": 1.89, "learning_rate": 1.5948333320399905e-06, "loss": 0.1595, "step": 5827 }, { "epoch": 1.89, "learning_rate": 1.5940181194887472e-06, "loss": 0.1504, "step": 5828 }, { "epoch": 1.89, "learning_rate": 1.5932030178209163e-06, "loss": 0.1689, "step": 5829 }, { "epoch": 1.89, "learning_rate": 1.592388027136256e-06, "loss": 0.1522, "step": 5830 }, { "epoch": 1.89, "learning_rate": 1.591573147534516e-06, "loss": 0.1453, "step": 5831 }, { "epoch": 1.89, "learning_rate": 1.5907583791154275e-06, "loss": 0.175, "step": 5832 }, { "epoch": 1.89, "learning_rate": 1.5899437219787124e-06, "loss": 0.1684, "step": 5833 }, { "epoch": 1.89, "learning_rate": 1.5891291762240757e-06, "loss": 0.18, "step": 5834 }, { "epoch": 1.89, "learning_rate": 1.5883147419512086e-06, "loss": 0.1569, "step": 5835 }, { "epoch": 1.89, "learning_rate": 1.5875004192597926e-06, "loss": 0.158, "step": 5836 }, { "epoch": 1.89, "learning_rate": 1.5866862082494907e-06, "loss": 0.153, "step": 5837 }, { "epoch": 1.89, "learning_rate": 1.5858721090199564e-06, "loss": 0.1628, "step": 5838 }, { "epoch": 1.89, "learning_rate": 1.5850581216708254e-06, "loss": 0.1475, "step": 5839 }, { "epoch": 1.89, "learning_rate": 1.5842442463017235e-06, "loss": 0.1517, "step": 5840 }, { "epoch": 1.89, "learning_rate": 1.583430483012261e-06, "loss": 0.1469, "step": 5841 }, { "epoch": 1.89, "learning_rate": 1.5826168319020332e-06, "loss": 0.1415, "step": 5842 }, { "epoch": 1.89, "learning_rate": 1.5818032930706254e-06, "loss": 0.164, "step": 5843 }, { "epoch": 1.89, "learning_rate": 1.5809898666176044e-06, "loss": 0.1507, "step": 5844 }, { "epoch": 1.89, "learning_rate": 1.5801765526425283e-06, "loss": 0.1419, "step": 5845 }, { "epoch": 1.89, "learning_rate": 1.5793633512449374e-06, "loss": 0.1669, "step": 5846 }, { "epoch": 1.89, "learning_rate": 1.578550262524359e-06, "loss": 0.1703, "step": 5847 }, { "epoch": 1.9, "learning_rate": 1.5777372865803091e-06, "loss": 0.1564, "step": 5848 }, { "epoch": 1.9, "learning_rate": 1.5769244235122867e-06, "loss": 0.1529, "step": 5849 }, { "epoch": 1.9, "learning_rate": 1.576111673419779e-06, "loss": 0.1836, "step": 5850 }, { "epoch": 1.9, "learning_rate": 1.5752990364022588e-06, "loss": 0.144, "step": 5851 }, { "epoch": 1.9, "learning_rate": 1.5744865125591837e-06, "loss": 0.1546, "step": 5852 }, { "epoch": 1.9, "learning_rate": 1.573674101990001e-06, "loss": 0.161, "step": 5853 }, { "epoch": 1.9, "learning_rate": 1.5728618047941393e-06, "loss": 0.1459, "step": 5854 }, { "epoch": 1.9, "learning_rate": 1.5720496210710185e-06, "loss": 0.1423, "step": 5855 }, { "epoch": 1.9, "learning_rate": 1.5712375509200397e-06, "loss": 0.1565, "step": 5856 }, { "epoch": 1.9, "learning_rate": 1.5704255944405947e-06, "loss": 0.1606, "step": 5857 }, { "epoch": 1.9, "learning_rate": 1.5696137517320582e-06, "loss": 0.1481, "step": 5858 }, { "epoch": 1.9, "learning_rate": 1.5688020228937905e-06, "loss": 0.1603, "step": 5859 }, { "epoch": 1.9, "learning_rate": 1.5679904080251414e-06, "loss": 0.1647, "step": 5860 }, { "epoch": 1.9, "learning_rate": 1.567178907225443e-06, "loss": 0.1584, "step": 5861 }, { "epoch": 1.9, "learning_rate": 1.5663675205940164e-06, "loss": 0.1639, "step": 5862 }, { "epoch": 1.9, "learning_rate": 1.5655562482301664e-06, "loss": 0.1722, "step": 5863 }, { "epoch": 1.9, "learning_rate": 1.5647450902331866e-06, "loss": 0.1581, "step": 5864 }, { "epoch": 1.9, "learning_rate": 1.5639340467023534e-06, "loss": 0.1636, "step": 5865 }, { "epoch": 1.9, "learning_rate": 1.5631231177369305e-06, "loss": 0.1542, "step": 5866 }, { "epoch": 1.9, "learning_rate": 1.562312303436169e-06, "loss": 0.1531, "step": 5867 }, { "epoch": 1.9, "learning_rate": 1.5615016038993036e-06, "loss": 0.1624, "step": 5868 }, { "epoch": 1.9, "learning_rate": 1.5606910192255565e-06, "loss": 0.161, "step": 5869 }, { "epoch": 1.9, "learning_rate": 1.5598805495141362e-06, "loss": 0.1774, "step": 5870 }, { "epoch": 1.9, "learning_rate": 1.5590701948642348e-06, "loss": 0.1598, "step": 5871 }, { "epoch": 1.9, "learning_rate": 1.5582599553750332e-06, "loss": 0.1579, "step": 5872 }, { "epoch": 1.9, "learning_rate": 1.5574498311456953e-06, "loss": 0.1558, "step": 5873 }, { "epoch": 1.9, "learning_rate": 1.5566398222753745e-06, "loss": 0.1633, "step": 5874 }, { "epoch": 1.9, "learning_rate": 1.5558299288632061e-06, "loss": 0.1555, "step": 5875 }, { "epoch": 1.9, "learning_rate": 1.555020151008315e-06, "loss": 0.1591, "step": 5876 }, { "epoch": 1.9, "learning_rate": 1.5542104888098093e-06, "loss": 0.155, "step": 5877 }, { "epoch": 1.9, "learning_rate": 1.553400942366783e-06, "loss": 0.1671, "step": 5878 }, { "epoch": 1.91, "learning_rate": 1.5525915117783182e-06, "loss": 0.1541, "step": 5879 }, { "epoch": 1.91, "learning_rate": 1.5517821971434804e-06, "loss": 0.1596, "step": 5880 }, { "epoch": 1.91, "learning_rate": 1.5509729985613232e-06, "loss": 0.1518, "step": 5881 }, { "epoch": 1.91, "learning_rate": 1.5501639161308829e-06, "loss": 0.1776, "step": 5882 }, { "epoch": 1.91, "learning_rate": 1.5493549499511834e-06, "loss": 0.1599, "step": 5883 }, { "epoch": 1.91, "learning_rate": 1.5485461001212365e-06, "loss": 0.156, "step": 5884 }, { "epoch": 1.91, "learning_rate": 1.5477373667400347e-06, "loss": 0.1526, "step": 5885 }, { "epoch": 1.91, "learning_rate": 1.5469287499065615e-06, "loss": 0.1777, "step": 5886 }, { "epoch": 1.91, "learning_rate": 1.5461202497197821e-06, "loss": 0.1448, "step": 5887 }, { "epoch": 1.91, "learning_rate": 1.5453118662786509e-06, "loss": 0.1493, "step": 5888 }, { "epoch": 1.91, "learning_rate": 1.544503599682105e-06, "loss": 0.1573, "step": 5889 }, { "epoch": 1.91, "learning_rate": 1.5436954500290684e-06, "loss": 0.1632, "step": 5890 }, { "epoch": 1.91, "learning_rate": 1.5428874174184509e-06, "loss": 0.1517, "step": 5891 }, { "epoch": 1.91, "learning_rate": 1.5420795019491475e-06, "loss": 0.1542, "step": 5892 }, { "epoch": 1.91, "learning_rate": 1.5412717037200406e-06, "loss": 0.1578, "step": 5893 }, { "epoch": 1.91, "learning_rate": 1.540464022829996e-06, "loss": 0.1597, "step": 5894 }, { "epoch": 1.91, "learning_rate": 1.5396564593778646e-06, "loss": 0.1639, "step": 5895 }, { "epoch": 1.91, "learning_rate": 1.538849013462487e-06, "loss": 0.1706, "step": 5896 }, { "epoch": 1.91, "learning_rate": 1.5380416851826845e-06, "loss": 0.1601, "step": 5897 }, { "epoch": 1.91, "learning_rate": 1.537234474637268e-06, "loss": 0.1545, "step": 5898 }, { "epoch": 1.91, "learning_rate": 1.5364273819250308e-06, "loss": 0.1636, "step": 5899 }, { "epoch": 1.91, "learning_rate": 1.535620407144755e-06, "loss": 0.1702, "step": 5900 }, { "epoch": 1.91, "learning_rate": 1.534813550395205e-06, "loss": 0.1623, "step": 5901 }, { "epoch": 1.91, "learning_rate": 1.5340068117751329e-06, "loss": 0.1688, "step": 5902 }, { "epoch": 1.91, "learning_rate": 1.5332001913832754e-06, "loss": 0.1409, "step": 5903 }, { "epoch": 1.91, "learning_rate": 1.5323936893183542e-06, "loss": 0.1558, "step": 5904 }, { "epoch": 1.91, "learning_rate": 1.5315873056790791e-06, "loss": 0.1574, "step": 5905 }, { "epoch": 1.91, "learning_rate": 1.5307810405641433e-06, "loss": 0.1629, "step": 5906 }, { "epoch": 1.91, "learning_rate": 1.5299748940722241e-06, "loss": 0.151, "step": 5907 }, { "epoch": 1.91, "learning_rate": 1.5291688663019885e-06, "loss": 0.1685, "step": 5908 }, { "epoch": 1.91, "learning_rate": 1.5283629573520841e-06, "loss": 0.1613, "step": 5909 }, { "epoch": 1.92, "learning_rate": 1.5275571673211487e-06, "loss": 0.1597, "step": 5910 }, { "epoch": 1.92, "learning_rate": 1.5267514963078014e-06, "loss": 0.162, "step": 5911 }, { "epoch": 1.92, "learning_rate": 1.5259459444106497e-06, "loss": 0.1547, "step": 5912 }, { "epoch": 1.92, "learning_rate": 1.5251405117282843e-06, "loss": 0.1486, "step": 5913 }, { "epoch": 1.92, "learning_rate": 1.524335198359283e-06, "loss": 0.1757, "step": 5914 }, { "epoch": 1.92, "learning_rate": 1.5235300044022088e-06, "loss": 0.1795, "step": 5915 }, { "epoch": 1.92, "learning_rate": 1.522724929955608e-06, "loss": 0.1664, "step": 5916 }, { "epoch": 1.92, "learning_rate": 1.5219199751180162e-06, "loss": 0.1497, "step": 5917 }, { "epoch": 1.92, "learning_rate": 1.5211151399879505e-06, "loss": 0.1586, "step": 5918 }, { "epoch": 1.92, "learning_rate": 1.5203104246639144e-06, "loss": 0.1561, "step": 5919 }, { "epoch": 1.92, "learning_rate": 1.5195058292443996e-06, "loss": 0.1635, "step": 5920 }, { "epoch": 1.92, "learning_rate": 1.518701353827878e-06, "loss": 0.1783, "step": 5921 }, { "epoch": 1.92, "learning_rate": 1.5178969985128122e-06, "loss": 0.1543, "step": 5922 }, { "epoch": 1.92, "learning_rate": 1.5170927633976457e-06, "loss": 0.1644, "step": 5923 }, { "epoch": 1.92, "learning_rate": 1.5162886485808102e-06, "loss": 0.1603, "step": 5924 }, { "epoch": 1.92, "learning_rate": 1.515484654160721e-06, "loss": 0.1663, "step": 5925 }, { "epoch": 1.92, "learning_rate": 1.5146807802357782e-06, "loss": 0.1493, "step": 5926 }, { "epoch": 1.92, "learning_rate": 1.5138770269043704e-06, "loss": 0.1515, "step": 5927 }, { "epoch": 1.92, "learning_rate": 1.513073394264867e-06, "loss": 0.1669, "step": 5928 }, { "epoch": 1.92, "learning_rate": 1.5122698824156271e-06, "loss": 0.1453, "step": 5929 }, { "epoch": 1.92, "learning_rate": 1.5114664914549903e-06, "loss": 0.1537, "step": 5930 }, { "epoch": 1.92, "learning_rate": 1.5106632214812865e-06, "loss": 0.157, "step": 5931 }, { "epoch": 1.92, "learning_rate": 1.5098600725928269e-06, "loss": 0.1644, "step": 5932 }, { "epoch": 1.92, "learning_rate": 1.5090570448879088e-06, "loss": 0.1645, "step": 5933 }, { "epoch": 1.92, "learning_rate": 1.5082541384648154e-06, "loss": 0.1702, "step": 5934 }, { "epoch": 1.92, "learning_rate": 1.5074513534218137e-06, "loss": 0.1469, "step": 5935 }, { "epoch": 1.92, "learning_rate": 1.5066486898571588e-06, "loss": 0.1733, "step": 5936 }, { "epoch": 1.92, "learning_rate": 1.5058461478690878e-06, "loss": 0.1542, "step": 5937 }, { "epoch": 1.92, "learning_rate": 1.5050437275558233e-06, "loss": 0.1645, "step": 5938 }, { "epoch": 1.92, "learning_rate": 1.5042414290155754e-06, "loss": 0.1486, "step": 5939 }, { "epoch": 1.92, "learning_rate": 1.5034392523465364e-06, "loss": 0.1502, "step": 5940 }, { "epoch": 1.93, "learning_rate": 1.502637197646886e-06, "loss": 0.1586, "step": 5941 }, { "epoch": 1.93, "learning_rate": 1.5018352650147872e-06, "loss": 0.1479, "step": 5942 }, { "epoch": 1.93, "learning_rate": 1.5010334545483885e-06, "loss": 0.1551, "step": 5943 }, { "epoch": 1.93, "learning_rate": 1.500231766345825e-06, "loss": 0.1696, "step": 5944 }, { "epoch": 1.93, "learning_rate": 1.4994302005052141e-06, "loss": 0.1374, "step": 5945 }, { "epoch": 1.93, "learning_rate": 1.4986287571246614e-06, "loss": 0.1661, "step": 5946 }, { "epoch": 1.93, "learning_rate": 1.4978274363022532e-06, "loss": 0.1468, "step": 5947 }, { "epoch": 1.93, "learning_rate": 1.4970262381360664e-06, "loss": 0.1601, "step": 5948 }, { "epoch": 1.93, "learning_rate": 1.4962251627241583e-06, "loss": 0.1645, "step": 5949 }, { "epoch": 1.93, "learning_rate": 1.4954242101645722e-06, "loss": 0.1607, "step": 5950 }, { "epoch": 1.93, "learning_rate": 1.4946233805553387e-06, "loss": 0.1585, "step": 5951 }, { "epoch": 1.93, "learning_rate": 1.4938226739944694e-06, "loss": 0.1724, "step": 5952 }, { "epoch": 1.93, "learning_rate": 1.4930220905799652e-06, "loss": 0.1657, "step": 5953 }, { "epoch": 1.93, "learning_rate": 1.4922216304098085e-06, "loss": 0.1504, "step": 5954 }, { "epoch": 1.93, "learning_rate": 1.4914212935819689e-06, "loss": 0.1571, "step": 5955 }, { "epoch": 1.93, "learning_rate": 1.4906210801943985e-06, "loss": 0.1408, "step": 5956 }, { "epoch": 1.93, "learning_rate": 1.4898209903450361e-06, "loss": 0.1663, "step": 5957 }, { "epoch": 1.93, "learning_rate": 1.489021024131806e-06, "loss": 0.1634, "step": 5958 }, { "epoch": 1.93, "learning_rate": 1.4882211816526144e-06, "loss": 0.1658, "step": 5959 }, { "epoch": 1.93, "learning_rate": 1.4874214630053562e-06, "loss": 0.1517, "step": 5960 }, { "epoch": 1.93, "learning_rate": 1.4866218682879088e-06, "loss": 0.1652, "step": 5961 }, { "epoch": 1.93, "learning_rate": 1.4858223975981334e-06, "loss": 0.1608, "step": 5962 }, { "epoch": 1.93, "learning_rate": 1.4850230510338792e-06, "loss": 0.1569, "step": 5963 }, { "epoch": 1.93, "learning_rate": 1.4842238286929777e-06, "loss": 0.1511, "step": 5964 }, { "epoch": 1.93, "learning_rate": 1.4834247306732457e-06, "loss": 0.1545, "step": 5965 }, { "epoch": 1.93, "learning_rate": 1.4826257570724856e-06, "loss": 0.1557, "step": 5966 }, { "epoch": 1.93, "learning_rate": 1.4818269079884845e-06, "loss": 0.1573, "step": 5967 }, { "epoch": 1.93, "learning_rate": 1.4810281835190132e-06, "loss": 0.156, "step": 5968 }, { "epoch": 1.93, "learning_rate": 1.4802295837618268e-06, "loss": 0.1541, "step": 5969 }, { "epoch": 1.93, "learning_rate": 1.479431108814668e-06, "loss": 0.183, "step": 5970 }, { "epoch": 1.93, "learning_rate": 1.4786327587752608e-06, "loss": 0.1537, "step": 5971 }, { "epoch": 1.94, "learning_rate": 1.4778345337413174e-06, "loss": 0.1568, "step": 5972 }, { "epoch": 1.94, "learning_rate": 1.4770364338105315e-06, "loss": 0.1582, "step": 5973 }, { "epoch": 1.94, "learning_rate": 1.4762384590805823e-06, "loss": 0.1655, "step": 5974 }, { "epoch": 1.94, "learning_rate": 1.475440609649136e-06, "loss": 0.1499, "step": 5975 }, { "epoch": 1.94, "learning_rate": 1.4746428856138395e-06, "loss": 0.1539, "step": 5976 }, { "epoch": 1.94, "learning_rate": 1.4738452870723286e-06, "loss": 0.1771, "step": 5977 }, { "epoch": 1.94, "learning_rate": 1.4730478141222194e-06, "loss": 0.1474, "step": 5978 }, { "epoch": 1.94, "learning_rate": 1.4722504668611172e-06, "loss": 0.1667, "step": 5979 }, { "epoch": 1.94, "learning_rate": 1.4714532453866084e-06, "loss": 0.1714, "step": 5980 }, { "epoch": 1.94, "learning_rate": 1.4706561497962644e-06, "loss": 0.1653, "step": 5981 }, { "epoch": 1.94, "learning_rate": 1.4698591801876435e-06, "loss": 0.1827, "step": 5982 }, { "epoch": 1.94, "learning_rate": 1.4690623366582856e-06, "loss": 0.1711, "step": 5983 }, { "epoch": 1.94, "learning_rate": 1.4682656193057189e-06, "loss": 0.1797, "step": 5984 }, { "epoch": 1.94, "learning_rate": 1.4674690282274517e-06, "loss": 0.1564, "step": 5985 }, { "epoch": 1.94, "learning_rate": 1.4666725635209794e-06, "loss": 0.1773, "step": 5986 }, { "epoch": 1.94, "learning_rate": 1.4658762252837821e-06, "loss": 0.1571, "step": 5987 }, { "epoch": 1.94, "learning_rate": 1.4650800136133238e-06, "loss": 0.1617, "step": 5988 }, { "epoch": 1.94, "learning_rate": 1.4642839286070537e-06, "loss": 0.1915, "step": 5989 }, { "epoch": 1.94, "learning_rate": 1.4634879703624027e-06, "loss": 0.1422, "step": 5990 }, { "epoch": 1.94, "learning_rate": 1.4626921389767915e-06, "loss": 0.1514, "step": 5991 }, { "epoch": 1.94, "learning_rate": 1.4618964345476203e-06, "loss": 0.1608, "step": 5992 }, { "epoch": 1.94, "learning_rate": 1.4611008571722748e-06, "loss": 0.175, "step": 5993 }, { "epoch": 1.94, "learning_rate": 1.4603054069481282e-06, "loss": 0.1527, "step": 5994 }, { "epoch": 1.94, "learning_rate": 1.4595100839725338e-06, "loss": 0.1632, "step": 5995 }, { "epoch": 1.94, "learning_rate": 1.4587148883428337e-06, "loss": 0.1536, "step": 5996 }, { "epoch": 1.94, "learning_rate": 1.45791982015635e-06, "loss": 0.1513, "step": 5997 }, { "epoch": 1.94, "learning_rate": 1.4571248795103921e-06, "loss": 0.1543, "step": 5998 }, { "epoch": 1.94, "learning_rate": 1.4563300665022534e-06, "loss": 0.16, "step": 5999 }, { "epoch": 1.94, "learning_rate": 1.4555353812292105e-06, "loss": 0.1715, "step": 6000 }, { "epoch": 1.94, "learning_rate": 1.4547408237885262e-06, "loss": 0.1539, "step": 6001 }, { "epoch": 1.94, "learning_rate": 1.4539463942774462e-06, "loss": 0.1725, "step": 6002 }, { "epoch": 1.95, "learning_rate": 1.4531520927932017e-06, "loss": 0.1507, "step": 6003 }, { "epoch": 1.95, "learning_rate": 1.452357919433006e-06, "loss": 0.1551, "step": 6004 }, { "epoch": 1.95, "learning_rate": 1.4515638742940585e-06, "loss": 0.1557, "step": 6005 }, { "epoch": 1.95, "learning_rate": 1.4507699574735436e-06, "loss": 0.1648, "step": 6006 }, { "epoch": 1.95, "learning_rate": 1.4499761690686287e-06, "loss": 0.1649, "step": 6007 }, { "epoch": 1.95, "learning_rate": 1.4491825091764656e-06, "loss": 0.1592, "step": 6008 }, { "epoch": 1.95, "learning_rate": 1.4483889778941904e-06, "loss": 0.162, "step": 6009 }, { "epoch": 1.95, "learning_rate": 1.447595575318924e-06, "loss": 0.1667, "step": 6010 }, { "epoch": 1.95, "learning_rate": 1.4468023015477722e-06, "loss": 0.1646, "step": 6011 }, { "epoch": 1.95, "learning_rate": 1.446009156677822e-06, "loss": 0.1498, "step": 6012 }, { "epoch": 1.95, "learning_rate": 1.4452161408061478e-06, "loss": 0.1748, "step": 6013 }, { "epoch": 1.95, "learning_rate": 1.4444232540298064e-06, "loss": 0.1479, "step": 6014 }, { "epoch": 1.95, "learning_rate": 1.44363049644584e-06, "loss": 0.1604, "step": 6015 }, { "epoch": 1.95, "learning_rate": 1.4428378681512755e-06, "loss": 0.1481, "step": 6016 }, { "epoch": 1.95, "learning_rate": 1.4420453692431197e-06, "loss": 0.1557, "step": 6017 }, { "epoch": 1.95, "learning_rate": 1.441252999818371e-06, "loss": 0.1459, "step": 6018 }, { "epoch": 1.95, "learning_rate": 1.440460759974004e-06, "loss": 0.1662, "step": 6019 }, { "epoch": 1.95, "learning_rate": 1.4396686498069844e-06, "loss": 0.1514, "step": 6020 }, { "epoch": 1.95, "learning_rate": 1.4388766694142553e-06, "loss": 0.142, "step": 6021 }, { "epoch": 1.95, "learning_rate": 1.4380848188927516e-06, "loss": 0.1656, "step": 6022 }, { "epoch": 1.95, "learning_rate": 1.4372930983393849e-06, "loss": 0.1706, "step": 6023 }, { "epoch": 1.95, "learning_rate": 1.4365015078510553e-06, "loss": 0.173, "step": 6024 }, { "epoch": 1.95, "learning_rate": 1.4357100475246463e-06, "loss": 0.1467, "step": 6025 }, { "epoch": 1.95, "learning_rate": 1.4349187174570226e-06, "loss": 0.1648, "step": 6026 }, { "epoch": 1.95, "learning_rate": 1.4341275177450389e-06, "loss": 0.1379, "step": 6027 }, { "epoch": 1.95, "learning_rate": 1.4333364484855277e-06, "loss": 0.1528, "step": 6028 }, { "epoch": 1.95, "learning_rate": 1.432545509775309e-06, "loss": 0.1738, "step": 6029 }, { "epoch": 1.95, "learning_rate": 1.4317547017111865e-06, "loss": 0.1643, "step": 6030 }, { "epoch": 1.95, "learning_rate": 1.4309640243899467e-06, "loss": 0.1612, "step": 6031 }, { "epoch": 1.95, "learning_rate": 1.4301734779083614e-06, "loss": 0.1575, "step": 6032 }, { "epoch": 1.95, "learning_rate": 1.4293830623631857e-06, "loss": 0.1404, "step": 6033 }, { "epoch": 1.96, "learning_rate": 1.4285927778511598e-06, "loss": 0.1709, "step": 6034 }, { "epoch": 1.96, "learning_rate": 1.4278026244690046e-06, "loss": 0.1824, "step": 6035 }, { "epoch": 1.96, "learning_rate": 1.427012602313429e-06, "loss": 0.1542, "step": 6036 }, { "epoch": 1.96, "learning_rate": 1.4262227114811233e-06, "loss": 0.155, "step": 6037 }, { "epoch": 1.96, "learning_rate": 1.4254329520687626e-06, "loss": 0.145, "step": 6038 }, { "epoch": 1.96, "learning_rate": 1.4246433241730062e-06, "loss": 0.1619, "step": 6039 }, { "epoch": 1.96, "learning_rate": 1.4238538278904973e-06, "loss": 0.1501, "step": 6040 }, { "epoch": 1.96, "learning_rate": 1.4230644633178603e-06, "loss": 0.1648, "step": 6041 }, { "epoch": 1.96, "learning_rate": 1.4222752305517093e-06, "loss": 0.1536, "step": 6042 }, { "epoch": 1.96, "learning_rate": 1.421486129688635e-06, "loss": 0.1724, "step": 6043 }, { "epoch": 1.96, "learning_rate": 1.4206971608252196e-06, "loss": 0.1686, "step": 6044 }, { "epoch": 1.96, "learning_rate": 1.4199083240580218e-06, "loss": 0.1477, "step": 6045 }, { "epoch": 1.96, "learning_rate": 1.41911961948359e-06, "loss": 0.1614, "step": 6046 }, { "epoch": 1.96, "learning_rate": 1.4183310471984532e-06, "loss": 0.1656, "step": 6047 }, { "epoch": 1.96, "learning_rate": 1.4175426072991234e-06, "loss": 0.1555, "step": 6048 }, { "epoch": 1.96, "learning_rate": 1.416754299882101e-06, "loss": 0.1546, "step": 6049 }, { "epoch": 1.96, "learning_rate": 1.415966125043864e-06, "loss": 0.1469, "step": 6050 }, { "epoch": 1.96, "learning_rate": 1.415178082880881e-06, "loss": 0.1509, "step": 6051 }, { "epoch": 1.96, "learning_rate": 1.4143901734895973e-06, "loss": 0.1572, "step": 6052 }, { "epoch": 1.96, "learning_rate": 1.4136023969664471e-06, "loss": 0.1595, "step": 6053 }, { "epoch": 1.96, "learning_rate": 1.4128147534078469e-06, "loss": 0.1769, "step": 6054 }, { "epoch": 1.96, "learning_rate": 1.4120272429101955e-06, "loss": 0.1626, "step": 6055 }, { "epoch": 1.96, "learning_rate": 1.4112398655698772e-06, "loss": 0.1578, "step": 6056 }, { "epoch": 1.96, "learning_rate": 1.4104526214832595e-06, "loss": 0.1607, "step": 6057 }, { "epoch": 1.96, "learning_rate": 1.4096655107466943e-06, "loss": 0.173, "step": 6058 }, { "epoch": 1.96, "learning_rate": 1.4088785334565145e-06, "loss": 0.1686, "step": 6059 }, { "epoch": 1.96, "learning_rate": 1.4080916897090391e-06, "loss": 0.1624, "step": 6060 }, { "epoch": 1.96, "learning_rate": 1.4073049796005705e-06, "loss": 0.1519, "step": 6061 }, { "epoch": 1.96, "learning_rate": 1.4065184032273942e-06, "loss": 0.1581, "step": 6062 }, { "epoch": 1.96, "learning_rate": 1.4057319606857795e-06, "loss": 0.1564, "step": 6063 }, { "epoch": 1.97, "learning_rate": 1.4049456520719805e-06, "loss": 0.1536, "step": 6064 }, { "epoch": 1.97, "learning_rate": 1.404159477482231e-06, "loss": 0.158, "step": 6065 }, { "epoch": 1.97, "learning_rate": 1.403373437012755e-06, "loss": 0.1523, "step": 6066 }, { "epoch": 1.97, "learning_rate": 1.4025875307597528e-06, "loss": 0.1644, "step": 6067 }, { "epoch": 1.97, "learning_rate": 1.4018017588194132e-06, "loss": 0.177, "step": 6068 }, { "epoch": 1.97, "learning_rate": 1.401016121287907e-06, "loss": 0.1556, "step": 6069 }, { "epoch": 1.97, "learning_rate": 1.4002306182613885e-06, "loss": 0.1552, "step": 6070 }, { "epoch": 1.97, "learning_rate": 1.3994452498359963e-06, "loss": 0.1555, "step": 6071 }, { "epoch": 1.97, "learning_rate": 1.39866001610785e-06, "loss": 0.1675, "step": 6072 }, { "epoch": 1.97, "learning_rate": 1.3978749171730577e-06, "loss": 0.1745, "step": 6073 }, { "epoch": 1.97, "learning_rate": 1.397089953127704e-06, "loss": 0.1628, "step": 6074 }, { "epoch": 1.97, "learning_rate": 1.3963051240678652e-06, "loss": 0.1657, "step": 6075 }, { "epoch": 1.97, "learning_rate": 1.3955204300895937e-06, "loss": 0.1517, "step": 6076 }, { "epoch": 1.97, "learning_rate": 1.3947358712889292e-06, "loss": 0.1509, "step": 6077 }, { "epoch": 1.97, "learning_rate": 1.3939514477618944e-06, "loss": 0.1787, "step": 6078 }, { "epoch": 1.97, "learning_rate": 1.3931671596044946e-06, "loss": 0.1651, "step": 6079 }, { "epoch": 1.97, "learning_rate": 1.392383006912721e-06, "loss": 0.1492, "step": 6080 }, { "epoch": 1.97, "learning_rate": 1.3915989897825424e-06, "loss": 0.1424, "step": 6081 }, { "epoch": 1.97, "learning_rate": 1.3908151083099195e-06, "loss": 0.1299, "step": 6082 }, { "epoch": 1.97, "learning_rate": 1.3900313625907886e-06, "loss": 0.1535, "step": 6083 }, { "epoch": 1.97, "learning_rate": 1.3892477527210734e-06, "loss": 0.1544, "step": 6084 }, { "epoch": 1.97, "learning_rate": 1.3884642787966806e-06, "loss": 0.1564, "step": 6085 }, { "epoch": 1.97, "learning_rate": 1.3876809409134994e-06, "loss": 0.1523, "step": 6086 }, { "epoch": 1.97, "learning_rate": 1.3868977391674033e-06, "loss": 0.1815, "step": 6087 }, { "epoch": 1.97, "learning_rate": 1.386114673654248e-06, "loss": 0.1533, "step": 6088 }, { "epoch": 1.97, "learning_rate": 1.3853317444698744e-06, "loss": 0.1545, "step": 6089 }, { "epoch": 1.97, "learning_rate": 1.3845489517101036e-06, "loss": 0.1581, "step": 6090 }, { "epoch": 1.97, "learning_rate": 1.3837662954707426e-06, "loss": 0.164, "step": 6091 }, { "epoch": 1.97, "learning_rate": 1.3829837758475808e-06, "loss": 0.1606, "step": 6092 }, { "epoch": 1.97, "learning_rate": 1.3822013929363914e-06, "loss": 0.1848, "step": 6093 }, { "epoch": 1.97, "learning_rate": 1.3814191468329307e-06, "loss": 0.1674, "step": 6094 }, { "epoch": 1.98, "learning_rate": 1.3806370376329388e-06, "loss": 0.1645, "step": 6095 }, { "epoch": 1.98, "learning_rate": 1.3798550654321347e-06, "loss": 0.1505, "step": 6096 }, { "epoch": 1.98, "learning_rate": 1.379073230326229e-06, "loss": 0.1499, "step": 6097 }, { "epoch": 1.98, "learning_rate": 1.3782915324109075e-06, "loss": 0.1554, "step": 6098 }, { "epoch": 1.98, "learning_rate": 1.3775099717818432e-06, "loss": 0.1516, "step": 6099 }, { "epoch": 1.98, "learning_rate": 1.376728548534692e-06, "loss": 0.1535, "step": 6100 }, { "epoch": 1.98, "learning_rate": 1.3759472627650926e-06, "loss": 0.1714, "step": 6101 }, { "epoch": 1.98, "learning_rate": 1.3751661145686673e-06, "loss": 0.1568, "step": 6102 }, { "epoch": 1.98, "learning_rate": 1.3743851040410183e-06, "loss": 0.1513, "step": 6103 }, { "epoch": 1.98, "learning_rate": 1.3736042312777381e-06, "loss": 0.1462, "step": 6104 }, { "epoch": 1.98, "learning_rate": 1.3728234963743931e-06, "loss": 0.1643, "step": 6105 }, { "epoch": 1.98, "learning_rate": 1.3720428994265427e-06, "loss": 0.174, "step": 6106 }, { "epoch": 1.98, "learning_rate": 1.3712624405297209e-06, "loss": 0.163, "step": 6107 }, { "epoch": 1.98, "learning_rate": 1.3704821197794491e-06, "loss": 0.1539, "step": 6108 }, { "epoch": 1.98, "learning_rate": 1.369701937271231e-06, "loss": 0.1577, "step": 6109 }, { "epoch": 1.98, "learning_rate": 1.3689218931005543e-06, "loss": 0.1302, "step": 6110 }, { "epoch": 1.98, "learning_rate": 1.368141987362889e-06, "loss": 0.1672, "step": 6111 }, { "epoch": 1.98, "learning_rate": 1.3673622201536852e-06, "loss": 0.1523, "step": 6112 }, { "epoch": 1.98, "learning_rate": 1.3665825915683829e-06, "loss": 0.1612, "step": 6113 }, { "epoch": 1.98, "learning_rate": 1.3658031017023977e-06, "loss": 0.1515, "step": 6114 }, { "epoch": 1.98, "learning_rate": 1.3650237506511333e-06, "loss": 0.1678, "step": 6115 }, { "epoch": 1.98, "learning_rate": 1.3642445385099746e-06, "loss": 0.1655, "step": 6116 }, { "epoch": 1.98, "learning_rate": 1.363465465374289e-06, "loss": 0.1645, "step": 6117 }, { "epoch": 1.98, "learning_rate": 1.362686531339428e-06, "loss": 0.1784, "step": 6118 }, { "epoch": 1.98, "learning_rate": 1.3619077365007266e-06, "loss": 0.1565, "step": 6119 }, { "epoch": 1.98, "learning_rate": 1.3611290809534997e-06, "loss": 0.1575, "step": 6120 }, { "epoch": 1.98, "learning_rate": 1.3603505647930481e-06, "loss": 0.167, "step": 6121 }, { "epoch": 1.98, "learning_rate": 1.3595721881146548e-06, "loss": 0.147, "step": 6122 }, { "epoch": 1.98, "learning_rate": 1.3587939510135856e-06, "loss": 0.1612, "step": 6123 }, { "epoch": 1.98, "learning_rate": 1.3580158535850884e-06, "loss": 0.1586, "step": 6124 }, { "epoch": 1.98, "learning_rate": 1.357237895924396e-06, "loss": 0.1752, "step": 6125 }, { "epoch": 1.99, "learning_rate": 1.3564600781267234e-06, "loss": 0.1577, "step": 6126 }, { "epoch": 1.99, "learning_rate": 1.3556824002872648e-06, "loss": 0.1564, "step": 6127 }, { "epoch": 1.99, "learning_rate": 1.3549048625012046e-06, "loss": 0.1576, "step": 6128 }, { "epoch": 1.99, "learning_rate": 1.354127464863703e-06, "loss": 0.1529, "step": 6129 }, { "epoch": 1.99, "learning_rate": 1.3533502074699065e-06, "loss": 0.1661, "step": 6130 }, { "epoch": 1.99, "learning_rate": 1.3525730904149443e-06, "loss": 0.1606, "step": 6131 }, { "epoch": 1.99, "learning_rate": 1.351796113793928e-06, "loss": 0.1548, "step": 6132 }, { "epoch": 1.99, "learning_rate": 1.3510192777019527e-06, "loss": 0.1546, "step": 6133 }, { "epoch": 1.99, "learning_rate": 1.3502425822340925e-06, "loss": 0.1621, "step": 6134 }, { "epoch": 1.99, "learning_rate": 1.3494660274854122e-06, "loss": 0.1522, "step": 6135 }, { "epoch": 1.99, "learning_rate": 1.3486896135509503e-06, "loss": 0.155, "step": 6136 }, { "epoch": 1.99, "learning_rate": 1.3479133405257355e-06, "loss": 0.1513, "step": 6137 }, { "epoch": 1.99, "learning_rate": 1.3471372085047743e-06, "loss": 0.156, "step": 6138 }, { "epoch": 1.99, "learning_rate": 1.3463612175830578e-06, "loss": 0.1691, "step": 6139 }, { "epoch": 1.99, "learning_rate": 1.3455853678555605e-06, "loss": 0.1584, "step": 6140 }, { "epoch": 1.99, "learning_rate": 1.3448096594172383e-06, "loss": 0.1525, "step": 6141 }, { "epoch": 1.99, "learning_rate": 1.344034092363032e-06, "loss": 0.1837, "step": 6142 }, { "epoch": 1.99, "learning_rate": 1.343258666787861e-06, "loss": 0.162, "step": 6143 }, { "epoch": 1.99, "learning_rate": 1.3424833827866312e-06, "loss": 0.1534, "step": 6144 }, { "epoch": 1.99, "learning_rate": 1.3417082404542295e-06, "loss": 0.1459, "step": 6145 }, { "epoch": 1.99, "learning_rate": 1.3409332398855263e-06, "loss": 0.159, "step": 6146 }, { "epoch": 1.99, "learning_rate": 1.3401583811753735e-06, "loss": 0.1668, "step": 6147 }, { "epoch": 1.99, "learning_rate": 1.339383664418607e-06, "loss": 0.1616, "step": 6148 }, { "epoch": 1.99, "learning_rate": 1.3386090897100442e-06, "loss": 0.1593, "step": 6149 }, { "epoch": 1.99, "learning_rate": 1.3378346571444866e-06, "loss": 0.1619, "step": 6150 }, { "epoch": 1.99, "learning_rate": 1.3370603668167156e-06, "loss": 0.1651, "step": 6151 }, { "epoch": 1.99, "learning_rate": 1.3362862188214977e-06, "loss": 0.168, "step": 6152 }, { "epoch": 1.99, "learning_rate": 1.3355122132535806e-06, "loss": 0.1504, "step": 6153 }, { "epoch": 1.99, "learning_rate": 1.3347383502076955e-06, "loss": 0.1567, "step": 6154 }, { "epoch": 1.99, "learning_rate": 1.333964629778556e-06, "loss": 0.1617, "step": 6155 }, { "epoch": 1.99, "learning_rate": 1.3331910520608576e-06, "loss": 0.1501, "step": 6156 }, { "epoch": 2.0, "learning_rate": 1.3324176171492798e-06, "loss": 0.162, "step": 6157 }, { "epoch": 2.0, "learning_rate": 1.3316443251384808e-06, "loss": 0.156, "step": 6158 }, { "epoch": 2.0, "learning_rate": 1.3308711761231074e-06, "loss": 0.1568, "step": 6159 }, { "epoch": 2.0, "learning_rate": 1.3300981701977834e-06, "loss": 0.1635, "step": 6160 }, { "epoch": 2.0, "learning_rate": 1.3293253074571178e-06, "loss": 0.148, "step": 6161 }, { "epoch": 2.0, "learning_rate": 1.3285525879957011e-06, "loss": 0.1676, "step": 6162 }, { "epoch": 2.0, "learning_rate": 1.3277800119081077e-06, "loss": 0.1594, "step": 6163 }, { "epoch": 2.0, "learning_rate": 1.3270075792888937e-06, "loss": 0.1434, "step": 6164 }, { "epoch": 2.0, "learning_rate": 1.3262352902325944e-06, "loss": 0.1364, "step": 6165 }, { "epoch": 2.0, "learning_rate": 1.325463144833735e-06, "loss": 0.1476, "step": 6166 }, { "epoch": 2.0, "learning_rate": 1.324691143186814e-06, "loss": 0.1691, "step": 6167 }, { "epoch": 2.0, "learning_rate": 1.323919285386321e-06, "loss": 0.1537, "step": 6168 }, { "epoch": 2.0, "learning_rate": 1.3231475715267217e-06, "loss": 0.1501, "step": 6169 }, { "epoch": 2.0, "learning_rate": 1.3223760017024661e-06, "loss": 0.1541, "step": 6170 }, { "epoch": 2.0, "learning_rate": 1.3216045760079882e-06, "loss": 0.157, "step": 6171 }, { "epoch": 2.0, "learning_rate": 1.3208332945377022e-06, "loss": 0.1624, "step": 6172 }, { "epoch": 2.0, "learning_rate": 1.3200621573860068e-06, "loss": 0.1562, "step": 6173 }, { "epoch": 2.0, "learning_rate": 1.3192911646472796e-06, "loss": 0.1588, "step": 6174 }, { "epoch": 2.0, "learning_rate": 1.3185203164158838e-06, "loss": 0.1513, "step": 6175 }, { "epoch": 2.0, "learning_rate": 1.3177496127861635e-06, "loss": 0.1439, "step": 6176 }, { "epoch": 2.0, "learning_rate": 1.3169790538524457e-06, "loss": 0.1568, "step": 6177 }, { "epoch": 2.0, "learning_rate": 1.316208639709039e-06, "loss": 0.157, "step": 6178 }, { "epoch": 2.0, "learning_rate": 1.3154383704502349e-06, "loss": 0.1446, "step": 6179 }, { "epoch": 2.0, "learning_rate": 1.3146682461703069e-06, "loss": 0.1452, "step": 6180 }, { "epoch": 2.0, "learning_rate": 1.3138982669635117e-06, "loss": 0.1289, "step": 6181 }, { "epoch": 2.0, "learning_rate": 1.313128432924084e-06, "loss": 0.1431, "step": 6182 }, { "epoch": 2.0, "learning_rate": 1.3123587441462487e-06, "loss": 0.1526, "step": 6183 }, { "epoch": 2.0, "learning_rate": 1.3115892007242046e-06, "loss": 0.1397, "step": 6184 }, { "epoch": 2.0, "learning_rate": 1.3108198027521374e-06, "loss": 0.1515, "step": 6185 }, { "epoch": 2.0, "learning_rate": 1.3100505503242156e-06, "loss": 0.1337, "step": 6186 }, { "epoch": 2.0, "learning_rate": 1.3092814435345845e-06, "loss": 0.1392, "step": 6187 }, { "epoch": 2.01, "learning_rate": 1.3085124824773797e-06, "loss": 0.1584, "step": 6188 }, { "epoch": 2.01, "learning_rate": 1.307743667246711e-06, "loss": 0.1529, "step": 6189 }, { "epoch": 2.01, "learning_rate": 1.306974997936677e-06, "loss": 0.1402, "step": 6190 }, { "epoch": 2.01, "learning_rate": 1.3062064746413522e-06, "loss": 0.1351, "step": 6191 }, { "epoch": 2.01, "learning_rate": 1.3054380974547998e-06, "loss": 0.1427, "step": 6192 }, { "epoch": 2.01, "learning_rate": 1.3046698664710595e-06, "loss": 0.1511, "step": 6193 }, { "epoch": 2.01, "learning_rate": 1.3039017817841553e-06, "loss": 0.1412, "step": 6194 }, { "epoch": 2.01, "learning_rate": 1.3031338434880952e-06, "loss": 0.144, "step": 6195 }, { "epoch": 2.01, "learning_rate": 1.3023660516768638e-06, "loss": 0.1402, "step": 6196 }, { "epoch": 2.01, "learning_rate": 1.301598406444436e-06, "loss": 0.1431, "step": 6197 }, { "epoch": 2.01, "learning_rate": 1.3008309078847605e-06, "loss": 0.1515, "step": 6198 }, { "epoch": 2.01, "learning_rate": 1.3000635560917735e-06, "loss": 0.1449, "step": 6199 }, { "epoch": 2.01, "learning_rate": 1.2992963511593904e-06, "loss": 0.1422, "step": 6200 }, { "epoch": 2.01, "learning_rate": 1.2985292931815105e-06, "loss": 0.1576, "step": 6201 }, { "epoch": 2.01, "learning_rate": 1.2977623822520141e-06, "loss": 0.1466, "step": 6202 }, { "epoch": 2.01, "learning_rate": 1.296995618464763e-06, "loss": 0.1524, "step": 6203 }, { "epoch": 2.01, "learning_rate": 1.2962290019136028e-06, "loss": 0.1456, "step": 6204 }, { "epoch": 2.01, "learning_rate": 1.2954625326923602e-06, "loss": 0.1401, "step": 6205 }, { "epoch": 2.01, "learning_rate": 1.294696210894842e-06, "loss": 0.1367, "step": 6206 }, { "epoch": 2.01, "learning_rate": 1.2939300366148389e-06, "loss": 0.1542, "step": 6207 }, { "epoch": 2.01, "learning_rate": 1.2931640099461237e-06, "loss": 0.1591, "step": 6208 }, { "epoch": 2.01, "learning_rate": 1.2923981309824507e-06, "loss": 0.147, "step": 6209 }, { "epoch": 2.01, "learning_rate": 1.291632399817557e-06, "loss": 0.1414, "step": 6210 }, { "epoch": 2.01, "learning_rate": 1.2908668165451577e-06, "loss": 0.1455, "step": 6211 }, { "epoch": 2.01, "learning_rate": 1.290101381258957e-06, "loss": 0.1419, "step": 6212 }, { "epoch": 2.01, "learning_rate": 1.289336094052632e-06, "loss": 0.1538, "step": 6213 }, { "epoch": 2.01, "learning_rate": 1.288570955019851e-06, "loss": 0.1537, "step": 6214 }, { "epoch": 2.01, "learning_rate": 1.2878059642542566e-06, "loss": 0.1471, "step": 6215 }, { "epoch": 2.01, "learning_rate": 1.2870411218494778e-06, "loss": 0.1499, "step": 6216 }, { "epoch": 2.01, "learning_rate": 1.2862764278991236e-06, "loss": 0.1647, "step": 6217 }, { "epoch": 2.01, "learning_rate": 1.2855118824967833e-06, "loss": 0.1375, "step": 6218 }, { "epoch": 2.02, "learning_rate": 1.2847474857360332e-06, "loss": 0.1382, "step": 6219 }, { "epoch": 2.02, "learning_rate": 1.2839832377104245e-06, "loss": 0.1301, "step": 6220 }, { "epoch": 2.02, "learning_rate": 1.2832191385134972e-06, "loss": 0.1696, "step": 6221 }, { "epoch": 2.02, "learning_rate": 1.2824551882387664e-06, "loss": 0.1496, "step": 6222 }, { "epoch": 2.02, "learning_rate": 1.2816913869797353e-06, "loss": 0.1545, "step": 6223 }, { "epoch": 2.02, "learning_rate": 1.2809277348298838e-06, "loss": 0.148, "step": 6224 }, { "epoch": 2.02, "learning_rate": 1.2801642318826759e-06, "loss": 0.1691, "step": 6225 }, { "epoch": 2.02, "learning_rate": 1.279400878231557e-06, "loss": 0.1434, "step": 6226 }, { "epoch": 2.02, "learning_rate": 1.2786376739699547e-06, "loss": 0.157, "step": 6227 }, { "epoch": 2.02, "learning_rate": 1.2778746191912778e-06, "loss": 0.1528, "step": 6228 }, { "epoch": 2.02, "learning_rate": 1.2771117139889155e-06, "loss": 0.1557, "step": 6229 }, { "epoch": 2.02, "learning_rate": 1.276348958456241e-06, "loss": 0.1334, "step": 6230 }, { "epoch": 2.02, "learning_rate": 1.2755863526866087e-06, "loss": 0.1532, "step": 6231 }, { "epoch": 2.02, "learning_rate": 1.2748238967733529e-06, "loss": 0.1351, "step": 6232 }, { "epoch": 2.02, "learning_rate": 1.2740615908097915e-06, "loss": 0.1485, "step": 6233 }, { "epoch": 2.02, "learning_rate": 1.2732994348892237e-06, "loss": 0.1424, "step": 6234 }, { "epoch": 2.02, "learning_rate": 1.2725374291049296e-06, "loss": 0.1454, "step": 6235 }, { "epoch": 2.02, "learning_rate": 1.2717755735501725e-06, "loss": 0.1377, "step": 6236 }, { "epoch": 2.02, "learning_rate": 1.2710138683181937e-06, "loss": 0.1609, "step": 6237 }, { "epoch": 2.02, "learning_rate": 1.2702523135022205e-06, "loss": 0.1599, "step": 6238 }, { "epoch": 2.02, "learning_rate": 1.2694909091954588e-06, "loss": 0.1564, "step": 6239 }, { "epoch": 2.02, "learning_rate": 1.2687296554910978e-06, "loss": 0.1506, "step": 6240 }, { "epoch": 2.02, "learning_rate": 1.2679685524823082e-06, "loss": 0.1396, "step": 6241 }, { "epoch": 2.02, "learning_rate": 1.2672076002622386e-06, "loss": 0.1425, "step": 6242 }, { "epoch": 2.02, "learning_rate": 1.2664467989240265e-06, "loss": 0.1677, "step": 6243 }, { "epoch": 2.02, "learning_rate": 1.2656861485607828e-06, "loss": 0.1359, "step": 6244 }, { "epoch": 2.02, "learning_rate": 1.264925649265607e-06, "loss": 0.1378, "step": 6245 }, { "epoch": 2.02, "learning_rate": 1.2641653011315746e-06, "loss": 0.1406, "step": 6246 }, { "epoch": 2.02, "learning_rate": 1.2634051042517453e-06, "loss": 0.1492, "step": 6247 }, { "epoch": 2.02, "learning_rate": 1.2626450587191602e-06, "loss": 0.1519, "step": 6248 }, { "epoch": 2.02, "learning_rate": 1.2618851646268416e-06, "loss": 0.156, "step": 6249 }, { "epoch": 2.03, "learning_rate": 1.2611254220677937e-06, "loss": 0.1599, "step": 6250 }, { "epoch": 2.03, "learning_rate": 1.260365831134999e-06, "loss": 0.1525, "step": 6251 }, { "epoch": 2.03, "learning_rate": 1.259606391921428e-06, "loss": 0.1632, "step": 6252 }, { "epoch": 2.03, "learning_rate": 1.2588471045200256e-06, "loss": 0.1589, "step": 6253 }, { "epoch": 2.03, "learning_rate": 1.2580879690237224e-06, "loss": 0.1602, "step": 6254 }, { "epoch": 2.03, "learning_rate": 1.257328985525429e-06, "loss": 0.1431, "step": 6255 }, { "epoch": 2.03, "learning_rate": 1.256570154118038e-06, "loss": 0.1401, "step": 6256 }, { "epoch": 2.03, "learning_rate": 1.2558114748944226e-06, "loss": 0.1467, "step": 6257 }, { "epoch": 2.03, "learning_rate": 1.2550529479474383e-06, "loss": 0.1474, "step": 6258 }, { "epoch": 2.03, "learning_rate": 1.2542945733699216e-06, "loss": 0.1417, "step": 6259 }, { "epoch": 2.03, "learning_rate": 1.2535363512546892e-06, "loss": 0.153, "step": 6260 }, { "epoch": 2.03, "learning_rate": 1.2527782816945405e-06, "loss": 0.1467, "step": 6261 }, { "epoch": 2.03, "learning_rate": 1.2520203647822563e-06, "loss": 0.1456, "step": 6262 }, { "epoch": 2.03, "learning_rate": 1.2512626006105977e-06, "loss": 0.1598, "step": 6263 }, { "epoch": 2.03, "learning_rate": 1.2505049892723083e-06, "loss": 0.1495, "step": 6264 }, { "epoch": 2.03, "learning_rate": 1.2497475308601134e-06, "loss": 0.1424, "step": 6265 }, { "epoch": 2.03, "learning_rate": 1.248990225466715e-06, "loss": 0.1473, "step": 6266 }, { "epoch": 2.03, "learning_rate": 1.2482330731848044e-06, "loss": 0.1382, "step": 6267 }, { "epoch": 2.03, "learning_rate": 1.2474760741070465e-06, "loss": 0.1446, "step": 6268 }, { "epoch": 2.03, "learning_rate": 1.246719228326092e-06, "loss": 0.1555, "step": 6269 }, { "epoch": 2.03, "learning_rate": 1.2459625359345712e-06, "loss": 0.1551, "step": 6270 }, { "epoch": 2.03, "learning_rate": 1.2452059970250957e-06, "loss": 0.1461, "step": 6271 }, { "epoch": 2.03, "learning_rate": 1.2444496116902602e-06, "loss": 0.1355, "step": 6272 }, { "epoch": 2.03, "learning_rate": 1.2436933800226352e-06, "loss": 0.1575, "step": 6273 }, { "epoch": 2.03, "learning_rate": 1.2429373021147808e-06, "loss": 0.1401, "step": 6274 }, { "epoch": 2.03, "learning_rate": 1.2421813780592294e-06, "loss": 0.1409, "step": 6275 }, { "epoch": 2.03, "learning_rate": 1.2414256079485021e-06, "loss": 0.1457, "step": 6276 }, { "epoch": 2.03, "learning_rate": 1.240669991875096e-06, "loss": 0.1458, "step": 6277 }, { "epoch": 2.03, "learning_rate": 1.2399145299314913e-06, "loss": 0.1471, "step": 6278 }, { "epoch": 2.03, "learning_rate": 1.2391592222101497e-06, "loss": 0.1363, "step": 6279 }, { "epoch": 2.03, "learning_rate": 1.2384040688035135e-06, "loss": 0.1489, "step": 6280 }, { "epoch": 2.04, "learning_rate": 1.2376490698040069e-06, "loss": 0.1427, "step": 6281 }, { "epoch": 2.04, "learning_rate": 1.236894225304032e-06, "loss": 0.1381, "step": 6282 }, { "epoch": 2.04, "learning_rate": 1.2361395353959776e-06, "loss": 0.1378, "step": 6283 }, { "epoch": 2.04, "learning_rate": 1.2353850001722084e-06, "loss": 0.1421, "step": 6284 }, { "epoch": 2.04, "learning_rate": 1.2346306197250727e-06, "loss": 0.1629, "step": 6285 }, { "epoch": 2.04, "learning_rate": 1.2338763941468993e-06, "loss": 0.1441, "step": 6286 }, { "epoch": 2.04, "learning_rate": 1.2331223235299983e-06, "loss": 0.1519, "step": 6287 }, { "epoch": 2.04, "learning_rate": 1.2323684079666604e-06, "loss": 0.1473, "step": 6288 }, { "epoch": 2.04, "learning_rate": 1.2316146475491578e-06, "loss": 0.1435, "step": 6289 }, { "epoch": 2.04, "learning_rate": 1.2308610423697446e-06, "loss": 0.1396, "step": 6290 }, { "epoch": 2.04, "learning_rate": 1.2301075925206524e-06, "loss": 0.1685, "step": 6291 }, { "epoch": 2.04, "learning_rate": 1.2293542980940974e-06, "loss": 0.1394, "step": 6292 }, { "epoch": 2.04, "learning_rate": 1.2286011591822756e-06, "loss": 0.1531, "step": 6293 }, { "epoch": 2.04, "learning_rate": 1.2278481758773636e-06, "loss": 0.1556, "step": 6294 }, { "epoch": 2.04, "learning_rate": 1.2270953482715197e-06, "loss": 0.1636, "step": 6295 }, { "epoch": 2.04, "learning_rate": 1.2263426764568835e-06, "loss": 0.148, "step": 6296 }, { "epoch": 2.04, "learning_rate": 1.2255901605255715e-06, "loss": 0.1434, "step": 6297 }, { "epoch": 2.04, "learning_rate": 1.224837800569689e-06, "loss": 0.1643, "step": 6298 }, { "epoch": 2.04, "learning_rate": 1.224085596681314e-06, "loss": 0.1493, "step": 6299 }, { "epoch": 2.04, "learning_rate": 1.22333354895251e-06, "loss": 0.1493, "step": 6300 }, { "epoch": 2.04, "learning_rate": 1.2225816574753208e-06, "loss": 0.1575, "step": 6301 }, { "epoch": 2.04, "learning_rate": 1.2218299223417702e-06, "loss": 0.1607, "step": 6302 }, { "epoch": 2.04, "learning_rate": 1.2210783436438644e-06, "loss": 0.153, "step": 6303 }, { "epoch": 2.04, "learning_rate": 1.2203269214735866e-06, "loss": 0.1408, "step": 6304 }, { "epoch": 2.04, "learning_rate": 1.2195756559229072e-06, "loss": 0.1478, "step": 6305 }, { "epoch": 2.04, "learning_rate": 1.2188245470837702e-06, "loss": 0.1467, "step": 6306 }, { "epoch": 2.04, "learning_rate": 1.218073595048108e-06, "loss": 0.1523, "step": 6307 }, { "epoch": 2.04, "learning_rate": 1.2173227999078264e-06, "loss": 0.1624, "step": 6308 }, { "epoch": 2.04, "learning_rate": 1.2165721617548172e-06, "loss": 0.1502, "step": 6309 }, { "epoch": 2.04, "learning_rate": 1.2158216806809505e-06, "loss": 0.1383, "step": 6310 }, { "epoch": 2.05, "learning_rate": 1.2150713567780786e-06, "loss": 0.1479, "step": 6311 }, { "epoch": 2.05, "learning_rate": 1.2143211901380341e-06, "loss": 0.1466, "step": 6312 }, { "epoch": 2.05, "learning_rate": 1.2135711808526282e-06, "loss": 0.145, "step": 6313 }, { "epoch": 2.05, "learning_rate": 1.2128213290136578e-06, "loss": 0.1498, "step": 6314 }, { "epoch": 2.05, "learning_rate": 1.212071634712895e-06, "loss": 0.1433, "step": 6315 }, { "epoch": 2.05, "learning_rate": 1.211322098042096e-06, "loss": 0.1528, "step": 6316 }, { "epoch": 2.05, "learning_rate": 1.2105727190929967e-06, "loss": 0.1346, "step": 6317 }, { "epoch": 2.05, "learning_rate": 1.209823497957314e-06, "loss": 0.1335, "step": 6318 }, { "epoch": 2.05, "learning_rate": 1.2090744347267452e-06, "loss": 0.1436, "step": 6319 }, { "epoch": 2.05, "learning_rate": 1.2083255294929697e-06, "loss": 0.1469, "step": 6320 }, { "epoch": 2.05, "learning_rate": 1.2075767823476439e-06, "loss": 0.1502, "step": 6321 }, { "epoch": 2.05, "learning_rate": 1.2068281933824084e-06, "loss": 0.1475, "step": 6322 }, { "epoch": 2.05, "learning_rate": 1.2060797626888828e-06, "loss": 0.1477, "step": 6323 }, { "epoch": 2.05, "learning_rate": 1.2053314903586685e-06, "loss": 0.1373, "step": 6324 }, { "epoch": 2.05, "learning_rate": 1.2045833764833461e-06, "loss": 0.14, "step": 6325 }, { "epoch": 2.05, "learning_rate": 1.2038354211544781e-06, "loss": 0.1526, "step": 6326 }, { "epoch": 2.05, "learning_rate": 1.2030876244636078e-06, "loss": 0.1568, "step": 6327 }, { "epoch": 2.05, "learning_rate": 1.202339986502255e-06, "loss": 0.1407, "step": 6328 }, { "epoch": 2.05, "learning_rate": 1.2015925073619275e-06, "loss": 0.1533, "step": 6329 }, { "epoch": 2.05, "learning_rate": 1.2008451871341056e-06, "loss": 0.1487, "step": 6330 }, { "epoch": 2.05, "learning_rate": 1.200098025910258e-06, "loss": 0.138, "step": 6331 }, { "epoch": 2.05, "learning_rate": 1.1993510237818269e-06, "loss": 0.1617, "step": 6332 }, { "epoch": 2.05, "learning_rate": 1.1986041808402393e-06, "loss": 0.1462, "step": 6333 }, { "epoch": 2.05, "learning_rate": 1.1978574971769025e-06, "loss": 0.1519, "step": 6334 }, { "epoch": 2.05, "learning_rate": 1.1971109728832003e-06, "loss": 0.1536, "step": 6335 }, { "epoch": 2.05, "learning_rate": 1.196364608050504e-06, "loss": 0.1391, "step": 6336 }, { "epoch": 2.05, "learning_rate": 1.1956184027701576e-06, "loss": 0.147, "step": 6337 }, { "epoch": 2.05, "learning_rate": 1.1948723571334932e-06, "loss": 0.1587, "step": 6338 }, { "epoch": 2.05, "learning_rate": 1.1941264712318167e-06, "loss": 0.1427, "step": 6339 }, { "epoch": 2.05, "learning_rate": 1.1933807451564186e-06, "loss": 0.1337, "step": 6340 }, { "epoch": 2.05, "learning_rate": 1.192635178998568e-06, "loss": 0.1418, "step": 6341 }, { "epoch": 2.06, "learning_rate": 1.191889772849515e-06, "loss": 0.1462, "step": 6342 }, { "epoch": 2.06, "learning_rate": 1.1911445268004917e-06, "loss": 0.1608, "step": 6343 }, { "epoch": 2.06, "learning_rate": 1.1903994409427063e-06, "loss": 0.1532, "step": 6344 }, { "epoch": 2.06, "learning_rate": 1.1896545153673517e-06, "loss": 0.1648, "step": 6345 }, { "epoch": 2.06, "learning_rate": 1.1889097501655991e-06, "loss": 0.1386, "step": 6346 }, { "epoch": 2.06, "learning_rate": 1.1881651454286008e-06, "loss": 0.1532, "step": 6347 }, { "epoch": 2.06, "learning_rate": 1.1874207012474891e-06, "loss": 0.1466, "step": 6348 }, { "epoch": 2.06, "learning_rate": 1.186676417713377e-06, "loss": 0.1497, "step": 6349 }, { "epoch": 2.06, "learning_rate": 1.1859322949173572e-06, "loss": 0.1554, "step": 6350 }, { "epoch": 2.06, "learning_rate": 1.1851883329505043e-06, "loss": 0.1396, "step": 6351 }, { "epoch": 2.06, "learning_rate": 1.1844445319038694e-06, "loss": 0.1429, "step": 6352 }, { "epoch": 2.06, "learning_rate": 1.18370089186849e-06, "loss": 0.1439, "step": 6353 }, { "epoch": 2.06, "learning_rate": 1.1829574129353777e-06, "loss": 0.142, "step": 6354 }, { "epoch": 2.06, "learning_rate": 1.182214095195528e-06, "loss": 0.1417, "step": 6355 }, { "epoch": 2.06, "learning_rate": 1.181470938739917e-06, "loss": 0.1409, "step": 6356 }, { "epoch": 2.06, "learning_rate": 1.1807279436594967e-06, "loss": 0.1404, "step": 6357 }, { "epoch": 2.06, "learning_rate": 1.1799851100452067e-06, "loss": 0.1445, "step": 6358 }, { "epoch": 2.06, "learning_rate": 1.1792424379879582e-06, "loss": 0.1572, "step": 6359 }, { "epoch": 2.06, "learning_rate": 1.1784999275786515e-06, "loss": 0.1365, "step": 6360 }, { "epoch": 2.06, "learning_rate": 1.177757578908159e-06, "loss": 0.1522, "step": 6361 }, { "epoch": 2.06, "learning_rate": 1.17701539206734e-06, "loss": 0.1444, "step": 6362 }, { "epoch": 2.06, "learning_rate": 1.1762733671470285e-06, "loss": 0.1459, "step": 6363 }, { "epoch": 2.06, "learning_rate": 1.1755315042380425e-06, "loss": 0.1441, "step": 6364 }, { "epoch": 2.06, "learning_rate": 1.1747898034311782e-06, "loss": 0.1607, "step": 6365 }, { "epoch": 2.06, "learning_rate": 1.1740482648172132e-06, "loss": 0.1527, "step": 6366 }, { "epoch": 2.06, "learning_rate": 1.1733068884869053e-06, "loss": 0.1493, "step": 6367 }, { "epoch": 2.06, "learning_rate": 1.172565674530989e-06, "loss": 0.1297, "step": 6368 }, { "epoch": 2.06, "learning_rate": 1.1718246230401856e-06, "loss": 0.1437, "step": 6369 }, { "epoch": 2.06, "learning_rate": 1.1710837341051892e-06, "loss": 0.1375, "step": 6370 }, { "epoch": 2.06, "learning_rate": 1.1703430078166792e-06, "loss": 0.149, "step": 6371 }, { "epoch": 2.06, "learning_rate": 1.169602444265313e-06, "loss": 0.1464, "step": 6372 }, { "epoch": 2.07, "learning_rate": 1.168862043541728e-06, "loss": 0.1383, "step": 6373 }, { "epoch": 2.07, "learning_rate": 1.1681218057365429e-06, "loss": 0.1414, "step": 6374 }, { "epoch": 2.07, "learning_rate": 1.167381730940356e-06, "loss": 0.1464, "step": 6375 }, { "epoch": 2.07, "learning_rate": 1.1666418192437434e-06, "loss": 0.1619, "step": 6376 }, { "epoch": 2.07, "learning_rate": 1.1659020707372643e-06, "loss": 0.1565, "step": 6377 }, { "epoch": 2.07, "learning_rate": 1.1651624855114565e-06, "loss": 0.152, "step": 6378 }, { "epoch": 2.07, "learning_rate": 1.1644230636568384e-06, "loss": 0.1607, "step": 6379 }, { "epoch": 2.07, "learning_rate": 1.1636838052639081e-06, "loss": 0.15, "step": 6380 }, { "epoch": 2.07, "learning_rate": 1.1629447104231435e-06, "loss": 0.1322, "step": 6381 }, { "epoch": 2.07, "learning_rate": 1.1622057792250033e-06, "loss": 0.157, "step": 6382 }, { "epoch": 2.07, "learning_rate": 1.1614670117599231e-06, "loss": 0.1564, "step": 6383 }, { "epoch": 2.07, "learning_rate": 1.1607284081183245e-06, "loss": 0.1459, "step": 6384 }, { "epoch": 2.07, "learning_rate": 1.1599899683906026e-06, "loss": 0.1355, "step": 6385 }, { "epoch": 2.07, "learning_rate": 1.1592516926671367e-06, "loss": 0.1527, "step": 6386 }, { "epoch": 2.07, "learning_rate": 1.1585135810382836e-06, "loss": 0.1452, "step": 6387 }, { "epoch": 2.07, "learning_rate": 1.1577756335943818e-06, "loss": 0.1404, "step": 6388 }, { "epoch": 2.07, "learning_rate": 1.1570378504257499e-06, "loss": 0.1578, "step": 6389 }, { "epoch": 2.07, "learning_rate": 1.156300231622682e-06, "loss": 0.1409, "step": 6390 }, { "epoch": 2.07, "learning_rate": 1.1555627772754595e-06, "loss": 0.1535, "step": 6391 }, { "epoch": 2.07, "learning_rate": 1.1548254874743365e-06, "loss": 0.142, "step": 6392 }, { "epoch": 2.07, "learning_rate": 1.154088362309553e-06, "loss": 0.1407, "step": 6393 }, { "epoch": 2.07, "learning_rate": 1.1533514018713238e-06, "loss": 0.154, "step": 6394 }, { "epoch": 2.07, "learning_rate": 1.1526146062498464e-06, "loss": 0.1652, "step": 6395 }, { "epoch": 2.07, "learning_rate": 1.1518779755352977e-06, "loss": 0.1342, "step": 6396 }, { "epoch": 2.07, "learning_rate": 1.1511415098178336e-06, "loss": 0.1314, "step": 6397 }, { "epoch": 2.07, "learning_rate": 1.1504052091875917e-06, "loss": 0.1504, "step": 6398 }, { "epoch": 2.07, "learning_rate": 1.1496690737346864e-06, "loss": 0.1335, "step": 6399 }, { "epoch": 2.07, "learning_rate": 1.148933103549214e-06, "loss": 0.1433, "step": 6400 }, { "epoch": 2.07, "learning_rate": 1.1481972987212505e-06, "loss": 0.1524, "step": 6401 }, { "epoch": 2.07, "learning_rate": 1.1474616593408513e-06, "loss": 0.1444, "step": 6402 }, { "epoch": 2.07, "learning_rate": 1.1467261854980513e-06, "loss": 0.1377, "step": 6403 }, { "epoch": 2.08, "learning_rate": 1.1459908772828658e-06, "loss": 0.1616, "step": 6404 }, { "epoch": 2.08, "learning_rate": 1.1452557347852885e-06, "loss": 0.1341, "step": 6405 }, { "epoch": 2.08, "learning_rate": 1.1445207580952956e-06, "loss": 0.1476, "step": 6406 }, { "epoch": 2.08, "learning_rate": 1.143785947302839e-06, "loss": 0.1527, "step": 6407 }, { "epoch": 2.08, "learning_rate": 1.143051302497853e-06, "loss": 0.1573, "step": 6408 }, { "epoch": 2.08, "learning_rate": 1.1423168237702515e-06, "loss": 0.1525, "step": 6409 }, { "epoch": 2.08, "learning_rate": 1.1415825112099274e-06, "loss": 0.1523, "step": 6410 }, { "epoch": 2.08, "learning_rate": 1.1408483649067541e-06, "loss": 0.1669, "step": 6411 }, { "epoch": 2.08, "learning_rate": 1.1401143849505816e-06, "loss": 0.147, "step": 6412 }, { "epoch": 2.08, "learning_rate": 1.1393805714312456e-06, "loss": 0.1594, "step": 6413 }, { "epoch": 2.08, "learning_rate": 1.138646924438554e-06, "loss": 0.1471, "step": 6414 }, { "epoch": 2.08, "learning_rate": 1.1379134440623018e-06, "loss": 0.1611, "step": 6415 }, { "epoch": 2.08, "learning_rate": 1.137180130392257e-06, "loss": 0.1534, "step": 6416 }, { "epoch": 2.08, "learning_rate": 1.1364469835181712e-06, "loss": 0.155, "step": 6417 }, { "epoch": 2.08, "learning_rate": 1.1357140035297745e-06, "loss": 0.1437, "step": 6418 }, { "epoch": 2.08, "learning_rate": 1.1349811905167762e-06, "loss": 0.1531, "step": 6419 }, { "epoch": 2.08, "learning_rate": 1.134248544568867e-06, "loss": 0.1577, "step": 6420 }, { "epoch": 2.08, "learning_rate": 1.1335160657757121e-06, "loss": 0.1279, "step": 6421 }, { "epoch": 2.08, "learning_rate": 1.1327837542269645e-06, "loss": 0.1492, "step": 6422 }, { "epoch": 2.08, "learning_rate": 1.1320516100122487e-06, "loss": 0.1464, "step": 6423 }, { "epoch": 2.08, "learning_rate": 1.1313196332211728e-06, "loss": 0.1604, "step": 6424 }, { "epoch": 2.08, "learning_rate": 1.130587823943324e-06, "loss": 0.1541, "step": 6425 }, { "epoch": 2.08, "learning_rate": 1.1298561822682687e-06, "loss": 0.1325, "step": 6426 }, { "epoch": 2.08, "learning_rate": 1.1291247082855528e-06, "loss": 0.1569, "step": 6427 }, { "epoch": 2.08, "learning_rate": 1.1283934020847015e-06, "loss": 0.1505, "step": 6428 }, { "epoch": 2.08, "learning_rate": 1.1276622637552203e-06, "loss": 0.1627, "step": 6429 }, { "epoch": 2.08, "learning_rate": 1.126931293386592e-06, "loss": 0.1419, "step": 6430 }, { "epoch": 2.08, "learning_rate": 1.1262004910682811e-06, "loss": 0.1316, "step": 6431 }, { "epoch": 2.08, "learning_rate": 1.1254698568897308e-06, "loss": 0.1361, "step": 6432 }, { "epoch": 2.08, "learning_rate": 1.124739390940363e-06, "loss": 0.1538, "step": 6433 }, { "epoch": 2.08, "learning_rate": 1.1240090933095806e-06, "loss": 0.1498, "step": 6434 }, { "epoch": 2.09, "learning_rate": 1.1232789640867644e-06, "loss": 0.1444, "step": 6435 }, { "epoch": 2.09, "learning_rate": 1.1225490033612755e-06, "loss": 0.1523, "step": 6436 }, { "epoch": 2.09, "learning_rate": 1.1218192112224547e-06, "loss": 0.1382, "step": 6437 }, { "epoch": 2.09, "learning_rate": 1.1210895877596195e-06, "loss": 0.156, "step": 6438 }, { "epoch": 2.09, "learning_rate": 1.12036013306207e-06, "loss": 0.1624, "step": 6439 }, { "epoch": 2.09, "learning_rate": 1.1196308472190845e-06, "loss": 0.1569, "step": 6440 }, { "epoch": 2.09, "learning_rate": 1.1189017303199198e-06, "loss": 0.1563, "step": 6441 }, { "epoch": 2.09, "learning_rate": 1.1181727824538147e-06, "loss": 0.1533, "step": 6442 }, { "epoch": 2.09, "learning_rate": 1.1174440037099815e-06, "loss": 0.1388, "step": 6443 }, { "epoch": 2.09, "learning_rate": 1.1167153941776205e-06, "loss": 0.1501, "step": 6444 }, { "epoch": 2.09, "learning_rate": 1.1159869539459018e-06, "loss": 0.1552, "step": 6445 }, { "epoch": 2.09, "learning_rate": 1.1152586831039835e-06, "loss": 0.1417, "step": 6446 }, { "epoch": 2.09, "learning_rate": 1.1145305817409962e-06, "loss": 0.1519, "step": 6447 }, { "epoch": 2.09, "learning_rate": 1.1138026499460532e-06, "loss": 0.1464, "step": 6448 }, { "epoch": 2.09, "learning_rate": 1.1130748878082467e-06, "loss": 0.1411, "step": 6449 }, { "epoch": 2.09, "learning_rate": 1.1123472954166473e-06, "loss": 0.1478, "step": 6450 }, { "epoch": 2.09, "learning_rate": 1.1116198728603061e-06, "loss": 0.1405, "step": 6451 }, { "epoch": 2.09, "learning_rate": 1.1108926202282505e-06, "loss": 0.1364, "step": 6452 }, { "epoch": 2.09, "learning_rate": 1.110165537609492e-06, "loss": 0.1756, "step": 6453 }, { "epoch": 2.09, "learning_rate": 1.1094386250930164e-06, "loss": 0.1546, "step": 6454 }, { "epoch": 2.09, "learning_rate": 1.1087118827677915e-06, "loss": 0.1448, "step": 6455 }, { "epoch": 2.09, "learning_rate": 1.1079853107227634e-06, "loss": 0.1406, "step": 6456 }, { "epoch": 2.09, "learning_rate": 1.1072589090468571e-06, "loss": 0.1462, "step": 6457 }, { "epoch": 2.09, "learning_rate": 1.1065326778289782e-06, "loss": 0.1455, "step": 6458 }, { "epoch": 2.09, "learning_rate": 1.1058066171580092e-06, "loss": 0.1694, "step": 6459 }, { "epoch": 2.09, "learning_rate": 1.1050807271228146e-06, "loss": 0.1481, "step": 6460 }, { "epoch": 2.09, "learning_rate": 1.1043550078122342e-06, "loss": 0.1502, "step": 6461 }, { "epoch": 2.09, "learning_rate": 1.1036294593150898e-06, "loss": 0.14, "step": 6462 }, { "epoch": 2.09, "learning_rate": 1.1029040817201819e-06, "loss": 0.1524, "step": 6463 }, { "epoch": 2.09, "learning_rate": 1.1021788751162893e-06, "loss": 0.1475, "step": 6464 }, { "epoch": 2.09, "learning_rate": 1.1014538395921704e-06, "loss": 0.1323, "step": 6465 }, { "epoch": 2.1, "learning_rate": 1.1007289752365635e-06, "loss": 0.1425, "step": 6466 }, { "epoch": 2.1, "learning_rate": 1.1000042821381823e-06, "loss": 0.1493, "step": 6467 }, { "epoch": 2.1, "learning_rate": 1.0992797603857257e-06, "loss": 0.1608, "step": 6468 }, { "epoch": 2.1, "learning_rate": 1.0985554100678647e-06, "loss": 0.1437, "step": 6469 }, { "epoch": 2.1, "learning_rate": 1.0978312312732562e-06, "loss": 0.1505, "step": 6470 }, { "epoch": 2.1, "learning_rate": 1.09710722409053e-06, "loss": 0.1492, "step": 6471 }, { "epoch": 2.1, "learning_rate": 1.0963833886082987e-06, "loss": 0.1499, "step": 6472 }, { "epoch": 2.1, "learning_rate": 1.0956597249151532e-06, "loss": 0.1502, "step": 6473 }, { "epoch": 2.1, "learning_rate": 1.0949362330996605e-06, "loss": 0.1608, "step": 6474 }, { "epoch": 2.1, "learning_rate": 1.094212913250373e-06, "loss": 0.139, "step": 6475 }, { "epoch": 2.1, "learning_rate": 1.0934897654558134e-06, "loss": 0.1554, "step": 6476 }, { "epoch": 2.1, "learning_rate": 1.0927667898044927e-06, "loss": 0.1438, "step": 6477 }, { "epoch": 2.1, "learning_rate": 1.092043986384893e-06, "loss": 0.1576, "step": 6478 }, { "epoch": 2.1, "learning_rate": 1.091321355285479e-06, "loss": 0.148, "step": 6479 }, { "epoch": 2.1, "learning_rate": 1.0905988965946942e-06, "loss": 0.1487, "step": 6480 }, { "epoch": 2.1, "learning_rate": 1.0898766104009606e-06, "loss": 0.1456, "step": 6481 }, { "epoch": 2.1, "learning_rate": 1.0891544967926795e-06, "loss": 0.147, "step": 6482 }, { "epoch": 2.1, "learning_rate": 1.0884325558582283e-06, "loss": 0.138, "step": 6483 }, { "epoch": 2.1, "learning_rate": 1.0877107876859688e-06, "loss": 0.1469, "step": 6484 }, { "epoch": 2.1, "learning_rate": 1.086989192364236e-06, "loss": 0.153, "step": 6485 }, { "epoch": 2.1, "learning_rate": 1.0862677699813471e-06, "loss": 0.1572, "step": 6486 }, { "epoch": 2.1, "learning_rate": 1.0855465206255972e-06, "loss": 0.1507, "step": 6487 }, { "epoch": 2.1, "learning_rate": 1.0848254443852602e-06, "loss": 0.1521, "step": 6488 }, { "epoch": 2.1, "learning_rate": 1.084104541348589e-06, "loss": 0.1513, "step": 6489 }, { "epoch": 2.1, "learning_rate": 1.0833838116038156e-06, "loss": 0.1499, "step": 6490 }, { "epoch": 2.1, "learning_rate": 1.0826632552391484e-06, "loss": 0.1511, "step": 6491 }, { "epoch": 2.1, "learning_rate": 1.081942872342779e-06, "loss": 0.1497, "step": 6492 }, { "epoch": 2.1, "learning_rate": 1.0812226630028738e-06, "loss": 0.1556, "step": 6493 }, { "epoch": 2.1, "learning_rate": 1.0805026273075797e-06, "loss": 0.1445, "step": 6494 }, { "epoch": 2.1, "learning_rate": 1.0797827653450222e-06, "loss": 0.1546, "step": 6495 }, { "epoch": 2.1, "learning_rate": 1.0790630772033057e-06, "loss": 0.1602, "step": 6496 }, { "epoch": 2.11, "learning_rate": 1.0783435629705134e-06, "loss": 0.1472, "step": 6497 }, { "epoch": 2.11, "learning_rate": 1.0776242227347044e-06, "loss": 0.1632, "step": 6498 }, { "epoch": 2.11, "learning_rate": 1.0769050565839228e-06, "loss": 0.1691, "step": 6499 }, { "epoch": 2.11, "learning_rate": 1.0761860646061838e-06, "loss": 0.1406, "step": 6500 }, { "epoch": 2.11, "learning_rate": 1.0754672468894889e-06, "loss": 0.1562, "step": 6501 }, { "epoch": 2.11, "learning_rate": 1.0747486035218116e-06, "loss": 0.1468, "step": 6502 }, { "epoch": 2.11, "learning_rate": 1.0740301345911075e-06, "loss": 0.1674, "step": 6503 }, { "epoch": 2.11, "learning_rate": 1.0733118401853112e-06, "loss": 0.163, "step": 6504 }, { "epoch": 2.11, "learning_rate": 1.0725937203923327e-06, "loss": 0.153, "step": 6505 }, { "epoch": 2.11, "learning_rate": 1.0718757753000665e-06, "loss": 0.1565, "step": 6506 }, { "epoch": 2.11, "learning_rate": 1.071158004996378e-06, "loss": 0.1493, "step": 6507 }, { "epoch": 2.11, "learning_rate": 1.070440409569119e-06, "loss": 0.1465, "step": 6508 }, { "epoch": 2.11, "learning_rate": 1.0697229891061141e-06, "loss": 0.1446, "step": 6509 }, { "epoch": 2.11, "learning_rate": 1.0690057436951689e-06, "loss": 0.1398, "step": 6510 }, { "epoch": 2.11, "learning_rate": 1.068288673424068e-06, "loss": 0.1436, "step": 6511 }, { "epoch": 2.11, "learning_rate": 1.067571778380573e-06, "loss": 0.1444, "step": 6512 }, { "epoch": 2.11, "learning_rate": 1.0668550586524256e-06, "loss": 0.1368, "step": 6513 }, { "epoch": 2.11, "learning_rate": 1.066138514327345e-06, "loss": 0.1554, "step": 6514 }, { "epoch": 2.11, "learning_rate": 1.0654221454930305e-06, "loss": 0.148, "step": 6515 }, { "epoch": 2.11, "learning_rate": 1.0647059522371565e-06, "loss": 0.1548, "step": 6516 }, { "epoch": 2.11, "learning_rate": 1.0639899346473792e-06, "loss": 0.154, "step": 6517 }, { "epoch": 2.11, "learning_rate": 1.0632740928113323e-06, "loss": 0.1655, "step": 6518 }, { "epoch": 2.11, "learning_rate": 1.0625584268166278e-06, "loss": 0.141, "step": 6519 }, { "epoch": 2.11, "learning_rate": 1.0618429367508564e-06, "loss": 0.1584, "step": 6520 }, { "epoch": 2.11, "learning_rate": 1.061127622701588e-06, "loss": 0.1561, "step": 6521 }, { "epoch": 2.11, "learning_rate": 1.0604124847563674e-06, "loss": 0.1476, "step": 6522 }, { "epoch": 2.11, "learning_rate": 1.0596975230027243e-06, "loss": 0.1468, "step": 6523 }, { "epoch": 2.11, "learning_rate": 1.05898273752816e-06, "loss": 0.1549, "step": 6524 }, { "epoch": 2.11, "learning_rate": 1.0582681284201587e-06, "loss": 0.1521, "step": 6525 }, { "epoch": 2.11, "learning_rate": 1.0575536957661814e-06, "loss": 0.1416, "step": 6526 }, { "epoch": 2.12, "learning_rate": 1.056839439653668e-06, "loss": 0.1507, "step": 6527 }, { "epoch": 2.12, "learning_rate": 1.056125360170037e-06, "loss": 0.164, "step": 6528 }, { "epoch": 2.12, "learning_rate": 1.0554114574026823e-06, "loss": 0.1535, "step": 6529 }, { "epoch": 2.12, "learning_rate": 1.0546977314389822e-06, "loss": 0.1597, "step": 6530 }, { "epoch": 2.12, "learning_rate": 1.0539841823662867e-06, "loss": 0.1549, "step": 6531 }, { "epoch": 2.12, "learning_rate": 1.0532708102719303e-06, "loss": 0.1489, "step": 6532 }, { "epoch": 2.12, "learning_rate": 1.0525576152432204e-06, "loss": 0.1508, "step": 6533 }, { "epoch": 2.12, "learning_rate": 1.051844597367446e-06, "loss": 0.1623, "step": 6534 }, { "epoch": 2.12, "learning_rate": 1.0511317567318737e-06, "loss": 0.1416, "step": 6535 }, { "epoch": 2.12, "learning_rate": 1.0504190934237484e-06, "loss": 0.156, "step": 6536 }, { "epoch": 2.12, "learning_rate": 1.0497066075302939e-06, "loss": 0.1469, "step": 6537 }, { "epoch": 2.12, "learning_rate": 1.0489942991387088e-06, "loss": 0.1381, "step": 6538 }, { "epoch": 2.12, "learning_rate": 1.0482821683361767e-06, "loss": 0.1478, "step": 6539 }, { "epoch": 2.12, "learning_rate": 1.0475702152098522e-06, "loss": 0.1562, "step": 6540 }, { "epoch": 2.12, "learning_rate": 1.0468584398468729e-06, "loss": 0.1455, "step": 6541 }, { "epoch": 2.12, "learning_rate": 1.0461468423343532e-06, "loss": 0.1432, "step": 6542 }, { "epoch": 2.12, "learning_rate": 1.0454354227593855e-06, "loss": 0.1844, "step": 6543 }, { "epoch": 2.12, "learning_rate": 1.0447241812090408e-06, "loss": 0.1599, "step": 6544 }, { "epoch": 2.12, "learning_rate": 1.0440131177703692e-06, "loss": 0.159, "step": 6545 }, { "epoch": 2.12, "learning_rate": 1.0433022325303956e-06, "loss": 0.1414, "step": 6546 }, { "epoch": 2.12, "learning_rate": 1.042591525576127e-06, "loss": 0.1538, "step": 6547 }, { "epoch": 2.12, "learning_rate": 1.041880996994547e-06, "loss": 0.1476, "step": 6548 }, { "epoch": 2.12, "learning_rate": 1.0411706468726173e-06, "loss": 0.1406, "step": 6549 }, { "epoch": 2.12, "learning_rate": 1.040460475297278e-06, "loss": 0.1401, "step": 6550 }, { "epoch": 2.12, "learning_rate": 1.039750482355447e-06, "loss": 0.1304, "step": 6551 }, { "epoch": 2.12, "learning_rate": 1.0390406681340212e-06, "loss": 0.1497, "step": 6552 }, { "epoch": 2.12, "learning_rate": 1.0383310327198728e-06, "loss": 0.1549, "step": 6553 }, { "epoch": 2.12, "learning_rate": 1.0376215761998578e-06, "loss": 0.1524, "step": 6554 }, { "epoch": 2.12, "learning_rate": 1.0369122986608044e-06, "loss": 0.1428, "step": 6555 }, { "epoch": 2.12, "learning_rate": 1.0362032001895214e-06, "loss": 0.1584, "step": 6556 }, { "epoch": 2.12, "learning_rate": 1.0354942808727962e-06, "loss": 0.1565, "step": 6557 }, { "epoch": 2.13, "learning_rate": 1.0347855407973933e-06, "loss": 0.1527, "step": 6558 }, { "epoch": 2.13, "learning_rate": 1.034076980050057e-06, "loss": 0.1405, "step": 6559 }, { "epoch": 2.13, "learning_rate": 1.0333685987175052e-06, "loss": 0.1456, "step": 6560 }, { "epoch": 2.13, "learning_rate": 1.0326603968864407e-06, "loss": 0.1378, "step": 6561 }, { "epoch": 2.13, "learning_rate": 1.0319523746435367e-06, "loss": 0.1601, "step": 6562 }, { "epoch": 2.13, "learning_rate": 1.0312445320754522e-06, "loss": 0.1536, "step": 6563 }, { "epoch": 2.13, "learning_rate": 1.0305368692688175e-06, "loss": 0.1585, "step": 6564 }, { "epoch": 2.13, "learning_rate": 1.0298293863102444e-06, "loss": 0.1433, "step": 6565 }, { "epoch": 2.13, "learning_rate": 1.0291220832863219e-06, "loss": 0.162, "step": 6566 }, { "epoch": 2.13, "learning_rate": 1.0284149602836174e-06, "loss": 0.1653, "step": 6567 }, { "epoch": 2.13, "learning_rate": 1.0277080173886766e-06, "loss": 0.1671, "step": 6568 }, { "epoch": 2.13, "learning_rate": 1.0270012546880207e-06, "loss": 0.1433, "step": 6569 }, { "epoch": 2.13, "learning_rate": 1.0262946722681513e-06, "loss": 0.1489, "step": 6570 }, { "epoch": 2.13, "learning_rate": 1.0255882702155476e-06, "loss": 0.141, "step": 6571 }, { "epoch": 2.13, "learning_rate": 1.024882048616666e-06, "loss": 0.1526, "step": 6572 }, { "epoch": 2.13, "learning_rate": 1.0241760075579418e-06, "loss": 0.148, "step": 6573 }, { "epoch": 2.13, "learning_rate": 1.0234701471257868e-06, "loss": 0.14, "step": 6574 }, { "epoch": 2.13, "learning_rate": 1.0227644674065923e-06, "loss": 0.1558, "step": 6575 }, { "epoch": 2.13, "learning_rate": 1.0220589684867269e-06, "loss": 0.1565, "step": 6576 }, { "epoch": 2.13, "learning_rate": 1.021353650452535e-06, "loss": 0.157, "step": 6577 }, { "epoch": 2.13, "learning_rate": 1.0206485133903424e-06, "loss": 0.1406, "step": 6578 }, { "epoch": 2.13, "learning_rate": 1.0199435573864502e-06, "loss": 0.1411, "step": 6579 }, { "epoch": 2.13, "learning_rate": 1.0192387825271384e-06, "loss": 0.1363, "step": 6580 }, { "epoch": 2.13, "learning_rate": 1.018534188898665e-06, "loss": 0.1599, "step": 6581 }, { "epoch": 2.13, "learning_rate": 1.0178297765872651e-06, "loss": 0.1583, "step": 6582 }, { "epoch": 2.13, "learning_rate": 1.0171255456791531e-06, "loss": 0.1552, "step": 6583 }, { "epoch": 2.13, "learning_rate": 1.016421496260517e-06, "loss": 0.1381, "step": 6584 }, { "epoch": 2.13, "learning_rate": 1.0157176284175293e-06, "loss": 0.1487, "step": 6585 }, { "epoch": 2.13, "learning_rate": 1.0150139422363342e-06, "loss": 0.15, "step": 6586 }, { "epoch": 2.13, "learning_rate": 1.0143104378030565e-06, "loss": 0.1419, "step": 6587 }, { "epoch": 2.13, "learning_rate": 1.013607115203799e-06, "loss": 0.1637, "step": 6588 }, { "epoch": 2.14, "learning_rate": 1.012903974524641e-06, "loss": 0.1346, "step": 6589 }, { "epoch": 2.14, "learning_rate": 1.0122010158516412e-06, "loss": 0.1601, "step": 6590 }, { "epoch": 2.14, "learning_rate": 1.0114982392708325e-06, "loss": 0.1445, "step": 6591 }, { "epoch": 2.14, "learning_rate": 1.010795644868231e-06, "loss": 0.1429, "step": 6592 }, { "epoch": 2.14, "learning_rate": 1.0100932327298244e-06, "loss": 0.1349, "step": 6593 }, { "epoch": 2.14, "learning_rate": 1.0093910029415843e-06, "loss": 0.1561, "step": 6594 }, { "epoch": 2.14, "learning_rate": 1.0086889555894545e-06, "loss": 0.1486, "step": 6595 }, { "epoch": 2.14, "learning_rate": 1.0079870907593592e-06, "loss": 0.1508, "step": 6596 }, { "epoch": 2.14, "learning_rate": 1.0072854085372005e-06, "loss": 0.1534, "step": 6597 }, { "epoch": 2.14, "learning_rate": 1.0065839090088572e-06, "loss": 0.1569, "step": 6598 }, { "epoch": 2.14, "learning_rate": 1.0058825922601866e-06, "loss": 0.133, "step": 6599 }, { "epoch": 2.14, "learning_rate": 1.005181458377022e-06, "loss": 0.1564, "step": 6600 }, { "epoch": 2.14, "learning_rate": 1.0044805074451757e-06, "loss": 0.1544, "step": 6601 }, { "epoch": 2.14, "learning_rate": 1.003779739550438e-06, "loss": 0.1373, "step": 6602 }, { "epoch": 2.14, "learning_rate": 1.003079154778575e-06, "loss": 0.1697, "step": 6603 }, { "epoch": 2.14, "learning_rate": 1.0023787532153325e-06, "loss": 0.1687, "step": 6604 }, { "epoch": 2.14, "learning_rate": 1.0016785349464326e-06, "loss": 0.1699, "step": 6605 }, { "epoch": 2.14, "learning_rate": 1.0009785000575747e-06, "loss": 0.1638, "step": 6606 }, { "epoch": 2.14, "learning_rate": 1.0002786486344379e-06, "loss": 0.1587, "step": 6607 }, { "epoch": 2.14, "learning_rate": 9.995789807626754e-07, "loss": 0.1499, "step": 6608 }, { "epoch": 2.14, "learning_rate": 9.988794965279203e-07, "loss": 0.167, "step": 6609 }, { "epoch": 2.14, "learning_rate": 9.981801960157827e-07, "loss": 0.1545, "step": 6610 }, { "epoch": 2.14, "learning_rate": 9.974810793118505e-07, "loss": 0.1509, "step": 6611 }, { "epoch": 2.14, "learning_rate": 9.967821465016893e-07, "loss": 0.1508, "step": 6612 }, { "epoch": 2.14, "learning_rate": 9.960833976708398e-07, "loss": 0.141, "step": 6613 }, { "epoch": 2.14, "learning_rate": 9.953848329048248e-07, "loss": 0.1463, "step": 6614 }, { "epoch": 2.14, "learning_rate": 9.94686452289139e-07, "loss": 0.1406, "step": 6615 }, { "epoch": 2.14, "learning_rate": 9.939882559092604e-07, "loss": 0.1505, "step": 6616 }, { "epoch": 2.14, "learning_rate": 9.93290243850638e-07, "loss": 0.1459, "step": 6617 }, { "epoch": 2.14, "learning_rate": 9.925924161987057e-07, "loss": 0.1429, "step": 6618 }, { "epoch": 2.14, "learning_rate": 9.918947730388682e-07, "loss": 0.1437, "step": 6619 }, { "epoch": 2.15, "learning_rate": 9.911973144565105e-07, "loss": 0.1584, "step": 6620 }, { "epoch": 2.15, "learning_rate": 9.90500040536996e-07, "loss": 0.1533, "step": 6621 }, { "epoch": 2.15, "learning_rate": 9.898029513656618e-07, "loss": 0.1395, "step": 6622 }, { "epoch": 2.15, "learning_rate": 9.891060470278286e-07, "loss": 0.1385, "step": 6623 }, { "epoch": 2.15, "learning_rate": 9.884093276087871e-07, "loss": 0.1463, "step": 6624 }, { "epoch": 2.15, "learning_rate": 9.877127931938111e-07, "loss": 0.1379, "step": 6625 }, { "epoch": 2.15, "learning_rate": 9.87016443868149e-07, "loss": 0.1427, "step": 6626 }, { "epoch": 2.15, "learning_rate": 9.863202797170273e-07, "loss": 0.1546, "step": 6627 }, { "epoch": 2.15, "learning_rate": 9.8562430082565e-07, "loss": 0.1487, "step": 6628 }, { "epoch": 2.15, "learning_rate": 9.849285072791978e-07, "loss": 0.1379, "step": 6629 }, { "epoch": 2.15, "learning_rate": 9.8423289916283e-07, "loss": 0.1528, "step": 6630 }, { "epoch": 2.15, "learning_rate": 9.835374765616809e-07, "loss": 0.1317, "step": 6631 }, { "epoch": 2.15, "learning_rate": 9.82842239560864e-07, "loss": 0.1441, "step": 6632 }, { "epoch": 2.15, "learning_rate": 9.821471882454703e-07, "loss": 0.1633, "step": 6633 }, { "epoch": 2.15, "learning_rate": 9.814523227005662e-07, "loss": 0.149, "step": 6634 }, { "epoch": 2.15, "learning_rate": 9.807576430111975e-07, "loss": 0.1365, "step": 6635 }, { "epoch": 2.15, "learning_rate": 9.800631492623867e-07, "loss": 0.1483, "step": 6636 }, { "epoch": 2.15, "learning_rate": 9.793688415391304e-07, "loss": 0.1561, "step": 6637 }, { "epoch": 2.15, "learning_rate": 9.786747199264088e-07, "loss": 0.1539, "step": 6638 }, { "epoch": 2.15, "learning_rate": 9.779807845091722e-07, "loss": 0.1497, "step": 6639 }, { "epoch": 2.15, "learning_rate": 9.77287035372355e-07, "loss": 0.1444, "step": 6640 }, { "epoch": 2.15, "learning_rate": 9.76593472600863e-07, "loss": 0.1541, "step": 6641 }, { "epoch": 2.15, "learning_rate": 9.75900096279582e-07, "loss": 0.1439, "step": 6642 }, { "epoch": 2.15, "learning_rate": 9.752069064933758e-07, "loss": 0.1474, "step": 6643 }, { "epoch": 2.15, "learning_rate": 9.745139033270812e-07, "loss": 0.1444, "step": 6644 }, { "epoch": 2.15, "learning_rate": 9.738210868655187e-07, "loss": 0.1614, "step": 6645 }, { "epoch": 2.15, "learning_rate": 9.73128457193479e-07, "loss": 0.1514, "step": 6646 }, { "epoch": 2.15, "learning_rate": 9.724360143957367e-07, "loss": 0.1505, "step": 6647 }, { "epoch": 2.15, "learning_rate": 9.717437585570375e-07, "loss": 0.1527, "step": 6648 }, { "epoch": 2.15, "learning_rate": 9.710516897621072e-07, "loss": 0.1477, "step": 6649 }, { "epoch": 2.15, "learning_rate": 9.703598080956488e-07, "loss": 0.1577, "step": 6650 }, { "epoch": 2.16, "learning_rate": 9.696681136423422e-07, "loss": 0.1422, "step": 6651 }, { "epoch": 2.16, "learning_rate": 9.689766064868434e-07, "loss": 0.1512, "step": 6652 }, { "epoch": 2.16, "learning_rate": 9.682852867137865e-07, "loss": 0.1452, "step": 6653 }, { "epoch": 2.16, "learning_rate": 9.675941544077833e-07, "loss": 0.156, "step": 6654 }, { "epoch": 2.16, "learning_rate": 9.6690320965342e-07, "loss": 0.1406, "step": 6655 }, { "epoch": 2.16, "learning_rate": 9.66212452535262e-07, "loss": 0.1473, "step": 6656 }, { "epoch": 2.16, "learning_rate": 9.655218831378518e-07, "loss": 0.1552, "step": 6657 }, { "epoch": 2.16, "learning_rate": 9.648315015457083e-07, "loss": 0.1591, "step": 6658 }, { "epoch": 2.16, "learning_rate": 9.641413078433274e-07, "loss": 0.1454, "step": 6659 }, { "epoch": 2.16, "learning_rate": 9.63451302115182e-07, "loss": 0.1393, "step": 6660 }, { "epoch": 2.16, "learning_rate": 9.627614844457222e-07, "loss": 0.1412, "step": 6661 }, { "epoch": 2.16, "learning_rate": 9.620718549193764e-07, "loss": 0.1553, "step": 6662 }, { "epoch": 2.16, "learning_rate": 9.61382413620546e-07, "loss": 0.142, "step": 6663 }, { "epoch": 2.16, "learning_rate": 9.606931606336134e-07, "loss": 0.1383, "step": 6664 }, { "epoch": 2.16, "learning_rate": 9.60004096042936e-07, "loss": 0.1426, "step": 6665 }, { "epoch": 2.16, "learning_rate": 9.593152199328494e-07, "loss": 0.1477, "step": 6666 }, { "epoch": 2.16, "learning_rate": 9.586265323876653e-07, "loss": 0.1509, "step": 6667 }, { "epoch": 2.16, "learning_rate": 9.579380334916704e-07, "loss": 0.1672, "step": 6668 }, { "epoch": 2.16, "learning_rate": 9.572497233291337e-07, "loss": 0.1391, "step": 6669 }, { "epoch": 2.16, "learning_rate": 9.56561601984294e-07, "loss": 0.1555, "step": 6670 }, { "epoch": 2.16, "learning_rate": 9.558736695413745e-07, "loss": 0.1493, "step": 6671 }, { "epoch": 2.16, "learning_rate": 9.551859260845686e-07, "loss": 0.1526, "step": 6672 }, { "epoch": 2.16, "learning_rate": 9.544983716980505e-07, "loss": 0.154, "step": 6673 }, { "epoch": 2.16, "learning_rate": 9.5381100646597e-07, "loss": 0.1557, "step": 6674 }, { "epoch": 2.16, "learning_rate": 9.531238304724538e-07, "loss": 0.1595, "step": 6675 }, { "epoch": 2.16, "learning_rate": 9.524368438016071e-07, "loss": 0.1382, "step": 6676 }, { "epoch": 2.16, "learning_rate": 9.517500465375071e-07, "loss": 0.1304, "step": 6677 }, { "epoch": 2.16, "learning_rate": 9.510634387642151e-07, "loss": 0.1503, "step": 6678 }, { "epoch": 2.16, "learning_rate": 9.503770205657625e-07, "loss": 0.1514, "step": 6679 }, { "epoch": 2.16, "learning_rate": 9.496907920261609e-07, "loss": 0.145, "step": 6680 }, { "epoch": 2.16, "learning_rate": 9.490047532293984e-07, "loss": 0.1494, "step": 6681 }, { "epoch": 2.17, "learning_rate": 9.48318904259439e-07, "loss": 0.1512, "step": 6682 }, { "epoch": 2.17, "learning_rate": 9.476332452002245e-07, "loss": 0.1519, "step": 6683 }, { "epoch": 2.17, "learning_rate": 9.469477761356727e-07, "loss": 0.1435, "step": 6684 }, { "epoch": 2.17, "learning_rate": 9.462624971496793e-07, "loss": 0.1668, "step": 6685 }, { "epoch": 2.17, "learning_rate": 9.455774083261138e-07, "loss": 0.1409, "step": 6686 }, { "epoch": 2.17, "learning_rate": 9.448925097488257e-07, "loss": 0.1606, "step": 6687 }, { "epoch": 2.17, "learning_rate": 9.442078015016398e-07, "loss": 0.1553, "step": 6688 }, { "epoch": 2.17, "learning_rate": 9.435232836683577e-07, "loss": 0.155, "step": 6689 }, { "epoch": 2.17, "learning_rate": 9.42838956332758e-07, "loss": 0.149, "step": 6690 }, { "epoch": 2.17, "learning_rate": 9.421548195785962e-07, "loss": 0.1503, "step": 6691 }, { "epoch": 2.17, "learning_rate": 9.414708734896019e-07, "loss": 0.1565, "step": 6692 }, { "epoch": 2.17, "learning_rate": 9.407871181494865e-07, "loss": 0.149, "step": 6693 }, { "epoch": 2.17, "learning_rate": 9.401035536419326e-07, "loss": 0.1364, "step": 6694 }, { "epoch": 2.17, "learning_rate": 9.394201800506028e-07, "loss": 0.1672, "step": 6695 }, { "epoch": 2.17, "learning_rate": 9.387369974591353e-07, "loss": 0.1336, "step": 6696 }, { "epoch": 2.17, "learning_rate": 9.380540059511453e-07, "loss": 0.1481, "step": 6697 }, { "epoch": 2.17, "learning_rate": 9.373712056102249e-07, "loss": 0.1436, "step": 6698 }, { "epoch": 2.17, "learning_rate": 9.366885965199398e-07, "loss": 0.1408, "step": 6699 }, { "epoch": 2.17, "learning_rate": 9.360061787638383e-07, "loss": 0.1639, "step": 6700 }, { "epoch": 2.17, "learning_rate": 9.353239524254382e-07, "loss": 0.1572, "step": 6701 }, { "epoch": 2.17, "learning_rate": 9.346419175882407e-07, "loss": 0.1348, "step": 6702 }, { "epoch": 2.17, "learning_rate": 9.339600743357177e-07, "loss": 0.1435, "step": 6703 }, { "epoch": 2.17, "learning_rate": 9.332784227513212e-07, "loss": 0.1552, "step": 6704 }, { "epoch": 2.17, "learning_rate": 9.325969629184789e-07, "loss": 0.1431, "step": 6705 }, { "epoch": 2.17, "learning_rate": 9.319156949205943e-07, "loss": 0.1608, "step": 6706 }, { "epoch": 2.17, "learning_rate": 9.312346188410496e-07, "loss": 0.1481, "step": 6707 }, { "epoch": 2.17, "learning_rate": 9.30553734763199e-07, "loss": 0.1519, "step": 6708 }, { "epoch": 2.17, "learning_rate": 9.298730427703795e-07, "loss": 0.1361, "step": 6709 }, { "epoch": 2.17, "learning_rate": 9.291925429458987e-07, "loss": 0.1589, "step": 6710 }, { "epoch": 2.17, "learning_rate": 9.285122353730439e-07, "loss": 0.1548, "step": 6711 }, { "epoch": 2.17, "learning_rate": 9.278321201350784e-07, "loss": 0.1583, "step": 6712 }, { "epoch": 2.18, "learning_rate": 9.271521973152418e-07, "loss": 0.159, "step": 6713 }, { "epoch": 2.18, "learning_rate": 9.264724669967498e-07, "loss": 0.1583, "step": 6714 }, { "epoch": 2.18, "learning_rate": 9.257929292627956e-07, "loss": 0.1513, "step": 6715 }, { "epoch": 2.18, "learning_rate": 9.251135841965467e-07, "loss": 0.1304, "step": 6716 }, { "epoch": 2.18, "learning_rate": 9.244344318811491e-07, "loss": 0.1501, "step": 6717 }, { "epoch": 2.18, "learning_rate": 9.237554723997242e-07, "loss": 0.132, "step": 6718 }, { "epoch": 2.18, "learning_rate": 9.230767058353701e-07, "loss": 0.137, "step": 6719 }, { "epoch": 2.18, "learning_rate": 9.223981322711617e-07, "loss": 0.1603, "step": 6720 }, { "epoch": 2.18, "learning_rate": 9.217197517901494e-07, "loss": 0.1372, "step": 6721 }, { "epoch": 2.18, "learning_rate": 9.210415644753615e-07, "loss": 0.1618, "step": 6722 }, { "epoch": 2.18, "learning_rate": 9.203635704097988e-07, "loss": 0.1668, "step": 6723 }, { "epoch": 2.18, "learning_rate": 9.196857696764446e-07, "loss": 0.1477, "step": 6724 }, { "epoch": 2.18, "learning_rate": 9.190081623582531e-07, "loss": 0.1585, "step": 6725 }, { "epoch": 2.18, "learning_rate": 9.183307485381571e-07, "loss": 0.1627, "step": 6726 }, { "epoch": 2.18, "learning_rate": 9.176535282990656e-07, "loss": 0.1462, "step": 6727 }, { "epoch": 2.18, "learning_rate": 9.169765017238641e-07, "loss": 0.1663, "step": 6728 }, { "epoch": 2.18, "learning_rate": 9.162996688954148e-07, "loss": 0.1314, "step": 6729 }, { "epoch": 2.18, "learning_rate": 9.156230298965529e-07, "loss": 0.1662, "step": 6730 }, { "epoch": 2.18, "learning_rate": 9.149465848100958e-07, "loss": 0.1675, "step": 6731 }, { "epoch": 2.18, "learning_rate": 9.142703337188305e-07, "loss": 0.1524, "step": 6732 }, { "epoch": 2.18, "learning_rate": 9.135942767055272e-07, "loss": 0.1475, "step": 6733 }, { "epoch": 2.18, "learning_rate": 9.129184138529259e-07, "loss": 0.1541, "step": 6734 }, { "epoch": 2.18, "learning_rate": 9.122427452437465e-07, "loss": 0.1351, "step": 6735 }, { "epoch": 2.18, "learning_rate": 9.115672709606846e-07, "loss": 0.1417, "step": 6736 }, { "epoch": 2.18, "learning_rate": 9.108919910864111e-07, "loss": 0.149, "step": 6737 }, { "epoch": 2.18, "learning_rate": 9.102169057035753e-07, "loss": 0.1581, "step": 6738 }, { "epoch": 2.18, "learning_rate": 9.095420148947984e-07, "loss": 0.1461, "step": 6739 }, { "epoch": 2.18, "learning_rate": 9.088673187426836e-07, "loss": 0.1537, "step": 6740 }, { "epoch": 2.18, "learning_rate": 9.081928173298046e-07, "loss": 0.145, "step": 6741 }, { "epoch": 2.18, "learning_rate": 9.075185107387149e-07, "loss": 0.1552, "step": 6742 }, { "epoch": 2.19, "learning_rate": 9.068443990519432e-07, "loss": 0.1665, "step": 6743 }, { "epoch": 2.19, "learning_rate": 9.061704823519943e-07, "loss": 0.1368, "step": 6744 }, { "epoch": 2.19, "learning_rate": 9.054967607213486e-07, "loss": 0.1546, "step": 6745 }, { "epoch": 2.19, "learning_rate": 9.048232342424642e-07, "loss": 0.1371, "step": 6746 }, { "epoch": 2.19, "learning_rate": 9.04149902997773e-07, "loss": 0.1583, "step": 6747 }, { "epoch": 2.19, "learning_rate": 9.034767670696842e-07, "loss": 0.1633, "step": 6748 }, { "epoch": 2.19, "learning_rate": 9.028038265405836e-07, "loss": 0.1556, "step": 6749 }, { "epoch": 2.19, "learning_rate": 9.021310814928328e-07, "loss": 0.1481, "step": 6750 }, { "epoch": 2.19, "learning_rate": 9.01458532008769e-07, "loss": 0.147, "step": 6751 }, { "epoch": 2.19, "learning_rate": 9.007861781707056e-07, "loss": 0.1414, "step": 6752 }, { "epoch": 2.19, "learning_rate": 9.001140200609334e-07, "loss": 0.1455, "step": 6753 }, { "epoch": 2.19, "learning_rate": 8.994420577617155e-07, "loss": 0.1478, "step": 6754 }, { "epoch": 2.19, "learning_rate": 8.987702913552964e-07, "loss": 0.159, "step": 6755 }, { "epoch": 2.19, "learning_rate": 8.980987209238922e-07, "loss": 0.1518, "step": 6756 }, { "epoch": 2.19, "learning_rate": 8.974273465496966e-07, "loss": 0.1283, "step": 6757 }, { "epoch": 2.19, "learning_rate": 8.967561683148798e-07, "loss": 0.1629, "step": 6758 }, { "epoch": 2.19, "learning_rate": 8.960851863015874e-07, "loss": 0.1546, "step": 6759 }, { "epoch": 2.19, "learning_rate": 8.954144005919422e-07, "loss": 0.1489, "step": 6760 }, { "epoch": 2.19, "learning_rate": 8.947438112680387e-07, "loss": 0.1465, "step": 6761 }, { "epoch": 2.19, "learning_rate": 8.940734184119542e-07, "loss": 0.1554, "step": 6762 }, { "epoch": 2.19, "learning_rate": 8.934032221057354e-07, "loss": 0.1524, "step": 6763 }, { "epoch": 2.19, "learning_rate": 8.927332224314106e-07, "loss": 0.1413, "step": 6764 }, { "epoch": 2.19, "learning_rate": 8.92063419470979e-07, "loss": 0.1418, "step": 6765 }, { "epoch": 2.19, "learning_rate": 8.91393813306419e-07, "loss": 0.1474, "step": 6766 }, { "epoch": 2.19, "learning_rate": 8.907244040196836e-07, "loss": 0.148, "step": 6767 }, { "epoch": 2.19, "learning_rate": 8.900551916927022e-07, "loss": 0.138, "step": 6768 }, { "epoch": 2.19, "learning_rate": 8.893861764073808e-07, "loss": 0.144, "step": 6769 }, { "epoch": 2.19, "learning_rate": 8.887173582455985e-07, "loss": 0.1415, "step": 6770 }, { "epoch": 2.19, "learning_rate": 8.88048737289213e-07, "loss": 0.1427, "step": 6771 }, { "epoch": 2.19, "learning_rate": 8.873803136200574e-07, "loss": 0.1478, "step": 6772 }, { "epoch": 2.19, "learning_rate": 8.8671208731994e-07, "loss": 0.144, "step": 6773 }, { "epoch": 2.2, "learning_rate": 8.860440584706451e-07, "loss": 0.1574, "step": 6774 }, { "epoch": 2.2, "learning_rate": 8.853762271539332e-07, "loss": 0.1525, "step": 6775 }, { "epoch": 2.2, "learning_rate": 8.847085934515404e-07, "loss": 0.159, "step": 6776 }, { "epoch": 2.2, "learning_rate": 8.840411574451793e-07, "loss": 0.1645, "step": 6777 }, { "epoch": 2.2, "learning_rate": 8.833739192165352e-07, "loss": 0.1432, "step": 6778 }, { "epoch": 2.2, "learning_rate": 8.827068788472751e-07, "loss": 0.148, "step": 6779 }, { "epoch": 2.2, "learning_rate": 8.820400364190351e-07, "loss": 0.1453, "step": 6780 }, { "epoch": 2.2, "learning_rate": 8.813733920134321e-07, "loss": 0.1694, "step": 6781 }, { "epoch": 2.2, "learning_rate": 8.807069457120571e-07, "loss": 0.1561, "step": 6782 }, { "epoch": 2.2, "learning_rate": 8.80040697596474e-07, "loss": 0.156, "step": 6783 }, { "epoch": 2.2, "learning_rate": 8.79374647748229e-07, "loss": 0.1485, "step": 6784 }, { "epoch": 2.2, "learning_rate": 8.787087962488367e-07, "loss": 0.1422, "step": 6785 }, { "epoch": 2.2, "learning_rate": 8.780431431797937e-07, "loss": 0.1371, "step": 6786 }, { "epoch": 2.2, "learning_rate": 8.773776886225668e-07, "loss": 0.1628, "step": 6787 }, { "epoch": 2.2, "learning_rate": 8.767124326586043e-07, "loss": 0.1583, "step": 6788 }, { "epoch": 2.2, "learning_rate": 8.760473753693243e-07, "loss": 0.157, "step": 6789 }, { "epoch": 2.2, "learning_rate": 8.753825168361249e-07, "loss": 0.157, "step": 6790 }, { "epoch": 2.2, "learning_rate": 8.747178571403786e-07, "loss": 0.1567, "step": 6791 }, { "epoch": 2.2, "learning_rate": 8.74053396363431e-07, "loss": 0.1611, "step": 6792 }, { "epoch": 2.2, "learning_rate": 8.733891345866088e-07, "loss": 0.1618, "step": 6793 }, { "epoch": 2.2, "learning_rate": 8.727250718912089e-07, "loss": 0.1559, "step": 6794 }, { "epoch": 2.2, "learning_rate": 8.72061208358507e-07, "loss": 0.1366, "step": 6795 }, { "epoch": 2.2, "learning_rate": 8.713975440697536e-07, "loss": 0.1576, "step": 6796 }, { "epoch": 2.2, "learning_rate": 8.707340791061747e-07, "loss": 0.1457, "step": 6797 }, { "epoch": 2.2, "learning_rate": 8.700708135489722e-07, "loss": 0.1438, "step": 6798 }, { "epoch": 2.2, "learning_rate": 8.694077474793227e-07, "loss": 0.1551, "step": 6799 }, { "epoch": 2.2, "learning_rate": 8.687448809783799e-07, "loss": 0.1427, "step": 6800 }, { "epoch": 2.2, "learning_rate": 8.680822141272727e-07, "loss": 0.1515, "step": 6801 }, { "epoch": 2.2, "learning_rate": 8.674197470071033e-07, "loss": 0.1518, "step": 6802 }, { "epoch": 2.2, "learning_rate": 8.667574796989526e-07, "loss": 0.1466, "step": 6803 }, { "epoch": 2.2, "learning_rate": 8.66095412283875e-07, "loss": 0.1425, "step": 6804 }, { "epoch": 2.21, "learning_rate": 8.654335448429016e-07, "loss": 0.1492, "step": 6805 }, { "epoch": 2.21, "learning_rate": 8.647718774570385e-07, "loss": 0.1383, "step": 6806 }, { "epoch": 2.21, "learning_rate": 8.641104102072676e-07, "loss": 0.1417, "step": 6807 }, { "epoch": 2.21, "learning_rate": 8.634491431745465e-07, "loss": 0.1504, "step": 6808 }, { "epoch": 2.21, "learning_rate": 8.627880764398055e-07, "loss": 0.1329, "step": 6809 }, { "epoch": 2.21, "learning_rate": 8.621272100839562e-07, "loss": 0.1619, "step": 6810 }, { "epoch": 2.21, "learning_rate": 8.614665441878798e-07, "loss": 0.1618, "step": 6811 }, { "epoch": 2.21, "learning_rate": 8.60806078832436e-07, "loss": 0.1455, "step": 6812 }, { "epoch": 2.21, "learning_rate": 8.601458140984606e-07, "loss": 0.1652, "step": 6813 }, { "epoch": 2.21, "learning_rate": 8.594857500667606e-07, "loss": 0.1523, "step": 6814 }, { "epoch": 2.21, "learning_rate": 8.588258868181251e-07, "loss": 0.1405, "step": 6815 }, { "epoch": 2.21, "learning_rate": 8.581662244333116e-07, "loss": 0.1549, "step": 6816 }, { "epoch": 2.21, "learning_rate": 8.575067629930601e-07, "loss": 0.1601, "step": 6817 }, { "epoch": 2.21, "learning_rate": 8.568475025780781e-07, "loss": 0.1514, "step": 6818 }, { "epoch": 2.21, "learning_rate": 8.561884432690568e-07, "loss": 0.1455, "step": 6819 }, { "epoch": 2.21, "learning_rate": 8.555295851466556e-07, "loss": 0.1483, "step": 6820 }, { "epoch": 2.21, "learning_rate": 8.548709282915135e-07, "loss": 0.131, "step": 6821 }, { "epoch": 2.21, "learning_rate": 8.542124727842438e-07, "loss": 0.1429, "step": 6822 }, { "epoch": 2.21, "learning_rate": 8.535542187054352e-07, "loss": 0.1497, "step": 6823 }, { "epoch": 2.21, "learning_rate": 8.528961661356519e-07, "loss": 0.1547, "step": 6824 }, { "epoch": 2.21, "learning_rate": 8.52238315155432e-07, "loss": 0.1355, "step": 6825 }, { "epoch": 2.21, "learning_rate": 8.515806658452908e-07, "loss": 0.1485, "step": 6826 }, { "epoch": 2.21, "learning_rate": 8.50923218285718e-07, "loss": 0.1384, "step": 6827 }, { "epoch": 2.21, "learning_rate": 8.502659725571791e-07, "loss": 0.1618, "step": 6828 }, { "epoch": 2.21, "learning_rate": 8.496089287401144e-07, "loss": 0.1395, "step": 6829 }, { "epoch": 2.21, "learning_rate": 8.489520869149398e-07, "loss": 0.1501, "step": 6830 }, { "epoch": 2.21, "learning_rate": 8.482954471620464e-07, "loss": 0.1327, "step": 6831 }, { "epoch": 2.21, "learning_rate": 8.476390095618015e-07, "loss": 0.1472, "step": 6832 }, { "epoch": 2.21, "learning_rate": 8.469827741945447e-07, "loss": 0.156, "step": 6833 }, { "epoch": 2.21, "learning_rate": 8.46326741140594e-07, "loss": 0.1452, "step": 6834 }, { "epoch": 2.21, "learning_rate": 8.456709104802413e-07, "loss": 0.1813, "step": 6835 }, { "epoch": 2.22, "learning_rate": 8.450152822937541e-07, "loss": 0.1759, "step": 6836 }, { "epoch": 2.22, "learning_rate": 8.443598566613756e-07, "loss": 0.1561, "step": 6837 }, { "epoch": 2.22, "learning_rate": 8.437046336633212e-07, "loss": 0.1587, "step": 6838 }, { "epoch": 2.22, "learning_rate": 8.430496133797872e-07, "loss": 0.1456, "step": 6839 }, { "epoch": 2.22, "learning_rate": 8.423947958909381e-07, "loss": 0.1286, "step": 6840 }, { "epoch": 2.22, "learning_rate": 8.41740181276921e-07, "loss": 0.1383, "step": 6841 }, { "epoch": 2.22, "learning_rate": 8.410857696178518e-07, "loss": 0.1567, "step": 6842 }, { "epoch": 2.22, "learning_rate": 8.404315609938246e-07, "loss": 0.1489, "step": 6843 }, { "epoch": 2.22, "learning_rate": 8.397775554849086e-07, "loss": 0.1465, "step": 6844 }, { "epoch": 2.22, "learning_rate": 8.391237531711474e-07, "loss": 0.1488, "step": 6845 }, { "epoch": 2.22, "learning_rate": 8.384701541325612e-07, "loss": 0.1516, "step": 6846 }, { "epoch": 2.22, "learning_rate": 8.378167584491417e-07, "loss": 0.1626, "step": 6847 }, { "epoch": 2.22, "learning_rate": 8.371635662008615e-07, "loss": 0.1472, "step": 6848 }, { "epoch": 2.22, "learning_rate": 8.365105774676624e-07, "loss": 0.1456, "step": 6849 }, { "epoch": 2.22, "learning_rate": 8.358577923294647e-07, "loss": 0.1464, "step": 6850 }, { "epoch": 2.22, "learning_rate": 8.352052108661634e-07, "loss": 0.1517, "step": 6851 }, { "epoch": 2.22, "learning_rate": 8.345528331576275e-07, "loss": 0.1521, "step": 6852 }, { "epoch": 2.22, "learning_rate": 8.339006592837021e-07, "loss": 0.1538, "step": 6853 }, { "epoch": 2.22, "learning_rate": 8.33248689324207e-07, "loss": 0.1423, "step": 6854 }, { "epoch": 2.22, "learning_rate": 8.325969233589376e-07, "loss": 0.1532, "step": 6855 }, { "epoch": 2.22, "learning_rate": 8.319453614676626e-07, "loss": 0.1481, "step": 6856 }, { "epoch": 2.22, "learning_rate": 8.31294003730127e-07, "loss": 0.1714, "step": 6857 }, { "epoch": 2.22, "learning_rate": 8.306428502260511e-07, "loss": 0.1414, "step": 6858 }, { "epoch": 2.22, "learning_rate": 8.299919010351296e-07, "loss": 0.1713, "step": 6859 }, { "epoch": 2.22, "learning_rate": 8.293411562370327e-07, "loss": 0.1456, "step": 6860 }, { "epoch": 2.22, "learning_rate": 8.286906159114058e-07, "loss": 0.1391, "step": 6861 }, { "epoch": 2.22, "learning_rate": 8.280402801378662e-07, "loss": 0.1587, "step": 6862 }, { "epoch": 2.22, "learning_rate": 8.27390148996012e-07, "loss": 0.1515, "step": 6863 }, { "epoch": 2.22, "learning_rate": 8.267402225654112e-07, "loss": 0.1507, "step": 6864 }, { "epoch": 2.22, "learning_rate": 8.260905009256081e-07, "loss": 0.1383, "step": 6865 }, { "epoch": 2.22, "learning_rate": 8.254409841561234e-07, "loss": 0.1587, "step": 6866 }, { "epoch": 2.23, "learning_rate": 8.24791672336451e-07, "loss": 0.1375, "step": 6867 }, { "epoch": 2.23, "learning_rate": 8.241425655460616e-07, "loss": 0.1379, "step": 6868 }, { "epoch": 2.23, "learning_rate": 8.23493663864397e-07, "loss": 0.1373, "step": 6869 }, { "epoch": 2.23, "learning_rate": 8.228449673708797e-07, "loss": 0.1589, "step": 6870 }, { "epoch": 2.23, "learning_rate": 8.221964761449008e-07, "loss": 0.1482, "step": 6871 }, { "epoch": 2.23, "learning_rate": 8.215481902658323e-07, "loss": 0.1543, "step": 6872 }, { "epoch": 2.23, "learning_rate": 8.209001098130157e-07, "loss": 0.1502, "step": 6873 }, { "epoch": 2.23, "learning_rate": 8.20252234865771e-07, "loss": 0.1568, "step": 6874 }, { "epoch": 2.23, "learning_rate": 8.196045655033913e-07, "loss": 0.1495, "step": 6875 }, { "epoch": 2.23, "learning_rate": 8.189571018051454e-07, "loss": 0.131, "step": 6876 }, { "epoch": 2.23, "learning_rate": 8.183098438502771e-07, "loss": 0.151, "step": 6877 }, { "epoch": 2.23, "learning_rate": 8.176627917180025e-07, "loss": 0.1563, "step": 6878 }, { "epoch": 2.23, "learning_rate": 8.170159454875173e-07, "loss": 0.1622, "step": 6879 }, { "epoch": 2.23, "learning_rate": 8.163693052379873e-07, "loss": 0.1538, "step": 6880 }, { "epoch": 2.23, "learning_rate": 8.157228710485554e-07, "loss": 0.1572, "step": 6881 }, { "epoch": 2.23, "learning_rate": 8.15076642998339e-07, "loss": 0.1663, "step": 6882 }, { "epoch": 2.23, "learning_rate": 8.144306211664302e-07, "loss": 0.1372, "step": 6883 }, { "epoch": 2.23, "learning_rate": 8.137848056318959e-07, "loss": 0.1419, "step": 6884 }, { "epoch": 2.23, "learning_rate": 8.131391964737773e-07, "loss": 0.1533, "step": 6885 }, { "epoch": 2.23, "learning_rate": 8.12493793771092e-07, "loss": 0.1437, "step": 6886 }, { "epoch": 2.23, "learning_rate": 8.118485976028292e-07, "loss": 0.1472, "step": 6887 }, { "epoch": 2.23, "learning_rate": 8.112036080479554e-07, "loss": 0.1497, "step": 6888 }, { "epoch": 2.23, "learning_rate": 8.10558825185411e-07, "loss": 0.1476, "step": 6889 }, { "epoch": 2.23, "learning_rate": 8.099142490941117e-07, "loss": 0.1595, "step": 6890 }, { "epoch": 2.23, "learning_rate": 8.09269879852947e-07, "loss": 0.1542, "step": 6891 }, { "epoch": 2.23, "learning_rate": 8.086257175407819e-07, "loss": 0.1431, "step": 6892 }, { "epoch": 2.23, "learning_rate": 8.079817622364539e-07, "loss": 0.1418, "step": 6893 }, { "epoch": 2.23, "learning_rate": 8.073380140187795e-07, "loss": 0.1546, "step": 6894 }, { "epoch": 2.23, "learning_rate": 8.066944729665455e-07, "loss": 0.1436, "step": 6895 }, { "epoch": 2.23, "learning_rate": 8.060511391585152e-07, "loss": 0.1478, "step": 6896 }, { "epoch": 2.23, "learning_rate": 8.054080126734271e-07, "loss": 0.1607, "step": 6897 }, { "epoch": 2.24, "learning_rate": 8.047650935899931e-07, "loss": 0.1481, "step": 6898 }, { "epoch": 2.24, "learning_rate": 8.041223819869015e-07, "loss": 0.1543, "step": 6899 }, { "epoch": 2.24, "learning_rate": 8.034798779428113e-07, "loss": 0.1445, "step": 6900 }, { "epoch": 2.24, "learning_rate": 8.02837581536362e-07, "loss": 0.1594, "step": 6901 }, { "epoch": 2.24, "learning_rate": 8.021954928461611e-07, "loss": 0.154, "step": 6902 }, { "epoch": 2.24, "learning_rate": 8.015536119507977e-07, "loss": 0.156, "step": 6903 }, { "epoch": 2.24, "learning_rate": 8.009119389288292e-07, "loss": 0.147, "step": 6904 }, { "epoch": 2.24, "learning_rate": 8.002704738587911e-07, "loss": 0.1391, "step": 6905 }, { "epoch": 2.24, "learning_rate": 7.996292168191919e-07, "loss": 0.1578, "step": 6906 }, { "epoch": 2.24, "learning_rate": 7.989881678885158e-07, "loss": 0.1462, "step": 6907 }, { "epoch": 2.24, "learning_rate": 7.983473271452219e-07, "loss": 0.1408, "step": 6908 }, { "epoch": 2.24, "learning_rate": 7.977066946677404e-07, "loss": 0.144, "step": 6909 }, { "epoch": 2.24, "learning_rate": 7.970662705344812e-07, "loss": 0.1503, "step": 6910 }, { "epoch": 2.24, "learning_rate": 7.964260548238242e-07, "loss": 0.1476, "step": 6911 }, { "epoch": 2.24, "learning_rate": 7.957860476141261e-07, "loss": 0.1352, "step": 6912 }, { "epoch": 2.24, "learning_rate": 7.951462489837178e-07, "loss": 0.1487, "step": 6913 }, { "epoch": 2.24, "learning_rate": 7.945066590109044e-07, "loss": 0.1531, "step": 6914 }, { "epoch": 2.24, "learning_rate": 7.938672777739654e-07, "loss": 0.1433, "step": 6915 }, { "epoch": 2.24, "learning_rate": 7.932281053511559e-07, "loss": 0.1365, "step": 6916 }, { "epoch": 2.24, "learning_rate": 7.925891418207024e-07, "loss": 0.1467, "step": 6917 }, { "epoch": 2.24, "learning_rate": 7.919503872608092e-07, "loss": 0.1362, "step": 6918 }, { "epoch": 2.24, "learning_rate": 7.913118417496532e-07, "loss": 0.1572, "step": 6919 }, { "epoch": 2.24, "learning_rate": 7.906735053653866e-07, "loss": 0.1505, "step": 6920 }, { "epoch": 2.24, "learning_rate": 7.900353781861353e-07, "loss": 0.1481, "step": 6921 }, { "epoch": 2.24, "learning_rate": 7.893974602899998e-07, "loss": 0.1287, "step": 6922 }, { "epoch": 2.24, "learning_rate": 7.887597517550564e-07, "loss": 0.1667, "step": 6923 }, { "epoch": 2.24, "learning_rate": 7.881222526593513e-07, "loss": 0.1437, "step": 6924 }, { "epoch": 2.24, "learning_rate": 7.87484963080912e-07, "loss": 0.1594, "step": 6925 }, { "epoch": 2.24, "learning_rate": 7.868478830977331e-07, "loss": 0.1522, "step": 6926 }, { "epoch": 2.24, "learning_rate": 7.862110127877903e-07, "loss": 0.1892, "step": 6927 }, { "epoch": 2.24, "learning_rate": 7.855743522290283e-07, "loss": 0.1399, "step": 6928 }, { "epoch": 2.25, "learning_rate": 7.849379014993683e-07, "loss": 0.1581, "step": 6929 }, { "epoch": 2.25, "learning_rate": 7.84301660676707e-07, "loss": 0.1447, "step": 6930 }, { "epoch": 2.25, "learning_rate": 7.836656298389114e-07, "loss": 0.1432, "step": 6931 }, { "epoch": 2.25, "learning_rate": 7.830298090638291e-07, "loss": 0.1493, "step": 6932 }, { "epoch": 2.25, "learning_rate": 7.823941984292752e-07, "loss": 0.1474, "step": 6933 }, { "epoch": 2.25, "learning_rate": 7.817587980130451e-07, "loss": 0.1484, "step": 6934 }, { "epoch": 2.25, "learning_rate": 7.811236078929033e-07, "loss": 0.1513, "step": 6935 }, { "epoch": 2.25, "learning_rate": 7.80488628146592e-07, "loss": 0.1481, "step": 6936 }, { "epoch": 2.25, "learning_rate": 7.798538588518265e-07, "loss": 0.1408, "step": 6937 }, { "epoch": 2.25, "learning_rate": 7.792193000862964e-07, "loss": 0.1596, "step": 6938 }, { "epoch": 2.25, "learning_rate": 7.785849519276661e-07, "loss": 0.1517, "step": 6939 }, { "epoch": 2.25, "learning_rate": 7.779508144535725e-07, "loss": 0.1512, "step": 6940 }, { "epoch": 2.25, "learning_rate": 7.773168877416285e-07, "loss": 0.1479, "step": 6941 }, { "epoch": 2.25, "learning_rate": 7.766831718694204e-07, "loss": 0.1444, "step": 6942 }, { "epoch": 2.25, "learning_rate": 7.760496669145093e-07, "loss": 0.1543, "step": 6943 }, { "epoch": 2.25, "learning_rate": 7.754163729544297e-07, "loss": 0.1653, "step": 6944 }, { "epoch": 2.25, "learning_rate": 7.747832900666907e-07, "loss": 0.1359, "step": 6945 }, { "epoch": 2.25, "learning_rate": 7.741504183287757e-07, "loss": 0.1414, "step": 6946 }, { "epoch": 2.25, "learning_rate": 7.73517757818143e-07, "loss": 0.1489, "step": 6947 }, { "epoch": 2.25, "learning_rate": 7.728853086122212e-07, "loss": 0.1469, "step": 6948 }, { "epoch": 2.25, "learning_rate": 7.722530707884196e-07, "loss": 0.1539, "step": 6949 }, { "epoch": 2.25, "learning_rate": 7.716210444241154e-07, "loss": 0.1534, "step": 6950 }, { "epoch": 2.25, "learning_rate": 7.709892295966634e-07, "loss": 0.1401, "step": 6951 }, { "epoch": 2.25, "learning_rate": 7.703576263833915e-07, "loss": 0.1434, "step": 6952 }, { "epoch": 2.25, "learning_rate": 7.697262348616019e-07, "loss": 0.1691, "step": 6953 }, { "epoch": 2.25, "learning_rate": 7.690950551085716e-07, "loss": 0.1602, "step": 6954 }, { "epoch": 2.25, "learning_rate": 7.684640872015484e-07, "loss": 0.1574, "step": 6955 }, { "epoch": 2.25, "learning_rate": 7.678333312177602e-07, "loss": 0.1457, "step": 6956 }, { "epoch": 2.25, "learning_rate": 7.672027872344017e-07, "loss": 0.1326, "step": 6957 }, { "epoch": 2.25, "learning_rate": 7.665724553286491e-07, "loss": 0.1498, "step": 6958 }, { "epoch": 2.26, "learning_rate": 7.659423355776463e-07, "loss": 0.1678, "step": 6959 }, { "epoch": 2.26, "learning_rate": 7.653124280585145e-07, "loss": 0.1312, "step": 6960 }, { "epoch": 2.26, "learning_rate": 7.646827328483486e-07, "loss": 0.1758, "step": 6961 }, { "epoch": 2.26, "learning_rate": 7.64053250024217e-07, "loss": 0.1511, "step": 6962 }, { "epoch": 2.26, "learning_rate": 7.634239796631629e-07, "loss": 0.1434, "step": 6963 }, { "epoch": 2.26, "learning_rate": 7.62794921842201e-07, "loss": 0.1676, "step": 6964 }, { "epoch": 2.26, "learning_rate": 7.621660766383246e-07, "loss": 0.1463, "step": 6965 }, { "epoch": 2.26, "learning_rate": 7.615374441284962e-07, "loss": 0.1561, "step": 6966 }, { "epoch": 2.26, "learning_rate": 7.60909024389655e-07, "loss": 0.1622, "step": 6967 }, { "epoch": 2.26, "learning_rate": 7.602808174987137e-07, "loss": 0.1353, "step": 6968 }, { "epoch": 2.26, "learning_rate": 7.596528235325582e-07, "loss": 0.16, "step": 6969 }, { "epoch": 2.26, "learning_rate": 7.590250425680496e-07, "loss": 0.1566, "step": 6970 }, { "epoch": 2.26, "learning_rate": 7.583974746820222e-07, "loss": 0.1533, "step": 6971 }, { "epoch": 2.26, "learning_rate": 7.577701199512835e-07, "loss": 0.1467, "step": 6972 }, { "epoch": 2.26, "learning_rate": 7.571429784526157e-07, "loss": 0.1558, "step": 6973 }, { "epoch": 2.26, "learning_rate": 7.565160502627752e-07, "loss": 0.1493, "step": 6974 }, { "epoch": 2.26, "learning_rate": 7.558893354584923e-07, "loss": 0.1454, "step": 6975 }, { "epoch": 2.26, "learning_rate": 7.5526283411647e-07, "loss": 0.1319, "step": 6976 }, { "epoch": 2.26, "learning_rate": 7.546365463133867e-07, "loss": 0.1709, "step": 6977 }, { "epoch": 2.26, "learning_rate": 7.540104721258945e-07, "loss": 0.1554, "step": 6978 }, { "epoch": 2.26, "learning_rate": 7.533846116306162e-07, "loss": 0.1424, "step": 6979 }, { "epoch": 2.26, "learning_rate": 7.527589649041548e-07, "loss": 0.1494, "step": 6980 }, { "epoch": 2.26, "learning_rate": 7.521335320230804e-07, "loss": 0.1349, "step": 6981 }, { "epoch": 2.26, "learning_rate": 7.515083130639411e-07, "loss": 0.1375, "step": 6982 }, { "epoch": 2.26, "learning_rate": 7.508833081032577e-07, "loss": 0.148, "step": 6983 }, { "epoch": 2.26, "learning_rate": 7.502585172175244e-07, "loss": 0.1358, "step": 6984 }, { "epoch": 2.26, "learning_rate": 7.496339404832109e-07, "loss": 0.1534, "step": 6985 }, { "epoch": 2.26, "learning_rate": 7.490095779767564e-07, "loss": 0.1494, "step": 6986 }, { "epoch": 2.26, "learning_rate": 7.483854297745805e-07, "loss": 0.134, "step": 6987 }, { "epoch": 2.26, "learning_rate": 7.47761495953069e-07, "loss": 0.1639, "step": 6988 }, { "epoch": 2.26, "learning_rate": 7.471377765885893e-07, "loss": 0.1559, "step": 6989 }, { "epoch": 2.27, "learning_rate": 7.465142717574761e-07, "loss": 0.1336, "step": 6990 }, { "epoch": 2.27, "learning_rate": 7.458909815360407e-07, "loss": 0.1623, "step": 6991 }, { "epoch": 2.27, "learning_rate": 7.45267906000568e-07, "loss": 0.1538, "step": 6992 }, { "epoch": 2.27, "learning_rate": 7.446450452273168e-07, "loss": 0.1548, "step": 6993 }, { "epoch": 2.27, "learning_rate": 7.440223992925194e-07, "loss": 0.156, "step": 6994 }, { "epoch": 2.27, "learning_rate": 7.433999682723805e-07, "loss": 0.1453, "step": 6995 }, { "epoch": 2.27, "learning_rate": 7.427777522430804e-07, "loss": 0.14, "step": 6996 }, { "epoch": 2.27, "learning_rate": 7.42155751280772e-07, "loss": 0.1532, "step": 6997 }, { "epoch": 2.27, "learning_rate": 7.415339654615824e-07, "loss": 0.1487, "step": 6998 }, { "epoch": 2.27, "learning_rate": 7.409123948616123e-07, "loss": 0.1384, "step": 6999 }, { "epoch": 2.27, "learning_rate": 7.402910395569357e-07, "loss": 0.1685, "step": 7000 }, { "epoch": 2.27, "learning_rate": 7.396698996236004e-07, "loss": 0.1455, "step": 7001 }, { "epoch": 2.27, "learning_rate": 7.39048975137629e-07, "loss": 0.1619, "step": 7002 }, { "epoch": 2.27, "learning_rate": 7.38428266175015e-07, "loss": 0.1512, "step": 7003 }, { "epoch": 2.27, "learning_rate": 7.378077728117277e-07, "loss": 0.1589, "step": 7004 }, { "epoch": 2.27, "learning_rate": 7.371874951237099e-07, "loss": 0.1415, "step": 7005 }, { "epoch": 2.27, "learning_rate": 7.365674331868772e-07, "loss": 0.1545, "step": 7006 }, { "epoch": 2.27, "learning_rate": 7.359475870771202e-07, "loss": 0.1623, "step": 7007 }, { "epoch": 2.27, "learning_rate": 7.353279568702995e-07, "loss": 0.1577, "step": 7008 }, { "epoch": 2.27, "learning_rate": 7.347085426422551e-07, "loss": 0.1453, "step": 7009 }, { "epoch": 2.27, "learning_rate": 7.340893444687944e-07, "loss": 0.1452, "step": 7010 }, { "epoch": 2.27, "learning_rate": 7.334703624257039e-07, "loss": 0.1451, "step": 7011 }, { "epoch": 2.27, "learning_rate": 7.328515965887389e-07, "loss": 0.1622, "step": 7012 }, { "epoch": 2.27, "learning_rate": 7.322330470336314e-07, "loss": 0.1469, "step": 7013 }, { "epoch": 2.27, "learning_rate": 7.316147138360855e-07, "loss": 0.151, "step": 7014 }, { "epoch": 2.27, "learning_rate": 7.309965970717795e-07, "loss": 0.1589, "step": 7015 }, { "epoch": 2.27, "learning_rate": 7.303786968163651e-07, "loss": 0.1477, "step": 7016 }, { "epoch": 2.27, "learning_rate": 7.297610131454657e-07, "loss": 0.1518, "step": 7017 }, { "epoch": 2.27, "learning_rate": 7.291435461346827e-07, "loss": 0.1474, "step": 7018 }, { "epoch": 2.27, "learning_rate": 7.285262958595846e-07, "loss": 0.1551, "step": 7019 }, { "epoch": 2.27, "learning_rate": 7.279092623957204e-07, "loss": 0.1541, "step": 7020 }, { "epoch": 2.28, "learning_rate": 7.272924458186064e-07, "loss": 0.1418, "step": 7021 }, { "epoch": 2.28, "learning_rate": 7.26675846203736e-07, "loss": 0.149, "step": 7022 }, { "epoch": 2.28, "learning_rate": 7.26059463626575e-07, "loss": 0.1422, "step": 7023 }, { "epoch": 2.28, "learning_rate": 7.254432981625626e-07, "loss": 0.1478, "step": 7024 }, { "epoch": 2.28, "learning_rate": 7.248273498871119e-07, "loss": 0.1516, "step": 7025 }, { "epoch": 2.28, "learning_rate": 7.242116188756082e-07, "loss": 0.1538, "step": 7026 }, { "epoch": 2.28, "learning_rate": 7.235961052034113e-07, "loss": 0.1473, "step": 7027 }, { "epoch": 2.28, "learning_rate": 7.22980808945854e-07, "loss": 0.1456, "step": 7028 }, { "epoch": 2.28, "learning_rate": 7.22365730178243e-07, "loss": 0.15, "step": 7029 }, { "epoch": 2.28, "learning_rate": 7.217508689758576e-07, "loss": 0.1457, "step": 7030 }, { "epoch": 2.28, "learning_rate": 7.211362254139512e-07, "loss": 0.1355, "step": 7031 }, { "epoch": 2.28, "learning_rate": 7.205217995677502e-07, "loss": 0.1628, "step": 7032 }, { "epoch": 2.28, "learning_rate": 7.199075915124548e-07, "loss": 0.1457, "step": 7033 }, { "epoch": 2.28, "learning_rate": 7.192936013232368e-07, "loss": 0.166, "step": 7034 }, { "epoch": 2.28, "learning_rate": 7.186798290752436e-07, "loss": 0.1467, "step": 7035 }, { "epoch": 2.28, "learning_rate": 7.180662748435946e-07, "loss": 0.1459, "step": 7036 }, { "epoch": 2.28, "learning_rate": 7.174529387033832e-07, "loss": 0.148, "step": 7037 }, { "epoch": 2.28, "learning_rate": 7.168398207296764e-07, "loss": 0.1515, "step": 7038 }, { "epoch": 2.28, "learning_rate": 7.162269209975117e-07, "loss": 0.1537, "step": 7039 }, { "epoch": 2.28, "learning_rate": 7.156142395819055e-07, "loss": 0.1382, "step": 7040 }, { "epoch": 2.28, "learning_rate": 7.150017765578401e-07, "loss": 0.1674, "step": 7041 }, { "epoch": 2.28, "learning_rate": 7.143895320002789e-07, "loss": 0.152, "step": 7042 }, { "epoch": 2.28, "learning_rate": 7.137775059841523e-07, "loss": 0.1472, "step": 7043 }, { "epoch": 2.28, "learning_rate": 7.131656985843669e-07, "loss": 0.1516, "step": 7044 }, { "epoch": 2.28, "learning_rate": 7.125541098758021e-07, "loss": 0.1474, "step": 7045 }, { "epoch": 2.28, "learning_rate": 7.119427399333104e-07, "loss": 0.1439, "step": 7046 }, { "epoch": 2.28, "learning_rate": 7.113315888317182e-07, "loss": 0.1528, "step": 7047 }, { "epoch": 2.28, "learning_rate": 7.107206566458225e-07, "loss": 0.1325, "step": 7048 }, { "epoch": 2.28, "learning_rate": 7.101099434503986e-07, "loss": 0.1509, "step": 7049 }, { "epoch": 2.28, "learning_rate": 7.09499449320189e-07, "loss": 0.1429, "step": 7050 }, { "epoch": 2.28, "learning_rate": 7.088891743299136e-07, "loss": 0.171, "step": 7051 }, { "epoch": 2.29, "learning_rate": 7.08279118554264e-07, "loss": 0.1519, "step": 7052 }, { "epoch": 2.29, "learning_rate": 7.076692820679051e-07, "loss": 0.1651, "step": 7053 }, { "epoch": 2.29, "learning_rate": 7.070596649454748e-07, "loss": 0.1461, "step": 7054 }, { "epoch": 2.29, "learning_rate": 7.064502672615847e-07, "loss": 0.1533, "step": 7055 }, { "epoch": 2.29, "learning_rate": 7.058410890908196e-07, "loss": 0.1381, "step": 7056 }, { "epoch": 2.29, "learning_rate": 7.052321305077356e-07, "loss": 0.1433, "step": 7057 }, { "epoch": 2.29, "learning_rate": 7.046233915868642e-07, "loss": 0.1339, "step": 7058 }, { "epoch": 2.29, "learning_rate": 7.04014872402709e-07, "loss": 0.166, "step": 7059 }, { "epoch": 2.29, "learning_rate": 7.034065730297471e-07, "loss": 0.1587, "step": 7060 }, { "epoch": 2.29, "learning_rate": 7.027984935424284e-07, "loss": 0.1539, "step": 7061 }, { "epoch": 2.29, "learning_rate": 7.021906340151763e-07, "loss": 0.1383, "step": 7062 }, { "epoch": 2.29, "learning_rate": 7.015829945223851e-07, "loss": 0.1456, "step": 7063 }, { "epoch": 2.29, "learning_rate": 7.009755751384267e-07, "loss": 0.1472, "step": 7064 }, { "epoch": 2.29, "learning_rate": 7.003683759376415e-07, "loss": 0.1511, "step": 7065 }, { "epoch": 2.29, "learning_rate": 6.997613969943451e-07, "loss": 0.1509, "step": 7066 }, { "epoch": 2.29, "learning_rate": 6.99154638382826e-07, "loss": 0.1575, "step": 7067 }, { "epoch": 2.29, "learning_rate": 6.985481001773456e-07, "loss": 0.1532, "step": 7068 }, { "epoch": 2.29, "learning_rate": 6.979417824521393e-07, "loss": 0.1457, "step": 7069 }, { "epoch": 2.29, "learning_rate": 6.97335685281412e-07, "loss": 0.155, "step": 7070 }, { "epoch": 2.29, "learning_rate": 6.967298087393471e-07, "loss": 0.1348, "step": 7071 }, { "epoch": 2.29, "learning_rate": 6.96124152900095e-07, "loss": 0.1757, "step": 7072 }, { "epoch": 2.29, "learning_rate": 6.955187178377853e-07, "loss": 0.1514, "step": 7073 }, { "epoch": 2.29, "learning_rate": 6.949135036265153e-07, "loss": 0.1541, "step": 7074 }, { "epoch": 2.29, "learning_rate": 6.943085103403577e-07, "loss": 0.1431, "step": 7075 }, { "epoch": 2.29, "learning_rate": 6.937037380533579e-07, "loss": 0.1409, "step": 7076 }, { "epoch": 2.29, "learning_rate": 6.930991868395343e-07, "loss": 0.1275, "step": 7077 }, { "epoch": 2.29, "learning_rate": 6.924948567728787e-07, "loss": 0.1548, "step": 7078 }, { "epoch": 2.29, "learning_rate": 6.918907479273535e-07, "loss": 0.1568, "step": 7079 }, { "epoch": 2.29, "learning_rate": 6.912868603768979e-07, "loss": 0.1546, "step": 7080 }, { "epoch": 2.29, "learning_rate": 6.906831941954206e-07, "loss": 0.1606, "step": 7081 }, { "epoch": 2.29, "learning_rate": 6.900797494568045e-07, "loss": 0.1447, "step": 7082 }, { "epoch": 2.3, "learning_rate": 6.894765262349056e-07, "loss": 0.1564, "step": 7083 }, { "epoch": 2.3, "learning_rate": 6.88873524603553e-07, "loss": 0.1698, "step": 7084 }, { "epoch": 2.3, "learning_rate": 6.882707446365477e-07, "loss": 0.1506, "step": 7085 }, { "epoch": 2.3, "learning_rate": 6.876681864076646e-07, "loss": 0.1413, "step": 7086 }, { "epoch": 2.3, "learning_rate": 6.870658499906505e-07, "loss": 0.1541, "step": 7087 }, { "epoch": 2.3, "learning_rate": 6.864637354592266e-07, "loss": 0.145, "step": 7088 }, { "epoch": 2.3, "learning_rate": 6.858618428870842e-07, "loss": 0.1568, "step": 7089 }, { "epoch": 2.3, "learning_rate": 6.852601723478902e-07, "loss": 0.1483, "step": 7090 }, { "epoch": 2.3, "learning_rate": 6.84658723915283e-07, "loss": 0.1544, "step": 7091 }, { "epoch": 2.3, "learning_rate": 6.840574976628741e-07, "loss": 0.1494, "step": 7092 }, { "epoch": 2.3, "learning_rate": 6.834564936642488e-07, "loss": 0.1524, "step": 7093 }, { "epoch": 2.3, "learning_rate": 6.828557119929613e-07, "loss": 0.1456, "step": 7094 }, { "epoch": 2.3, "learning_rate": 6.822551527225452e-07, "loss": 0.1684, "step": 7095 }, { "epoch": 2.3, "learning_rate": 6.816548159264993e-07, "loss": 0.1304, "step": 7096 }, { "epoch": 2.3, "learning_rate": 6.810547016783029e-07, "loss": 0.1469, "step": 7097 }, { "epoch": 2.3, "learning_rate": 6.804548100514013e-07, "loss": 0.1357, "step": 7098 }, { "epoch": 2.3, "learning_rate": 6.798551411192165e-07, "loss": 0.1411, "step": 7099 }, { "epoch": 2.3, "learning_rate": 6.792556949551426e-07, "loss": 0.1525, "step": 7100 }, { "epoch": 2.3, "learning_rate": 6.786564716325441e-07, "loss": 0.1666, "step": 7101 }, { "epoch": 2.3, "learning_rate": 6.780574712247632e-07, "loss": 0.1708, "step": 7102 }, { "epoch": 2.3, "learning_rate": 6.774586938051084e-07, "loss": 0.1483, "step": 7103 }, { "epoch": 2.3, "learning_rate": 6.768601394468674e-07, "loss": 0.1455, "step": 7104 }, { "epoch": 2.3, "learning_rate": 6.762618082232952e-07, "loss": 0.1522, "step": 7105 }, { "epoch": 2.3, "learning_rate": 6.756637002076225e-07, "loss": 0.1637, "step": 7106 }, { "epoch": 2.3, "learning_rate": 6.750658154730522e-07, "loss": 0.135, "step": 7107 }, { "epoch": 2.3, "learning_rate": 6.744681540927588e-07, "loss": 0.147, "step": 7108 }, { "epoch": 2.3, "learning_rate": 6.738707161398914e-07, "loss": 0.1528, "step": 7109 }, { "epoch": 2.3, "learning_rate": 6.732735016875697e-07, "loss": 0.1511, "step": 7110 }, { "epoch": 2.3, "learning_rate": 6.726765108088881e-07, "loss": 0.1494, "step": 7111 }, { "epoch": 2.3, "learning_rate": 6.720797435769111e-07, "loss": 0.1671, "step": 7112 }, { "epoch": 2.3, "learning_rate": 6.714832000646778e-07, "loss": 0.1504, "step": 7113 }, { "epoch": 2.31, "learning_rate": 6.708868803451992e-07, "loss": 0.147, "step": 7114 }, { "epoch": 2.31, "learning_rate": 6.702907844914597e-07, "loss": 0.1611, "step": 7115 }, { "epoch": 2.31, "learning_rate": 6.696949125764149e-07, "loss": 0.1548, "step": 7116 }, { "epoch": 2.31, "learning_rate": 6.690992646729949e-07, "loss": 0.1391, "step": 7117 }, { "epoch": 2.31, "learning_rate": 6.685038408540989e-07, "loss": 0.1563, "step": 7118 }, { "epoch": 2.31, "learning_rate": 6.679086411926039e-07, "loss": 0.1704, "step": 7119 }, { "epoch": 2.31, "learning_rate": 6.673136657613547e-07, "loss": 0.1625, "step": 7120 }, { "epoch": 2.31, "learning_rate": 6.667189146331707e-07, "loss": 0.1606, "step": 7121 }, { "epoch": 2.31, "learning_rate": 6.661243878808443e-07, "loss": 0.1591, "step": 7122 }, { "epoch": 2.31, "learning_rate": 6.655300855771393e-07, "loss": 0.1474, "step": 7123 }, { "epoch": 2.31, "learning_rate": 6.649360077947939e-07, "loss": 0.1588, "step": 7124 }, { "epoch": 2.31, "learning_rate": 6.643421546065146e-07, "loss": 0.1534, "step": 7125 }, { "epoch": 2.31, "learning_rate": 6.637485260849866e-07, "loss": 0.1527, "step": 7126 }, { "epoch": 2.31, "learning_rate": 6.63155122302861e-07, "loss": 0.1427, "step": 7127 }, { "epoch": 2.31, "learning_rate": 6.625619433327681e-07, "loss": 0.1489, "step": 7128 }, { "epoch": 2.31, "learning_rate": 6.619689892473046e-07, "loss": 0.157, "step": 7129 }, { "epoch": 2.31, "learning_rate": 6.613762601190435e-07, "loss": 0.1543, "step": 7130 }, { "epoch": 2.31, "learning_rate": 6.60783756020529e-07, "loss": 0.1396, "step": 7131 }, { "epoch": 2.31, "learning_rate": 6.601914770242776e-07, "loss": 0.1552, "step": 7132 }, { "epoch": 2.31, "learning_rate": 6.595994232027794e-07, "loss": 0.1459, "step": 7133 }, { "epoch": 2.31, "learning_rate": 6.590075946284941e-07, "loss": 0.1598, "step": 7134 }, { "epoch": 2.31, "learning_rate": 6.584159913738583e-07, "loss": 0.1457, "step": 7135 }, { "epoch": 2.31, "learning_rate": 6.578246135112765e-07, "loss": 0.1663, "step": 7136 }, { "epoch": 2.31, "learning_rate": 6.572334611131284e-07, "loss": 0.1721, "step": 7137 }, { "epoch": 2.31, "learning_rate": 6.566425342517652e-07, "loss": 0.1487, "step": 7138 }, { "epoch": 2.31, "learning_rate": 6.560518329995108e-07, "loss": 0.151, "step": 7139 }, { "epoch": 2.31, "learning_rate": 6.554613574286614e-07, "loss": 0.1548, "step": 7140 }, { "epoch": 2.31, "learning_rate": 6.548711076114858e-07, "loss": 0.1646, "step": 7141 }, { "epoch": 2.31, "learning_rate": 6.542810836202237e-07, "loss": 0.1384, "step": 7142 }, { "epoch": 2.31, "learning_rate": 6.536912855270894e-07, "loss": 0.1643, "step": 7143 }, { "epoch": 2.31, "learning_rate": 6.531017134042678e-07, "loss": 0.1586, "step": 7144 }, { "epoch": 2.32, "learning_rate": 6.52512367323917e-07, "loss": 0.1624, "step": 7145 }, { "epoch": 2.32, "learning_rate": 6.519232473581675e-07, "loss": 0.1562, "step": 7146 }, { "epoch": 2.32, "learning_rate": 6.513343535791216e-07, "loss": 0.1408, "step": 7147 }, { "epoch": 2.32, "learning_rate": 6.507456860588554e-07, "loss": 0.1509, "step": 7148 }, { "epoch": 2.32, "learning_rate": 6.501572448694135e-07, "loss": 0.1425, "step": 7149 }, { "epoch": 2.32, "learning_rate": 6.495690300828183e-07, "loss": 0.1522, "step": 7150 }, { "epoch": 2.32, "learning_rate": 6.489810417710596e-07, "loss": 0.1636, "step": 7151 }, { "epoch": 2.32, "learning_rate": 6.483932800061021e-07, "loss": 0.1508, "step": 7152 }, { "epoch": 2.32, "learning_rate": 6.478057448598821e-07, "loss": 0.168, "step": 7153 }, { "epoch": 2.32, "learning_rate": 6.472184364043085e-07, "loss": 0.1371, "step": 7154 }, { "epoch": 2.32, "learning_rate": 6.466313547112627e-07, "loss": 0.1357, "step": 7155 }, { "epoch": 2.32, "learning_rate": 6.460444998525953e-07, "loss": 0.1612, "step": 7156 }, { "epoch": 2.32, "learning_rate": 6.454578719001353e-07, "loss": 0.1527, "step": 7157 }, { "epoch": 2.32, "learning_rate": 6.448714709256768e-07, "loss": 0.142, "step": 7158 }, { "epoch": 2.32, "learning_rate": 6.442852970009925e-07, "loss": 0.1541, "step": 7159 }, { "epoch": 2.32, "learning_rate": 6.436993501978226e-07, "loss": 0.1397, "step": 7160 }, { "epoch": 2.32, "learning_rate": 6.431136305878819e-07, "loss": 0.1533, "step": 7161 }, { "epoch": 2.32, "learning_rate": 6.425281382428566e-07, "loss": 0.1587, "step": 7162 }, { "epoch": 2.32, "learning_rate": 6.419428732344055e-07, "loss": 0.1424, "step": 7163 }, { "epoch": 2.32, "learning_rate": 6.413578356341602e-07, "loss": 0.1541, "step": 7164 }, { "epoch": 2.32, "learning_rate": 6.407730255137212e-07, "loss": 0.1484, "step": 7165 }, { "epoch": 2.32, "learning_rate": 6.401884429446667e-07, "loss": 0.1532, "step": 7166 }, { "epoch": 2.32, "learning_rate": 6.396040879985416e-07, "loss": 0.1485, "step": 7167 }, { "epoch": 2.32, "learning_rate": 6.390199607468661e-07, "loss": 0.1415, "step": 7168 }, { "epoch": 2.32, "learning_rate": 6.384360612611317e-07, "loss": 0.1425, "step": 7169 }, { "epoch": 2.32, "learning_rate": 6.378523896128022e-07, "loss": 0.1691, "step": 7170 }, { "epoch": 2.32, "learning_rate": 6.37268945873313e-07, "loss": 0.1558, "step": 7171 }, { "epoch": 2.32, "learning_rate": 6.36685730114073e-07, "loss": 0.1414, "step": 7172 }, { "epoch": 2.32, "learning_rate": 6.361027424064609e-07, "loss": 0.1406, "step": 7173 }, { "epoch": 2.32, "learning_rate": 6.355199828218289e-07, "loss": 0.152, "step": 7174 }, { "epoch": 2.33, "learning_rate": 6.349374514315015e-07, "loss": 0.1362, "step": 7175 }, { "epoch": 2.33, "learning_rate": 6.343551483067751e-07, "loss": 0.156, "step": 7176 }, { "epoch": 2.33, "learning_rate": 6.337730735189174e-07, "loss": 0.1399, "step": 7177 }, { "epoch": 2.33, "learning_rate": 6.331912271391688e-07, "loss": 0.1426, "step": 7178 }, { "epoch": 2.33, "learning_rate": 6.326096092387429e-07, "loss": 0.1573, "step": 7179 }, { "epoch": 2.33, "learning_rate": 6.320282198888217e-07, "loss": 0.1531, "step": 7180 }, { "epoch": 2.33, "learning_rate": 6.314470591605646e-07, "loss": 0.1373, "step": 7181 }, { "epoch": 2.33, "learning_rate": 6.308661271250974e-07, "loss": 0.1445, "step": 7182 }, { "epoch": 2.33, "learning_rate": 6.302854238535219e-07, "loss": 0.1437, "step": 7183 }, { "epoch": 2.33, "learning_rate": 6.2970494941691e-07, "loss": 0.1538, "step": 7184 }, { "epoch": 2.33, "learning_rate": 6.291247038863066e-07, "loss": 0.1387, "step": 7185 }, { "epoch": 2.33, "learning_rate": 6.285446873327289e-07, "loss": 0.1504, "step": 7186 }, { "epoch": 2.33, "learning_rate": 6.279648998271626e-07, "loss": 0.139, "step": 7187 }, { "epoch": 2.33, "learning_rate": 6.273853414405715e-07, "loss": 0.146, "step": 7188 }, { "epoch": 2.33, "learning_rate": 6.268060122438846e-07, "loss": 0.1552, "step": 7189 }, { "epoch": 2.33, "learning_rate": 6.262269123080095e-07, "loss": 0.1476, "step": 7190 }, { "epoch": 2.33, "learning_rate": 6.256480417038202e-07, "loss": 0.1462, "step": 7191 }, { "epoch": 2.33, "learning_rate": 6.250694005021651e-07, "loss": 0.1497, "step": 7192 }, { "epoch": 2.33, "learning_rate": 6.244909887738651e-07, "loss": 0.156, "step": 7193 }, { "epoch": 2.33, "learning_rate": 6.239128065897113e-07, "loss": 0.138, "step": 7194 }, { "epoch": 2.33, "learning_rate": 6.233348540204689e-07, "loss": 0.1291, "step": 7195 }, { "epoch": 2.33, "learning_rate": 6.227571311368724e-07, "loss": 0.1626, "step": 7196 }, { "epoch": 2.33, "learning_rate": 6.221796380096298e-07, "loss": 0.1503, "step": 7197 }, { "epoch": 2.33, "learning_rate": 6.216023747094207e-07, "loss": 0.1508, "step": 7198 }, { "epoch": 2.33, "learning_rate": 6.210253413068964e-07, "loss": 0.1562, "step": 7199 }, { "epoch": 2.33, "learning_rate": 6.20448537872681e-07, "loss": 0.1365, "step": 7200 }, { "epoch": 2.33, "learning_rate": 6.198719644773687e-07, "loss": 0.1489, "step": 7201 }, { "epoch": 2.33, "learning_rate": 6.192956211915269e-07, "loss": 0.1409, "step": 7202 }, { "epoch": 2.33, "learning_rate": 6.187195080856953e-07, "loss": 0.1584, "step": 7203 }, { "epoch": 2.33, "learning_rate": 6.181436252303829e-07, "loss": 0.1416, "step": 7204 }, { "epoch": 2.33, "learning_rate": 6.175679726960731e-07, "loss": 0.1372, "step": 7205 }, { "epoch": 2.34, "learning_rate": 6.169925505532201e-07, "loss": 0.1569, "step": 7206 }, { "epoch": 2.34, "learning_rate": 6.164173588722497e-07, "loss": 0.1399, "step": 7207 }, { "epoch": 2.34, "learning_rate": 6.158423977235611e-07, "loss": 0.1388, "step": 7208 }, { "epoch": 2.34, "learning_rate": 6.152676671775215e-07, "loss": 0.1426, "step": 7209 }, { "epoch": 2.34, "learning_rate": 6.146931673044751e-07, "loss": 0.1461, "step": 7210 }, { "epoch": 2.34, "learning_rate": 6.141188981747323e-07, "loss": 0.1552, "step": 7211 }, { "epoch": 2.34, "learning_rate": 6.135448598585814e-07, "loss": 0.1538, "step": 7212 }, { "epoch": 2.34, "learning_rate": 6.129710524262758e-07, "loss": 0.1526, "step": 7213 }, { "epoch": 2.34, "learning_rate": 6.123974759480469e-07, "loss": 0.1547, "step": 7214 }, { "epoch": 2.34, "learning_rate": 6.118241304940928e-07, "loss": 0.1399, "step": 7215 }, { "epoch": 2.34, "learning_rate": 6.112510161345861e-07, "loss": 0.166, "step": 7216 }, { "epoch": 2.34, "learning_rate": 6.106781329396714e-07, "loss": 0.1564, "step": 7217 }, { "epoch": 2.34, "learning_rate": 6.101054809794615e-07, "loss": 0.1279, "step": 7218 }, { "epoch": 2.34, "learning_rate": 6.095330603240468e-07, "loss": 0.1385, "step": 7219 }, { "epoch": 2.34, "learning_rate": 6.089608710434836e-07, "loss": 0.1557, "step": 7220 }, { "epoch": 2.34, "learning_rate": 6.083889132078033e-07, "loss": 0.1557, "step": 7221 }, { "epoch": 2.34, "learning_rate": 6.078171868870075e-07, "loss": 0.1526, "step": 7222 }, { "epoch": 2.34, "learning_rate": 6.072456921510703e-07, "loss": 0.1647, "step": 7223 }, { "epoch": 2.34, "learning_rate": 6.066744290699372e-07, "loss": 0.1567, "step": 7224 }, { "epoch": 2.34, "learning_rate": 6.061033977135253e-07, "loss": 0.1582, "step": 7225 }, { "epoch": 2.34, "learning_rate": 6.055325981517238e-07, "loss": 0.1354, "step": 7226 }, { "epoch": 2.34, "learning_rate": 6.049620304543916e-07, "loss": 0.1474, "step": 7227 }, { "epoch": 2.34, "learning_rate": 6.043916946913613e-07, "loss": 0.1591, "step": 7228 }, { "epoch": 2.34, "learning_rate": 6.038215909324372e-07, "loss": 0.1506, "step": 7229 }, { "epoch": 2.34, "learning_rate": 6.032517192473935e-07, "loss": 0.1466, "step": 7230 }, { "epoch": 2.34, "learning_rate": 6.026820797059777e-07, "loss": 0.1489, "step": 7231 }, { "epoch": 2.34, "learning_rate": 6.021126723779075e-07, "loss": 0.1549, "step": 7232 }, { "epoch": 2.34, "learning_rate": 6.015434973328735e-07, "loss": 0.1402, "step": 7233 }, { "epoch": 2.34, "learning_rate": 6.009745546405377e-07, "loss": 0.14, "step": 7234 }, { "epoch": 2.34, "learning_rate": 6.00405844370531e-07, "loss": 0.1523, "step": 7235 }, { "epoch": 2.34, "learning_rate": 5.998373665924606e-07, "loss": 0.1456, "step": 7236 }, { "epoch": 2.35, "learning_rate": 5.992691213759011e-07, "loss": 0.1431, "step": 7237 }, { "epoch": 2.35, "learning_rate": 5.987011087904007e-07, "loss": 0.1523, "step": 7238 }, { "epoch": 2.35, "learning_rate": 5.981333289054792e-07, "loss": 0.1476, "step": 7239 }, { "epoch": 2.35, "learning_rate": 5.975657817906253e-07, "loss": 0.1466, "step": 7240 }, { "epoch": 2.35, "learning_rate": 5.96998467515304e-07, "loss": 0.1578, "step": 7241 }, { "epoch": 2.35, "learning_rate": 5.964313861489466e-07, "loss": 0.15, "step": 7242 }, { "epoch": 2.35, "learning_rate": 5.958645377609606e-07, "loss": 0.1319, "step": 7243 }, { "epoch": 2.35, "learning_rate": 5.952979224207205e-07, "loss": 0.1497, "step": 7244 }, { "epoch": 2.35, "learning_rate": 5.947315401975773e-07, "loss": 0.1297, "step": 7245 }, { "epoch": 2.35, "learning_rate": 5.941653911608486e-07, "loss": 0.1514, "step": 7246 }, { "epoch": 2.35, "learning_rate": 5.935994753798258e-07, "loss": 0.1506, "step": 7247 }, { "epoch": 2.35, "learning_rate": 5.930337929237726e-07, "loss": 0.1536, "step": 7248 }, { "epoch": 2.35, "learning_rate": 5.924683438619208e-07, "loss": 0.1542, "step": 7249 }, { "epoch": 2.35, "learning_rate": 5.91903128263479e-07, "loss": 0.1518, "step": 7250 }, { "epoch": 2.35, "learning_rate": 5.913381461976217e-07, "loss": 0.1395, "step": 7251 }, { "epoch": 2.35, "learning_rate": 5.907733977334978e-07, "loss": 0.1351, "step": 7252 }, { "epoch": 2.35, "learning_rate": 5.902088829402274e-07, "loss": 0.1515, "step": 7253 }, { "epoch": 2.35, "learning_rate": 5.896446018869018e-07, "loss": 0.1443, "step": 7254 }, { "epoch": 2.35, "learning_rate": 5.890805546425832e-07, "loss": 0.1406, "step": 7255 }, { "epoch": 2.35, "learning_rate": 5.885167412763051e-07, "loss": 0.1621, "step": 7256 }, { "epoch": 2.35, "learning_rate": 5.879531618570738e-07, "loss": 0.1458, "step": 7257 }, { "epoch": 2.35, "learning_rate": 5.873898164538658e-07, "loss": 0.1477, "step": 7258 }, { "epoch": 2.35, "learning_rate": 5.868267051356283e-07, "loss": 0.1477, "step": 7259 }, { "epoch": 2.35, "learning_rate": 5.86263827971281e-07, "loss": 0.152, "step": 7260 }, { "epoch": 2.35, "learning_rate": 5.857011850297148e-07, "loss": 0.1468, "step": 7261 }, { "epoch": 2.35, "learning_rate": 5.851387763797916e-07, "loss": 0.144, "step": 7262 }, { "epoch": 2.35, "learning_rate": 5.845766020903459e-07, "loss": 0.153, "step": 7263 }, { "epoch": 2.35, "learning_rate": 5.840146622301796e-07, "loss": 0.1521, "step": 7264 }, { "epoch": 2.35, "learning_rate": 5.834529568680722e-07, "loss": 0.1415, "step": 7265 }, { "epoch": 2.35, "learning_rate": 5.828914860727674e-07, "loss": 0.1507, "step": 7266 }, { "epoch": 2.35, "learning_rate": 5.823302499129873e-07, "loss": 0.1326, "step": 7267 }, { "epoch": 2.36, "learning_rate": 5.817692484574197e-07, "loss": 0.1556, "step": 7268 }, { "epoch": 2.36, "learning_rate": 5.81208481774726e-07, "loss": 0.1539, "step": 7269 }, { "epoch": 2.36, "learning_rate": 5.806479499335385e-07, "loss": 0.1443, "step": 7270 }, { "epoch": 2.36, "learning_rate": 5.800876530024615e-07, "loss": 0.153, "step": 7271 }, { "epoch": 2.36, "learning_rate": 5.795275910500703e-07, "loss": 0.147, "step": 7272 }, { "epoch": 2.36, "learning_rate": 5.789677641449087e-07, "loss": 0.1539, "step": 7273 }, { "epoch": 2.36, "learning_rate": 5.784081723554971e-07, "loss": 0.1505, "step": 7274 }, { "epoch": 2.36, "learning_rate": 5.778488157503223e-07, "loss": 0.1405, "step": 7275 }, { "epoch": 2.36, "learning_rate": 5.772896943978446e-07, "loss": 0.1539, "step": 7276 }, { "epoch": 2.36, "learning_rate": 5.767308083664949e-07, "loss": 0.1317, "step": 7277 }, { "epoch": 2.36, "learning_rate": 5.761721577246754e-07, "loss": 0.1515, "step": 7278 }, { "epoch": 2.36, "learning_rate": 5.756137425407598e-07, "loss": 0.1589, "step": 7279 }, { "epoch": 2.36, "learning_rate": 5.750555628830928e-07, "loss": 0.1679, "step": 7280 }, { "epoch": 2.36, "learning_rate": 5.744976188199905e-07, "loss": 0.1458, "step": 7281 }, { "epoch": 2.36, "learning_rate": 5.739399104197388e-07, "loss": 0.1543, "step": 7282 }, { "epoch": 2.36, "learning_rate": 5.733824377505965e-07, "loss": 0.1576, "step": 7283 }, { "epoch": 2.36, "learning_rate": 5.728252008807925e-07, "loss": 0.1522, "step": 7284 }, { "epoch": 2.36, "learning_rate": 5.722681998785273e-07, "loss": 0.1417, "step": 7285 }, { "epoch": 2.36, "learning_rate": 5.717114348119726e-07, "loss": 0.1657, "step": 7286 }, { "epoch": 2.36, "learning_rate": 5.711549057492718e-07, "loss": 0.157, "step": 7287 }, { "epoch": 2.36, "learning_rate": 5.705986127585364e-07, "loss": 0.166, "step": 7288 }, { "epoch": 2.36, "learning_rate": 5.700425559078543e-07, "loss": 0.1473, "step": 7289 }, { "epoch": 2.36, "learning_rate": 5.694867352652791e-07, "loss": 0.1523, "step": 7290 }, { "epoch": 2.36, "learning_rate": 5.689311508988385e-07, "loss": 0.1292, "step": 7291 }, { "epoch": 2.36, "learning_rate": 5.68375802876531e-07, "loss": 0.1488, "step": 7292 }, { "epoch": 2.36, "learning_rate": 5.678206912663259e-07, "loss": 0.1417, "step": 7293 }, { "epoch": 2.36, "learning_rate": 5.672658161361636e-07, "loss": 0.1556, "step": 7294 }, { "epoch": 2.36, "learning_rate": 5.667111775539538e-07, "loss": 0.1423, "step": 7295 }, { "epoch": 2.36, "learning_rate": 5.661567755875816e-07, "loss": 0.1458, "step": 7296 }, { "epoch": 2.36, "learning_rate": 5.656026103048975e-07, "loss": 0.151, "step": 7297 }, { "epoch": 2.36, "learning_rate": 5.650486817737291e-07, "loss": 0.1472, "step": 7298 }, { "epoch": 2.37, "learning_rate": 5.644949900618696e-07, "loss": 0.175, "step": 7299 }, { "epoch": 2.37, "learning_rate": 5.639415352370858e-07, "loss": 0.142, "step": 7300 }, { "epoch": 2.37, "learning_rate": 5.633883173671159e-07, "loss": 0.1581, "step": 7301 }, { "epoch": 2.37, "learning_rate": 5.628353365196682e-07, "loss": 0.1802, "step": 7302 }, { "epoch": 2.37, "learning_rate": 5.622825927624226e-07, "loss": 0.1513, "step": 7303 }, { "epoch": 2.37, "learning_rate": 5.617300861630276e-07, "loss": 0.1532, "step": 7304 }, { "epoch": 2.37, "learning_rate": 5.611778167891077e-07, "loss": 0.1546, "step": 7305 }, { "epoch": 2.37, "learning_rate": 5.60625784708253e-07, "loss": 0.1328, "step": 7306 }, { "epoch": 2.37, "learning_rate": 5.600739899880275e-07, "loss": 0.1454, "step": 7307 }, { "epoch": 2.37, "learning_rate": 5.595224326959662e-07, "loss": 0.154, "step": 7308 }, { "epoch": 2.37, "learning_rate": 5.589711128995734e-07, "loss": 0.1472, "step": 7309 }, { "epoch": 2.37, "learning_rate": 5.584200306663259e-07, "loss": 0.144, "step": 7310 }, { "epoch": 2.37, "learning_rate": 5.578691860636706e-07, "loss": 0.1454, "step": 7311 }, { "epoch": 2.37, "learning_rate": 5.573185791590266e-07, "loss": 0.1457, "step": 7312 }, { "epoch": 2.37, "learning_rate": 5.567682100197808e-07, "loss": 0.1437, "step": 7313 }, { "epoch": 2.37, "learning_rate": 5.562180787132945e-07, "loss": 0.153, "step": 7314 }, { "epoch": 2.37, "learning_rate": 5.55668185306898e-07, "loss": 0.166, "step": 7315 }, { "epoch": 2.37, "learning_rate": 5.551185298678929e-07, "loss": 0.1393, "step": 7316 }, { "epoch": 2.37, "learning_rate": 5.545691124635518e-07, "loss": 0.1482, "step": 7317 }, { "epoch": 2.37, "learning_rate": 5.54019933161119e-07, "loss": 0.1407, "step": 7318 }, { "epoch": 2.37, "learning_rate": 5.534709920278064e-07, "loss": 0.1312, "step": 7319 }, { "epoch": 2.37, "learning_rate": 5.52922289130802e-07, "loss": 0.1429, "step": 7320 }, { "epoch": 2.37, "learning_rate": 5.523738245372596e-07, "loss": 0.1569, "step": 7321 }, { "epoch": 2.37, "learning_rate": 5.518255983143061e-07, "loss": 0.1455, "step": 7322 }, { "epoch": 2.37, "learning_rate": 5.512776105290402e-07, "loss": 0.1397, "step": 7323 }, { "epoch": 2.37, "learning_rate": 5.507298612485293e-07, "loss": 0.1395, "step": 7324 }, { "epoch": 2.37, "learning_rate": 5.501823505398137e-07, "loss": 0.1545, "step": 7325 }, { "epoch": 2.37, "learning_rate": 5.496350784699015e-07, "loss": 0.1429, "step": 7326 }, { "epoch": 2.37, "learning_rate": 5.490880451057759e-07, "loss": 0.1486, "step": 7327 }, { "epoch": 2.37, "learning_rate": 5.485412505143858e-07, "loss": 0.157, "step": 7328 }, { "epoch": 2.37, "learning_rate": 5.479946947626566e-07, "loss": 0.1404, "step": 7329 }, { "epoch": 2.38, "learning_rate": 5.474483779174791e-07, "loss": 0.1533, "step": 7330 }, { "epoch": 2.38, "learning_rate": 5.469023000457183e-07, "loss": 0.1387, "step": 7331 }, { "epoch": 2.38, "learning_rate": 5.463564612142083e-07, "loss": 0.1432, "step": 7332 }, { "epoch": 2.38, "learning_rate": 5.458108614897545e-07, "loss": 0.1539, "step": 7333 }, { "epoch": 2.38, "learning_rate": 5.452655009391341e-07, "loss": 0.1519, "step": 7334 }, { "epoch": 2.38, "learning_rate": 5.447203796290918e-07, "loss": 0.1485, "step": 7335 }, { "epoch": 2.38, "learning_rate": 5.441754976263478e-07, "loss": 0.144, "step": 7336 }, { "epoch": 2.38, "learning_rate": 5.436308549975883e-07, "loss": 0.1468, "step": 7337 }, { "epoch": 2.38, "learning_rate": 5.430864518094731e-07, "loss": 0.1672, "step": 7338 }, { "epoch": 2.38, "learning_rate": 5.425422881286319e-07, "loss": 0.1759, "step": 7339 }, { "epoch": 2.38, "learning_rate": 5.419983640216647e-07, "loss": 0.1294, "step": 7340 }, { "epoch": 2.38, "learning_rate": 5.414546795551429e-07, "loss": 0.1477, "step": 7341 }, { "epoch": 2.38, "learning_rate": 5.409112347956089e-07, "loss": 0.1465, "step": 7342 }, { "epoch": 2.38, "learning_rate": 5.403680298095737e-07, "loss": 0.1446, "step": 7343 }, { "epoch": 2.38, "learning_rate": 5.398250646635209e-07, "loss": 0.1343, "step": 7344 }, { "epoch": 2.38, "learning_rate": 5.392823394239042e-07, "loss": 0.1469, "step": 7345 }, { "epoch": 2.38, "learning_rate": 5.387398541571479e-07, "loss": 0.1341, "step": 7346 }, { "epoch": 2.38, "learning_rate": 5.381976089296467e-07, "loss": 0.1481, "step": 7347 }, { "epoch": 2.38, "learning_rate": 5.376556038077668e-07, "loss": 0.1567, "step": 7348 }, { "epoch": 2.38, "learning_rate": 5.371138388578448e-07, "loss": 0.1524, "step": 7349 }, { "epoch": 2.38, "learning_rate": 5.365723141461851e-07, "loss": 0.149, "step": 7350 }, { "epoch": 2.38, "learning_rate": 5.360310297390681e-07, "loss": 0.1302, "step": 7351 }, { "epoch": 2.38, "learning_rate": 5.354899857027398e-07, "loss": 0.1484, "step": 7352 }, { "epoch": 2.38, "learning_rate": 5.349491821034192e-07, "loss": 0.1497, "step": 7353 }, { "epoch": 2.38, "learning_rate": 5.344086190072955e-07, "loss": 0.1633, "step": 7354 }, { "epoch": 2.38, "learning_rate": 5.338682964805286e-07, "loss": 0.15, "step": 7355 }, { "epoch": 2.38, "learning_rate": 5.333282145892493e-07, "loss": 0.1306, "step": 7356 }, { "epoch": 2.38, "learning_rate": 5.327883733995562e-07, "loss": 0.1495, "step": 7357 }, { "epoch": 2.38, "learning_rate": 5.322487729775233e-07, "loss": 0.1499, "step": 7358 }, { "epoch": 2.38, "learning_rate": 5.317094133891903e-07, "loss": 0.1583, "step": 7359 }, { "epoch": 2.38, "learning_rate": 5.311702947005718e-07, "loss": 0.1634, "step": 7360 }, { "epoch": 2.39, "learning_rate": 5.306314169776486e-07, "loss": 0.1431, "step": 7361 }, { "epoch": 2.39, "learning_rate": 5.30092780286375e-07, "loss": 0.1565, "step": 7362 }, { "epoch": 2.39, "learning_rate": 5.295543846926752e-07, "loss": 0.1462, "step": 7363 }, { "epoch": 2.39, "learning_rate": 5.290162302624433e-07, "loss": 0.1449, "step": 7364 }, { "epoch": 2.39, "learning_rate": 5.284783170615446e-07, "loss": 0.1506, "step": 7365 }, { "epoch": 2.39, "learning_rate": 5.279406451558136e-07, "loss": 0.1505, "step": 7366 }, { "epoch": 2.39, "learning_rate": 5.274032146110567e-07, "loss": 0.15, "step": 7367 }, { "epoch": 2.39, "learning_rate": 5.268660254930499e-07, "loss": 0.1476, "step": 7368 }, { "epoch": 2.39, "learning_rate": 5.263290778675401e-07, "loss": 0.1593, "step": 7369 }, { "epoch": 2.39, "learning_rate": 5.257923718002447e-07, "loss": 0.1483, "step": 7370 }, { "epoch": 2.39, "learning_rate": 5.252559073568514e-07, "loss": 0.1336, "step": 7371 }, { "epoch": 2.39, "learning_rate": 5.247196846030178e-07, "loss": 0.1579, "step": 7372 }, { "epoch": 2.39, "learning_rate": 5.241837036043731e-07, "loss": 0.1604, "step": 7373 }, { "epoch": 2.39, "learning_rate": 5.236479644265153e-07, "loss": 0.1507, "step": 7374 }, { "epoch": 2.39, "learning_rate": 5.231124671350141e-07, "loss": 0.1285, "step": 7375 }, { "epoch": 2.39, "learning_rate": 5.225772117954089e-07, "loss": 0.1516, "step": 7376 }, { "epoch": 2.39, "learning_rate": 5.220421984732104e-07, "loss": 0.1522, "step": 7377 }, { "epoch": 2.39, "learning_rate": 5.215074272338986e-07, "loss": 0.1493, "step": 7378 }, { "epoch": 2.39, "learning_rate": 5.20972898142924e-07, "loss": 0.1532, "step": 7379 }, { "epoch": 2.39, "learning_rate": 5.204386112657095e-07, "loss": 0.1422, "step": 7380 }, { "epoch": 2.39, "learning_rate": 5.199045666676436e-07, "loss": 0.1644, "step": 7381 }, { "epoch": 2.39, "learning_rate": 5.193707644140913e-07, "loss": 0.155, "step": 7382 }, { "epoch": 2.39, "learning_rate": 5.188372045703824e-07, "loss": 0.1395, "step": 7383 }, { "epoch": 2.39, "learning_rate": 5.183038872018215e-07, "loss": 0.1438, "step": 7384 }, { "epoch": 2.39, "learning_rate": 5.1777081237368e-07, "loss": 0.1479, "step": 7385 }, { "epoch": 2.39, "learning_rate": 5.172379801512014e-07, "loss": 0.1607, "step": 7386 }, { "epoch": 2.39, "learning_rate": 5.167053905996003e-07, "loss": 0.1424, "step": 7387 }, { "epoch": 2.39, "learning_rate": 5.161730437840585e-07, "loss": 0.1599, "step": 7388 }, { "epoch": 2.39, "learning_rate": 5.15640939769732e-07, "loss": 0.1529, "step": 7389 }, { "epoch": 2.39, "learning_rate": 5.151090786217433e-07, "loss": 0.1586, "step": 7390 }, { "epoch": 2.4, "learning_rate": 5.145774604051895e-07, "loss": 0.1501, "step": 7391 }, { "epoch": 2.4, "learning_rate": 5.140460851851336e-07, "loss": 0.1373, "step": 7392 }, { "epoch": 2.4, "learning_rate": 5.135149530266112e-07, "loss": 0.143, "step": 7393 }, { "epoch": 2.4, "learning_rate": 5.129840639946279e-07, "loss": 0.1599, "step": 7394 }, { "epoch": 2.4, "learning_rate": 5.124534181541596e-07, "loss": 0.1491, "step": 7395 }, { "epoch": 2.4, "learning_rate": 5.119230155701515e-07, "loss": 0.1404, "step": 7396 }, { "epoch": 2.4, "learning_rate": 5.113928563075213e-07, "loss": 0.1638, "step": 7397 }, { "epoch": 2.4, "learning_rate": 5.108629404311535e-07, "loss": 0.1707, "step": 7398 }, { "epoch": 2.4, "learning_rate": 5.103332680059053e-07, "loss": 0.1461, "step": 7399 }, { "epoch": 2.4, "learning_rate": 5.098038390966039e-07, "loss": 0.1394, "step": 7400 }, { "epoch": 2.4, "learning_rate": 5.09274653768046e-07, "loss": 0.1604, "step": 7401 }, { "epoch": 2.4, "learning_rate": 5.087457120849984e-07, "loss": 0.1473, "step": 7402 }, { "epoch": 2.4, "learning_rate": 5.082170141121992e-07, "loss": 0.152, "step": 7403 }, { "epoch": 2.4, "learning_rate": 5.076885599143558e-07, "loss": 0.1611, "step": 7404 }, { "epoch": 2.4, "learning_rate": 5.071603495561444e-07, "loss": 0.161, "step": 7405 }, { "epoch": 2.4, "learning_rate": 5.066323831022155e-07, "loss": 0.1599, "step": 7406 }, { "epoch": 2.4, "learning_rate": 5.061046606171849e-07, "loss": 0.1405, "step": 7407 }, { "epoch": 2.4, "learning_rate": 5.055771821656416e-07, "loss": 0.1492, "step": 7408 }, { "epoch": 2.4, "learning_rate": 5.05049947812144e-07, "loss": 0.1464, "step": 7409 }, { "epoch": 2.4, "learning_rate": 5.045229576212191e-07, "loss": 0.1432, "step": 7410 }, { "epoch": 2.4, "learning_rate": 5.039962116573676e-07, "loss": 0.1471, "step": 7411 }, { "epoch": 2.4, "learning_rate": 5.034697099850557e-07, "loss": 0.1428, "step": 7412 }, { "epoch": 2.4, "learning_rate": 5.029434526687249e-07, "loss": 0.1354, "step": 7413 }, { "epoch": 2.4, "learning_rate": 5.02417439772781e-07, "loss": 0.1333, "step": 7414 }, { "epoch": 2.4, "learning_rate": 5.01891671361606e-07, "loss": 0.1621, "step": 7415 }, { "epoch": 2.4, "learning_rate": 5.013661474995463e-07, "loss": 0.1499, "step": 7416 }, { "epoch": 2.4, "learning_rate": 5.008408682509219e-07, "loss": 0.1582, "step": 7417 }, { "epoch": 2.4, "learning_rate": 5.003158336800218e-07, "loss": 0.1491, "step": 7418 }, { "epoch": 2.4, "learning_rate": 4.997910438511052e-07, "loss": 0.1489, "step": 7419 }, { "epoch": 2.4, "learning_rate": 4.992664988284021e-07, "loss": 0.1541, "step": 7420 }, { "epoch": 2.4, "learning_rate": 4.987421986761101e-07, "loss": 0.149, "step": 7421 }, { "epoch": 2.41, "learning_rate": 4.982181434583996e-07, "loss": 0.1424, "step": 7422 }, { "epoch": 2.41, "learning_rate": 4.976943332394093e-07, "loss": 0.1446, "step": 7423 }, { "epoch": 2.41, "learning_rate": 4.971707680832491e-07, "loss": 0.1665, "step": 7424 }, { "epoch": 2.41, "learning_rate": 4.966474480539976e-07, "loss": 0.1602, "step": 7425 }, { "epoch": 2.41, "learning_rate": 4.961243732157048e-07, "loss": 0.1381, "step": 7426 }, { "epoch": 2.41, "learning_rate": 4.956015436323897e-07, "loss": 0.146, "step": 7427 }, { "epoch": 2.41, "learning_rate": 4.950789593680422e-07, "loss": 0.1471, "step": 7428 }, { "epoch": 2.41, "learning_rate": 4.945566204866201e-07, "loss": 0.1315, "step": 7429 }, { "epoch": 2.41, "learning_rate": 4.940345270520536e-07, "loss": 0.156, "step": 7430 }, { "epoch": 2.41, "learning_rate": 4.935126791282419e-07, "loss": 0.1518, "step": 7431 }, { "epoch": 2.41, "learning_rate": 4.929910767790536e-07, "loss": 0.1534, "step": 7432 }, { "epoch": 2.41, "learning_rate": 4.92469720068329e-07, "loss": 0.1485, "step": 7433 }, { "epoch": 2.41, "learning_rate": 4.919486090598749e-07, "loss": 0.1505, "step": 7434 }, { "epoch": 2.41, "learning_rate": 4.91427743817473e-07, "loss": 0.1563, "step": 7435 }, { "epoch": 2.41, "learning_rate": 4.909071244048694e-07, "loss": 0.1544, "step": 7436 }, { "epoch": 2.41, "learning_rate": 4.903867508857857e-07, "loss": 0.1542, "step": 7437 }, { "epoch": 2.41, "learning_rate": 4.898666233239083e-07, "loss": 0.1434, "step": 7438 }, { "epoch": 2.41, "learning_rate": 4.893467417828967e-07, "loss": 0.1467, "step": 7439 }, { "epoch": 2.41, "learning_rate": 4.888271063263791e-07, "loss": 0.1375, "step": 7440 }, { "epoch": 2.41, "learning_rate": 4.883077170179542e-07, "loss": 0.1626, "step": 7441 }, { "epoch": 2.41, "learning_rate": 4.877885739211907e-07, "loss": 0.1482, "step": 7442 }, { "epoch": 2.41, "learning_rate": 4.872696770996246e-07, "loss": 0.1461, "step": 7443 }, { "epoch": 2.41, "learning_rate": 4.867510266167669e-07, "loss": 0.1467, "step": 7444 }, { "epoch": 2.41, "learning_rate": 4.862326225360927e-07, "loss": 0.1452, "step": 7445 }, { "epoch": 2.41, "learning_rate": 4.85714464921051e-07, "loss": 0.1355, "step": 7446 }, { "epoch": 2.41, "learning_rate": 4.851965538350589e-07, "loss": 0.1376, "step": 7447 }, { "epoch": 2.41, "learning_rate": 4.846788893415038e-07, "loss": 0.1615, "step": 7448 }, { "epoch": 2.41, "learning_rate": 4.841614715037429e-07, "loss": 0.1499, "step": 7449 }, { "epoch": 2.41, "learning_rate": 4.83644300385103e-07, "loss": 0.1564, "step": 7450 }, { "epoch": 2.41, "learning_rate": 4.831273760488816e-07, "loss": 0.1659, "step": 7451 }, { "epoch": 2.41, "learning_rate": 4.82610698558344e-07, "loss": 0.1454, "step": 7452 }, { "epoch": 2.42, "learning_rate": 4.820942679767268e-07, "loss": 0.1727, "step": 7453 }, { "epoch": 2.42, "learning_rate": 4.815780843672366e-07, "loss": 0.1636, "step": 7454 }, { "epoch": 2.42, "learning_rate": 4.810621477930488e-07, "loss": 0.1422, "step": 7455 }, { "epoch": 2.42, "learning_rate": 4.805464583173094e-07, "loss": 0.1416, "step": 7456 }, { "epoch": 2.42, "learning_rate": 4.800310160031335e-07, "loss": 0.16, "step": 7457 }, { "epoch": 2.42, "learning_rate": 4.795158209136067e-07, "loss": 0.1327, "step": 7458 }, { "epoch": 2.42, "learning_rate": 4.79000873111784e-07, "loss": 0.1433, "step": 7459 }, { "epoch": 2.42, "learning_rate": 4.784861726606893e-07, "loss": 0.1486, "step": 7460 }, { "epoch": 2.42, "learning_rate": 4.779717196233169e-07, "loss": 0.1504, "step": 7461 }, { "epoch": 2.42, "learning_rate": 4.774575140626317e-07, "loss": 0.1521, "step": 7462 }, { "epoch": 2.42, "learning_rate": 4.769435560415666e-07, "loss": 0.1601, "step": 7463 }, { "epoch": 2.42, "learning_rate": 4.764298456230265e-07, "loss": 0.1552, "step": 7464 }, { "epoch": 2.42, "learning_rate": 4.7591638286988234e-07, "loss": 0.1366, "step": 7465 }, { "epoch": 2.42, "learning_rate": 4.754031678449794e-07, "loss": 0.1501, "step": 7466 }, { "epoch": 2.42, "learning_rate": 4.7489020061112805e-07, "loss": 0.1659, "step": 7467 }, { "epoch": 2.42, "learning_rate": 4.743774812311125e-07, "loss": 0.1493, "step": 7468 }, { "epoch": 2.42, "learning_rate": 4.7386500976768337e-07, "loss": 0.1574, "step": 7469 }, { "epoch": 2.42, "learning_rate": 4.733527862835624e-07, "loss": 0.1513, "step": 7470 }, { "epoch": 2.42, "learning_rate": 4.728408108414409e-07, "loss": 0.1416, "step": 7471 }, { "epoch": 2.42, "learning_rate": 4.7232908350397984e-07, "loss": 0.1427, "step": 7472 }, { "epoch": 2.42, "learning_rate": 4.7181760433381017e-07, "loss": 0.153, "step": 7473 }, { "epoch": 2.42, "learning_rate": 4.7130637339352995e-07, "loss": 0.1613, "step": 7474 }, { "epoch": 2.42, "learning_rate": 4.707953907457119e-07, "loss": 0.1572, "step": 7475 }, { "epoch": 2.42, "learning_rate": 4.702846564528929e-07, "loss": 0.1497, "step": 7476 }, { "epoch": 2.42, "learning_rate": 4.6977417057758297e-07, "loss": 0.1531, "step": 7477 }, { "epoch": 2.42, "learning_rate": 4.6926393318226045e-07, "loss": 0.1378, "step": 7478 }, { "epoch": 2.42, "learning_rate": 4.6875394432937345e-07, "loss": 0.1552, "step": 7479 }, { "epoch": 2.42, "learning_rate": 4.6824420408133953e-07, "loss": 0.139, "step": 7480 }, { "epoch": 2.42, "learning_rate": 4.677347125005463e-07, "loss": 0.1483, "step": 7481 }, { "epoch": 2.42, "learning_rate": 4.6722546964935114e-07, "loss": 0.1418, "step": 7482 }, { "epoch": 2.42, "learning_rate": 4.6671647559007884e-07, "loss": 0.1544, "step": 7483 }, { "epoch": 2.43, "learning_rate": 4.6620773038502625e-07, "loss": 0.1444, "step": 7484 }, { "epoch": 2.43, "learning_rate": 4.656992340964589e-07, "loss": 0.1564, "step": 7485 }, { "epoch": 2.43, "learning_rate": 4.651909867866117e-07, "loss": 0.1509, "step": 7486 }, { "epoch": 2.43, "learning_rate": 4.64682988517689e-07, "loss": 0.1487, "step": 7487 }, { "epoch": 2.43, "learning_rate": 4.641752393518661e-07, "loss": 0.1468, "step": 7488 }, { "epoch": 2.43, "learning_rate": 4.6366773935128423e-07, "loss": 0.1549, "step": 7489 }, { "epoch": 2.43, "learning_rate": 4.631604885780591e-07, "loss": 0.1576, "step": 7490 }, { "epoch": 2.43, "learning_rate": 4.6265348709427146e-07, "loss": 0.1414, "step": 7491 }, { "epoch": 2.43, "learning_rate": 4.621467349619738e-07, "loss": 0.1593, "step": 7492 }, { "epoch": 2.43, "learning_rate": 4.6164023224318786e-07, "loss": 0.1417, "step": 7493 }, { "epoch": 2.43, "learning_rate": 4.6113397899990474e-07, "loss": 0.1578, "step": 7494 }, { "epoch": 2.43, "learning_rate": 4.6062797529408537e-07, "loss": 0.1403, "step": 7495 }, { "epoch": 2.43, "learning_rate": 4.6012222118765796e-07, "loss": 0.1536, "step": 7496 }, { "epoch": 2.43, "learning_rate": 4.5961671674252447e-07, "loss": 0.15, "step": 7497 }, { "epoch": 2.43, "learning_rate": 4.5911146202055113e-07, "loss": 0.1504, "step": 7498 }, { "epoch": 2.43, "learning_rate": 4.5860645708357855e-07, "loss": 0.145, "step": 7499 }, { "epoch": 2.43, "learning_rate": 4.581017019934131e-07, "loss": 0.1595, "step": 7500 }, { "epoch": 2.43, "learning_rate": 4.57597196811832e-07, "loss": 0.1453, "step": 7501 }, { "epoch": 2.43, "learning_rate": 4.5709294160058204e-07, "loss": 0.1461, "step": 7502 }, { "epoch": 2.43, "learning_rate": 4.565889364213791e-07, "loss": 0.1571, "step": 7503 }, { "epoch": 2.43, "learning_rate": 4.5608518133590933e-07, "loss": 0.1504, "step": 7504 }, { "epoch": 2.43, "learning_rate": 4.5558167640582545e-07, "loss": 0.15, "step": 7505 }, { "epoch": 2.43, "learning_rate": 4.550784216927542e-07, "loss": 0.1433, "step": 7506 }, { "epoch": 2.43, "learning_rate": 4.5457541725828696e-07, "loss": 0.1365, "step": 7507 }, { "epoch": 2.43, "learning_rate": 4.5407266316398745e-07, "loss": 0.1404, "step": 7508 }, { "epoch": 2.43, "learning_rate": 4.5357015947138786e-07, "loss": 0.1612, "step": 7509 }, { "epoch": 2.43, "learning_rate": 4.530679062419899e-07, "loss": 0.154, "step": 7510 }, { "epoch": 2.43, "learning_rate": 4.5256590353726426e-07, "loss": 0.14, "step": 7511 }, { "epoch": 2.43, "learning_rate": 4.520641514186522e-07, "loss": 0.153, "step": 7512 }, { "epoch": 2.43, "learning_rate": 4.5156264994756144e-07, "loss": 0.1485, "step": 7513 }, { "epoch": 2.43, "learning_rate": 4.510613991853721e-07, "loss": 0.1493, "step": 7514 }, { "epoch": 2.44, "learning_rate": 4.5056039919343236e-07, "loss": 0.1597, "step": 7515 }, { "epoch": 2.44, "learning_rate": 4.5005965003305953e-07, "loss": 0.1496, "step": 7516 }, { "epoch": 2.44, "learning_rate": 4.4955915176554065e-07, "loss": 0.1427, "step": 7517 }, { "epoch": 2.44, "learning_rate": 4.490589044521315e-07, "loss": 0.153, "step": 7518 }, { "epoch": 2.44, "learning_rate": 4.4855890815405867e-07, "loss": 0.1601, "step": 7519 }, { "epoch": 2.44, "learning_rate": 4.4805916293251486e-07, "loss": 0.1429, "step": 7520 }, { "epoch": 2.44, "learning_rate": 4.4755966884866606e-07, "loss": 0.1435, "step": 7521 }, { "epoch": 2.44, "learning_rate": 4.470604259636438e-07, "loss": 0.1538, "step": 7522 }, { "epoch": 2.44, "learning_rate": 4.465614343385524e-07, "loss": 0.1562, "step": 7523 }, { "epoch": 2.44, "learning_rate": 4.46062694034462e-07, "loss": 0.1362, "step": 7524 }, { "epoch": 2.44, "learning_rate": 4.455642051124143e-07, "loss": 0.1477, "step": 7525 }, { "epoch": 2.44, "learning_rate": 4.4506596763341985e-07, "loss": 0.1377, "step": 7526 }, { "epoch": 2.44, "learning_rate": 4.445679816584567e-07, "loss": 0.1572, "step": 7527 }, { "epoch": 2.44, "learning_rate": 4.4407024724847534e-07, "loss": 0.1581, "step": 7528 }, { "epoch": 2.44, "learning_rate": 4.4357276446439197e-07, "loss": 0.1445, "step": 7529 }, { "epoch": 2.44, "learning_rate": 4.4307553336709525e-07, "loss": 0.1487, "step": 7530 }, { "epoch": 2.44, "learning_rate": 4.4257855401744044e-07, "loss": 0.1514, "step": 7531 }, { "epoch": 2.44, "learning_rate": 4.42081826476253e-07, "loss": 0.1373, "step": 7532 }, { "epoch": 2.44, "learning_rate": 4.4158535080432803e-07, "loss": 0.152, "step": 7533 }, { "epoch": 2.44, "learning_rate": 4.4108912706242876e-07, "loss": 0.1445, "step": 7534 }, { "epoch": 2.44, "learning_rate": 4.405931553112894e-07, "loss": 0.1426, "step": 7535 }, { "epoch": 2.44, "learning_rate": 4.4009743561161e-07, "loss": 0.1459, "step": 7536 }, { "epoch": 2.44, "learning_rate": 4.396019680240643e-07, "loss": 0.1411, "step": 7537 }, { "epoch": 2.44, "learning_rate": 4.3910675260929096e-07, "loss": 0.1546, "step": 7538 }, { "epoch": 2.44, "learning_rate": 4.386117894278999e-07, "loss": 0.1452, "step": 7539 }, { "epoch": 2.44, "learning_rate": 4.381170785404704e-07, "loss": 0.1459, "step": 7540 }, { "epoch": 2.44, "learning_rate": 4.376226200075495e-07, "loss": 0.1432, "step": 7541 }, { "epoch": 2.44, "learning_rate": 4.3712841388965476e-07, "loss": 0.1561, "step": 7542 }, { "epoch": 2.44, "learning_rate": 4.3663446024727247e-07, "loss": 0.1573, "step": 7543 }, { "epoch": 2.44, "learning_rate": 4.3614075914085617e-07, "loss": 0.1445, "step": 7544 }, { "epoch": 2.44, "learning_rate": 4.356473106308326e-07, "loss": 0.1455, "step": 7545 }, { "epoch": 2.45, "learning_rate": 4.351541147775931e-07, "loss": 0.1486, "step": 7546 }, { "epoch": 2.45, "learning_rate": 4.346611716415006e-07, "loss": 0.1411, "step": 7547 }, { "epoch": 2.45, "learning_rate": 4.341684812828867e-07, "loss": 0.1437, "step": 7548 }, { "epoch": 2.45, "learning_rate": 4.336760437620519e-07, "loss": 0.1583, "step": 7549 }, { "epoch": 2.45, "learning_rate": 4.331838591392662e-07, "loss": 0.1521, "step": 7550 }, { "epoch": 2.45, "learning_rate": 4.326919274747668e-07, "loss": 0.1326, "step": 7551 }, { "epoch": 2.45, "learning_rate": 4.322002488287635e-07, "loss": 0.1342, "step": 7552 }, { "epoch": 2.45, "learning_rate": 4.317088232614308e-07, "loss": 0.1486, "step": 7553 }, { "epoch": 2.45, "learning_rate": 4.3121765083291663e-07, "loss": 0.1476, "step": 7554 }, { "epoch": 2.45, "learning_rate": 4.307267316033342e-07, "loss": 0.1575, "step": 7555 }, { "epoch": 2.45, "learning_rate": 4.3023606563276753e-07, "loss": 0.1606, "step": 7556 }, { "epoch": 2.45, "learning_rate": 4.297456529812702e-07, "loss": 0.1439, "step": 7557 }, { "epoch": 2.45, "learning_rate": 4.292554937088622e-07, "loss": 0.1361, "step": 7558 }, { "epoch": 2.45, "learning_rate": 4.287655878755365e-07, "loss": 0.1525, "step": 7559 }, { "epoch": 2.45, "learning_rate": 4.282759355412505e-07, "loss": 0.1472, "step": 7560 }, { "epoch": 2.45, "learning_rate": 4.2778653676593534e-07, "loss": 0.1393, "step": 7561 }, { "epoch": 2.45, "learning_rate": 4.272973916094872e-07, "loss": 0.1387, "step": 7562 }, { "epoch": 2.45, "learning_rate": 4.268085001317726e-07, "loss": 0.1564, "step": 7563 }, { "epoch": 2.45, "learning_rate": 4.263198623926279e-07, "loss": 0.1523, "step": 7564 }, { "epoch": 2.45, "learning_rate": 4.258314784518569e-07, "loss": 0.135, "step": 7565 }, { "epoch": 2.45, "learning_rate": 4.253433483692337e-07, "loss": 0.1505, "step": 7566 }, { "epoch": 2.45, "learning_rate": 4.248554722045009e-07, "loss": 0.1533, "step": 7567 }, { "epoch": 2.45, "learning_rate": 4.2436785001736896e-07, "loss": 0.1631, "step": 7568 }, { "epoch": 2.45, "learning_rate": 4.2388048186751823e-07, "loss": 0.1353, "step": 7569 }, { "epoch": 2.45, "learning_rate": 4.233933678145982e-07, "loss": 0.1515, "step": 7570 }, { "epoch": 2.45, "learning_rate": 4.229065079182268e-07, "loss": 0.1387, "step": 7571 }, { "epoch": 2.45, "learning_rate": 4.224199022379913e-07, "loss": 0.1488, "step": 7572 }, { "epoch": 2.45, "learning_rate": 4.2193355083344684e-07, "loss": 0.1725, "step": 7573 }, { "epoch": 2.45, "learning_rate": 4.2144745376411946e-07, "loss": 0.1493, "step": 7574 }, { "epoch": 2.45, "learning_rate": 4.2096161108950015e-07, "loss": 0.1515, "step": 7575 }, { "epoch": 2.45, "learning_rate": 4.204760228690546e-07, "loss": 0.1442, "step": 7576 }, { "epoch": 2.46, "learning_rate": 4.1999068916221184e-07, "loss": 0.1537, "step": 7577 }, { "epoch": 2.46, "learning_rate": 4.1950561002837257e-07, "loss": 0.134, "step": 7578 }, { "epoch": 2.46, "learning_rate": 4.1902078552690573e-07, "loss": 0.1458, "step": 7579 }, { "epoch": 2.46, "learning_rate": 4.185362157171496e-07, "loss": 0.1486, "step": 7580 }, { "epoch": 2.46, "learning_rate": 4.1805190065841107e-07, "loss": 0.1407, "step": 7581 }, { "epoch": 2.46, "learning_rate": 4.175678404099637e-07, "loss": 0.1573, "step": 7582 }, { "epoch": 2.46, "learning_rate": 4.1708403503105456e-07, "loss": 0.1543, "step": 7583 }, { "epoch": 2.46, "learning_rate": 4.166004845808941e-07, "loss": 0.143, "step": 7584 }, { "epoch": 2.46, "learning_rate": 4.1611718911866663e-07, "loss": 0.1436, "step": 7585 }, { "epoch": 2.46, "learning_rate": 4.1563414870352093e-07, "loss": 0.1496, "step": 7586 }, { "epoch": 2.46, "learning_rate": 4.1515136339457725e-07, "loss": 0.1584, "step": 7587 }, { "epoch": 2.46, "learning_rate": 4.146688332509241e-07, "loss": 0.1497, "step": 7588 }, { "epoch": 2.46, "learning_rate": 4.1418655833161794e-07, "loss": 0.1506, "step": 7589 }, { "epoch": 2.46, "learning_rate": 4.137045386956853e-07, "loss": 0.1367, "step": 7590 }, { "epoch": 2.46, "learning_rate": 4.1322277440211973e-07, "loss": 0.1365, "step": 7591 }, { "epoch": 2.46, "learning_rate": 4.1274126550988505e-07, "loss": 0.1578, "step": 7592 }, { "epoch": 2.46, "learning_rate": 4.1226001207791327e-07, "loss": 0.1398, "step": 7593 }, { "epoch": 2.46, "learning_rate": 4.1177901416510485e-07, "loss": 0.1523, "step": 7594 }, { "epoch": 2.46, "learning_rate": 4.112982718303299e-07, "loss": 0.1455, "step": 7595 }, { "epoch": 2.46, "learning_rate": 4.1081778513242606e-07, "loss": 0.1573, "step": 7596 }, { "epoch": 2.46, "learning_rate": 4.103375541302007e-07, "loss": 0.1695, "step": 7597 }, { "epoch": 2.46, "learning_rate": 4.0985757888242965e-07, "loss": 0.1594, "step": 7598 }, { "epoch": 2.46, "learning_rate": 4.0937785944785617e-07, "loss": 0.1528, "step": 7599 }, { "epoch": 2.46, "learning_rate": 4.0889839588519386e-07, "loss": 0.1477, "step": 7600 }, { "epoch": 2.46, "learning_rate": 4.0841918825312465e-07, "loss": 0.1534, "step": 7601 }, { "epoch": 2.46, "learning_rate": 4.0794023661029856e-07, "loss": 0.161, "step": 7602 }, { "epoch": 2.46, "learning_rate": 4.0746154101533485e-07, "loss": 0.1517, "step": 7603 }, { "epoch": 2.46, "learning_rate": 4.0698310152682107e-07, "loss": 0.1538, "step": 7604 }, { "epoch": 2.46, "learning_rate": 4.065049182033146e-07, "loss": 0.1539, "step": 7605 }, { "epoch": 2.46, "learning_rate": 4.0602699110333795e-07, "loss": 0.1554, "step": 7606 }, { "epoch": 2.47, "learning_rate": 4.0554932028538774e-07, "loss": 0.1395, "step": 7607 }, { "epoch": 2.47, "learning_rate": 4.050719058079244e-07, "loss": 0.1548, "step": 7608 }, { "epoch": 2.47, "learning_rate": 4.045947477293791e-07, "loss": 0.1526, "step": 7609 }, { "epoch": 2.47, "learning_rate": 4.041178461081519e-07, "loss": 0.1534, "step": 7610 }, { "epoch": 2.47, "learning_rate": 4.036412010026103e-07, "loss": 0.1605, "step": 7611 }, { "epoch": 2.47, "learning_rate": 4.0316481247109215e-07, "loss": 0.1378, "step": 7612 }, { "epoch": 2.47, "learning_rate": 4.0268868057190075e-07, "loss": 0.1568, "step": 7613 }, { "epoch": 2.47, "learning_rate": 4.022128053633123e-07, "loss": 0.1636, "step": 7614 }, { "epoch": 2.47, "learning_rate": 4.017371869035674e-07, "loss": 0.1407, "step": 7615 }, { "epoch": 2.47, "learning_rate": 4.01261825250879e-07, "loss": 0.1538, "step": 7616 }, { "epoch": 2.47, "learning_rate": 4.0078672046342553e-07, "loss": 0.1438, "step": 7617 }, { "epoch": 2.47, "learning_rate": 4.0031187259935546e-07, "loss": 0.1402, "step": 7618 }, { "epoch": 2.47, "learning_rate": 3.998372817167856e-07, "loss": 0.1439, "step": 7619 }, { "epoch": 2.47, "learning_rate": 3.993629478738012e-07, "loss": 0.1627, "step": 7620 }, { "epoch": 2.47, "learning_rate": 3.988888711284569e-07, "loss": 0.143, "step": 7621 }, { "epoch": 2.47, "learning_rate": 3.9841505153877387e-07, "loss": 0.1459, "step": 7622 }, { "epoch": 2.47, "learning_rate": 3.9794148916274365e-07, "loss": 0.1568, "step": 7623 }, { "epoch": 2.47, "learning_rate": 3.974681840583255e-07, "loss": 0.1365, "step": 7624 }, { "epoch": 2.47, "learning_rate": 3.969951362834476e-07, "loss": 0.1531, "step": 7625 }, { "epoch": 2.47, "learning_rate": 3.965223458960063e-07, "loss": 0.1436, "step": 7626 }, { "epoch": 2.47, "learning_rate": 3.9604981295386673e-07, "loss": 0.1485, "step": 7627 }, { "epoch": 2.47, "learning_rate": 3.9557753751486237e-07, "loss": 0.1503, "step": 7628 }, { "epoch": 2.47, "learning_rate": 3.9510551963679534e-07, "loss": 0.1363, "step": 7629 }, { "epoch": 2.47, "learning_rate": 3.9463375937743546e-07, "loss": 0.1571, "step": 7630 }, { "epoch": 2.47, "learning_rate": 3.941622567945216e-07, "loss": 0.1568, "step": 7631 }, { "epoch": 2.47, "learning_rate": 3.9369101194576156e-07, "loss": 0.1516, "step": 7632 }, { "epoch": 2.47, "learning_rate": 3.93220024888831e-07, "loss": 0.1379, "step": 7633 }, { "epoch": 2.47, "learning_rate": 3.927492956813747e-07, "loss": 0.1472, "step": 7634 }, { "epoch": 2.47, "learning_rate": 3.922788243810038e-07, "loss": 0.1427, "step": 7635 }, { "epoch": 2.47, "learning_rate": 3.918086110453015e-07, "loss": 0.147, "step": 7636 }, { "epoch": 2.47, "learning_rate": 3.9133865573181524e-07, "loss": 0.15, "step": 7637 }, { "epoch": 2.48, "learning_rate": 3.9086895849806547e-07, "loss": 0.1468, "step": 7638 }, { "epoch": 2.48, "learning_rate": 3.903995194015364e-07, "loss": 0.1406, "step": 7639 }, { "epoch": 2.48, "learning_rate": 3.899303384996836e-07, "loss": 0.1596, "step": 7640 }, { "epoch": 2.48, "learning_rate": 3.894614158499302e-07, "loss": 0.1325, "step": 7641 }, { "epoch": 2.48, "learning_rate": 3.889927515096681e-07, "loss": 0.1386, "step": 7642 }, { "epoch": 2.48, "learning_rate": 3.885243455362578e-07, "loss": 0.1519, "step": 7643 }, { "epoch": 2.48, "learning_rate": 3.8805619798702565e-07, "loss": 0.1562, "step": 7644 }, { "epoch": 2.48, "learning_rate": 3.8758830891927056e-07, "loss": 0.1312, "step": 7645 }, { "epoch": 2.48, "learning_rate": 3.8712067839025647e-07, "loss": 0.1661, "step": 7646 }, { "epoch": 2.48, "learning_rate": 3.86653306457217e-07, "loss": 0.1475, "step": 7647 }, { "epoch": 2.48, "learning_rate": 3.861861931773542e-07, "loss": 0.1378, "step": 7648 }, { "epoch": 2.48, "learning_rate": 3.8571933860783785e-07, "loss": 0.1407, "step": 7649 }, { "epoch": 2.48, "learning_rate": 3.8525274280580646e-07, "loss": 0.1392, "step": 7650 }, { "epoch": 2.48, "learning_rate": 3.8478640582836733e-07, "loss": 0.1609, "step": 7651 }, { "epoch": 2.48, "learning_rate": 3.8432032773259574e-07, "loss": 0.1459, "step": 7652 }, { "epoch": 2.48, "learning_rate": 3.838545085755341e-07, "loss": 0.1747, "step": 7653 }, { "epoch": 2.48, "learning_rate": 3.8338894841419476e-07, "loss": 0.1665, "step": 7654 }, { "epoch": 2.48, "learning_rate": 3.8292364730555754e-07, "loss": 0.1537, "step": 7655 }, { "epoch": 2.48, "learning_rate": 3.8245860530657126e-07, "loss": 0.1413, "step": 7656 }, { "epoch": 2.48, "learning_rate": 3.8199382247415236e-07, "loss": 0.1537, "step": 7657 }, { "epoch": 2.48, "learning_rate": 3.8152929886518587e-07, "loss": 0.1573, "step": 7658 }, { "epoch": 2.48, "learning_rate": 3.810650345365241e-07, "loss": 0.14, "step": 7659 }, { "epoch": 2.48, "learning_rate": 3.8060102954499024e-07, "loss": 0.1508, "step": 7660 }, { "epoch": 2.48, "learning_rate": 3.8013728394737216e-07, "loss": 0.1508, "step": 7661 }, { "epoch": 2.48, "learning_rate": 3.796737978004289e-07, "loss": 0.149, "step": 7662 }, { "epoch": 2.48, "learning_rate": 3.792105711608865e-07, "loss": 0.1604, "step": 7663 }, { "epoch": 2.48, "learning_rate": 3.7874760408543933e-07, "loss": 0.1479, "step": 7664 }, { "epoch": 2.48, "learning_rate": 3.7828489663075065e-07, "loss": 0.1477, "step": 7665 }, { "epoch": 2.48, "learning_rate": 3.778224488534496e-07, "loss": 0.1268, "step": 7666 }, { "epoch": 2.48, "learning_rate": 3.773602608101376e-07, "loss": 0.1351, "step": 7667 }, { "epoch": 2.48, "learning_rate": 3.7689833255737995e-07, "loss": 0.1665, "step": 7668 }, { "epoch": 2.49, "learning_rate": 3.764366641517145e-07, "loss": 0.1541, "step": 7669 }, { "epoch": 2.49, "learning_rate": 3.759752556496421e-07, "loss": 0.1593, "step": 7670 }, { "epoch": 2.49, "learning_rate": 3.7551410710763764e-07, "loss": 0.1581, "step": 7671 }, { "epoch": 2.49, "learning_rate": 3.7505321858213926e-07, "loss": 0.1337, "step": 7672 }, { "epoch": 2.49, "learning_rate": 3.7459259012955606e-07, "loss": 0.1444, "step": 7673 }, { "epoch": 2.49, "learning_rate": 3.7413222180626455e-07, "loss": 0.1485, "step": 7674 }, { "epoch": 2.49, "learning_rate": 3.736721136686081e-07, "loss": 0.1658, "step": 7675 }, { "epoch": 2.49, "learning_rate": 3.7321226577290147e-07, "loss": 0.1503, "step": 7676 }, { "epoch": 2.49, "learning_rate": 3.7275267817542425e-07, "loss": 0.151, "step": 7677 }, { "epoch": 2.49, "learning_rate": 3.7229335093242587e-07, "loss": 0.1589, "step": 7678 }, { "epoch": 2.49, "learning_rate": 3.7183428410012326e-07, "loss": 0.1597, "step": 7679 }, { "epoch": 2.49, "learning_rate": 3.713754777347023e-07, "loss": 0.1685, "step": 7680 }, { "epoch": 2.49, "learning_rate": 3.7091693189231615e-07, "loss": 0.1553, "step": 7681 }, { "epoch": 2.49, "learning_rate": 3.704586466290863e-07, "loss": 0.144, "step": 7682 }, { "epoch": 2.49, "learning_rate": 3.7000062200110266e-07, "loss": 0.1495, "step": 7683 }, { "epoch": 2.49, "learning_rate": 3.6954285806442337e-07, "loss": 0.1575, "step": 7684 }, { "epoch": 2.49, "learning_rate": 3.6908535487507335e-07, "loss": 0.1467, "step": 7685 }, { "epoch": 2.49, "learning_rate": 3.68628112489047e-07, "loss": 0.1527, "step": 7686 }, { "epoch": 2.49, "learning_rate": 3.681711309623065e-07, "loss": 0.1607, "step": 7687 }, { "epoch": 2.49, "learning_rate": 3.677144103507818e-07, "loss": 0.1641, "step": 7688 }, { "epoch": 2.49, "learning_rate": 3.672579507103716e-07, "loss": 0.1512, "step": 7689 }, { "epoch": 2.49, "learning_rate": 3.668017520969405e-07, "loss": 0.1484, "step": 7690 }, { "epoch": 2.49, "learning_rate": 3.663458145663254e-07, "loss": 0.1343, "step": 7691 }, { "epoch": 2.49, "learning_rate": 3.65890138174326e-07, "loss": 0.1433, "step": 7692 }, { "epoch": 2.49, "learning_rate": 3.6543472297671495e-07, "loss": 0.1427, "step": 7693 }, { "epoch": 2.49, "learning_rate": 3.6497956902922904e-07, "loss": 0.1424, "step": 7694 }, { "epoch": 2.49, "learning_rate": 3.645246763875754e-07, "loss": 0.144, "step": 7695 }, { "epoch": 2.49, "learning_rate": 3.640700451074289e-07, "loss": 0.1648, "step": 7696 }, { "epoch": 2.49, "learning_rate": 3.636156752444303e-07, "loss": 0.1379, "step": 7697 }, { "epoch": 2.49, "learning_rate": 3.631615668541921e-07, "loss": 0.1504, "step": 7698 }, { "epoch": 2.49, "learning_rate": 3.6270771999229124e-07, "loss": 0.1528, "step": 7699 }, { "epoch": 2.5, "learning_rate": 3.622541347142758e-07, "loss": 0.1537, "step": 7700 }, { "epoch": 2.5, "learning_rate": 3.618008110756588e-07, "loss": 0.1599, "step": 7701 }, { "epoch": 2.5, "learning_rate": 3.6134774913192314e-07, "loss": 0.1587, "step": 7702 }, { "epoch": 2.5, "learning_rate": 3.608949489385191e-07, "loss": 0.1457, "step": 7703 }, { "epoch": 2.5, "learning_rate": 3.6044241055086525e-07, "loss": 0.1481, "step": 7704 }, { "epoch": 2.5, "learning_rate": 3.599901340243478e-07, "loss": 0.1476, "step": 7705 }, { "epoch": 2.5, "learning_rate": 3.5953811941432104e-07, "loss": 0.1432, "step": 7706 }, { "epoch": 2.5, "learning_rate": 3.590863667761077e-07, "loss": 0.1396, "step": 7707 }, { "epoch": 2.5, "learning_rate": 3.5863487616499713e-07, "loss": 0.1519, "step": 7708 }, { "epoch": 2.5, "learning_rate": 3.581836476362474e-07, "loss": 0.1643, "step": 7709 }, { "epoch": 2.5, "learning_rate": 3.5773268124508485e-07, "loss": 0.1477, "step": 7710 }, { "epoch": 2.5, "learning_rate": 3.5728197704670344e-07, "loss": 0.1573, "step": 7711 }, { "epoch": 2.5, "learning_rate": 3.5683153509626504e-07, "loss": 0.1585, "step": 7712 }, { "epoch": 2.5, "learning_rate": 3.563813554488996e-07, "loss": 0.1563, "step": 7713 }, { "epoch": 2.5, "learning_rate": 3.559314381597034e-07, "loss": 0.1472, "step": 7714 }, { "epoch": 2.5, "learning_rate": 3.55481783283744e-07, "loss": 0.1591, "step": 7715 }, { "epoch": 2.5, "learning_rate": 3.5503239087605337e-07, "loss": 0.1267, "step": 7716 }, { "epoch": 2.5, "learning_rate": 3.54583260991633e-07, "loss": 0.1538, "step": 7717 }, { "epoch": 2.5, "learning_rate": 3.541343936854524e-07, "loss": 0.1573, "step": 7718 }, { "epoch": 2.5, "learning_rate": 3.5368578901244843e-07, "loss": 0.1474, "step": 7719 }, { "epoch": 2.5, "learning_rate": 3.5323744702752657e-07, "loss": 0.1677, "step": 7720 }, { "epoch": 2.5, "learning_rate": 3.5278936778555763e-07, "loss": 0.1412, "step": 7721 }, { "epoch": 2.5, "learning_rate": 3.523415513413847e-07, "loss": 0.1608, "step": 7722 }, { "epoch": 2.5, "learning_rate": 3.518939977498137e-07, "loss": 0.1456, "step": 7723 }, { "epoch": 2.5, "learning_rate": 3.514467070656233e-07, "loss": 0.1653, "step": 7724 }, { "epoch": 2.5, "learning_rate": 3.509996793435558e-07, "loss": 0.1486, "step": 7725 }, { "epoch": 2.5, "learning_rate": 3.505529146383235e-07, "loss": 0.148, "step": 7726 }, { "epoch": 2.5, "learning_rate": 3.501064130046064e-07, "loss": 0.1336, "step": 7727 }, { "epoch": 2.5, "learning_rate": 3.496601744970518e-07, "loss": 0.1572, "step": 7728 }, { "epoch": 2.5, "learning_rate": 3.492141991702752e-07, "loss": 0.1276, "step": 7729 }, { "epoch": 2.5, "learning_rate": 3.4876848707885854e-07, "loss": 0.1397, "step": 7730 }, { "epoch": 2.51, "learning_rate": 3.483230382773545e-07, "loss": 0.1598, "step": 7731 }, { "epoch": 2.51, "learning_rate": 3.478778528202803e-07, "loss": 0.1503, "step": 7732 }, { "epoch": 2.51, "learning_rate": 3.474329307621227e-07, "loss": 0.1583, "step": 7733 }, { "epoch": 2.51, "learning_rate": 3.469882721573356e-07, "loss": 0.1403, "step": 7734 }, { "epoch": 2.51, "learning_rate": 3.465438770603416e-07, "loss": 0.1509, "step": 7735 }, { "epoch": 2.51, "learning_rate": 3.4609974552552993e-07, "loss": 0.1271, "step": 7736 }, { "epoch": 2.51, "learning_rate": 3.456558776072585e-07, "loss": 0.1424, "step": 7737 }, { "epoch": 2.51, "learning_rate": 3.4521227335985146e-07, "loss": 0.1597, "step": 7738 }, { "epoch": 2.51, "learning_rate": 3.447689328376022e-07, "loss": 0.137, "step": 7739 }, { "epoch": 2.51, "learning_rate": 3.4432585609477125e-07, "loss": 0.155, "step": 7740 }, { "epoch": 2.51, "learning_rate": 3.438830431855872e-07, "loss": 0.1687, "step": 7741 }, { "epoch": 2.51, "learning_rate": 3.434404941642455e-07, "loss": 0.1309, "step": 7742 }, { "epoch": 2.51, "learning_rate": 3.4299820908491045e-07, "loss": 0.1533, "step": 7743 }, { "epoch": 2.51, "learning_rate": 3.4255618800171366e-07, "loss": 0.1432, "step": 7744 }, { "epoch": 2.51, "learning_rate": 3.421144309687527e-07, "loss": 0.1291, "step": 7745 }, { "epoch": 2.51, "learning_rate": 3.4167293804009656e-07, "loss": 0.1533, "step": 7746 }, { "epoch": 2.51, "learning_rate": 3.412317092697781e-07, "loss": 0.1558, "step": 7747 }, { "epoch": 2.51, "learning_rate": 3.407907447117997e-07, "loss": 0.1471, "step": 7748 }, { "epoch": 2.51, "learning_rate": 3.4035004442013157e-07, "loss": 0.1534, "step": 7749 }, { "epoch": 2.51, "learning_rate": 3.399096084487108e-07, "loss": 0.132, "step": 7750 }, { "epoch": 2.51, "learning_rate": 3.394694368514434e-07, "loss": 0.1426, "step": 7751 }, { "epoch": 2.51, "learning_rate": 3.390295296822002e-07, "loss": 0.1299, "step": 7752 }, { "epoch": 2.51, "learning_rate": 3.3858988699482397e-07, "loss": 0.1472, "step": 7753 }, { "epoch": 2.51, "learning_rate": 3.381505088431203e-07, "loss": 0.1399, "step": 7754 }, { "epoch": 2.51, "learning_rate": 3.377113952808669e-07, "loss": 0.1487, "step": 7755 }, { "epoch": 2.51, "learning_rate": 3.3727254636180597e-07, "loss": 0.1525, "step": 7756 }, { "epoch": 2.51, "learning_rate": 3.3683396213964826e-07, "loss": 0.1474, "step": 7757 }, { "epoch": 2.51, "learning_rate": 3.363956426680728e-07, "loss": 0.1369, "step": 7758 }, { "epoch": 2.51, "learning_rate": 3.3595758800072515e-07, "loss": 0.1405, "step": 7759 }, { "epoch": 2.51, "learning_rate": 3.355197981912198e-07, "loss": 0.1453, "step": 7760 }, { "epoch": 2.51, "learning_rate": 3.350822732931361e-07, "loss": 0.1524, "step": 7761 }, { "epoch": 2.52, "learning_rate": 3.3464501336002544e-07, "loss": 0.1358, "step": 7762 }, { "epoch": 2.52, "learning_rate": 3.342080184454022e-07, "loss": 0.1651, "step": 7763 }, { "epoch": 2.52, "learning_rate": 3.337712886027511e-07, "loss": 0.1542, "step": 7764 }, { "epoch": 2.52, "learning_rate": 3.3333482388552356e-07, "loss": 0.1493, "step": 7765 }, { "epoch": 2.52, "learning_rate": 3.3289862434713857e-07, "loss": 0.1473, "step": 7766 }, { "epoch": 2.52, "learning_rate": 3.3246269004098275e-07, "loss": 0.1458, "step": 7767 }, { "epoch": 2.52, "learning_rate": 3.320270210204107e-07, "loss": 0.148, "step": 7768 }, { "epoch": 2.52, "learning_rate": 3.3159161733874347e-07, "loss": 0.1376, "step": 7769 }, { "epoch": 2.52, "learning_rate": 3.311564790492702e-07, "loss": 0.15, "step": 7770 }, { "epoch": 2.52, "learning_rate": 3.307216062052479e-07, "loss": 0.1457, "step": 7771 }, { "epoch": 2.52, "learning_rate": 3.3028699885990085e-07, "loss": 0.1529, "step": 7772 }, { "epoch": 2.52, "learning_rate": 3.298526570664207e-07, "loss": 0.1486, "step": 7773 }, { "epoch": 2.52, "learning_rate": 3.294185808779665e-07, "loss": 0.1528, "step": 7774 }, { "epoch": 2.52, "learning_rate": 3.289847703476659e-07, "loss": 0.1357, "step": 7775 }, { "epoch": 2.52, "learning_rate": 3.285512255286111e-07, "loss": 0.1619, "step": 7776 }, { "epoch": 2.52, "learning_rate": 3.2811794647386625e-07, "loss": 0.1511, "step": 7777 }, { "epoch": 2.52, "learning_rate": 3.276849332364587e-07, "loss": 0.146, "step": 7778 }, { "epoch": 2.52, "learning_rate": 3.2725218586938584e-07, "loss": 0.1542, "step": 7779 }, { "epoch": 2.52, "learning_rate": 3.2681970442561134e-07, "loss": 0.1696, "step": 7780 }, { "epoch": 2.52, "learning_rate": 3.2638748895806705e-07, "loss": 0.1601, "step": 7781 }, { "epoch": 2.52, "learning_rate": 3.259555395196526e-07, "loss": 0.1373, "step": 7782 }, { "epoch": 2.52, "learning_rate": 3.255238561632326e-07, "loss": 0.1401, "step": 7783 }, { "epoch": 2.52, "learning_rate": 3.250924389416432e-07, "loss": 0.15, "step": 7784 }, { "epoch": 2.52, "learning_rate": 3.2466128790768327e-07, "loss": 0.17, "step": 7785 }, { "epoch": 2.52, "learning_rate": 3.2423040311412384e-07, "loss": 0.1363, "step": 7786 }, { "epoch": 2.52, "learning_rate": 3.2379978461369976e-07, "loss": 0.1542, "step": 7787 }, { "epoch": 2.52, "learning_rate": 3.233694324591144e-07, "loss": 0.14, "step": 7788 }, { "epoch": 2.52, "learning_rate": 3.229393467030395e-07, "loss": 0.1596, "step": 7789 }, { "epoch": 2.52, "learning_rate": 3.225095273981127e-07, "loss": 0.1474, "step": 7790 }, { "epoch": 2.52, "learning_rate": 3.2207997459694053e-07, "loss": 0.1437, "step": 7791 }, { "epoch": 2.52, "learning_rate": 3.2165068835209506e-07, "loss": 0.1482, "step": 7792 }, { "epoch": 2.53, "learning_rate": 3.2122166871611736e-07, "loss": 0.1511, "step": 7793 }, { "epoch": 2.53, "learning_rate": 3.207929157415152e-07, "loss": 0.1475, "step": 7794 }, { "epoch": 2.53, "learning_rate": 3.2036442948076395e-07, "loss": 0.1465, "step": 7795 }, { "epoch": 2.53, "learning_rate": 3.199362099863057e-07, "loss": 0.1442, "step": 7796 }, { "epoch": 2.53, "learning_rate": 3.19508257310551e-07, "loss": 0.1454, "step": 7797 }, { "epoch": 2.53, "learning_rate": 3.190805715058765e-07, "loss": 0.16, "step": 7798 }, { "epoch": 2.53, "learning_rate": 3.1865315262462783e-07, "loss": 0.1639, "step": 7799 }, { "epoch": 2.53, "learning_rate": 3.182260007191157e-07, "loss": 0.136, "step": 7800 }, { "epoch": 2.53, "learning_rate": 3.1779911584161963e-07, "loss": 0.1426, "step": 7801 }, { "epoch": 2.53, "learning_rate": 3.173724980443868e-07, "loss": 0.1562, "step": 7802 }, { "epoch": 2.53, "learning_rate": 3.1694614737963036e-07, "loss": 0.1459, "step": 7803 }, { "epoch": 2.53, "learning_rate": 3.165200638995328e-07, "loss": 0.1498, "step": 7804 }, { "epoch": 2.53, "learning_rate": 3.160942476562404e-07, "loss": 0.152, "step": 7805 }, { "epoch": 2.53, "learning_rate": 3.1566869870187115e-07, "loss": 0.1582, "step": 7806 }, { "epoch": 2.53, "learning_rate": 3.1524341708850633e-07, "loss": 0.1574, "step": 7807 }, { "epoch": 2.53, "learning_rate": 3.148184028681983e-07, "loss": 0.1538, "step": 7808 }, { "epoch": 2.53, "learning_rate": 3.1439365609296253e-07, "loss": 0.1495, "step": 7809 }, { "epoch": 2.53, "learning_rate": 3.1396917681478595e-07, "loss": 0.1481, "step": 7810 }, { "epoch": 2.53, "learning_rate": 3.13544965085619e-07, "loss": 0.1633, "step": 7811 }, { "epoch": 2.53, "learning_rate": 3.1312102095738205e-07, "loss": 0.1514, "step": 7812 }, { "epoch": 2.53, "learning_rate": 3.12697344481962e-07, "loss": 0.1565, "step": 7813 }, { "epoch": 2.53, "learning_rate": 3.1227393571121117e-07, "loss": 0.1468, "step": 7814 }, { "epoch": 2.53, "learning_rate": 3.1185079469695263e-07, "loss": 0.1539, "step": 7815 }, { "epoch": 2.53, "learning_rate": 3.1142792149097297e-07, "loss": 0.1511, "step": 7816 }, { "epoch": 2.53, "learning_rate": 3.110053161450299e-07, "loss": 0.1401, "step": 7817 }, { "epoch": 2.53, "learning_rate": 3.105829787108444e-07, "loss": 0.1415, "step": 7818 }, { "epoch": 2.53, "learning_rate": 3.10160909240107e-07, "loss": 0.1454, "step": 7819 }, { "epoch": 2.53, "learning_rate": 3.0973910778447523e-07, "loss": 0.1286, "step": 7820 }, { "epoch": 2.53, "learning_rate": 3.0931757439557313e-07, "loss": 0.1557, "step": 7821 }, { "epoch": 2.53, "learning_rate": 3.08896309124993e-07, "loss": 0.1626, "step": 7822 }, { "epoch": 2.53, "learning_rate": 3.084753120242928e-07, "loss": 0.1509, "step": 7823 }, { "epoch": 2.54, "learning_rate": 3.0805458314499855e-07, "loss": 0.156, "step": 7824 }, { "epoch": 2.54, "learning_rate": 3.076341225386037e-07, "loss": 0.1487, "step": 7825 }, { "epoch": 2.54, "learning_rate": 3.0721393025656853e-07, "loss": 0.1488, "step": 7826 }, { "epoch": 2.54, "learning_rate": 3.0679400635032053e-07, "loss": 0.148, "step": 7827 }, { "epoch": 2.54, "learning_rate": 3.063743508712544e-07, "loss": 0.1474, "step": 7828 }, { "epoch": 2.54, "learning_rate": 3.059549638707315e-07, "loss": 0.1743, "step": 7829 }, { "epoch": 2.54, "learning_rate": 3.0553584540008176e-07, "loss": 0.1564, "step": 7830 }, { "epoch": 2.54, "learning_rate": 3.0511699551059927e-07, "loss": 0.1478, "step": 7831 }, { "epoch": 2.54, "learning_rate": 3.0469841425354945e-07, "loss": 0.1504, "step": 7832 }, { "epoch": 2.54, "learning_rate": 3.0428010168016107e-07, "loss": 0.1447, "step": 7833 }, { "epoch": 2.54, "learning_rate": 3.0386205784163207e-07, "loss": 0.1461, "step": 7834 }, { "epoch": 2.54, "learning_rate": 3.0344428278912765e-07, "loss": 0.1463, "step": 7835 }, { "epoch": 2.54, "learning_rate": 3.030267765737774e-07, "loss": 0.1358, "step": 7836 }, { "epoch": 2.54, "learning_rate": 3.026095392466824e-07, "loss": 0.1506, "step": 7837 }, { "epoch": 2.54, "learning_rate": 3.021925708589066e-07, "loss": 0.1415, "step": 7838 }, { "epoch": 2.54, "learning_rate": 3.0177587146148435e-07, "loss": 0.1501, "step": 7839 }, { "epoch": 2.54, "learning_rate": 3.013594411054144e-07, "loss": 0.1602, "step": 7840 }, { "epoch": 2.54, "learning_rate": 3.0094327984166506e-07, "loss": 0.165, "step": 7841 }, { "epoch": 2.54, "learning_rate": 3.0052738772116925e-07, "loss": 0.1394, "step": 7842 }, { "epoch": 2.54, "learning_rate": 3.001117647948287e-07, "loss": 0.1383, "step": 7843 }, { "epoch": 2.54, "learning_rate": 2.996964111135123e-07, "loss": 0.1378, "step": 7844 }, { "epoch": 2.54, "learning_rate": 2.992813267280531e-07, "loss": 0.1412, "step": 7845 }, { "epoch": 2.54, "learning_rate": 2.988665116892564e-07, "loss": 0.1603, "step": 7846 }, { "epoch": 2.54, "learning_rate": 2.9845196604788935e-07, "loss": 0.1533, "step": 7847 }, { "epoch": 2.54, "learning_rate": 2.980376898546888e-07, "loss": 0.1426, "step": 7848 }, { "epoch": 2.54, "learning_rate": 2.976236831603588e-07, "loss": 0.1493, "step": 7849 }, { "epoch": 2.54, "learning_rate": 2.972099460155689e-07, "loss": 0.1476, "step": 7850 }, { "epoch": 2.54, "learning_rate": 2.9679647847095735e-07, "loss": 0.1494, "step": 7851 }, { "epoch": 2.54, "learning_rate": 2.9638328057712775e-07, "loss": 0.1524, "step": 7852 }, { "epoch": 2.54, "learning_rate": 2.9597035238465214e-07, "loss": 0.1681, "step": 7853 }, { "epoch": 2.55, "learning_rate": 2.9555769394406934e-07, "loss": 0.1496, "step": 7854 }, { "epoch": 2.55, "learning_rate": 2.9514530530588367e-07, "loss": 0.1495, "step": 7855 }, { "epoch": 2.55, "learning_rate": 2.947331865205677e-07, "loss": 0.1396, "step": 7856 }, { "epoch": 2.55, "learning_rate": 2.943213376385612e-07, "loss": 0.1591, "step": 7857 }, { "epoch": 2.55, "learning_rate": 2.9390975871027046e-07, "loss": 0.1583, "step": 7858 }, { "epoch": 2.55, "learning_rate": 2.934984497860691e-07, "loss": 0.1455, "step": 7859 }, { "epoch": 2.55, "learning_rate": 2.9308741091629596e-07, "loss": 0.1559, "step": 7860 }, { "epoch": 2.55, "learning_rate": 2.9267664215126e-07, "loss": 0.1629, "step": 7861 }, { "epoch": 2.55, "learning_rate": 2.9226614354123356e-07, "loss": 0.1548, "step": 7862 }, { "epoch": 2.55, "learning_rate": 2.9185591513645947e-07, "loss": 0.156, "step": 7863 }, { "epoch": 2.55, "learning_rate": 2.914459569871447e-07, "loss": 0.1397, "step": 7864 }, { "epoch": 2.55, "learning_rate": 2.91036269143464e-07, "loss": 0.1498, "step": 7865 }, { "epoch": 2.55, "learning_rate": 2.9062685165555963e-07, "loss": 0.1741, "step": 7866 }, { "epoch": 2.55, "learning_rate": 2.9021770457354046e-07, "loss": 0.1572, "step": 7867 }, { "epoch": 2.55, "learning_rate": 2.8980882794748227e-07, "loss": 0.1584, "step": 7868 }, { "epoch": 2.55, "learning_rate": 2.894002218274261e-07, "loss": 0.156, "step": 7869 }, { "epoch": 2.55, "learning_rate": 2.8899188626338363e-07, "loss": 0.149, "step": 7870 }, { "epoch": 2.55, "learning_rate": 2.8858382130532965e-07, "loss": 0.1616, "step": 7871 }, { "epoch": 2.55, "learning_rate": 2.8817602700320747e-07, "loss": 0.1332, "step": 7872 }, { "epoch": 2.55, "learning_rate": 2.8776850340692777e-07, "loss": 0.1481, "step": 7873 }, { "epoch": 2.55, "learning_rate": 2.87361250566367e-07, "loss": 0.1466, "step": 7874 }, { "epoch": 2.55, "learning_rate": 2.869542685313692e-07, "loss": 0.1434, "step": 7875 }, { "epoch": 2.55, "learning_rate": 2.865475573517451e-07, "loss": 0.1416, "step": 7876 }, { "epoch": 2.55, "learning_rate": 2.8614111707727267e-07, "loss": 0.1339, "step": 7877 }, { "epoch": 2.55, "learning_rate": 2.8573494775769485e-07, "loss": 0.1389, "step": 7878 }, { "epoch": 2.55, "learning_rate": 2.853290494427238e-07, "loss": 0.1441, "step": 7879 }, { "epoch": 2.55, "learning_rate": 2.8492342218203766e-07, "loss": 0.1538, "step": 7880 }, { "epoch": 2.55, "learning_rate": 2.845180660252808e-07, "loss": 0.1348, "step": 7881 }, { "epoch": 2.55, "learning_rate": 2.8411298102206524e-07, "loss": 0.1546, "step": 7882 }, { "epoch": 2.55, "learning_rate": 2.837081672219694e-07, "loss": 0.1427, "step": 7883 }, { "epoch": 2.55, "learning_rate": 2.833036246745385e-07, "loss": 0.1481, "step": 7884 }, { "epoch": 2.56, "learning_rate": 2.828993534292851e-07, "loss": 0.1365, "step": 7885 }, { "epoch": 2.56, "learning_rate": 2.824953535356872e-07, "loss": 0.1347, "step": 7886 }, { "epoch": 2.56, "learning_rate": 2.820916250431907e-07, "loss": 0.1521, "step": 7887 }, { "epoch": 2.56, "learning_rate": 2.8168816800120845e-07, "loss": 0.1379, "step": 7888 }, { "epoch": 2.56, "learning_rate": 2.812849824591196e-07, "loss": 0.161, "step": 7889 }, { "epoch": 2.56, "learning_rate": 2.808820684662705e-07, "loss": 0.1511, "step": 7890 }, { "epoch": 2.56, "learning_rate": 2.804794260719726e-07, "loss": 0.1489, "step": 7891 }, { "epoch": 2.56, "learning_rate": 2.800770553255072e-07, "loss": 0.1474, "step": 7892 }, { "epoch": 2.56, "learning_rate": 2.796749562761186e-07, "loss": 0.1417, "step": 7893 }, { "epoch": 2.56, "learning_rate": 2.7927312897302217e-07, "loss": 0.1569, "step": 7894 }, { "epoch": 2.56, "learning_rate": 2.7887157346539574e-07, "loss": 0.1472, "step": 7895 }, { "epoch": 2.56, "learning_rate": 2.7847028980238666e-07, "loss": 0.1466, "step": 7896 }, { "epoch": 2.56, "learning_rate": 2.780692780331079e-07, "loss": 0.1494, "step": 7897 }, { "epoch": 2.56, "learning_rate": 2.7766853820663963e-07, "loss": 0.1426, "step": 7898 }, { "epoch": 2.56, "learning_rate": 2.7726807037202903e-07, "loss": 0.129, "step": 7899 }, { "epoch": 2.56, "learning_rate": 2.7686787457828796e-07, "loss": 0.1541, "step": 7900 }, { "epoch": 2.56, "learning_rate": 2.764679508743981e-07, "loss": 0.1579, "step": 7901 }, { "epoch": 2.56, "learning_rate": 2.7606829930930555e-07, "loss": 0.1592, "step": 7902 }, { "epoch": 2.56, "learning_rate": 2.7566891993192347e-07, "loss": 0.144, "step": 7903 }, { "epoch": 2.56, "learning_rate": 2.752698127911327e-07, "loss": 0.1494, "step": 7904 }, { "epoch": 2.56, "learning_rate": 2.748709779357794e-07, "loss": 0.1669, "step": 7905 }, { "epoch": 2.56, "learning_rate": 2.744724154146777e-07, "loss": 0.1525, "step": 7906 }, { "epoch": 2.56, "learning_rate": 2.740741252766077e-07, "loss": 0.1504, "step": 7907 }, { "epoch": 2.56, "learning_rate": 2.736761075703165e-07, "loss": 0.1475, "step": 7908 }, { "epoch": 2.56, "learning_rate": 2.732783623445168e-07, "loss": 0.1698, "step": 7909 }, { "epoch": 2.56, "learning_rate": 2.728808896478891e-07, "loss": 0.1474, "step": 7910 }, { "epoch": 2.56, "learning_rate": 2.7248368952908055e-07, "loss": 0.1597, "step": 7911 }, { "epoch": 2.56, "learning_rate": 2.7208676203670406e-07, "loss": 0.1561, "step": 7912 }, { "epoch": 2.56, "learning_rate": 2.716901072193404e-07, "loss": 0.1711, "step": 7913 }, { "epoch": 2.56, "learning_rate": 2.71293725125536e-07, "loss": 0.1341, "step": 7914 }, { "epoch": 2.56, "learning_rate": 2.7089761580380346e-07, "loss": 0.1462, "step": 7915 }, { "epoch": 2.57, "learning_rate": 2.7050177930262406e-07, "loss": 0.1484, "step": 7916 }, { "epoch": 2.57, "learning_rate": 2.701062156704434e-07, "loss": 0.1482, "step": 7917 }, { "epoch": 2.57, "learning_rate": 2.697109249556748e-07, "loss": 0.1654, "step": 7918 }, { "epoch": 2.57, "learning_rate": 2.6931590720669807e-07, "loss": 0.1541, "step": 7919 }, { "epoch": 2.57, "learning_rate": 2.6892116247185964e-07, "loss": 0.1451, "step": 7920 }, { "epoch": 2.57, "learning_rate": 2.6852669079947294e-07, "loss": 0.152, "step": 7921 }, { "epoch": 2.57, "learning_rate": 2.681324922378159e-07, "loss": 0.1468, "step": 7922 }, { "epoch": 2.57, "learning_rate": 2.6773856683513677e-07, "loss": 0.1419, "step": 7923 }, { "epoch": 2.57, "learning_rate": 2.673449146396459e-07, "loss": 0.1449, "step": 7924 }, { "epoch": 2.57, "learning_rate": 2.6695153569952475e-07, "loss": 0.1544, "step": 7925 }, { "epoch": 2.57, "learning_rate": 2.665584300629176e-07, "loss": 0.1398, "step": 7926 }, { "epoch": 2.57, "learning_rate": 2.661655977779373e-07, "loss": 0.1391, "step": 7927 }, { "epoch": 2.57, "learning_rate": 2.6577303889266244e-07, "loss": 0.1365, "step": 7928 }, { "epoch": 2.57, "learning_rate": 2.6538075345513864e-07, "loss": 0.143, "step": 7929 }, { "epoch": 2.57, "learning_rate": 2.6498874151337865e-07, "loss": 0.148, "step": 7930 }, { "epoch": 2.57, "learning_rate": 2.6459700311535885e-07, "loss": 0.1501, "step": 7931 }, { "epoch": 2.57, "learning_rate": 2.642055383090264e-07, "loss": 0.1563, "step": 7932 }, { "epoch": 2.57, "learning_rate": 2.638143471422916e-07, "loss": 0.1378, "step": 7933 }, { "epoch": 2.57, "learning_rate": 2.634234296630328e-07, "loss": 0.1517, "step": 7934 }, { "epoch": 2.57, "learning_rate": 2.6303278591909426e-07, "loss": 0.1676, "step": 7935 }, { "epoch": 2.57, "learning_rate": 2.626424159582872e-07, "loss": 0.1578, "step": 7936 }, { "epoch": 2.57, "learning_rate": 2.622523198283894e-07, "loss": 0.1419, "step": 7937 }, { "epoch": 2.57, "learning_rate": 2.6186249757714474e-07, "loss": 0.143, "step": 7938 }, { "epoch": 2.57, "learning_rate": 2.614729492522633e-07, "loss": 0.1446, "step": 7939 }, { "epoch": 2.57, "learning_rate": 2.61083674901422e-07, "loss": 0.1531, "step": 7940 }, { "epoch": 2.57, "learning_rate": 2.6069467457226467e-07, "loss": 0.154, "step": 7941 }, { "epoch": 2.57, "learning_rate": 2.6030594831240094e-07, "loss": 0.152, "step": 7942 }, { "epoch": 2.57, "learning_rate": 2.599174961694073e-07, "loss": 0.1466, "step": 7943 }, { "epoch": 2.57, "learning_rate": 2.595293181908265e-07, "loss": 0.139, "step": 7944 }, { "epoch": 2.57, "learning_rate": 2.59141414424168e-07, "loss": 0.1568, "step": 7945 }, { "epoch": 2.57, "learning_rate": 2.587537849169064e-07, "loss": 0.1294, "step": 7946 }, { "epoch": 2.58, "learning_rate": 2.5836642971648534e-07, "loss": 0.1542, "step": 7947 }, { "epoch": 2.58, "learning_rate": 2.579793488703122e-07, "loss": 0.1601, "step": 7948 }, { "epoch": 2.58, "learning_rate": 2.5759254242576246e-07, "loss": 0.1549, "step": 7949 }, { "epoch": 2.58, "learning_rate": 2.572060104301771e-07, "loss": 0.1569, "step": 7950 }, { "epoch": 2.58, "learning_rate": 2.5681975293086443e-07, "loss": 0.1463, "step": 7951 }, { "epoch": 2.58, "learning_rate": 2.564337699750985e-07, "loss": 0.1402, "step": 7952 }, { "epoch": 2.58, "learning_rate": 2.560480616101191e-07, "loss": 0.149, "step": 7953 }, { "epoch": 2.58, "learning_rate": 2.556626278831345e-07, "loss": 0.1452, "step": 7954 }, { "epoch": 2.58, "learning_rate": 2.552774688413165e-07, "loss": 0.1496, "step": 7955 }, { "epoch": 2.58, "learning_rate": 2.5489258453180676e-07, "loss": 0.1237, "step": 7956 }, { "epoch": 2.58, "learning_rate": 2.545079750017099e-07, "loss": 0.1583, "step": 7957 }, { "epoch": 2.58, "learning_rate": 2.541236402980987e-07, "loss": 0.1495, "step": 7958 }, { "epoch": 2.58, "learning_rate": 2.5373958046801207e-07, "loss": 0.1787, "step": 7959 }, { "epoch": 2.58, "learning_rate": 2.5335579555845563e-07, "loss": 0.1428, "step": 7960 }, { "epoch": 2.58, "learning_rate": 2.5297228561640075e-07, "loss": 0.1526, "step": 7961 }, { "epoch": 2.58, "learning_rate": 2.5258905068878433e-07, "loss": 0.1445, "step": 7962 }, { "epoch": 2.58, "learning_rate": 2.522060908225127e-07, "loss": 0.1537, "step": 7963 }, { "epoch": 2.58, "learning_rate": 2.518234060644545e-07, "loss": 0.1485, "step": 7964 }, { "epoch": 2.58, "learning_rate": 2.5144099646144724e-07, "loss": 0.1425, "step": 7965 }, { "epoch": 2.58, "learning_rate": 2.510588620602947e-07, "loss": 0.1513, "step": 7966 }, { "epoch": 2.58, "learning_rate": 2.506770029077657e-07, "loss": 0.1604, "step": 7967 }, { "epoch": 2.58, "learning_rate": 2.502954190505963e-07, "loss": 0.1506, "step": 7968 }, { "epoch": 2.58, "learning_rate": 2.499141105354894e-07, "loss": 0.1447, "step": 7969 }, { "epoch": 2.58, "learning_rate": 2.495330774091126e-07, "loss": 0.1582, "step": 7970 }, { "epoch": 2.58, "learning_rate": 2.4915231971810064e-07, "loss": 0.1426, "step": 7971 }, { "epoch": 2.58, "learning_rate": 2.4877183750905475e-07, "loss": 0.1537, "step": 7972 }, { "epoch": 2.58, "learning_rate": 2.483916308285425e-07, "loss": 0.1438, "step": 7973 }, { "epoch": 2.58, "learning_rate": 2.4801169972309745e-07, "loss": 0.1608, "step": 7974 }, { "epoch": 2.58, "learning_rate": 2.4763204423921937e-07, "loss": 0.1455, "step": 7975 }, { "epoch": 2.58, "learning_rate": 2.47252664423375e-07, "loss": 0.1433, "step": 7976 }, { "epoch": 2.58, "learning_rate": 2.4687356032199516e-07, "loss": 0.1531, "step": 7977 }, { "epoch": 2.59, "learning_rate": 2.464947319814806e-07, "loss": 0.1493, "step": 7978 }, { "epoch": 2.59, "learning_rate": 2.461161794481945e-07, "loss": 0.145, "step": 7979 }, { "epoch": 2.59, "learning_rate": 2.4573790276846947e-07, "loss": 0.1723, "step": 7980 }, { "epoch": 2.59, "learning_rate": 2.453599019886016e-07, "loss": 0.1383, "step": 7981 }, { "epoch": 2.59, "learning_rate": 2.449821771548552e-07, "loss": 0.1323, "step": 7982 }, { "epoch": 2.59, "learning_rate": 2.446047283134606e-07, "loss": 0.1439, "step": 7983 }, { "epoch": 2.59, "learning_rate": 2.4422755551061246e-07, "loss": 0.147, "step": 7984 }, { "epoch": 2.59, "learning_rate": 2.4385065879247466e-07, "loss": 0.1388, "step": 7985 }, { "epoch": 2.59, "learning_rate": 2.4347403820517423e-07, "loss": 0.1485, "step": 7986 }, { "epoch": 2.59, "learning_rate": 2.4309769379480764e-07, "loss": 0.1411, "step": 7987 }, { "epoch": 2.59, "learning_rate": 2.427216256074341e-07, "loss": 0.1556, "step": 7988 }, { "epoch": 2.59, "learning_rate": 2.423458336890816e-07, "loss": 0.1544, "step": 7989 }, { "epoch": 2.59, "learning_rate": 2.4197031808574327e-07, "loss": 0.1474, "step": 7990 }, { "epoch": 2.59, "learning_rate": 2.4159507884337877e-07, "loss": 0.1433, "step": 7991 }, { "epoch": 2.59, "learning_rate": 2.4122011600791334e-07, "loss": 0.1374, "step": 7992 }, { "epoch": 2.59, "learning_rate": 2.408454296252397e-07, "loss": 0.1494, "step": 7993 }, { "epoch": 2.59, "learning_rate": 2.404710197412144e-07, "loss": 0.139, "step": 7994 }, { "epoch": 2.59, "learning_rate": 2.4009688640166257e-07, "loss": 0.1454, "step": 7995 }, { "epoch": 2.59, "learning_rate": 2.397230296523742e-07, "loss": 0.1569, "step": 7996 }, { "epoch": 2.59, "learning_rate": 2.3934944953910576e-07, "loss": 0.1603, "step": 7997 }, { "epoch": 2.59, "learning_rate": 2.3897614610757984e-07, "loss": 0.148, "step": 7998 }, { "epoch": 2.59, "learning_rate": 2.386031194034855e-07, "loss": 0.1397, "step": 7999 }, { "epoch": 2.59, "learning_rate": 2.3823036947247773e-07, "loss": 0.1619, "step": 8000 }, { "epoch": 2.59, "learning_rate": 2.3785789636017604e-07, "loss": 0.1404, "step": 8001 }, { "epoch": 2.59, "learning_rate": 2.374857001121697e-07, "loss": 0.1429, "step": 8002 }, { "epoch": 2.59, "learning_rate": 2.371137807740101e-07, "loss": 0.1543, "step": 8003 }, { "epoch": 2.59, "learning_rate": 2.3674213839121745e-07, "loss": 0.1508, "step": 8004 }, { "epoch": 2.59, "learning_rate": 2.3637077300927762e-07, "loss": 0.1432, "step": 8005 }, { "epoch": 2.59, "learning_rate": 2.3599968467364037e-07, "loss": 0.1476, "step": 8006 }, { "epoch": 2.59, "learning_rate": 2.3562887342972574e-07, "loss": 0.1348, "step": 8007 }, { "epoch": 2.59, "learning_rate": 2.3525833932291491e-07, "loss": 0.1571, "step": 8008 }, { "epoch": 2.6, "learning_rate": 2.3488808239855998e-07, "loss": 0.1621, "step": 8009 }, { "epoch": 2.6, "learning_rate": 2.3451810270197494e-07, "loss": 0.1428, "step": 8010 }, { "epoch": 2.6, "learning_rate": 2.341484002784436e-07, "loss": 0.1526, "step": 8011 }, { "epoch": 2.6, "learning_rate": 2.3377897517321224e-07, "loss": 0.1525, "step": 8012 }, { "epoch": 2.6, "learning_rate": 2.3340982743149582e-07, "loss": 0.1357, "step": 8013 }, { "epoch": 2.6, "learning_rate": 2.3304095709847402e-07, "loss": 0.1423, "step": 8014 }, { "epoch": 2.6, "learning_rate": 2.3267236421929323e-07, "loss": 0.153, "step": 8015 }, { "epoch": 2.6, "learning_rate": 2.3230404883906626e-07, "loss": 0.1375, "step": 8016 }, { "epoch": 2.6, "learning_rate": 2.319360110028701e-07, "loss": 0.1473, "step": 8017 }, { "epoch": 2.6, "learning_rate": 2.3156825075574956e-07, "loss": 0.1509, "step": 8018 }, { "epoch": 2.6, "learning_rate": 2.312007681427153e-07, "loss": 0.1601, "step": 8019 }, { "epoch": 2.6, "learning_rate": 2.30833563208743e-07, "loss": 0.1457, "step": 8020 }, { "epoch": 2.6, "learning_rate": 2.304666359987756e-07, "loss": 0.1709, "step": 8021 }, { "epoch": 2.6, "learning_rate": 2.300999865577211e-07, "loss": 0.1305, "step": 8022 }, { "epoch": 2.6, "learning_rate": 2.2973361493045382e-07, "loss": 0.1571, "step": 8023 }, { "epoch": 2.6, "learning_rate": 2.293675211618146e-07, "loss": 0.147, "step": 8024 }, { "epoch": 2.6, "learning_rate": 2.2900170529660898e-07, "loss": 0.141, "step": 8025 }, { "epoch": 2.6, "learning_rate": 2.2863616737960976e-07, "loss": 0.1512, "step": 8026 }, { "epoch": 2.6, "learning_rate": 2.2827090745555502e-07, "loss": 0.1456, "step": 8027 }, { "epoch": 2.6, "learning_rate": 2.279059255691493e-07, "loss": 0.1579, "step": 8028 }, { "epoch": 2.6, "learning_rate": 2.2754122176506244e-07, "loss": 0.1565, "step": 8029 }, { "epoch": 2.6, "learning_rate": 2.271767960879312e-07, "loss": 0.1476, "step": 8030 }, { "epoch": 2.6, "learning_rate": 2.2681264858235797e-07, "loss": 0.1606, "step": 8031 }, { "epoch": 2.6, "learning_rate": 2.2644877929290932e-07, "loss": 0.1398, "step": 8032 }, { "epoch": 2.6, "learning_rate": 2.2608518826412128e-07, "loss": 0.1551, "step": 8033 }, { "epoch": 2.6, "learning_rate": 2.2572187554049274e-07, "loss": 0.1356, "step": 8034 }, { "epoch": 2.6, "learning_rate": 2.2535884116648976e-07, "loss": 0.149, "step": 8035 }, { "epoch": 2.6, "learning_rate": 2.2499608518654432e-07, "loss": 0.1629, "step": 8036 }, { "epoch": 2.6, "learning_rate": 2.2463360764505448e-07, "loss": 0.1416, "step": 8037 }, { "epoch": 2.6, "learning_rate": 2.2427140858638424e-07, "loss": 0.1454, "step": 8038 }, { "epoch": 2.6, "learning_rate": 2.2390948805486174e-07, "loss": 0.1635, "step": 8039 }, { "epoch": 2.61, "learning_rate": 2.2354784609478485e-07, "loss": 0.1338, "step": 8040 }, { "epoch": 2.61, "learning_rate": 2.2318648275041267e-07, "loss": 0.1679, "step": 8041 }, { "epoch": 2.61, "learning_rate": 2.2282539806597476e-07, "loss": 0.1488, "step": 8042 }, { "epoch": 2.61, "learning_rate": 2.22464592085663e-07, "loss": 0.145, "step": 8043 }, { "epoch": 2.61, "learning_rate": 2.2210406485363656e-07, "loss": 0.1399, "step": 8044 }, { "epoch": 2.61, "learning_rate": 2.217438164140212e-07, "loss": 0.1507, "step": 8045 }, { "epoch": 2.61, "learning_rate": 2.213838468109075e-07, "loss": 0.1607, "step": 8046 }, { "epoch": 2.61, "learning_rate": 2.210241560883525e-07, "loss": 0.1463, "step": 8047 }, { "epoch": 2.61, "learning_rate": 2.206647442903781e-07, "loss": 0.1439, "step": 8048 }, { "epoch": 2.61, "learning_rate": 2.2030561146097363e-07, "loss": 0.1451, "step": 8049 }, { "epoch": 2.61, "learning_rate": 2.199467576440928e-07, "loss": 0.1698, "step": 8050 }, { "epoch": 2.61, "learning_rate": 2.195881828836563e-07, "loss": 0.1646, "step": 8051 }, { "epoch": 2.61, "learning_rate": 2.1922988722355044e-07, "loss": 0.1524, "step": 8052 }, { "epoch": 2.61, "learning_rate": 2.188718707076265e-07, "loss": 0.1576, "step": 8053 }, { "epoch": 2.61, "learning_rate": 2.185141333797025e-07, "loss": 0.1522, "step": 8054 }, { "epoch": 2.61, "learning_rate": 2.181566752835626e-07, "loss": 0.145, "step": 8055 }, { "epoch": 2.61, "learning_rate": 2.177994964629554e-07, "loss": 0.1636, "step": 8056 }, { "epoch": 2.61, "learning_rate": 2.174425969615962e-07, "loss": 0.1606, "step": 8057 }, { "epoch": 2.61, "learning_rate": 2.1708597682316645e-07, "loss": 0.1477, "step": 8058 }, { "epoch": 2.61, "learning_rate": 2.1672963609131292e-07, "loss": 0.1573, "step": 8059 }, { "epoch": 2.61, "learning_rate": 2.1637357480964821e-07, "loss": 0.1575, "step": 8060 }, { "epoch": 2.61, "learning_rate": 2.1601779302175026e-07, "loss": 0.1371, "step": 8061 }, { "epoch": 2.61, "learning_rate": 2.1566229077116445e-07, "loss": 0.1654, "step": 8062 }, { "epoch": 2.61, "learning_rate": 2.1530706810139913e-07, "loss": 0.1348, "step": 8063 }, { "epoch": 2.61, "learning_rate": 2.1495212505593221e-07, "loss": 0.18, "step": 8064 }, { "epoch": 2.61, "learning_rate": 2.1459746167820372e-07, "loss": 0.1517, "step": 8065 }, { "epoch": 2.61, "learning_rate": 2.142430780116214e-07, "loss": 0.1291, "step": 8066 }, { "epoch": 2.61, "learning_rate": 2.1388897409955867e-07, "loss": 0.1422, "step": 8067 }, { "epoch": 2.61, "learning_rate": 2.1353514998535414e-07, "loss": 0.155, "step": 8068 }, { "epoch": 2.61, "learning_rate": 2.1318160571231316e-07, "loss": 0.1445, "step": 8069 }, { "epoch": 2.62, "learning_rate": 2.128283413237045e-07, "loss": 0.1647, "step": 8070 }, { "epoch": 2.62, "learning_rate": 2.1247535686276632e-07, "loss": 0.1598, "step": 8071 }, { "epoch": 2.62, "learning_rate": 2.121226523726988e-07, "loss": 0.1465, "step": 8072 }, { "epoch": 2.62, "learning_rate": 2.1177022789667045e-07, "loss": 0.1383, "step": 8073 }, { "epoch": 2.62, "learning_rate": 2.1141808347781428e-07, "loss": 0.1392, "step": 8074 }, { "epoch": 2.62, "learning_rate": 2.110662191592297e-07, "loss": 0.1567, "step": 8075 }, { "epoch": 2.62, "learning_rate": 2.1071463498398114e-07, "loss": 0.1415, "step": 8076 }, { "epoch": 2.62, "learning_rate": 2.103633309950995e-07, "loss": 0.1611, "step": 8077 }, { "epoch": 2.62, "learning_rate": 2.1001230723558087e-07, "loss": 0.136, "step": 8078 }, { "epoch": 2.62, "learning_rate": 2.0966156374838677e-07, "loss": 0.1403, "step": 8079 }, { "epoch": 2.62, "learning_rate": 2.0931110057644505e-07, "loss": 0.1568, "step": 8080 }, { "epoch": 2.62, "learning_rate": 2.089609177626492e-07, "loss": 0.1511, "step": 8081 }, { "epoch": 2.62, "learning_rate": 2.0861101534985774e-07, "loss": 0.1511, "step": 8082 }, { "epoch": 2.62, "learning_rate": 2.082613933808958e-07, "loss": 0.1487, "step": 8083 }, { "epoch": 2.62, "learning_rate": 2.079120518985539e-07, "loss": 0.1674, "step": 8084 }, { "epoch": 2.62, "learning_rate": 2.07562990945587e-07, "loss": 0.1561, "step": 8085 }, { "epoch": 2.62, "learning_rate": 2.0721421056471818e-07, "loss": 0.1425, "step": 8086 }, { "epoch": 2.62, "learning_rate": 2.0686571079863383e-07, "loss": 0.1644, "step": 8087 }, { "epoch": 2.62, "learning_rate": 2.0651749168998703e-07, "loss": 0.1499, "step": 8088 }, { "epoch": 2.62, "learning_rate": 2.0616955328139675e-07, "loss": 0.1376, "step": 8089 }, { "epoch": 2.62, "learning_rate": 2.058218956154473e-07, "loss": 0.1592, "step": 8090 }, { "epoch": 2.62, "learning_rate": 2.0547451873468877e-07, "loss": 0.1539, "step": 8091 }, { "epoch": 2.62, "learning_rate": 2.051274226816355e-07, "loss": 0.1522, "step": 8092 }, { "epoch": 2.62, "learning_rate": 2.0478060749877044e-07, "loss": 0.1542, "step": 8093 }, { "epoch": 2.62, "learning_rate": 2.0443407322853882e-07, "loss": 0.1435, "step": 8094 }, { "epoch": 2.62, "learning_rate": 2.0408781991335446e-07, "loss": 0.1508, "step": 8095 }, { "epoch": 2.62, "learning_rate": 2.0374184759559463e-07, "loss": 0.1405, "step": 8096 }, { "epoch": 2.62, "learning_rate": 2.033961563176029e-07, "loss": 0.1695, "step": 8097 }, { "epoch": 2.62, "learning_rate": 2.0305074612168906e-07, "loss": 0.1551, "step": 8098 }, { "epoch": 2.62, "learning_rate": 2.0270561705012765e-07, "loss": 0.1498, "step": 8099 }, { "epoch": 2.62, "learning_rate": 2.0236076914515956e-07, "loss": 0.1385, "step": 8100 }, { "epoch": 2.63, "learning_rate": 2.020162024489894e-07, "loss": 0.1553, "step": 8101 }, { "epoch": 2.63, "learning_rate": 2.0167191700379092e-07, "loss": 0.1638, "step": 8102 }, { "epoch": 2.63, "learning_rate": 2.0132791285169985e-07, "loss": 0.1492, "step": 8103 }, { "epoch": 2.63, "learning_rate": 2.0098419003481946e-07, "loss": 0.1405, "step": 8104 }, { "epoch": 2.63, "learning_rate": 2.0064074859521777e-07, "loss": 0.1488, "step": 8105 }, { "epoch": 2.63, "learning_rate": 2.0029758857492893e-07, "loss": 0.1507, "step": 8106 }, { "epoch": 2.63, "learning_rate": 1.9995471001595267e-07, "loss": 0.142, "step": 8107 }, { "epoch": 2.63, "learning_rate": 1.9961211296025352e-07, "loss": 0.1368, "step": 8108 }, { "epoch": 2.63, "learning_rate": 1.992697974497629e-07, "loss": 0.1588, "step": 8109 }, { "epoch": 2.63, "learning_rate": 1.989277635263756e-07, "loss": 0.1556, "step": 8110 }, { "epoch": 2.63, "learning_rate": 1.9858601123195403e-07, "loss": 0.1488, "step": 8111 }, { "epoch": 2.63, "learning_rate": 1.9824454060832526e-07, "loss": 0.1442, "step": 8112 }, { "epoch": 2.63, "learning_rate": 1.9790335169728197e-07, "loss": 0.147, "step": 8113 }, { "epoch": 2.63, "learning_rate": 1.9756244454058244e-07, "loss": 0.1704, "step": 8114 }, { "epoch": 2.63, "learning_rate": 1.9722181917995103e-07, "loss": 0.1437, "step": 8115 }, { "epoch": 2.63, "learning_rate": 1.9688147565707528e-07, "loss": 0.1658, "step": 8116 }, { "epoch": 2.63, "learning_rate": 1.9654141401361183e-07, "loss": 0.1518, "step": 8117 }, { "epoch": 2.63, "learning_rate": 1.9620163429117906e-07, "loss": 0.1376, "step": 8118 }, { "epoch": 2.63, "learning_rate": 1.958621365313648e-07, "loss": 0.151, "step": 8119 }, { "epoch": 2.63, "learning_rate": 1.9552292077571894e-07, "loss": 0.1363, "step": 8120 }, { "epoch": 2.63, "learning_rate": 1.9518398706575846e-07, "loss": 0.1621, "step": 8121 }, { "epoch": 2.63, "learning_rate": 1.948453354429661e-07, "loss": 0.148, "step": 8122 }, { "epoch": 2.63, "learning_rate": 1.9450696594878804e-07, "loss": 0.1562, "step": 8123 }, { "epoch": 2.63, "learning_rate": 1.941688786246393e-07, "loss": 0.1466, "step": 8124 }, { "epoch": 2.63, "learning_rate": 1.9383107351189672e-07, "loss": 0.1415, "step": 8125 }, { "epoch": 2.63, "learning_rate": 1.9349355065190618e-07, "loss": 0.1558, "step": 8126 }, { "epoch": 2.63, "learning_rate": 1.9315631008597596e-07, "loss": 0.1597, "step": 8127 }, { "epoch": 2.63, "learning_rate": 1.9281935185538141e-07, "loss": 0.1469, "step": 8128 }, { "epoch": 2.63, "learning_rate": 1.9248267600136317e-07, "loss": 0.1346, "step": 8129 }, { "epoch": 2.63, "learning_rate": 1.9214628256512656e-07, "loss": 0.1411, "step": 8130 }, { "epoch": 2.63, "learning_rate": 1.918101715878437e-07, "loss": 0.1491, "step": 8131 }, { "epoch": 2.64, "learning_rate": 1.9147434311065028e-07, "loss": 0.1383, "step": 8132 }, { "epoch": 2.64, "learning_rate": 1.911387971746495e-07, "loss": 0.1527, "step": 8133 }, { "epoch": 2.64, "learning_rate": 1.9080353382090798e-07, "loss": 0.1696, "step": 8134 }, { "epoch": 2.64, "learning_rate": 1.9046855309045957e-07, "loss": 0.1595, "step": 8135 }, { "epoch": 2.64, "learning_rate": 1.9013385502430175e-07, "loss": 0.1465, "step": 8136 }, { "epoch": 2.64, "learning_rate": 1.8979943966339924e-07, "loss": 0.1474, "step": 8137 }, { "epoch": 2.64, "learning_rate": 1.8946530704868072e-07, "loss": 0.1471, "step": 8138 }, { "epoch": 2.64, "learning_rate": 1.891314572210412e-07, "loss": 0.1297, "step": 8139 }, { "epoch": 2.64, "learning_rate": 1.887978902213397e-07, "loss": 0.1541, "step": 8140 }, { "epoch": 2.64, "learning_rate": 1.8846460609040302e-07, "loss": 0.1478, "step": 8141 }, { "epoch": 2.64, "learning_rate": 1.881316048690207e-07, "loss": 0.1631, "step": 8142 }, { "epoch": 2.64, "learning_rate": 1.8779888659794937e-07, "loss": 0.162, "step": 8143 }, { "epoch": 2.64, "learning_rate": 1.874664513179106e-07, "loss": 0.1461, "step": 8144 }, { "epoch": 2.64, "learning_rate": 1.8713429906959097e-07, "loss": 0.1446, "step": 8145 }, { "epoch": 2.64, "learning_rate": 1.8680242989364327e-07, "loss": 0.1355, "step": 8146 }, { "epoch": 2.64, "learning_rate": 1.8647084383068393e-07, "loss": 0.1444, "step": 8147 }, { "epoch": 2.64, "learning_rate": 1.8613954092129738e-07, "loss": 0.1507, "step": 8148 }, { "epoch": 2.64, "learning_rate": 1.858085212060304e-07, "loss": 0.1386, "step": 8149 }, { "epoch": 2.64, "learning_rate": 1.85477784725398e-07, "loss": 0.1394, "step": 8150 }, { "epoch": 2.64, "learning_rate": 1.851473315198782e-07, "loss": 0.1473, "step": 8151 }, { "epoch": 2.64, "learning_rate": 1.848171616299152e-07, "loss": 0.1457, "step": 8152 }, { "epoch": 2.64, "learning_rate": 1.8448727509591951e-07, "loss": 0.1444, "step": 8153 }, { "epoch": 2.64, "learning_rate": 1.8415767195826468e-07, "loss": 0.1566, "step": 8154 }, { "epoch": 2.64, "learning_rate": 1.8382835225729256e-07, "loss": 0.1613, "step": 8155 }, { "epoch": 2.64, "learning_rate": 1.834993160333068e-07, "loss": 0.1631, "step": 8156 }, { "epoch": 2.64, "learning_rate": 1.831705633265804e-07, "loss": 0.1538, "step": 8157 }, { "epoch": 2.64, "learning_rate": 1.8284209417734762e-07, "loss": 0.1575, "step": 8158 }, { "epoch": 2.64, "learning_rate": 1.8251390862581097e-07, "loss": 0.1466, "step": 8159 }, { "epoch": 2.64, "learning_rate": 1.8218600671213698e-07, "loss": 0.1348, "step": 8160 }, { "epoch": 2.64, "learning_rate": 1.8185838847645743e-07, "loss": 0.1597, "step": 8161 }, { "epoch": 2.64, "learning_rate": 1.8153105395886967e-07, "loss": 0.1439, "step": 8162 }, { "epoch": 2.65, "learning_rate": 1.8120400319943692e-07, "loss": 0.1419, "step": 8163 }, { "epoch": 2.65, "learning_rate": 1.8087723623818608e-07, "loss": 0.1508, "step": 8164 }, { "epoch": 2.65, "learning_rate": 1.805507531151107e-07, "loss": 0.1465, "step": 8165 }, { "epoch": 2.65, "learning_rate": 1.8022455387016913e-07, "loss": 0.1541, "step": 8166 }, { "epoch": 2.65, "learning_rate": 1.7989863854328492e-07, "loss": 0.1543, "step": 8167 }, { "epoch": 2.65, "learning_rate": 1.7957300717434706e-07, "loss": 0.1589, "step": 8168 }, { "epoch": 2.65, "learning_rate": 1.7924765980320974e-07, "loss": 0.1369, "step": 8169 }, { "epoch": 2.65, "learning_rate": 1.7892259646969278e-07, "loss": 0.1392, "step": 8170 }, { "epoch": 2.65, "learning_rate": 1.785978172135791e-07, "loss": 0.136, "step": 8171 }, { "epoch": 2.65, "learning_rate": 1.782733220746205e-07, "loss": 0.1542, "step": 8172 }, { "epoch": 2.65, "learning_rate": 1.7794911109253105e-07, "loss": 0.1491, "step": 8173 }, { "epoch": 2.65, "learning_rate": 1.7762518430699122e-07, "loss": 0.1481, "step": 8174 }, { "epoch": 2.65, "learning_rate": 1.7730154175764623e-07, "loss": 0.1584, "step": 8175 }, { "epoch": 2.65, "learning_rate": 1.7697818348410722e-07, "loss": 0.1479, "step": 8176 }, { "epoch": 2.65, "learning_rate": 1.7665510952595027e-07, "loss": 0.1393, "step": 8177 }, { "epoch": 2.65, "learning_rate": 1.7633231992271572e-07, "loss": 0.1573, "step": 8178 }, { "epoch": 2.65, "learning_rate": 1.7600981471391083e-07, "loss": 0.1657, "step": 8179 }, { "epoch": 2.65, "learning_rate": 1.7568759393900597e-07, "loss": 0.1391, "step": 8180 }, { "epoch": 2.65, "learning_rate": 1.7536565763743934e-07, "loss": 0.1284, "step": 8181 }, { "epoch": 2.65, "learning_rate": 1.7504400584861137e-07, "loss": 0.142, "step": 8182 }, { "epoch": 2.65, "learning_rate": 1.7472263861189e-07, "loss": 0.1427, "step": 8183 }, { "epoch": 2.65, "learning_rate": 1.7440155596660735e-07, "loss": 0.1585, "step": 8184 }, { "epoch": 2.65, "learning_rate": 1.7408075795206037e-07, "loss": 0.1429, "step": 8185 }, { "epoch": 2.65, "learning_rate": 1.7376024460751262e-07, "loss": 0.1492, "step": 8186 }, { "epoch": 2.65, "learning_rate": 1.7344001597219024e-07, "loss": 0.1583, "step": 8187 }, { "epoch": 2.65, "learning_rate": 1.7312007208528796e-07, "loss": 0.14, "step": 8188 }, { "epoch": 2.65, "learning_rate": 1.7280041298596257e-07, "loss": 0.1514, "step": 8189 }, { "epoch": 2.65, "learning_rate": 1.7248103871333743e-07, "loss": 0.1486, "step": 8190 }, { "epoch": 2.65, "learning_rate": 1.7216194930650105e-07, "loss": 0.1585, "step": 8191 }, { "epoch": 2.65, "learning_rate": 1.7184314480450713e-07, "loss": 0.1522, "step": 8192 }, { "epoch": 2.65, "learning_rate": 1.715246252463737e-07, "loss": 0.1404, "step": 8193 }, { "epoch": 2.66, "learning_rate": 1.7120639067108508e-07, "loss": 0.1443, "step": 8194 }, { "epoch": 2.66, "learning_rate": 1.7088844111758956e-07, "loss": 0.1496, "step": 8195 }, { "epoch": 2.66, "learning_rate": 1.7057077662480131e-07, "loss": 0.1495, "step": 8196 }, { "epoch": 2.66, "learning_rate": 1.7025339723159924e-07, "loss": 0.1486, "step": 8197 }, { "epoch": 2.66, "learning_rate": 1.6993630297682778e-07, "loss": 0.1479, "step": 8198 }, { "epoch": 2.66, "learning_rate": 1.6961949389929593e-07, "loss": 0.164, "step": 8199 }, { "epoch": 2.66, "learning_rate": 1.693029700377785e-07, "loss": 0.1424, "step": 8200 }, { "epoch": 2.66, "learning_rate": 1.6898673143101479e-07, "loss": 0.138, "step": 8201 }, { "epoch": 2.66, "learning_rate": 1.6867077811770826e-07, "loss": 0.1513, "step": 8202 }, { "epoch": 2.66, "learning_rate": 1.683551101365305e-07, "loss": 0.1425, "step": 8203 }, { "epoch": 2.66, "learning_rate": 1.6803972752611475e-07, "loss": 0.1574, "step": 8204 }, { "epoch": 2.66, "learning_rate": 1.6772463032506126e-07, "loss": 0.1526, "step": 8205 }, { "epoch": 2.66, "learning_rate": 1.6740981857193471e-07, "loss": 0.1663, "step": 8206 }, { "epoch": 2.66, "learning_rate": 1.6709529230526544e-07, "loss": 0.1596, "step": 8207 }, { "epoch": 2.66, "learning_rate": 1.667810515635482e-07, "loss": 0.1456, "step": 8208 }, { "epoch": 2.66, "learning_rate": 1.6646709638524216e-07, "loss": 0.138, "step": 8209 }, { "epoch": 2.66, "learning_rate": 1.6615342680877417e-07, "loss": 0.1355, "step": 8210 }, { "epoch": 2.66, "learning_rate": 1.6584004287253235e-07, "loss": 0.1489, "step": 8211 }, { "epoch": 2.66, "learning_rate": 1.6552694461487385e-07, "loss": 0.1563, "step": 8212 }, { "epoch": 2.66, "learning_rate": 1.652141320741174e-07, "loss": 0.1617, "step": 8213 }, { "epoch": 2.66, "learning_rate": 1.6490160528854855e-07, "loss": 0.1541, "step": 8214 }, { "epoch": 2.66, "learning_rate": 1.6458936429641803e-07, "loss": 0.1611, "step": 8215 }, { "epoch": 2.66, "learning_rate": 1.642774091359406e-07, "loss": 0.151, "step": 8216 }, { "epoch": 2.66, "learning_rate": 1.6396573984529707e-07, "loss": 0.1687, "step": 8217 }, { "epoch": 2.66, "learning_rate": 1.6365435646263223e-07, "loss": 0.1721, "step": 8218 }, { "epoch": 2.66, "learning_rate": 1.6334325902605642e-07, "loss": 0.1399, "step": 8219 }, { "epoch": 2.66, "learning_rate": 1.63032447573645e-07, "loss": 0.1376, "step": 8220 }, { "epoch": 2.66, "learning_rate": 1.6272192214343868e-07, "loss": 0.1399, "step": 8221 }, { "epoch": 2.66, "learning_rate": 1.6241168277344232e-07, "loss": 0.1555, "step": 8222 }, { "epoch": 2.66, "learning_rate": 1.6210172950162639e-07, "loss": 0.1586, "step": 8223 }, { "epoch": 2.66, "learning_rate": 1.617920623659261e-07, "loss": 0.1493, "step": 8224 }, { "epoch": 2.67, "learning_rate": 1.6148268140424224e-07, "loss": 0.1418, "step": 8225 }, { "epoch": 2.67, "learning_rate": 1.6117358665443922e-07, "loss": 0.151, "step": 8226 }, { "epoch": 2.67, "learning_rate": 1.6086477815434763e-07, "loss": 0.1628, "step": 8227 }, { "epoch": 2.67, "learning_rate": 1.6055625594176254e-07, "loss": 0.1412, "step": 8228 }, { "epoch": 2.67, "learning_rate": 1.602480200544443e-07, "loss": 0.1522, "step": 8229 }, { "epoch": 2.67, "learning_rate": 1.5994007053011796e-07, "loss": 0.154, "step": 8230 }, { "epoch": 2.67, "learning_rate": 1.5963240740647285e-07, "loss": 0.1525, "step": 8231 }, { "epoch": 2.67, "learning_rate": 1.5932503072116524e-07, "loss": 0.1427, "step": 8232 }, { "epoch": 2.67, "learning_rate": 1.5901794051181362e-07, "loss": 0.1366, "step": 8233 }, { "epoch": 2.67, "learning_rate": 1.5871113681600464e-07, "loss": 0.1495, "step": 8234 }, { "epoch": 2.67, "learning_rate": 1.5840461967128628e-07, "loss": 0.1414, "step": 8235 }, { "epoch": 2.67, "learning_rate": 1.5809838911517438e-07, "loss": 0.1441, "step": 8236 }, { "epoch": 2.67, "learning_rate": 1.5779244518514813e-07, "loss": 0.1531, "step": 8237 }, { "epoch": 2.67, "learning_rate": 1.574867879186523e-07, "loss": 0.164, "step": 8238 }, { "epoch": 2.67, "learning_rate": 1.5718141735309695e-07, "loss": 0.1399, "step": 8239 }, { "epoch": 2.67, "learning_rate": 1.5687633352585467e-07, "loss": 0.1451, "step": 8240 }, { "epoch": 2.67, "learning_rate": 1.5657153647426703e-07, "loss": 0.1388, "step": 8241 }, { "epoch": 2.67, "learning_rate": 1.5626702623563694e-07, "loss": 0.1555, "step": 8242 }, { "epoch": 2.67, "learning_rate": 1.5596280284723348e-07, "loss": 0.1512, "step": 8243 }, { "epoch": 2.67, "learning_rate": 1.5565886634629102e-07, "loss": 0.1466, "step": 8244 }, { "epoch": 2.67, "learning_rate": 1.5535521677000813e-07, "loss": 0.1546, "step": 8245 }, { "epoch": 2.67, "learning_rate": 1.5505185415554903e-07, "loss": 0.1495, "step": 8246 }, { "epoch": 2.67, "learning_rate": 1.54748778540042e-07, "loss": 0.1387, "step": 8247 }, { "epoch": 2.67, "learning_rate": 1.544459899605813e-07, "loss": 0.1321, "step": 8248 }, { "epoch": 2.67, "learning_rate": 1.5414348845422394e-07, "loss": 0.1428, "step": 8249 }, { "epoch": 2.67, "learning_rate": 1.538412740579942e-07, "loss": 0.159, "step": 8250 }, { "epoch": 2.67, "learning_rate": 1.5353934680888e-07, "loss": 0.1477, "step": 8251 }, { "epoch": 2.67, "learning_rate": 1.5323770674383398e-07, "loss": 0.1415, "step": 8252 }, { "epoch": 2.67, "learning_rate": 1.529363538997744e-07, "loss": 0.1468, "step": 8253 }, { "epoch": 2.67, "learning_rate": 1.526352883135837e-07, "loss": 0.1584, "step": 8254 }, { "epoch": 2.67, "learning_rate": 1.5233451002210964e-07, "loss": 0.1485, "step": 8255 }, { "epoch": 2.68, "learning_rate": 1.520340190621647e-07, "loss": 0.1383, "step": 8256 }, { "epoch": 2.68, "learning_rate": 1.5173381547052528e-07, "loss": 0.1549, "step": 8257 }, { "epoch": 2.68, "learning_rate": 1.5143389928393398e-07, "loss": 0.1532, "step": 8258 }, { "epoch": 2.68, "learning_rate": 1.5113427053909725e-07, "loss": 0.1549, "step": 8259 }, { "epoch": 2.68, "learning_rate": 1.508349292726874e-07, "loss": 0.1465, "step": 8260 }, { "epoch": 2.68, "learning_rate": 1.505358755213407e-07, "loss": 0.1504, "step": 8261 }, { "epoch": 2.68, "learning_rate": 1.5023710932165758e-07, "loss": 0.1556, "step": 8262 }, { "epoch": 2.68, "learning_rate": 1.4993863071020548e-07, "loss": 0.1521, "step": 8263 }, { "epoch": 2.68, "learning_rate": 1.4964043972351377e-07, "loss": 0.1391, "step": 8264 }, { "epoch": 2.68, "learning_rate": 1.4934253639807994e-07, "loss": 0.1436, "step": 8265 }, { "epoch": 2.68, "learning_rate": 1.4904492077036286e-07, "loss": 0.1446, "step": 8266 }, { "epoch": 2.68, "learning_rate": 1.4874759287678898e-07, "loss": 0.1356, "step": 8267 }, { "epoch": 2.68, "learning_rate": 1.484505527537475e-07, "loss": 0.149, "step": 8268 }, { "epoch": 2.68, "learning_rate": 1.4815380043759374e-07, "loss": 0.1456, "step": 8269 }, { "epoch": 2.68, "learning_rate": 1.4785733596464736e-07, "loss": 0.1627, "step": 8270 }, { "epoch": 2.68, "learning_rate": 1.4756115937119202e-07, "loss": 0.1555, "step": 8271 }, { "epoch": 2.68, "learning_rate": 1.4726527069347796e-07, "loss": 0.1539, "step": 8272 }, { "epoch": 2.68, "learning_rate": 1.4696966996771838e-07, "loss": 0.1446, "step": 8273 }, { "epoch": 2.68, "learning_rate": 1.4667435723009187e-07, "loss": 0.1622, "step": 8274 }, { "epoch": 2.68, "learning_rate": 1.463793325167423e-07, "loss": 0.1609, "step": 8275 }, { "epoch": 2.68, "learning_rate": 1.4608459586377743e-07, "loss": 0.1377, "step": 8276 }, { "epoch": 2.68, "learning_rate": 1.4579014730727037e-07, "loss": 0.1566, "step": 8277 }, { "epoch": 2.68, "learning_rate": 1.4549598688325896e-07, "loss": 0.1519, "step": 8278 }, { "epoch": 2.68, "learning_rate": 1.4520211462774548e-07, "loss": 0.1556, "step": 8279 }, { "epoch": 2.68, "learning_rate": 1.4490853057669675e-07, "loss": 0.1417, "step": 8280 }, { "epoch": 2.68, "learning_rate": 1.4461523476604482e-07, "loss": 0.148, "step": 8281 }, { "epoch": 2.68, "learning_rate": 1.4432222723168632e-07, "loss": 0.1393, "step": 8282 }, { "epoch": 2.68, "learning_rate": 1.4402950800948223e-07, "loss": 0.14, "step": 8283 }, { "epoch": 2.68, "learning_rate": 1.437370771352589e-07, "loss": 0.1414, "step": 8284 }, { "epoch": 2.68, "learning_rate": 1.4344493464480745e-07, "loss": 0.1442, "step": 8285 }, { "epoch": 2.69, "learning_rate": 1.4315308057388206e-07, "loss": 0.1452, "step": 8286 }, { "epoch": 2.69, "learning_rate": 1.428615149582041e-07, "loss": 0.1499, "step": 8287 }, { "epoch": 2.69, "learning_rate": 1.425702378334573e-07, "loss": 0.1649, "step": 8288 }, { "epoch": 2.69, "learning_rate": 1.4227924923529228e-07, "loss": 0.1485, "step": 8289 }, { "epoch": 2.69, "learning_rate": 1.4198854919932225e-07, "loss": 0.1591, "step": 8290 }, { "epoch": 2.69, "learning_rate": 1.4169813776112652e-07, "loss": 0.1431, "step": 8291 }, { "epoch": 2.69, "learning_rate": 1.4140801495624913e-07, "loss": 0.1451, "step": 8292 }, { "epoch": 2.69, "learning_rate": 1.4111818082019696e-07, "loss": 0.1515, "step": 8293 }, { "epoch": 2.69, "learning_rate": 1.4082863538844444e-07, "loss": 0.1376, "step": 8294 }, { "epoch": 2.69, "learning_rate": 1.4053937869642737e-07, "loss": 0.1655, "step": 8295 }, { "epoch": 2.69, "learning_rate": 1.402504107795502e-07, "loss": 0.1505, "step": 8296 }, { "epoch": 2.69, "learning_rate": 1.39961731673178e-07, "loss": 0.1377, "step": 8297 }, { "epoch": 2.69, "learning_rate": 1.3967334141264277e-07, "loss": 0.1416, "step": 8298 }, { "epoch": 2.69, "learning_rate": 1.39385240033241e-07, "loss": 0.1428, "step": 8299 }, { "epoch": 2.69, "learning_rate": 1.3909742757023336e-07, "loss": 0.1492, "step": 8300 }, { "epoch": 2.69, "learning_rate": 1.3880990405884532e-07, "loss": 0.1559, "step": 8301 }, { "epoch": 2.69, "learning_rate": 1.3852266953426674e-07, "loss": 0.1608, "step": 8302 }, { "epoch": 2.69, "learning_rate": 1.3823572403165285e-07, "loss": 0.1494, "step": 8303 }, { "epoch": 2.69, "learning_rate": 1.3794906758612252e-07, "loss": 0.1364, "step": 8304 }, { "epoch": 2.69, "learning_rate": 1.376627002327599e-07, "loss": 0.1516, "step": 8305 }, { "epoch": 2.69, "learning_rate": 1.373766220066136e-07, "loss": 0.1397, "step": 8306 }, { "epoch": 2.69, "learning_rate": 1.3709083294269676e-07, "loss": 0.1547, "step": 8307 }, { "epoch": 2.69, "learning_rate": 1.368053330759872e-07, "loss": 0.1463, "step": 8308 }, { "epoch": 2.69, "learning_rate": 1.3652012244142754e-07, "loss": 0.1504, "step": 8309 }, { "epoch": 2.69, "learning_rate": 1.362352010739243e-07, "loss": 0.15, "step": 8310 }, { "epoch": 2.69, "learning_rate": 1.3595056900834986e-07, "loss": 0.1428, "step": 8311 }, { "epoch": 2.69, "learning_rate": 1.3566622627953968e-07, "loss": 0.1571, "step": 8312 }, { "epoch": 2.69, "learning_rate": 1.3538217292229482e-07, "loss": 0.1411, "step": 8313 }, { "epoch": 2.69, "learning_rate": 1.3509840897138083e-07, "loss": 0.1538, "step": 8314 }, { "epoch": 2.69, "learning_rate": 1.3481493446152766e-07, "loss": 0.151, "step": 8315 }, { "epoch": 2.69, "learning_rate": 1.3453174942743008e-07, "loss": 0.1608, "step": 8316 }, { "epoch": 2.7, "learning_rate": 1.3424885390374593e-07, "loss": 0.1282, "step": 8317 }, { "epoch": 2.7, "learning_rate": 1.3396624792510082e-07, "loss": 0.1444, "step": 8318 }, { "epoch": 2.7, "learning_rate": 1.33683931526081e-07, "loss": 0.1415, "step": 8319 }, { "epoch": 2.7, "learning_rate": 1.3340190474124104e-07, "loss": 0.1625, "step": 8320 }, { "epoch": 2.7, "learning_rate": 1.3312016760509722e-07, "loss": 0.1579, "step": 8321 }, { "epoch": 2.7, "learning_rate": 1.3283872015213168e-07, "loss": 0.1456, "step": 8322 }, { "epoch": 2.7, "learning_rate": 1.3255756241679102e-07, "loss": 0.1242, "step": 8323 }, { "epoch": 2.7, "learning_rate": 1.3227669443348578e-07, "loss": 0.1732, "step": 8324 }, { "epoch": 2.7, "learning_rate": 1.3199611623659235e-07, "loss": 0.1451, "step": 8325 }, { "epoch": 2.7, "learning_rate": 1.3171582786044968e-07, "loss": 0.1538, "step": 8326 }, { "epoch": 2.7, "learning_rate": 1.3143582933936333e-07, "loss": 0.1563, "step": 8327 }, { "epoch": 2.7, "learning_rate": 1.3115612070760174e-07, "loss": 0.1682, "step": 8328 }, { "epoch": 2.7, "learning_rate": 1.3087670199939894e-07, "loss": 0.1537, "step": 8329 }, { "epoch": 2.7, "learning_rate": 1.3059757324895283e-07, "loss": 0.1598, "step": 8330 }, { "epoch": 2.7, "learning_rate": 1.303187344904261e-07, "loss": 0.1504, "step": 8331 }, { "epoch": 2.7, "learning_rate": 1.3004018575794586e-07, "loss": 0.1595, "step": 8332 }, { "epoch": 2.7, "learning_rate": 1.2976192708560432e-07, "loss": 0.1377, "step": 8333 }, { "epoch": 2.7, "learning_rate": 1.2948395850745726e-07, "loss": 0.1361, "step": 8334 }, { "epoch": 2.7, "learning_rate": 1.29206280057525e-07, "loss": 0.1497, "step": 8335 }, { "epoch": 2.7, "learning_rate": 1.2892889176979284e-07, "loss": 0.1395, "step": 8336 }, { "epoch": 2.7, "learning_rate": 1.2865179367821083e-07, "loss": 0.1493, "step": 8337 }, { "epoch": 2.7, "learning_rate": 1.283749858166927e-07, "loss": 0.1452, "step": 8338 }, { "epoch": 2.7, "learning_rate": 1.280984682191172e-07, "loss": 0.1622, "step": 8339 }, { "epoch": 2.7, "learning_rate": 1.2782224091932775e-07, "loss": 0.1523, "step": 8340 }, { "epoch": 2.7, "learning_rate": 1.2754630395113098e-07, "loss": 0.1438, "step": 8341 }, { "epoch": 2.7, "learning_rate": 1.2727065734830013e-07, "loss": 0.1493, "step": 8342 }, { "epoch": 2.7, "learning_rate": 1.269953011445707e-07, "loss": 0.1438, "step": 8343 }, { "epoch": 2.7, "learning_rate": 1.267202353736438e-07, "loss": 0.1564, "step": 8344 }, { "epoch": 2.7, "learning_rate": 1.26445460069185e-07, "loss": 0.1399, "step": 8345 }, { "epoch": 2.7, "learning_rate": 1.2617097526482407e-07, "loss": 0.1414, "step": 8346 }, { "epoch": 2.7, "learning_rate": 1.2589678099415582e-07, "loss": 0.141, "step": 8347 }, { "epoch": 2.71, "learning_rate": 1.256228772907378e-07, "loss": 0.1487, "step": 8348 }, { "epoch": 2.71, "learning_rate": 1.2534926418809433e-07, "loss": 0.1517, "step": 8349 }, { "epoch": 2.71, "learning_rate": 1.2507594171971198e-07, "loss": 0.1528, "step": 8350 }, { "epoch": 2.71, "learning_rate": 1.2480290991904398e-07, "loss": 0.1542, "step": 8351 }, { "epoch": 2.71, "learning_rate": 1.245301688195058e-07, "loss": 0.1412, "step": 8352 }, { "epoch": 2.71, "learning_rate": 1.2425771845447853e-07, "loss": 0.1425, "step": 8353 }, { "epoch": 2.71, "learning_rate": 1.2398555885730774e-07, "loss": 0.1511, "step": 8354 }, { "epoch": 2.71, "learning_rate": 1.2371369006130256e-07, "loss": 0.1707, "step": 8355 }, { "epoch": 2.71, "learning_rate": 1.2344211209973811e-07, "loss": 0.1402, "step": 8356 }, { "epoch": 2.71, "learning_rate": 1.2317082500585163e-07, "loss": 0.1354, "step": 8357 }, { "epoch": 2.71, "learning_rate": 1.2289982881284718e-07, "loss": 0.142, "step": 8358 }, { "epoch": 2.71, "learning_rate": 1.226291235538915e-07, "loss": 0.1533, "step": 8359 }, { "epoch": 2.71, "learning_rate": 1.223587092621162e-07, "loss": 0.1489, "step": 8360 }, { "epoch": 2.71, "learning_rate": 1.2208858597061752e-07, "loss": 0.1407, "step": 8361 }, { "epoch": 2.71, "learning_rate": 1.21818753712456e-07, "loss": 0.1514, "step": 8362 }, { "epoch": 2.71, "learning_rate": 1.2154921252065633e-07, "loss": 0.1563, "step": 8363 }, { "epoch": 2.71, "learning_rate": 1.2127996242820822e-07, "loss": 0.1458, "step": 8364 }, { "epoch": 2.71, "learning_rate": 1.2101100346806478e-07, "loss": 0.1585, "step": 8365 }, { "epoch": 2.71, "learning_rate": 1.2074233567314408e-07, "loss": 0.146, "step": 8366 }, { "epoch": 2.71, "learning_rate": 1.2047395907632818e-07, "loss": 0.1484, "step": 8367 }, { "epoch": 2.71, "learning_rate": 1.2020587371046445e-07, "loss": 0.1314, "step": 8368 }, { "epoch": 2.71, "learning_rate": 1.1993807960836322e-07, "loss": 0.1614, "step": 8369 }, { "epoch": 2.71, "learning_rate": 1.1967057680280058e-07, "loss": 0.1465, "step": 8370 }, { "epoch": 2.71, "learning_rate": 1.1940336532651614e-07, "loss": 0.1586, "step": 8371 }, { "epoch": 2.71, "learning_rate": 1.1913644521221345e-07, "loss": 0.1594, "step": 8372 }, { "epoch": 2.71, "learning_rate": 1.1886981649256169e-07, "loss": 0.1427, "step": 8373 }, { "epoch": 2.71, "learning_rate": 1.1860347920019304e-07, "loss": 0.1521, "step": 8374 }, { "epoch": 2.71, "learning_rate": 1.1833743336770482e-07, "loss": 0.1519, "step": 8375 }, { "epoch": 2.71, "learning_rate": 1.1807167902765843e-07, "loss": 0.1465, "step": 8376 }, { "epoch": 2.71, "learning_rate": 1.1780621621257953e-07, "loss": 0.1408, "step": 8377 }, { "epoch": 2.71, "learning_rate": 1.1754104495495882e-07, "loss": 0.1557, "step": 8378 }, { "epoch": 2.72, "learning_rate": 1.1727616528724949e-07, "loss": 0.1574, "step": 8379 }, { "epoch": 2.72, "learning_rate": 1.1701157724187173e-07, "loss": 0.146, "step": 8380 }, { "epoch": 2.72, "learning_rate": 1.1674728085120713e-07, "loss": 0.138, "step": 8381 }, { "epoch": 2.72, "learning_rate": 1.1648327614760452e-07, "loss": 0.1368, "step": 8382 }, { "epoch": 2.72, "learning_rate": 1.1621956316337391e-07, "loss": 0.1522, "step": 8383 }, { "epoch": 2.72, "learning_rate": 1.1595614193079224e-07, "loss": 0.1409, "step": 8384 }, { "epoch": 2.72, "learning_rate": 1.1569301248209958e-07, "loss": 0.1567, "step": 8385 }, { "epoch": 2.72, "learning_rate": 1.1543017484950015e-07, "loss": 0.1512, "step": 8386 }, { "epoch": 2.72, "learning_rate": 1.1516762906516322e-07, "loss": 0.1512, "step": 8387 }, { "epoch": 2.72, "learning_rate": 1.1490537516122141e-07, "loss": 0.152, "step": 8388 }, { "epoch": 2.72, "learning_rate": 1.1464341316977184e-07, "loss": 0.1426, "step": 8389 }, { "epoch": 2.72, "learning_rate": 1.1438174312287664e-07, "loss": 0.1439, "step": 8390 }, { "epoch": 2.72, "learning_rate": 1.1412036505256158e-07, "loss": 0.1578, "step": 8391 }, { "epoch": 2.72, "learning_rate": 1.1385927899081661e-07, "loss": 0.1599, "step": 8392 }, { "epoch": 2.72, "learning_rate": 1.1359848496959618e-07, "loss": 0.1469, "step": 8393 }, { "epoch": 2.72, "learning_rate": 1.1333798302081922e-07, "loss": 0.1327, "step": 8394 }, { "epoch": 2.72, "learning_rate": 1.1307777317636882e-07, "loss": 0.1435, "step": 8395 }, { "epoch": 2.72, "learning_rate": 1.1281785546809115e-07, "loss": 0.1579, "step": 8396 }, { "epoch": 2.72, "learning_rate": 1.1255822992779858e-07, "loss": 0.1582, "step": 8397 }, { "epoch": 2.72, "learning_rate": 1.1229889658726623e-07, "loss": 0.1373, "step": 8398 }, { "epoch": 2.72, "learning_rate": 1.1203985547823427e-07, "loss": 0.1568, "step": 8399 }, { "epoch": 2.72, "learning_rate": 1.1178110663240676e-07, "loss": 0.1503, "step": 8400 }, { "epoch": 2.72, "learning_rate": 1.1152265008145202e-07, "loss": 0.1396, "step": 8401 }, { "epoch": 2.72, "learning_rate": 1.1126448585700306e-07, "loss": 0.1452, "step": 8402 }, { "epoch": 2.72, "learning_rate": 1.110066139906557e-07, "loss": 0.1477, "step": 8403 }, { "epoch": 2.72, "learning_rate": 1.1074903451397195e-07, "loss": 0.1469, "step": 8404 }, { "epoch": 2.72, "learning_rate": 1.1049174745847657e-07, "loss": 0.1434, "step": 8405 }, { "epoch": 2.72, "learning_rate": 1.1023475285565882e-07, "loss": 0.1393, "step": 8406 }, { "epoch": 2.72, "learning_rate": 1.099780507369727e-07, "loss": 0.1459, "step": 8407 }, { "epoch": 2.72, "learning_rate": 1.0972164113383616e-07, "loss": 0.1495, "step": 8408 }, { "epoch": 2.72, "learning_rate": 1.09465524077631e-07, "loss": 0.1451, "step": 8409 }, { "epoch": 2.73, "learning_rate": 1.0920969959970301e-07, "loss": 0.1489, "step": 8410 }, { "epoch": 2.73, "learning_rate": 1.0895416773136408e-07, "loss": 0.1681, "step": 8411 }, { "epoch": 2.73, "learning_rate": 1.0869892850388697e-07, "loss": 0.1553, "step": 8412 }, { "epoch": 2.73, "learning_rate": 1.0844398194851197e-07, "loss": 0.1382, "step": 8413 }, { "epoch": 2.73, "learning_rate": 1.0818932809644161e-07, "loss": 0.1449, "step": 8414 }, { "epoch": 2.73, "learning_rate": 1.0793496697884265e-07, "loss": 0.1446, "step": 8415 }, { "epoch": 2.73, "learning_rate": 1.0768089862684684e-07, "loss": 0.1473, "step": 8416 }, { "epoch": 2.73, "learning_rate": 1.0742712307154957e-07, "loss": 0.1504, "step": 8417 }, { "epoch": 2.73, "learning_rate": 1.0717364034401073e-07, "loss": 0.1468, "step": 8418 }, { "epoch": 2.73, "learning_rate": 1.0692045047525384e-07, "loss": 0.144, "step": 8419 }, { "epoch": 2.73, "learning_rate": 1.066675534962669e-07, "loss": 0.1404, "step": 8420 }, { "epoch": 2.73, "learning_rate": 1.0641494943800234e-07, "loss": 0.1613, "step": 8421 }, { "epoch": 2.73, "learning_rate": 1.0616263833137602e-07, "loss": 0.1537, "step": 8422 }, { "epoch": 2.73, "learning_rate": 1.0591062020726878e-07, "loss": 0.1525, "step": 8423 }, { "epoch": 2.73, "learning_rate": 1.0565889509652483e-07, "loss": 0.1453, "step": 8424 }, { "epoch": 2.73, "learning_rate": 1.0540746302995341e-07, "loss": 0.1452, "step": 8425 }, { "epoch": 2.73, "learning_rate": 1.0515632403832715e-07, "loss": 0.1412, "step": 8426 }, { "epoch": 2.73, "learning_rate": 1.0490547815238228e-07, "loss": 0.1381, "step": 8427 }, { "epoch": 2.73, "learning_rate": 1.0465492540282146e-07, "loss": 0.1467, "step": 8428 }, { "epoch": 2.73, "learning_rate": 1.044046658203085e-07, "loss": 0.1562, "step": 8429 }, { "epoch": 2.73, "learning_rate": 1.0415469943547335e-07, "loss": 0.1474, "step": 8430 }, { "epoch": 2.73, "learning_rate": 1.0390502627890986e-07, "loss": 0.154, "step": 8431 }, { "epoch": 2.73, "learning_rate": 1.0365564638117442e-07, "loss": 0.1538, "step": 8432 }, { "epoch": 2.73, "learning_rate": 1.0340655977279012e-07, "loss": 0.1398, "step": 8433 }, { "epoch": 2.73, "learning_rate": 1.0315776648424119e-07, "loss": 0.1335, "step": 8434 }, { "epoch": 2.73, "learning_rate": 1.0290926654597938e-07, "loss": 0.1604, "step": 8435 }, { "epoch": 2.73, "learning_rate": 1.0266105998841702e-07, "loss": 0.1385, "step": 8436 }, { "epoch": 2.73, "learning_rate": 1.0241314684193343e-07, "loss": 0.1259, "step": 8437 }, { "epoch": 2.73, "learning_rate": 1.0216552713686989e-07, "loss": 0.1459, "step": 8438 }, { "epoch": 2.73, "learning_rate": 1.01918200903533e-07, "loss": 0.1478, "step": 8439 }, { "epoch": 2.73, "learning_rate": 1.0167116817219325e-07, "loss": 0.1777, "step": 8440 }, { "epoch": 2.74, "learning_rate": 1.0142442897308453e-07, "loss": 0.1501, "step": 8441 }, { "epoch": 2.74, "learning_rate": 1.0117798333640627e-07, "loss": 0.1413, "step": 8442 }, { "epoch": 2.74, "learning_rate": 1.0093183129231993e-07, "loss": 0.1668, "step": 8443 }, { "epoch": 2.74, "learning_rate": 1.0068597287095305e-07, "loss": 0.1383, "step": 8444 }, { "epoch": 2.74, "learning_rate": 1.0044040810239547e-07, "loss": 0.1603, "step": 8445 }, { "epoch": 2.74, "learning_rate": 1.0019513701670285e-07, "loss": 0.1553, "step": 8446 }, { "epoch": 2.74, "learning_rate": 9.995015964389315e-08, "loss": 0.1455, "step": 8447 }, { "epoch": 2.74, "learning_rate": 9.970547601394986e-08, "loss": 0.1408, "step": 8448 }, { "epoch": 2.74, "learning_rate": 9.94610861568196e-08, "loss": 0.1624, "step": 8449 }, { "epoch": 2.74, "learning_rate": 9.92169901024137e-08, "loss": 0.1528, "step": 8450 }, { "epoch": 2.74, "learning_rate": 9.897318788060662e-08, "loss": 0.1515, "step": 8451 }, { "epoch": 2.74, "learning_rate": 9.872967952123752e-08, "loss": 0.1599, "step": 8452 }, { "epoch": 2.74, "learning_rate": 9.848646505410953e-08, "loss": 0.1443, "step": 8453 }, { "epoch": 2.74, "learning_rate": 9.824354450898966e-08, "loss": 0.1602, "step": 8454 }, { "epoch": 2.74, "learning_rate": 9.800091791560939e-08, "loss": 0.1466, "step": 8455 }, { "epoch": 2.74, "learning_rate": 9.775858530366334e-08, "loss": 0.1667, "step": 8456 }, { "epoch": 2.74, "learning_rate": 9.751654670281135e-08, "loss": 0.166, "step": 8457 }, { "epoch": 2.74, "learning_rate": 9.727480214267559e-08, "loss": 0.151, "step": 8458 }, { "epoch": 2.74, "learning_rate": 9.70333516528446e-08, "loss": 0.1551, "step": 8459 }, { "epoch": 2.74, "learning_rate": 9.679219526286837e-08, "loss": 0.1541, "step": 8460 }, { "epoch": 2.74, "learning_rate": 9.655133300226271e-08, "loss": 0.1507, "step": 8461 }, { "epoch": 2.74, "learning_rate": 9.631076490050684e-08, "loss": 0.1464, "step": 8462 }, { "epoch": 2.74, "learning_rate": 9.60704909870433e-08, "loss": 0.1542, "step": 8463 }, { "epoch": 2.74, "learning_rate": 9.583051129128051e-08, "loss": 0.1419, "step": 8464 }, { "epoch": 2.74, "learning_rate": 9.559082584258833e-08, "loss": 0.1492, "step": 8465 }, { "epoch": 2.74, "learning_rate": 9.535143467030327e-08, "loss": 0.15, "step": 8466 }, { "epoch": 2.74, "learning_rate": 9.511233780372303e-08, "loss": 0.156, "step": 8467 }, { "epoch": 2.74, "learning_rate": 9.487353527211223e-08, "loss": 0.146, "step": 8468 }, { "epoch": 2.74, "learning_rate": 9.463502710469697e-08, "loss": 0.1428, "step": 8469 }, { "epoch": 2.74, "learning_rate": 9.439681333066858e-08, "loss": 0.1494, "step": 8470 }, { "epoch": 2.74, "learning_rate": 9.415889397918238e-08, "loss": 0.1422, "step": 8471 }, { "epoch": 2.75, "learning_rate": 9.3921269079357e-08, "loss": 0.1388, "step": 8472 }, { "epoch": 2.75, "learning_rate": 9.368393866027614e-08, "loss": 0.1521, "step": 8473 }, { "epoch": 2.75, "learning_rate": 9.344690275098573e-08, "loss": 0.1505, "step": 8474 }, { "epoch": 2.75, "learning_rate": 9.321016138049727e-08, "loss": 0.1534, "step": 8475 }, { "epoch": 2.75, "learning_rate": 9.297371457778565e-08, "loss": 0.1392, "step": 8476 }, { "epoch": 2.75, "learning_rate": 9.273756237178938e-08, "loss": 0.1356, "step": 8477 }, { "epoch": 2.75, "learning_rate": 9.250170479141146e-08, "loss": 0.1502, "step": 8478 }, { "epoch": 2.75, "learning_rate": 9.226614186551852e-08, "loss": 0.1542, "step": 8479 }, { "epoch": 2.75, "learning_rate": 9.20308736229411e-08, "loss": 0.1376, "step": 8480 }, { "epoch": 2.75, "learning_rate": 9.179590009247397e-08, "loss": 0.1479, "step": 8481 }, { "epoch": 2.75, "learning_rate": 9.15612213028752e-08, "loss": 0.1588, "step": 8482 }, { "epoch": 2.75, "learning_rate": 9.132683728286767e-08, "loss": 0.1606, "step": 8483 }, { "epoch": 2.75, "learning_rate": 9.109274806113732e-08, "loss": 0.1564, "step": 8484 }, { "epoch": 2.75, "learning_rate": 9.085895366633457e-08, "loss": 0.1379, "step": 8485 }, { "epoch": 2.75, "learning_rate": 9.062545412707375e-08, "loss": 0.1772, "step": 8486 }, { "epoch": 2.75, "learning_rate": 9.039224947193254e-08, "loss": 0.1502, "step": 8487 }, { "epoch": 2.75, "learning_rate": 9.01593397294534e-08, "loss": 0.1466, "step": 8488 }, { "epoch": 2.75, "learning_rate": 8.992672492814158e-08, "loss": 0.1503, "step": 8489 }, { "epoch": 2.75, "learning_rate": 8.969440509646821e-08, "loss": 0.1465, "step": 8490 }, { "epoch": 2.75, "learning_rate": 8.946238026286552e-08, "loss": 0.1558, "step": 8491 }, { "epoch": 2.75, "learning_rate": 8.923065045573165e-08, "loss": 0.1539, "step": 8492 }, { "epoch": 2.75, "learning_rate": 8.899921570342807e-08, "loss": 0.1516, "step": 8493 }, { "epoch": 2.75, "learning_rate": 8.876807603428017e-08, "loss": 0.1508, "step": 8494 }, { "epoch": 2.75, "learning_rate": 8.853723147657755e-08, "loss": 0.1407, "step": 8495 }, { "epoch": 2.75, "learning_rate": 8.830668205857263e-08, "loss": 0.1527, "step": 8496 }, { "epoch": 2.75, "learning_rate": 8.807642780848335e-08, "loss": 0.1414, "step": 8497 }, { "epoch": 2.75, "learning_rate": 8.784646875448971e-08, "loss": 0.1512, "step": 8498 }, { "epoch": 2.75, "learning_rate": 8.761680492473668e-08, "loss": 0.1475, "step": 8499 }, { "epoch": 2.75, "learning_rate": 8.738743634733316e-08, "loss": 0.1434, "step": 8500 }, { "epoch": 2.75, "learning_rate": 8.715836305035169e-08, "loss": 0.1336, "step": 8501 }, { "epoch": 2.76, "learning_rate": 8.692958506182847e-08, "loss": 0.1618, "step": 8502 }, { "epoch": 2.76, "learning_rate": 8.67011024097636e-08, "loss": 0.1606, "step": 8503 }, { "epoch": 2.76, "learning_rate": 8.647291512212136e-08, "loss": 0.156, "step": 8504 }, { "epoch": 2.76, "learning_rate": 8.624502322682942e-08, "loss": 0.1567, "step": 8505 }, { "epoch": 2.76, "learning_rate": 8.601742675177993e-08, "loss": 0.1487, "step": 8506 }, { "epoch": 2.76, "learning_rate": 8.57901257248278e-08, "loss": 0.1514, "step": 8507 }, { "epoch": 2.76, "learning_rate": 8.556312017379332e-08, "loss": 0.1482, "step": 8508 }, { "epoch": 2.76, "learning_rate": 8.533641012645921e-08, "loss": 0.1379, "step": 8509 }, { "epoch": 2.76, "learning_rate": 8.510999561057276e-08, "loss": 0.1461, "step": 8510 }, { "epoch": 2.76, "learning_rate": 8.488387665384457e-08, "loss": 0.1487, "step": 8511 }, { "epoch": 2.76, "learning_rate": 8.465805328395055e-08, "loss": 0.16, "step": 8512 }, { "epoch": 2.76, "learning_rate": 8.443252552852776e-08, "loss": 0.1459, "step": 8513 }, { "epoch": 2.76, "learning_rate": 8.42072934151797e-08, "loss": 0.138, "step": 8514 }, { "epoch": 2.76, "learning_rate": 8.398235697147205e-08, "loss": 0.1367, "step": 8515 }, { "epoch": 2.76, "learning_rate": 8.375771622493506e-08, "loss": 0.1434, "step": 8516 }, { "epoch": 2.76, "learning_rate": 8.353337120306282e-08, "loss": 0.1689, "step": 8517 }, { "epoch": 2.76, "learning_rate": 8.330932193331226e-08, "loss": 0.1504, "step": 8518 }, { "epoch": 2.76, "learning_rate": 8.308556844310589e-08, "loss": 0.1353, "step": 8519 }, { "epoch": 2.76, "learning_rate": 8.286211075982764e-08, "loss": 0.1458, "step": 8520 }, { "epoch": 2.76, "learning_rate": 8.263894891082813e-08, "loss": 0.1873, "step": 8521 }, { "epoch": 2.76, "learning_rate": 8.241608292341913e-08, "loss": 0.1542, "step": 8522 }, { "epoch": 2.76, "learning_rate": 8.219351282487742e-08, "loss": 0.1598, "step": 8523 }, { "epoch": 2.76, "learning_rate": 8.197123864244344e-08, "loss": 0.1474, "step": 8524 }, { "epoch": 2.76, "learning_rate": 8.174926040332182e-08, "loss": 0.1375, "step": 8525 }, { "epoch": 2.76, "learning_rate": 8.152757813468027e-08, "loss": 0.1475, "step": 8526 }, { "epoch": 2.76, "learning_rate": 8.130619186365012e-08, "loss": 0.1594, "step": 8527 }, { "epoch": 2.76, "learning_rate": 8.10851016173278e-08, "loss": 0.1576, "step": 8528 }, { "epoch": 2.76, "learning_rate": 8.086430742277191e-08, "loss": 0.1557, "step": 8529 }, { "epoch": 2.76, "learning_rate": 8.064380930700556e-08, "loss": 0.154, "step": 8530 }, { "epoch": 2.76, "learning_rate": 8.042360729701604e-08, "loss": 0.1462, "step": 8531 }, { "epoch": 2.76, "learning_rate": 8.020370141975347e-08, "loss": 0.1552, "step": 8532 }, { "epoch": 2.77, "learning_rate": 7.998409170213245e-08, "loss": 0.1626, "step": 8533 }, { "epoch": 2.77, "learning_rate": 7.976477817103117e-08, "loss": 0.1503, "step": 8534 }, { "epoch": 2.77, "learning_rate": 7.954576085329152e-08, "loss": 0.1407, "step": 8535 }, { "epoch": 2.77, "learning_rate": 7.93270397757187e-08, "loss": 0.1392, "step": 8536 }, { "epoch": 2.77, "learning_rate": 7.910861496508216e-08, "loss": 0.1565, "step": 8537 }, { "epoch": 2.77, "learning_rate": 7.88904864481152e-08, "loss": 0.1478, "step": 8538 }, { "epoch": 2.77, "learning_rate": 7.867265425151454e-08, "loss": 0.1381, "step": 8539 }, { "epoch": 2.77, "learning_rate": 7.845511840194081e-08, "loss": 0.1469, "step": 8540 }, { "epoch": 2.77, "learning_rate": 7.823787892601825e-08, "loss": 0.1584, "step": 8541 }, { "epoch": 2.77, "learning_rate": 7.802093585033449e-08, "loss": 0.1522, "step": 8542 }, { "epoch": 2.77, "learning_rate": 7.780428920144217e-08, "loss": 0.1526, "step": 8543 }, { "epoch": 2.77, "learning_rate": 7.758793900585565e-08, "loss": 0.1573, "step": 8544 }, { "epoch": 2.77, "learning_rate": 7.737188529005484e-08, "loss": 0.141, "step": 8545 }, { "epoch": 2.77, "learning_rate": 7.715612808048251e-08, "loss": 0.1484, "step": 8546 }, { "epoch": 2.77, "learning_rate": 7.6940667403545e-08, "loss": 0.1477, "step": 8547 }, { "epoch": 2.77, "learning_rate": 7.672550328561318e-08, "loss": 0.1498, "step": 8548 }, { "epoch": 2.77, "learning_rate": 7.651063575301986e-08, "loss": 0.1318, "step": 8549 }, { "epoch": 2.77, "learning_rate": 7.62960648320643e-08, "loss": 0.1471, "step": 8550 }, { "epoch": 2.77, "learning_rate": 7.608179054900634e-08, "loss": 0.1559, "step": 8551 }, { "epoch": 2.77, "learning_rate": 7.586781293007273e-08, "loss": 0.1352, "step": 8552 }, { "epoch": 2.77, "learning_rate": 7.565413200145089e-08, "loss": 0.157, "step": 8553 }, { "epoch": 2.77, "learning_rate": 7.544074778929378e-08, "loss": 0.147, "step": 8554 }, { "epoch": 2.77, "learning_rate": 7.522766031971774e-08, "loss": 0.1507, "step": 8555 }, { "epoch": 2.77, "learning_rate": 7.501486961880245e-08, "loss": 0.1534, "step": 8556 }, { "epoch": 2.77, "learning_rate": 7.480237571259153e-08, "loss": 0.1395, "step": 8557 }, { "epoch": 2.77, "learning_rate": 7.459017862709194e-08, "loss": 0.1519, "step": 8558 }, { "epoch": 2.77, "learning_rate": 7.437827838827488e-08, "loss": 0.138, "step": 8559 }, { "epoch": 2.77, "learning_rate": 7.416667502207458e-08, "loss": 0.1499, "step": 8560 }, { "epoch": 2.77, "learning_rate": 7.395536855438923e-08, "loss": 0.1491, "step": 8561 }, { "epoch": 2.77, "learning_rate": 7.37443590110809e-08, "loss": 0.1475, "step": 8562 }, { "epoch": 2.77, "learning_rate": 7.353364641797533e-08, "loss": 0.1354, "step": 8563 }, { "epoch": 2.78, "learning_rate": 7.332323080086106e-08, "loss": 0.1318, "step": 8564 }, { "epoch": 2.78, "learning_rate": 7.311311218549166e-08, "loss": 0.1538, "step": 8565 }, { "epoch": 2.78, "learning_rate": 7.290329059758294e-08, "loss": 0.1393, "step": 8566 }, { "epoch": 2.78, "learning_rate": 7.269376606281547e-08, "loss": 0.1492, "step": 8567 }, { "epoch": 2.78, "learning_rate": 7.248453860683291e-08, "loss": 0.1412, "step": 8568 }, { "epoch": 2.78, "learning_rate": 7.227560825524255e-08, "loss": 0.1426, "step": 8569 }, { "epoch": 2.78, "learning_rate": 7.20669750336156e-08, "loss": 0.157, "step": 8570 }, { "epoch": 2.78, "learning_rate": 7.185863896748662e-08, "loss": 0.1556, "step": 8571 }, { "epoch": 2.78, "learning_rate": 7.165060008235414e-08, "loss": 0.1429, "step": 8572 }, { "epoch": 2.78, "learning_rate": 7.14428584036797e-08, "loss": 0.1478, "step": 8573 }, { "epoch": 2.78, "learning_rate": 7.123541395688966e-08, "loss": 0.1454, "step": 8574 }, { "epoch": 2.78, "learning_rate": 7.102826676737202e-08, "loss": 0.1442, "step": 8575 }, { "epoch": 2.78, "learning_rate": 7.082141686048066e-08, "loss": 0.1545, "step": 8576 }, { "epoch": 2.78, "learning_rate": 7.061486426153146e-08, "loss": 0.146, "step": 8577 }, { "epoch": 2.78, "learning_rate": 7.040860899580475e-08, "loss": 0.1458, "step": 8578 }, { "epoch": 2.78, "learning_rate": 7.020265108854423e-08, "loss": 0.1573, "step": 8579 }, { "epoch": 2.78, "learning_rate": 6.99969905649564e-08, "loss": 0.1565, "step": 8580 }, { "epoch": 2.78, "learning_rate": 6.979162745021306e-08, "loss": 0.1476, "step": 8581 }, { "epoch": 2.78, "learning_rate": 6.958656176944801e-08, "loss": 0.1435, "step": 8582 }, { "epoch": 2.78, "learning_rate": 6.938179354776003e-08, "loss": 0.1419, "step": 8583 }, { "epoch": 2.78, "learning_rate": 6.917732281020995e-08, "loss": 0.1498, "step": 8584 }, { "epoch": 2.78, "learning_rate": 6.897314958182327e-08, "loss": 0.1451, "step": 8585 }, { "epoch": 2.78, "learning_rate": 6.87692738875892e-08, "loss": 0.1583, "step": 8586 }, { "epoch": 2.78, "learning_rate": 6.856569575245969e-08, "loss": 0.1616, "step": 8587 }, { "epoch": 2.78, "learning_rate": 6.836241520135123e-08, "loss": 0.1493, "step": 8588 }, { "epoch": 2.78, "learning_rate": 6.815943225914278e-08, "loss": 0.1296, "step": 8589 }, { "epoch": 2.78, "learning_rate": 6.795674695067783e-08, "loss": 0.1459, "step": 8590 }, { "epoch": 2.78, "learning_rate": 6.77543593007629e-08, "loss": 0.1439, "step": 8591 }, { "epoch": 2.78, "learning_rate": 6.755226933416876e-08, "loss": 0.1468, "step": 8592 }, { "epoch": 2.78, "learning_rate": 6.735047707562863e-08, "loss": 0.1541, "step": 8593 }, { "epoch": 2.78, "learning_rate": 6.714898254984031e-08, "loss": 0.1436, "step": 8594 }, { "epoch": 2.79, "learning_rate": 6.69477857814646e-08, "loss": 0.154, "step": 8595 }, { "epoch": 2.79, "learning_rate": 6.674688679512654e-08, "loss": 0.1464, "step": 8596 }, { "epoch": 2.79, "learning_rate": 6.654628561541337e-08, "loss": 0.1336, "step": 8597 }, { "epoch": 2.79, "learning_rate": 6.634598226687772e-08, "loss": 0.1636, "step": 8598 }, { "epoch": 2.79, "learning_rate": 6.614597677403384e-08, "loss": 0.1358, "step": 8599 }, { "epoch": 2.79, "learning_rate": 6.594626916136077e-08, "loss": 0.1653, "step": 8600 }, { "epoch": 2.79, "learning_rate": 6.574685945330145e-08, "loss": 0.1457, "step": 8601 }, { "epoch": 2.79, "learning_rate": 6.554774767426026e-08, "loss": 0.1607, "step": 8602 }, { "epoch": 2.79, "learning_rate": 6.534893384860824e-08, "loss": 0.1461, "step": 8603 }, { "epoch": 2.79, "learning_rate": 6.515041800067678e-08, "loss": 0.1423, "step": 8604 }, { "epoch": 2.79, "learning_rate": 6.495220015476366e-08, "loss": 0.1405, "step": 8605 }, { "epoch": 2.79, "learning_rate": 6.475428033512754e-08, "loss": 0.1487, "step": 8606 }, { "epoch": 2.79, "learning_rate": 6.455665856599291e-08, "loss": 0.1379, "step": 8607 }, { "epoch": 2.79, "learning_rate": 6.435933487154627e-08, "loss": 0.1521, "step": 8608 }, { "epoch": 2.79, "learning_rate": 6.416230927593803e-08, "loss": 0.1425, "step": 8609 }, { "epoch": 2.79, "learning_rate": 6.39655818032825e-08, "loss": 0.1524, "step": 8610 }, { "epoch": 2.79, "learning_rate": 6.376915247765735e-08, "loss": 0.1474, "step": 8611 }, { "epoch": 2.79, "learning_rate": 6.357302132310338e-08, "loss": 0.1503, "step": 8612 }, { "epoch": 2.79, "learning_rate": 6.337718836362473e-08, "loss": 0.1346, "step": 8613 }, { "epoch": 2.79, "learning_rate": 6.318165362319023e-08, "loss": 0.1549, "step": 8614 }, { "epoch": 2.79, "learning_rate": 6.298641712573105e-08, "loss": 0.1404, "step": 8615 }, { "epoch": 2.79, "learning_rate": 6.279147889514226e-08, "loss": 0.1632, "step": 8616 }, { "epoch": 2.79, "learning_rate": 6.259683895528251e-08, "loss": 0.1508, "step": 8617 }, { "epoch": 2.79, "learning_rate": 6.24024973299736e-08, "loss": 0.1364, "step": 8618 }, { "epoch": 2.79, "learning_rate": 6.220845404300124e-08, "loss": 0.1432, "step": 8619 }, { "epoch": 2.79, "learning_rate": 6.201470911811474e-08, "loss": 0.139, "step": 8620 }, { "epoch": 2.79, "learning_rate": 6.182126257902626e-08, "loss": 0.1484, "step": 8621 }, { "epoch": 2.79, "learning_rate": 6.162811444941159e-08, "loss": 0.1581, "step": 8622 }, { "epoch": 2.79, "learning_rate": 6.143526475291067e-08, "loss": 0.1424, "step": 8623 }, { "epoch": 2.79, "learning_rate": 6.124271351312605e-08, "loss": 0.1569, "step": 8624 }, { "epoch": 2.79, "learning_rate": 6.105046075362441e-08, "loss": 0.1515, "step": 8625 }, { "epoch": 2.8, "learning_rate": 6.085850649793529e-08, "loss": 0.1576, "step": 8626 }, { "epoch": 2.8, "learning_rate": 6.066685076955264e-08, "loss": 0.157, "step": 8627 }, { "epoch": 2.8, "learning_rate": 6.047549359193245e-08, "loss": 0.1424, "step": 8628 }, { "epoch": 2.8, "learning_rate": 6.028443498849596e-08, "loss": 0.148, "step": 8629 }, { "epoch": 2.8, "learning_rate": 6.009367498262587e-08, "loss": 0.161, "step": 8630 }, { "epoch": 2.8, "learning_rate": 5.990321359767015e-08, "loss": 0.139, "step": 8631 }, { "epoch": 2.8, "learning_rate": 5.97130508569388e-08, "loss": 0.1258, "step": 8632 }, { "epoch": 2.8, "learning_rate": 5.95231867837065e-08, "loss": 0.1446, "step": 8633 }, { "epoch": 2.8, "learning_rate": 5.933362140121052e-08, "loss": 0.1623, "step": 8634 }, { "epoch": 2.8, "learning_rate": 5.9144354732651455e-08, "loss": 0.1543, "step": 8635 }, { "epoch": 2.8, "learning_rate": 5.8955386801194394e-08, "loss": 0.1621, "step": 8636 }, { "epoch": 2.8, "learning_rate": 5.8766717629966387e-08, "loss": 0.158, "step": 8637 }, { "epoch": 2.8, "learning_rate": 5.857834724205979e-08, "loss": 0.1489, "step": 8638 }, { "epoch": 2.8, "learning_rate": 5.839027566052841e-08, "loss": 0.1613, "step": 8639 }, { "epoch": 2.8, "learning_rate": 5.820250290839047e-08, "loss": 0.1359, "step": 8640 }, { "epoch": 2.8, "learning_rate": 5.801502900862788e-08, "loss": 0.1344, "step": 8641 }, { "epoch": 2.8, "learning_rate": 5.782785398418561e-08, "loss": 0.1591, "step": 8642 }, { "epoch": 2.8, "learning_rate": 5.7640977857972016e-08, "loss": 0.1412, "step": 8643 }, { "epoch": 2.8, "learning_rate": 5.745440065285879e-08, "loss": 0.1468, "step": 8644 }, { "epoch": 2.8, "learning_rate": 5.726812239168128e-08, "loss": 0.1489, "step": 8645 }, { "epoch": 2.8, "learning_rate": 5.708214309723792e-08, "loss": 0.1455, "step": 8646 }, { "epoch": 2.8, "learning_rate": 5.689646279229105e-08, "loss": 0.1444, "step": 8647 }, { "epoch": 2.8, "learning_rate": 5.671108149956611e-08, "loss": 0.1481, "step": 8648 }, { "epoch": 2.8, "learning_rate": 5.6525999241751894e-08, "loss": 0.1438, "step": 8649 }, { "epoch": 2.8, "learning_rate": 5.6341216041500555e-08, "loss": 0.1378, "step": 8650 }, { "epoch": 2.8, "learning_rate": 5.6156731921428455e-08, "loss": 0.1522, "step": 8651 }, { "epoch": 2.8, "learning_rate": 5.597254690411363e-08, "loss": 0.1411, "step": 8652 }, { "epoch": 2.8, "learning_rate": 5.5788661012099176e-08, "loss": 0.1397, "step": 8653 }, { "epoch": 2.8, "learning_rate": 5.560507426789069e-08, "loss": 0.1552, "step": 8654 }, { "epoch": 2.8, "learning_rate": 5.5421786693957705e-08, "loss": 0.1524, "step": 8655 }, { "epoch": 2.8, "learning_rate": 5.523879831273282e-08, "loss": 0.1454, "step": 8656 }, { "epoch": 2.81, "learning_rate": 5.505610914661147e-08, "loss": 0.1358, "step": 8657 }, { "epoch": 2.81, "learning_rate": 5.487371921795381e-08, "loss": 0.1685, "step": 8658 }, { "epoch": 2.81, "learning_rate": 5.4691628549082e-08, "loss": 0.1433, "step": 8659 }, { "epoch": 2.81, "learning_rate": 5.450983716228292e-08, "loss": 0.1443, "step": 8660 }, { "epoch": 2.81, "learning_rate": 5.4328345079805164e-08, "loss": 0.1533, "step": 8661 }, { "epoch": 2.81, "learning_rate": 5.4147152323862085e-08, "loss": 0.1326, "step": 8662 }, { "epoch": 2.81, "learning_rate": 5.3966258916629824e-08, "loss": 0.1541, "step": 8663 }, { "epoch": 2.81, "learning_rate": 5.378566488024817e-08, "loss": 0.1573, "step": 8664 }, { "epoch": 2.81, "learning_rate": 5.3605370236820276e-08, "loss": 0.142, "step": 8665 }, { "epoch": 2.81, "learning_rate": 5.3425375008411276e-08, "loss": 0.1719, "step": 8666 }, { "epoch": 2.81, "learning_rate": 5.3245679217052424e-08, "loss": 0.1461, "step": 8667 }, { "epoch": 2.81, "learning_rate": 5.3066282884735863e-08, "loss": 0.1622, "step": 8668 }, { "epoch": 2.81, "learning_rate": 5.2887186033417914e-08, "loss": 0.1408, "step": 8669 }, { "epoch": 2.81, "learning_rate": 5.270838868501854e-08, "loss": 0.1424, "step": 8670 }, { "epoch": 2.81, "learning_rate": 5.252989086142107e-08, "loss": 0.1574, "step": 8671 }, { "epoch": 2.81, "learning_rate": 5.235169258447137e-08, "loss": 0.1573, "step": 8672 }, { "epoch": 2.81, "learning_rate": 5.2173793875979204e-08, "loss": 0.1319, "step": 8673 }, { "epoch": 2.81, "learning_rate": 5.199619475771856e-08, "loss": 0.1411, "step": 8674 }, { "epoch": 2.81, "learning_rate": 5.181889525142453e-08, "loss": 0.1524, "step": 8675 }, { "epoch": 2.81, "learning_rate": 5.164189537879782e-08, "loss": 0.1526, "step": 8676 }, { "epoch": 2.81, "learning_rate": 5.146519516150084e-08, "loss": 0.1377, "step": 8677 }, { "epoch": 2.81, "learning_rate": 5.128879462116071e-08, "loss": 0.1553, "step": 8678 }, { "epoch": 2.81, "learning_rate": 5.111269377936656e-08, "loss": 0.1389, "step": 8679 }, { "epoch": 2.81, "learning_rate": 5.093689265767143e-08, "loss": 0.1741, "step": 8680 }, { "epoch": 2.81, "learning_rate": 5.0761391277591996e-08, "loss": 0.1501, "step": 8681 }, { "epoch": 2.81, "learning_rate": 5.05861896606083e-08, "loss": 0.1296, "step": 8682 }, { "epoch": 2.81, "learning_rate": 5.0411287828162346e-08, "loss": 0.144, "step": 8683 }, { "epoch": 2.81, "learning_rate": 5.023668580166091e-08, "loss": 0.1498, "step": 8684 }, { "epoch": 2.81, "learning_rate": 5.0062383602473566e-08, "loss": 0.1532, "step": 8685 }, { "epoch": 2.81, "learning_rate": 4.9888381251933237e-08, "loss": 0.1278, "step": 8686 }, { "epoch": 2.81, "learning_rate": 4.971467877133651e-08, "loss": 0.1408, "step": 8687 }, { "epoch": 2.82, "learning_rate": 4.954127618194193e-08, "loss": 0.1416, "step": 8688 }, { "epoch": 2.82, "learning_rate": 4.936817350497336e-08, "loss": 0.1405, "step": 8689 }, { "epoch": 2.82, "learning_rate": 4.919537076161579e-08, "loss": 0.1461, "step": 8690 }, { "epoch": 2.82, "learning_rate": 4.90228679730198e-08, "loss": 0.1486, "step": 8691 }, { "epoch": 2.82, "learning_rate": 4.8850665160297406e-08, "loss": 0.1622, "step": 8692 }, { "epoch": 2.82, "learning_rate": 4.867876234452423e-08, "loss": 0.1524, "step": 8693 }, { "epoch": 2.82, "learning_rate": 4.85071595467404e-08, "loss": 0.1456, "step": 8694 }, { "epoch": 2.82, "learning_rate": 4.8335856787947447e-08, "loss": 0.1462, "step": 8695 }, { "epoch": 2.82, "learning_rate": 4.81648540891122e-08, "loss": 0.1426, "step": 8696 }, { "epoch": 2.82, "learning_rate": 4.799415147116265e-08, "loss": 0.1353, "step": 8697 }, { "epoch": 2.82, "learning_rate": 4.782374895499236e-08, "loss": 0.1478, "step": 8698 }, { "epoch": 2.82, "learning_rate": 4.7653646561455767e-08, "loss": 0.1492, "step": 8699 }, { "epoch": 2.82, "learning_rate": 4.7483844311372594e-08, "loss": 0.1559, "step": 8700 }, { "epoch": 2.82, "learning_rate": 4.731434222552456e-08, "loss": 0.158, "step": 8701 }, { "epoch": 2.82, "learning_rate": 4.7145140324657e-08, "loss": 0.1383, "step": 8702 }, { "epoch": 2.82, "learning_rate": 4.697623862947892e-08, "loss": 0.1479, "step": 8703 }, { "epoch": 2.82, "learning_rate": 4.680763716066239e-08, "loss": 0.1483, "step": 8704 }, { "epoch": 2.82, "learning_rate": 4.663933593884229e-08, "loss": 0.1592, "step": 8705 }, { "epoch": 2.82, "learning_rate": 4.6471334984616866e-08, "loss": 0.1479, "step": 8706 }, { "epoch": 2.82, "learning_rate": 4.6303634318548006e-08, "loss": 0.1522, "step": 8707 }, { "epoch": 2.82, "learning_rate": 4.613623396116068e-08, "loss": 0.1506, "step": 8708 }, { "epoch": 2.82, "learning_rate": 4.596913393294322e-08, "loss": 0.1414, "step": 8709 }, { "epoch": 2.82, "learning_rate": 4.580233425434677e-08, "loss": 0.1549, "step": 8710 }, { "epoch": 2.82, "learning_rate": 4.563583494578638e-08, "loss": 0.1506, "step": 8711 }, { "epoch": 2.82, "learning_rate": 4.546963602763937e-08, "loss": 0.1482, "step": 8712 }, { "epoch": 2.82, "learning_rate": 4.530373752024753e-08, "loss": 0.1442, "step": 8713 }, { "epoch": 2.82, "learning_rate": 4.51381394439146e-08, "loss": 0.1523, "step": 8714 }, { "epoch": 2.82, "learning_rate": 4.497284181890882e-08, "loss": 0.147, "step": 8715 }, { "epoch": 2.82, "learning_rate": 4.480784466546068e-08, "loss": 0.1573, "step": 8716 }, { "epoch": 2.82, "learning_rate": 4.4643148003764015e-08, "loss": 0.1473, "step": 8717 }, { "epoch": 2.83, "learning_rate": 4.44787518539766e-08, "loss": 0.1674, "step": 8718 }, { "epoch": 2.83, "learning_rate": 4.4314656236218444e-08, "loss": 0.1463, "step": 8719 }, { "epoch": 2.83, "learning_rate": 4.415086117057377e-08, "loss": 0.1435, "step": 8720 }, { "epoch": 2.83, "learning_rate": 4.398736667708875e-08, "loss": 0.145, "step": 8721 }, { "epoch": 2.83, "learning_rate": 4.382417277577433e-08, "loss": 0.1451, "step": 8722 }, { "epoch": 2.83, "learning_rate": 4.3661279486603424e-08, "loss": 0.1564, "step": 8723 }, { "epoch": 2.83, "learning_rate": 4.349868682951286e-08, "loss": 0.1436, "step": 8724 }, { "epoch": 2.83, "learning_rate": 4.333639482440199e-08, "loss": 0.1482, "step": 8725 }, { "epoch": 2.83, "learning_rate": 4.3174403491134385e-08, "loss": 0.149, "step": 8726 }, { "epoch": 2.83, "learning_rate": 4.301271284953584e-08, "loss": 0.1493, "step": 8727 }, { "epoch": 2.83, "learning_rate": 4.285132291939526e-08, "loss": 0.1475, "step": 8728 }, { "epoch": 2.83, "learning_rate": 4.2690233720466265e-08, "loss": 0.1516, "step": 8729 }, { "epoch": 2.83, "learning_rate": 4.2529445272463946e-08, "loss": 0.144, "step": 8730 }, { "epoch": 2.83, "learning_rate": 4.2368957595067264e-08, "loss": 0.1485, "step": 8731 }, { "epoch": 2.83, "learning_rate": 4.220877070791857e-08, "loss": 0.1522, "step": 8732 }, { "epoch": 2.83, "learning_rate": 4.204888463062273e-08, "loss": 0.1607, "step": 8733 }, { "epoch": 2.83, "learning_rate": 4.188929938274911e-08, "loss": 0.1586, "step": 8734 }, { "epoch": 2.83, "learning_rate": 4.1730014983828724e-08, "loss": 0.143, "step": 8735 }, { "epoch": 2.83, "learning_rate": 4.157103145335628e-08, "loss": 0.1397, "step": 8736 }, { "epoch": 2.83, "learning_rate": 4.141234881079065e-08, "loss": 0.1532, "step": 8737 }, { "epoch": 2.83, "learning_rate": 4.125396707555213e-08, "loss": 0.1553, "step": 8738 }, { "epoch": 2.83, "learning_rate": 4.109588626702576e-08, "loss": 0.1436, "step": 8739 }, { "epoch": 2.83, "learning_rate": 4.0938106404558864e-08, "loss": 0.151, "step": 8740 }, { "epoch": 2.83, "learning_rate": 4.078062750746209e-08, "loss": 0.132, "step": 8741 }, { "epoch": 2.83, "learning_rate": 4.062344959500947e-08, "loss": 0.1388, "step": 8742 }, { "epoch": 2.83, "learning_rate": 4.0466572686437833e-08, "loss": 0.1736, "step": 8743 }, { "epoch": 2.83, "learning_rate": 4.0309996800947936e-08, "loss": 0.138, "step": 8744 }, { "epoch": 2.83, "learning_rate": 4.0153721957702504e-08, "loss": 0.1341, "step": 8745 }, { "epoch": 2.83, "learning_rate": 3.9997748175828467e-08, "loss": 0.1494, "step": 8746 }, { "epoch": 2.83, "learning_rate": 3.9842075474415545e-08, "loss": 0.146, "step": 8747 }, { "epoch": 2.83, "learning_rate": 3.9686703872516e-08, "loss": 0.1456, "step": 8748 }, { "epoch": 2.84, "learning_rate": 3.953163338914656e-08, "loss": 0.147, "step": 8749 }, { "epoch": 2.84, "learning_rate": 3.9376864043285943e-08, "loss": 0.1582, "step": 8750 }, { "epoch": 2.84, "learning_rate": 3.922239585387649e-08, "loss": 0.1577, "step": 8751 }, { "epoch": 2.84, "learning_rate": 3.906822883982336e-08, "loss": 0.154, "step": 8752 }, { "epoch": 2.84, "learning_rate": 3.891436301999563e-08, "loss": 0.1482, "step": 8753 }, { "epoch": 2.84, "learning_rate": 3.876079841322461e-08, "loss": 0.1388, "step": 8754 }, { "epoch": 2.84, "learning_rate": 3.8607535038305276e-08, "loss": 0.1365, "step": 8755 }, { "epoch": 2.84, "learning_rate": 3.84545729139954e-08, "loss": 0.1549, "step": 8756 }, { "epoch": 2.84, "learning_rate": 3.83019120590164e-08, "loss": 0.1585, "step": 8757 }, { "epoch": 2.84, "learning_rate": 3.814955249205221e-08, "loss": 0.1493, "step": 8758 }, { "epoch": 2.84, "learning_rate": 3.7997494231750145e-08, "loss": 0.163, "step": 8759 }, { "epoch": 2.84, "learning_rate": 3.784573729672086e-08, "loss": 0.1263, "step": 8760 }, { "epoch": 2.84, "learning_rate": 3.769428170553785e-08, "loss": 0.1573, "step": 8761 }, { "epoch": 2.84, "learning_rate": 3.754312747673766e-08, "loss": 0.1552, "step": 8762 }, { "epoch": 2.84, "learning_rate": 3.739227462882022e-08, "loss": 0.1338, "step": 8763 }, { "epoch": 2.84, "learning_rate": 3.724172318024854e-08, "loss": 0.1481, "step": 8764 }, { "epoch": 2.84, "learning_rate": 3.709147314944872e-08, "loss": 0.1439, "step": 8765 }, { "epoch": 2.84, "learning_rate": 3.6941524554809924e-08, "loss": 0.1399, "step": 8766 }, { "epoch": 2.84, "learning_rate": 3.6791877414683594e-08, "loss": 0.1434, "step": 8767 }, { "epoch": 2.84, "learning_rate": 3.664253174738647e-08, "loss": 0.1464, "step": 8768 }, { "epoch": 2.84, "learning_rate": 3.649348757119614e-08, "loss": 0.1629, "step": 8769 }, { "epoch": 2.84, "learning_rate": 3.634474490435413e-08, "loss": 0.1528, "step": 8770 }, { "epoch": 2.84, "learning_rate": 3.6196303765065333e-08, "loss": 0.1653, "step": 8771 }, { "epoch": 2.84, "learning_rate": 3.60481641714977e-08, "loss": 0.1514, "step": 8772 }, { "epoch": 2.84, "learning_rate": 3.590032614178174e-08, "loss": 0.1602, "step": 8773 }, { "epoch": 2.84, "learning_rate": 3.57527896940113e-08, "loss": 0.145, "step": 8774 }, { "epoch": 2.84, "learning_rate": 3.560555484624417e-08, "loss": 0.1522, "step": 8775 }, { "epoch": 2.84, "learning_rate": 3.545862161649927e-08, "loss": 0.152, "step": 8776 }, { "epoch": 2.84, "learning_rate": 3.531199002276109e-08, "loss": 0.1559, "step": 8777 }, { "epoch": 2.84, "learning_rate": 3.5165660082975006e-08, "loss": 0.1426, "step": 8778 }, { "epoch": 2.84, "learning_rate": 3.501963181505058e-08, "loss": 0.1425, "step": 8779 }, { "epoch": 2.85, "learning_rate": 3.487390523686074e-08, "loss": 0.1431, "step": 8780 }, { "epoch": 2.85, "learning_rate": 3.472848036624038e-08, "loss": 0.149, "step": 8781 }, { "epoch": 2.85, "learning_rate": 3.4583357220988326e-08, "loss": 0.1382, "step": 8782 }, { "epoch": 2.85, "learning_rate": 3.443853581886619e-08, "loss": 0.1397, "step": 8783 }, { "epoch": 2.85, "learning_rate": 3.4294016177598974e-08, "loss": 0.1445, "step": 8784 }, { "epoch": 2.85, "learning_rate": 3.4149798314874195e-08, "loss": 0.1616, "step": 8785 }, { "epoch": 2.85, "learning_rate": 3.4005882248343e-08, "loss": 0.1534, "step": 8786 }, { "epoch": 2.85, "learning_rate": 3.3862267995618817e-08, "loss": 0.1447, "step": 8787 }, { "epoch": 2.85, "learning_rate": 3.3718955574279234e-08, "loss": 0.1401, "step": 8788 }, { "epoch": 2.85, "learning_rate": 3.357594500186384e-08, "loss": 0.1405, "step": 8789 }, { "epoch": 2.85, "learning_rate": 3.3433236295876134e-08, "loss": 0.1346, "step": 8790 }, { "epoch": 2.85, "learning_rate": 3.329082947378215e-08, "loss": 0.1507, "step": 8791 }, { "epoch": 2.85, "learning_rate": 3.314872455301071e-08, "loss": 0.1485, "step": 8792 }, { "epoch": 2.85, "learning_rate": 3.300692155095458e-08, "loss": 0.1306, "step": 8793 }, { "epoch": 2.85, "learning_rate": 3.286542048496904e-08, "loss": 0.1553, "step": 8794 }, { "epoch": 2.85, "learning_rate": 3.272422137237219e-08, "loss": 0.1411, "step": 8795 }, { "epoch": 2.85, "learning_rate": 3.258332423044547e-08, "loss": 0.1512, "step": 8796 }, { "epoch": 2.85, "learning_rate": 3.2442729076433697e-08, "loss": 0.135, "step": 8797 }, { "epoch": 2.85, "learning_rate": 3.230243592754368e-08, "loss": 0.136, "step": 8798 }, { "epoch": 2.85, "learning_rate": 3.2162444800946655e-08, "loss": 0.1418, "step": 8799 }, { "epoch": 2.85, "learning_rate": 3.202275571377589e-08, "loss": 0.1479, "step": 8800 }, { "epoch": 2.85, "learning_rate": 3.188336868312769e-08, "loss": 0.1478, "step": 8801 }, { "epoch": 2.85, "learning_rate": 3.1744283726062306e-08, "loss": 0.1531, "step": 8802 }, { "epoch": 2.85, "learning_rate": 3.160550085960168e-08, "loss": 0.1539, "step": 8803 }, { "epoch": 2.85, "learning_rate": 3.1467020100732215e-08, "loss": 0.1581, "step": 8804 }, { "epoch": 2.85, "learning_rate": 3.1328841466401746e-08, "loss": 0.1485, "step": 8805 }, { "epoch": 2.85, "learning_rate": 3.1190964973522865e-08, "loss": 0.1514, "step": 8806 }, { "epoch": 2.85, "learning_rate": 3.105339063896956e-08, "loss": 0.1441, "step": 8807 }, { "epoch": 2.85, "learning_rate": 3.0916118479580593e-08, "loss": 0.1542, "step": 8808 }, { "epoch": 2.85, "learning_rate": 3.077914851215585e-08, "loss": 0.1397, "step": 8809 }, { "epoch": 2.85, "learning_rate": 3.064248075345916e-08, "loss": 0.1502, "step": 8810 }, { "epoch": 2.86, "learning_rate": 3.050611522021796e-08, "loss": 0.1457, "step": 8811 }, { "epoch": 2.86, "learning_rate": 3.0370051929121405e-08, "loss": 0.1373, "step": 8812 }, { "epoch": 2.86, "learning_rate": 3.023429089682284e-08, "loss": 0.1516, "step": 8813 }, { "epoch": 2.86, "learning_rate": 3.009883213993786e-08, "loss": 0.1555, "step": 8814 }, { "epoch": 2.86, "learning_rate": 2.996367567504544e-08, "loss": 0.1448, "step": 8815 }, { "epoch": 2.86, "learning_rate": 2.9828821518687045e-08, "loss": 0.156, "step": 8816 }, { "epoch": 2.86, "learning_rate": 2.9694269687367826e-08, "loss": 0.1437, "step": 8817 }, { "epoch": 2.86, "learning_rate": 2.9560020197555716e-08, "loss": 0.1511, "step": 8818 }, { "epoch": 2.86, "learning_rate": 2.9426073065681183e-08, "loss": 0.1483, "step": 8819 }, { "epoch": 2.86, "learning_rate": 2.929242830813861e-08, "loss": 0.1392, "step": 8820 }, { "epoch": 2.86, "learning_rate": 2.915908594128436e-08, "loss": 0.1537, "step": 8821 }, { "epoch": 2.86, "learning_rate": 2.9026045981438434e-08, "loss": 0.1316, "step": 8822 }, { "epoch": 2.86, "learning_rate": 2.889330844488364e-08, "loss": 0.1449, "step": 8823 }, { "epoch": 2.86, "learning_rate": 2.8760873347865593e-08, "loss": 0.1477, "step": 8824 }, { "epoch": 2.86, "learning_rate": 2.862874070659327e-08, "loss": 0.1398, "step": 8825 }, { "epoch": 2.86, "learning_rate": 2.8496910537238185e-08, "loss": 0.1387, "step": 8826 }, { "epoch": 2.86, "learning_rate": 2.8365382855935487e-08, "loss": 0.151, "step": 8827 }, { "epoch": 2.86, "learning_rate": 2.8234157678782846e-08, "loss": 0.1262, "step": 8828 }, { "epoch": 2.86, "learning_rate": 2.8103235021840204e-08, "loss": 0.1608, "step": 8829 }, { "epoch": 2.86, "learning_rate": 2.7972614901132235e-08, "loss": 0.1459, "step": 8830 }, { "epoch": 2.86, "learning_rate": 2.784229733264504e-08, "loss": 0.1544, "step": 8831 }, { "epoch": 2.86, "learning_rate": 2.771228233232809e-08, "loss": 0.1422, "step": 8832 }, { "epoch": 2.86, "learning_rate": 2.7582569916094205e-08, "loss": 0.1433, "step": 8833 }, { "epoch": 2.86, "learning_rate": 2.745316009981902e-08, "loss": 0.1505, "step": 8834 }, { "epoch": 2.86, "learning_rate": 2.732405289934098e-08, "loss": 0.1477, "step": 8835 }, { "epoch": 2.86, "learning_rate": 2.719524833046133e-08, "loss": 0.1666, "step": 8836 }, { "epoch": 2.86, "learning_rate": 2.7066746408944968e-08, "loss": 0.1548, "step": 8837 }, { "epoch": 2.86, "learning_rate": 2.6938547150518746e-08, "loss": 0.1365, "step": 8838 }, { "epoch": 2.86, "learning_rate": 2.6810650570873454e-08, "loss": 0.1476, "step": 8839 }, { "epoch": 2.86, "learning_rate": 2.6683056685662122e-08, "loss": 0.1558, "step": 8840 }, { "epoch": 2.86, "learning_rate": 2.6555765510500875e-08, "loss": 0.1536, "step": 8841 }, { "epoch": 2.87, "learning_rate": 2.6428777060969468e-08, "loss": 0.1427, "step": 8842 }, { "epoch": 2.87, "learning_rate": 2.6302091352609637e-08, "loss": 0.1567, "step": 8843 }, { "epoch": 2.87, "learning_rate": 2.617570840092648e-08, "loss": 0.1462, "step": 8844 }, { "epoch": 2.87, "learning_rate": 2.604962822138818e-08, "loss": 0.1465, "step": 8845 }, { "epoch": 2.87, "learning_rate": 2.5923850829425723e-08, "loss": 0.1634, "step": 8846 }, { "epoch": 2.87, "learning_rate": 2.579837624043291e-08, "loss": 0.1546, "step": 8847 }, { "epoch": 2.87, "learning_rate": 2.5673204469766898e-08, "loss": 0.1373, "step": 8848 }, { "epoch": 2.87, "learning_rate": 2.5548335532747105e-08, "loss": 0.1573, "step": 8849 }, { "epoch": 2.87, "learning_rate": 2.5423769444656575e-08, "loss": 0.1438, "step": 8850 }, { "epoch": 2.87, "learning_rate": 2.52995062207409e-08, "loss": 0.1515, "step": 8851 }, { "epoch": 2.87, "learning_rate": 2.517554587620874e-08, "loss": 0.1567, "step": 8852 }, { "epoch": 2.87, "learning_rate": 2.5051888426231574e-08, "loss": 0.1546, "step": 8853 }, { "epoch": 2.87, "learning_rate": 2.492853388594396e-08, "loss": 0.139, "step": 8854 }, { "epoch": 2.87, "learning_rate": 2.480548227044327e-08, "loss": 0.1321, "step": 8855 }, { "epoch": 2.87, "learning_rate": 2.4682733594789677e-08, "loss": 0.1608, "step": 8856 }, { "epoch": 2.87, "learning_rate": 2.4560287874006716e-08, "loss": 0.1406, "step": 8857 }, { "epoch": 2.87, "learning_rate": 2.443814512308018e-08, "loss": 0.1481, "step": 8858 }, { "epoch": 2.87, "learning_rate": 2.431630535695978e-08, "loss": 0.1371, "step": 8859 }, { "epoch": 2.87, "learning_rate": 2.419476859055664e-08, "loss": 0.1471, "step": 8860 }, { "epoch": 2.87, "learning_rate": 2.4073534838746637e-08, "loss": 0.1494, "step": 8861 }, { "epoch": 2.87, "learning_rate": 2.3952604116366795e-08, "loss": 0.1506, "step": 8862 }, { "epoch": 2.87, "learning_rate": 2.383197643821833e-08, "loss": 0.1409, "step": 8863 }, { "epoch": 2.87, "learning_rate": 2.3711651819064984e-08, "loss": 0.1518, "step": 8864 }, { "epoch": 2.87, "learning_rate": 2.359163027363276e-08, "loss": 0.1601, "step": 8865 }, { "epoch": 2.87, "learning_rate": 2.3471911816611846e-08, "loss": 0.1575, "step": 8866 }, { "epoch": 2.87, "learning_rate": 2.335249646265414e-08, "loss": 0.1537, "step": 8867 }, { "epoch": 2.87, "learning_rate": 2.3233384226375167e-08, "loss": 0.1481, "step": 8868 }, { "epoch": 2.87, "learning_rate": 2.311457512235271e-08, "loss": 0.1487, "step": 8869 }, { "epoch": 2.87, "learning_rate": 2.2996069165128198e-08, "loss": 0.148, "step": 8870 }, { "epoch": 2.87, "learning_rate": 2.2877866369205858e-08, "loss": 0.1349, "step": 8871 }, { "epoch": 2.87, "learning_rate": 2.2759966749051897e-08, "loss": 0.1461, "step": 8872 }, { "epoch": 2.88, "learning_rate": 2.2642370319096718e-08, "loss": 0.158, "step": 8873 }, { "epoch": 2.88, "learning_rate": 2.2525077093732695e-08, "loss": 0.1331, "step": 8874 }, { "epoch": 2.88, "learning_rate": 2.2408087087315567e-08, "loss": 0.1508, "step": 8875 }, { "epoch": 2.88, "learning_rate": 2.2291400314163325e-08, "loss": 0.1451, "step": 8876 }, { "epoch": 2.88, "learning_rate": 2.217501678855788e-08, "loss": 0.1424, "step": 8877 }, { "epoch": 2.88, "learning_rate": 2.2058936524742835e-08, "loss": 0.1462, "step": 8878 }, { "epoch": 2.88, "learning_rate": 2.1943159536925994e-08, "loss": 0.1499, "step": 8879 }, { "epoch": 2.88, "learning_rate": 2.1827685839276856e-08, "loss": 0.1484, "step": 8880 }, { "epoch": 2.88, "learning_rate": 2.1712515445928285e-08, "loss": 0.1483, "step": 8881 }, { "epoch": 2.88, "learning_rate": 2.159764837097622e-08, "loss": 0.1459, "step": 8882 }, { "epoch": 2.88, "learning_rate": 2.1483084628479145e-08, "loss": 0.1382, "step": 8883 }, { "epoch": 2.88, "learning_rate": 2.1368824232458618e-08, "loss": 0.1492, "step": 8884 }, { "epoch": 2.88, "learning_rate": 2.125486719689929e-08, "loss": 0.1456, "step": 8885 }, { "epoch": 2.88, "learning_rate": 2.1141213535747772e-08, "loss": 0.1444, "step": 8886 }, { "epoch": 2.88, "learning_rate": 2.1027863262914617e-08, "loss": 0.1671, "step": 8887 }, { "epoch": 2.88, "learning_rate": 2.0914816392272608e-08, "loss": 0.1456, "step": 8888 }, { "epoch": 2.88, "learning_rate": 2.0802072937657624e-08, "loss": 0.148, "step": 8889 }, { "epoch": 2.88, "learning_rate": 2.068963291286863e-08, "loss": 0.1605, "step": 8890 }, { "epoch": 2.88, "learning_rate": 2.0577496331666837e-08, "loss": 0.1404, "step": 8891 }, { "epoch": 2.88, "learning_rate": 2.046566320777682e-08, "loss": 0.1394, "step": 8892 }, { "epoch": 2.88, "learning_rate": 2.0354133554885967e-08, "loss": 0.1467, "step": 8893 }, { "epoch": 2.88, "learning_rate": 2.0242907386644195e-08, "loss": 0.1521, "step": 8894 }, { "epoch": 2.88, "learning_rate": 2.0131984716664776e-08, "loss": 0.1591, "step": 8895 }, { "epoch": 2.88, "learning_rate": 2.002136555852352e-08, "loss": 0.1305, "step": 8896 }, { "epoch": 2.88, "learning_rate": 1.9911049925758765e-08, "loss": 0.1315, "step": 8897 }, { "epoch": 2.88, "learning_rate": 1.9801037831872482e-08, "loss": 0.1278, "step": 8898 }, { "epoch": 2.88, "learning_rate": 1.9691329290329185e-08, "loss": 0.1468, "step": 8899 }, { "epoch": 2.88, "learning_rate": 1.958192431455591e-08, "loss": 0.1441, "step": 8900 }, { "epoch": 2.88, "learning_rate": 1.9472822917942778e-08, "loss": 0.152, "step": 8901 }, { "epoch": 2.88, "learning_rate": 1.9364025113842444e-08, "loss": 0.1513, "step": 8902 }, { "epoch": 2.88, "learning_rate": 1.9255530915571197e-08, "loss": 0.1393, "step": 8903 }, { "epoch": 2.89, "learning_rate": 1.9147340336407584e-08, "loss": 0.1456, "step": 8904 }, { "epoch": 2.89, "learning_rate": 1.9039453389592954e-08, "loss": 0.1457, "step": 8905 }, { "epoch": 2.89, "learning_rate": 1.893187008833175e-08, "loss": 0.1413, "step": 8906 }, { "epoch": 2.89, "learning_rate": 1.8824590445790935e-08, "loss": 0.1501, "step": 8907 }, { "epoch": 2.89, "learning_rate": 1.871761447510084e-08, "loss": 0.1549, "step": 8908 }, { "epoch": 2.89, "learning_rate": 1.8610942189353777e-08, "loss": 0.161, "step": 8909 }, { "epoch": 2.89, "learning_rate": 1.850457360160568e-08, "loss": 0.1423, "step": 8910 }, { "epoch": 2.89, "learning_rate": 1.839850872487503e-08, "loss": 0.1438, "step": 8911 }, { "epoch": 2.89, "learning_rate": 1.829274757214339e-08, "loss": 0.1747, "step": 8912 }, { "epoch": 2.89, "learning_rate": 1.8187290156354565e-08, "loss": 0.1575, "step": 8913 }, { "epoch": 2.89, "learning_rate": 1.808213649041546e-08, "loss": 0.1378, "step": 8914 }, { "epoch": 2.89, "learning_rate": 1.7977286587196053e-08, "loss": 0.1519, "step": 8915 }, { "epoch": 2.89, "learning_rate": 1.7872740459529135e-08, "loss": 0.1511, "step": 8916 }, { "epoch": 2.89, "learning_rate": 1.7768498120209755e-08, "loss": 0.1407, "step": 8917 }, { "epoch": 2.89, "learning_rate": 1.766455958199631e-08, "loss": 0.1417, "step": 8918 }, { "epoch": 2.89, "learning_rate": 1.7560924857610016e-08, "loss": 0.1488, "step": 8919 }, { "epoch": 2.89, "learning_rate": 1.745759395973462e-08, "loss": 0.1566, "step": 8920 }, { "epoch": 2.89, "learning_rate": 1.7354566901016944e-08, "loss": 0.1459, "step": 8921 }, { "epoch": 2.89, "learning_rate": 1.7251843694066074e-08, "loss": 0.1529, "step": 8922 }, { "epoch": 2.89, "learning_rate": 1.7149424351455003e-08, "loss": 0.1493, "step": 8923 }, { "epoch": 2.89, "learning_rate": 1.7047308885718427e-08, "loss": 0.1442, "step": 8924 }, { "epoch": 2.89, "learning_rate": 1.694549730935441e-08, "loss": 0.1494, "step": 8925 }, { "epoch": 2.89, "learning_rate": 1.684398963482381e-08, "loss": 0.1563, "step": 8926 }, { "epoch": 2.89, "learning_rate": 1.674278587454975e-08, "loss": 0.1452, "step": 8927 }, { "epoch": 2.89, "learning_rate": 1.6641886040919263e-08, "loss": 0.1339, "step": 8928 }, { "epoch": 2.89, "learning_rate": 1.654129014628081e-08, "loss": 0.1336, "step": 8929 }, { "epoch": 2.89, "learning_rate": 1.6440998202947034e-08, "loss": 0.1695, "step": 8930 }, { "epoch": 2.89, "learning_rate": 1.634101022319229e-08, "loss": 0.16, "step": 8931 }, { "epoch": 2.89, "learning_rate": 1.6241326219254006e-08, "loss": 0.1517, "step": 8932 }, { "epoch": 2.89, "learning_rate": 1.6141946203332703e-08, "loss": 0.1451, "step": 8933 }, { "epoch": 2.9, "learning_rate": 1.6042870187591985e-08, "loss": 0.1413, "step": 8934 }, { "epoch": 2.9, "learning_rate": 1.5944098184156876e-08, "loss": 0.159, "step": 8935 }, { "epoch": 2.9, "learning_rate": 1.5845630205117147e-08, "loss": 0.1434, "step": 8936 }, { "epoch": 2.9, "learning_rate": 1.5747466262523438e-08, "loss": 0.1519, "step": 8937 }, { "epoch": 2.9, "learning_rate": 1.5649606368390578e-08, "loss": 0.145, "step": 8938 }, { "epoch": 2.9, "learning_rate": 1.5552050534695383e-08, "loss": 0.1551, "step": 8939 }, { "epoch": 2.9, "learning_rate": 1.5454798773378023e-08, "loss": 0.1529, "step": 8940 }, { "epoch": 2.9, "learning_rate": 1.5357851096340915e-08, "loss": 0.15, "step": 8941 }, { "epoch": 2.9, "learning_rate": 1.526120751544985e-08, "loss": 0.1633, "step": 8942 }, { "epoch": 2.9, "learning_rate": 1.5164868042532864e-08, "loss": 0.1421, "step": 8943 }, { "epoch": 2.9, "learning_rate": 1.506883268938053e-08, "loss": 0.1498, "step": 8944 }, { "epoch": 2.9, "learning_rate": 1.4973101467747608e-08, "loss": 0.1399, "step": 8945 }, { "epoch": 2.9, "learning_rate": 1.4877674389349728e-08, "loss": 0.1392, "step": 8946 }, { "epoch": 2.9, "learning_rate": 1.4782551465866713e-08, "loss": 0.1555, "step": 8947 }, { "epoch": 2.9, "learning_rate": 1.4687732708940916e-08, "loss": 0.1584, "step": 8948 }, { "epoch": 2.9, "learning_rate": 1.4593218130176668e-08, "loss": 0.1483, "step": 8949 }, { "epoch": 2.9, "learning_rate": 1.4499007741141934e-08, "loss": 0.1567, "step": 8950 }, { "epoch": 2.9, "learning_rate": 1.4405101553367218e-08, "loss": 0.1424, "step": 8951 }, { "epoch": 2.9, "learning_rate": 1.4311499578345821e-08, "loss": 0.1377, "step": 8952 }, { "epoch": 2.9, "learning_rate": 1.421820182753303e-08, "loss": 0.15, "step": 8953 }, { "epoch": 2.9, "learning_rate": 1.4125208312348593e-08, "loss": 0.1456, "step": 8954 }, { "epoch": 2.9, "learning_rate": 1.403251904417341e-08, "loss": 0.1498, "step": 8955 }, { "epoch": 2.9, "learning_rate": 1.3940134034351738e-08, "loss": 0.1466, "step": 8956 }, { "epoch": 2.9, "learning_rate": 1.3848053294190922e-08, "loss": 0.145, "step": 8957 }, { "epoch": 2.9, "learning_rate": 1.3756276834960558e-08, "loss": 0.1542, "step": 8958 }, { "epoch": 2.9, "learning_rate": 1.366480466789305e-08, "loss": 0.1521, "step": 8959 }, { "epoch": 2.9, "learning_rate": 1.3573636804183887e-08, "loss": 0.1495, "step": 8960 }, { "epoch": 2.9, "learning_rate": 1.3482773254991365e-08, "loss": 0.1459, "step": 8961 }, { "epoch": 2.9, "learning_rate": 1.3392214031435757e-08, "loss": 0.1484, "step": 8962 }, { "epoch": 2.9, "learning_rate": 1.3301959144600974e-08, "loss": 0.1617, "step": 8963 }, { "epoch": 2.9, "learning_rate": 1.3212008605533177e-08, "loss": 0.1413, "step": 8964 }, { "epoch": 2.91, "learning_rate": 1.312236242524162e-08, "loss": 0.1583, "step": 8965 }, { "epoch": 2.91, "learning_rate": 1.3033020614698078e-08, "loss": 0.1473, "step": 8966 }, { "epoch": 2.91, "learning_rate": 1.2943983184837417e-08, "loss": 0.1472, "step": 8967 }, { "epoch": 2.91, "learning_rate": 1.2855250146556197e-08, "loss": 0.1459, "step": 8968 }, { "epoch": 2.91, "learning_rate": 1.2766821510715177e-08, "loss": 0.1547, "step": 8969 }, { "epoch": 2.91, "learning_rate": 1.2678697288136809e-08, "loss": 0.1531, "step": 8970 }, { "epoch": 2.91, "learning_rate": 1.2590877489606911e-08, "loss": 0.1407, "step": 8971 }, { "epoch": 2.91, "learning_rate": 1.2503362125873552e-08, "loss": 0.163, "step": 8972 }, { "epoch": 2.91, "learning_rate": 1.241615120764761e-08, "loss": 0.1565, "step": 8973 }, { "epoch": 2.91, "learning_rate": 1.2329244745603596e-08, "loss": 0.1608, "step": 8974 }, { "epoch": 2.91, "learning_rate": 1.2242642750376899e-08, "loss": 0.139, "step": 8975 }, { "epoch": 2.91, "learning_rate": 1.2156345232567923e-08, "loss": 0.1381, "step": 8976 }, { "epoch": 2.91, "learning_rate": 1.2070352202737668e-08, "loss": 0.1455, "step": 8977 }, { "epoch": 2.91, "learning_rate": 1.19846636714116e-08, "loss": 0.1431, "step": 8978 }, { "epoch": 2.91, "learning_rate": 1.1899279649076612e-08, "loss": 0.1592, "step": 8979 }, { "epoch": 2.91, "learning_rate": 1.181420014618323e-08, "loss": 0.1567, "step": 8980 }, { "epoch": 2.91, "learning_rate": 1.172942517314396e-08, "loss": 0.1402, "step": 8981 }, { "epoch": 2.91, "learning_rate": 1.1644954740334946e-08, "loss": 0.136, "step": 8982 }, { "epoch": 2.91, "learning_rate": 1.1560788858094584e-08, "loss": 0.1546, "step": 8983 }, { "epoch": 2.91, "learning_rate": 1.1476927536723248e-08, "loss": 0.154, "step": 8984 }, { "epoch": 2.91, "learning_rate": 1.1393370786485502e-08, "loss": 0.1463, "step": 8985 }, { "epoch": 2.91, "learning_rate": 1.1310118617607613e-08, "loss": 0.1605, "step": 8986 }, { "epoch": 2.91, "learning_rate": 1.122717104027865e-08, "loss": 0.156, "step": 8987 }, { "epoch": 2.91, "learning_rate": 1.1144528064650772e-08, "loss": 0.1628, "step": 8988 }, { "epoch": 2.91, "learning_rate": 1.1062189700838944e-08, "loss": 0.1354, "step": 8989 }, { "epoch": 2.91, "learning_rate": 1.0980155958920103e-08, "loss": 0.1523, "step": 8990 }, { "epoch": 2.91, "learning_rate": 1.089842684893455e-08, "loss": 0.159, "step": 8991 }, { "epoch": 2.91, "learning_rate": 1.0817002380885123e-08, "loss": 0.1364, "step": 8992 }, { "epoch": 2.91, "learning_rate": 1.0735882564737732e-08, "loss": 0.1548, "step": 8993 }, { "epoch": 2.91, "learning_rate": 1.0655067410419994e-08, "loss": 0.1525, "step": 8994 }, { "epoch": 2.91, "learning_rate": 1.057455692782372e-08, "loss": 0.131, "step": 8995 }, { "epoch": 2.92, "learning_rate": 1.049435112680186e-08, "loss": 0.1361, "step": 8996 }, { "epoch": 2.92, "learning_rate": 1.0414450017171007e-08, "loss": 0.1356, "step": 8997 }, { "epoch": 2.92, "learning_rate": 1.0334853608710838e-08, "loss": 0.1368, "step": 8998 }, { "epoch": 2.92, "learning_rate": 1.0255561911162449e-08, "loss": 0.15, "step": 8999 }, { "epoch": 2.92, "learning_rate": 1.0176574934230854e-08, "loss": 0.1541, "step": 9000 }, { "epoch": 2.92, "learning_rate": 1.0097892687583044e-08, "loss": 0.1494, "step": 9001 }, { "epoch": 2.92, "learning_rate": 1.0019515180849094e-08, "loss": 0.146, "step": 9002 }, { "epoch": 2.92, "learning_rate": 9.941442423621606e-09, "loss": 0.1568, "step": 9003 }, { "epoch": 2.92, "learning_rate": 9.863674425455716e-09, "loss": 0.1612, "step": 9004 }, { "epoch": 2.92, "learning_rate": 9.78621119586992e-09, "loss": 0.161, "step": 9005 }, { "epoch": 2.92, "learning_rate": 9.709052744344694e-09, "loss": 0.14, "step": 9006 }, { "epoch": 2.92, "learning_rate": 9.63219908032359e-09, "loss": 0.161, "step": 9007 }, { "epoch": 2.92, "learning_rate": 9.55565021321242e-09, "loss": 0.1375, "step": 9008 }, { "epoch": 2.92, "learning_rate": 9.479406152380632e-09, "loss": 0.1444, "step": 9009 }, { "epoch": 2.92, "learning_rate": 9.403466907159375e-09, "loss": 0.151, "step": 9010 }, { "epoch": 2.92, "learning_rate": 9.327832486842603e-09, "loss": 0.1729, "step": 9011 }, { "epoch": 2.92, "learning_rate": 9.25250290068791e-09, "loss": 0.1265, "step": 9012 }, { "epoch": 2.92, "learning_rate": 9.17747815791431e-09, "loss": 0.1435, "step": 9013 }, { "epoch": 2.92, "learning_rate": 9.102758267704736e-09, "loss": 0.1602, "step": 9014 }, { "epoch": 2.92, "learning_rate": 9.02834323920354e-09, "loss": 0.1489, "step": 9015 }, { "epoch": 2.92, "learning_rate": 8.954233081518438e-09, "loss": 0.1586, "step": 9016 }, { "epoch": 2.92, "learning_rate": 8.880427803720226e-09, "loss": 0.1487, "step": 9017 }, { "epoch": 2.92, "learning_rate": 8.806927414841959e-09, "loss": 0.1589, "step": 9018 }, { "epoch": 2.92, "learning_rate": 8.73373192387894e-09, "loss": 0.1468, "step": 9019 }, { "epoch": 2.92, "learning_rate": 8.660841339789561e-09, "loss": 0.1449, "step": 9020 }, { "epoch": 2.92, "learning_rate": 8.58825567149557e-09, "loss": 0.1467, "step": 9021 }, { "epoch": 2.92, "learning_rate": 8.515974927880144e-09, "loss": 0.1385, "step": 9022 }, { "epoch": 2.92, "learning_rate": 8.443999117790091e-09, "loss": 0.1614, "step": 9023 }, { "epoch": 2.92, "learning_rate": 8.3723282500342e-09, "loss": 0.1432, "step": 9024 }, { "epoch": 2.92, "learning_rate": 8.3009623333849e-09, "loss": 0.1492, "step": 9025 }, { "epoch": 2.92, "learning_rate": 8.229901376575755e-09, "loss": 0.1501, "step": 9026 }, { "epoch": 2.93, "learning_rate": 8.15914538830509e-09, "loss": 0.1477, "step": 9027 }, { "epoch": 2.93, "learning_rate": 8.088694377231532e-09, "loss": 0.1582, "step": 9028 }, { "epoch": 2.93, "learning_rate": 8.018548351978738e-09, "loss": 0.1395, "step": 9029 }, { "epoch": 2.93, "learning_rate": 7.948707321130956e-09, "loss": 0.1462, "step": 9030 }, { "epoch": 2.93, "learning_rate": 7.879171293236621e-09, "loss": 0.1643, "step": 9031 }, { "epoch": 2.93, "learning_rate": 7.80994027680615e-09, "loss": 0.1414, "step": 9032 }, { "epoch": 2.93, "learning_rate": 7.741014280312765e-09, "loss": 0.1441, "step": 9033 }, { "epoch": 2.93, "learning_rate": 7.672393312192218e-09, "loss": 0.1436, "step": 9034 }, { "epoch": 2.93, "learning_rate": 7.604077380843067e-09, "loss": 0.1376, "step": 9035 }, { "epoch": 2.93, "learning_rate": 7.536066494626681e-09, "loss": 0.1563, "step": 9036 }, { "epoch": 2.93, "learning_rate": 7.468360661866957e-09, "loss": 0.1539, "step": 9037 }, { "epoch": 2.93, "learning_rate": 7.400959890850046e-09, "loss": 0.144, "step": 9038 }, { "epoch": 2.93, "learning_rate": 7.333864189825735e-09, "loss": 0.155, "step": 9039 }, { "epoch": 2.93, "learning_rate": 7.267073567005234e-09, "loss": 0.1322, "step": 9040 }, { "epoch": 2.93, "learning_rate": 7.2005880305636714e-09, "loss": 0.1405, "step": 9041 }, { "epoch": 2.93, "learning_rate": 7.134407588637871e-09, "loss": 0.1489, "step": 9042 }, { "epoch": 2.93, "learning_rate": 7.068532249327742e-09, "loss": 0.1522, "step": 9043 }, { "epoch": 2.93, "learning_rate": 7.002962020695725e-09, "loss": 0.1507, "step": 9044 }, { "epoch": 2.93, "learning_rate": 6.937696910767067e-09, "loss": 0.1414, "step": 9045 }, { "epoch": 2.93, "learning_rate": 6.872736927529822e-09, "loss": 0.1486, "step": 9046 }, { "epoch": 2.93, "learning_rate": 6.8080820789340195e-09, "loss": 0.1603, "step": 9047 }, { "epoch": 2.93, "learning_rate": 6.743732372893053e-09, "loss": 0.1397, "step": 9048 }, { "epoch": 2.93, "learning_rate": 6.679687817282843e-09, "loss": 0.1577, "step": 9049 }, { "epoch": 2.93, "learning_rate": 6.615948419941565e-09, "loss": 0.166, "step": 9050 }, { "epoch": 2.93, "learning_rate": 6.5525141886702005e-09, "loss": 0.1424, "step": 9051 }, { "epoch": 2.93, "learning_rate": 6.489385131232817e-09, "loss": 0.1449, "step": 9052 }, { "epoch": 2.93, "learning_rate": 6.426561255355457e-09, "loss": 0.1506, "step": 9053 }, { "epoch": 2.93, "learning_rate": 6.364042568727524e-09, "loss": 0.1472, "step": 9054 }, { "epoch": 2.93, "learning_rate": 6.301829079000399e-09, "loss": 0.1666, "step": 9055 }, { "epoch": 2.93, "learning_rate": 6.239920793788546e-09, "loss": 0.1508, "step": 9056 }, { "epoch": 2.93, "learning_rate": 6.178317720668958e-09, "loss": 0.1489, "step": 9057 }, { "epoch": 2.94, "learning_rate": 6.117019867181162e-09, "loss": 0.1486, "step": 9058 }, { "epoch": 2.94, "learning_rate": 6.056027240827489e-09, "loss": 0.1398, "step": 9059 }, { "epoch": 2.94, "learning_rate": 5.995339849073079e-09, "loss": 0.1706, "step": 9060 }, { "epoch": 2.94, "learning_rate": 5.9349576993447675e-09, "loss": 0.1486, "step": 9061 }, { "epoch": 2.94, "learning_rate": 5.874880799033589e-09, "loss": 0.1443, "step": 9062 }, { "epoch": 2.94, "learning_rate": 5.815109155491716e-09, "loss": 0.1495, "step": 9063 }, { "epoch": 2.94, "learning_rate": 5.755642776035242e-09, "loss": 0.1469, "step": 9064 }, { "epoch": 2.94, "learning_rate": 5.696481667941678e-09, "loss": 0.1432, "step": 9065 }, { "epoch": 2.94, "learning_rate": 5.637625838452176e-09, "loss": 0.1507, "step": 9066 }, { "epoch": 2.94, "learning_rate": 5.579075294769864e-09, "loss": 0.1349, "step": 9067 }, { "epoch": 2.94, "learning_rate": 5.520830044060677e-09, "loss": 0.1585, "step": 9068 }, { "epoch": 2.94, "learning_rate": 5.46289009345391e-09, "loss": 0.1625, "step": 9069 }, { "epoch": 2.94, "learning_rate": 5.405255450040003e-09, "loss": 0.1514, "step": 9070 }, { "epoch": 2.94, "learning_rate": 5.347926120873592e-09, "loss": 0.1471, "step": 9071 }, { "epoch": 2.94, "learning_rate": 5.290902112970731e-09, "loss": 0.1297, "step": 9072 }, { "epoch": 2.94, "learning_rate": 5.234183433310835e-09, "loss": 0.1358, "step": 9073 }, { "epoch": 2.94, "learning_rate": 5.177770088835854e-09, "loss": 0.1588, "step": 9074 }, { "epoch": 2.94, "learning_rate": 5.121662086449708e-09, "loss": 0.1693, "step": 9075 }, { "epoch": 2.94, "learning_rate": 5.065859433019959e-09, "loss": 0.1651, "step": 9076 }, { "epoch": 2.94, "learning_rate": 5.010362135376423e-09, "loss": 0.1406, "step": 9077 }, { "epoch": 2.94, "learning_rate": 4.955170200310888e-09, "loss": 0.1456, "step": 9078 }, { "epoch": 2.94, "learning_rate": 4.9002836345787845e-09, "loss": 0.1375, "step": 9079 }, { "epoch": 2.94, "learning_rate": 4.845702444897515e-09, "loss": 0.1601, "step": 9080 }, { "epoch": 2.94, "learning_rate": 4.791426637947294e-09, "loss": 0.1498, "step": 9081 }, { "epoch": 2.94, "learning_rate": 4.7374562203708615e-09, "loss": 0.1553, "step": 9082 }, { "epoch": 2.94, "learning_rate": 4.683791198773768e-09, "loss": 0.1568, "step": 9083 }, { "epoch": 2.94, "learning_rate": 4.630431579724371e-09, "loss": 0.151, "step": 9084 }, { "epoch": 2.94, "learning_rate": 4.577377369752722e-09, "loss": 0.1456, "step": 9085 }, { "epoch": 2.94, "learning_rate": 4.524628575352796e-09, "loss": 0.1457, "step": 9086 }, { "epoch": 2.94, "learning_rate": 4.472185202980261e-09, "loss": 0.1575, "step": 9087 }, { "epoch": 2.94, "learning_rate": 4.420047259053595e-09, "loss": 0.1391, "step": 9088 }, { "epoch": 2.95, "learning_rate": 4.36821474995408e-09, "loss": 0.1559, "step": 9089 }, { "epoch": 2.95, "learning_rate": 4.316687682025256e-09, "loss": 0.1548, "step": 9090 }, { "epoch": 2.95, "learning_rate": 4.26546606157402e-09, "loss": 0.1419, "step": 9091 }, { "epoch": 2.95, "learning_rate": 4.2145498948692465e-09, "loss": 0.1445, "step": 9092 }, { "epoch": 2.95, "learning_rate": 4.163939188142341e-09, "loss": 0.1483, "step": 9093 }, { "epoch": 2.95, "learning_rate": 4.113633947587792e-09, "loss": 0.1489, "step": 9094 }, { "epoch": 2.95, "learning_rate": 4.063634179362341e-09, "loss": 0.1557, "step": 9095 }, { "epoch": 2.95, "learning_rate": 4.013939889585538e-09, "loss": 0.142, "step": 9096 }, { "epoch": 2.95, "learning_rate": 3.964551084339463e-09, "loss": 0.1456, "step": 9097 }, { "epoch": 2.95, "learning_rate": 3.915467769668724e-09, "loss": 0.1555, "step": 9098 }, { "epoch": 2.95, "learning_rate": 3.866689951580738e-09, "loss": 0.1616, "step": 9099 }, { "epoch": 2.95, "learning_rate": 3.818217636045729e-09, "loss": 0.1335, "step": 9100 }, { "epoch": 2.95, "learning_rate": 3.770050828995897e-09, "loss": 0.1553, "step": 9101 }, { "epoch": 2.95, "learning_rate": 3.7221895363262485e-09, "loss": 0.1554, "step": 9102 }, { "epoch": 2.95, "learning_rate": 3.674633763894875e-09, "loss": 0.1528, "step": 9103 }, { "epoch": 2.95, "learning_rate": 3.6273835175221204e-09, "loss": 0.1541, "step": 9104 }, { "epoch": 2.95, "learning_rate": 3.58043880299086e-09, "loss": 0.1408, "step": 9105 }, { "epoch": 2.95, "learning_rate": 3.533799626046497e-09, "loss": 0.1402, "step": 9106 }, { "epoch": 2.95, "learning_rate": 3.487465992397521e-09, "loss": 0.1502, "step": 9107 }, { "epoch": 2.95, "learning_rate": 3.4414379077146733e-09, "loss": 0.1505, "step": 9108 }, { "epoch": 2.95, "learning_rate": 3.3957153776312257e-09, "loss": 0.1361, "step": 9109 }, { "epoch": 2.95, "learning_rate": 3.3502984077429803e-09, "loss": 0.157, "step": 9110 }, { "epoch": 2.95, "learning_rate": 3.3051870036091004e-09, "loss": 0.153, "step": 9111 }, { "epoch": 2.95, "learning_rate": 3.260381170750171e-09, "loss": 0.1453, "step": 9112 }, { "epoch": 2.95, "learning_rate": 3.215880914650693e-09, "loss": 0.156, "step": 9113 }, { "epoch": 2.95, "learning_rate": 3.171686240756033e-09, "loss": 0.1502, "step": 9114 }, { "epoch": 2.95, "learning_rate": 3.1277971544763088e-09, "loss": 0.1569, "step": 9115 }, { "epoch": 2.95, "learning_rate": 3.0842136611825004e-09, "loss": 0.1545, "step": 9116 }, { "epoch": 2.95, "learning_rate": 3.0409357662086748e-09, "loss": 0.1464, "step": 9117 }, { "epoch": 2.95, "learning_rate": 2.997963474852261e-09, "loss": 0.1375, "step": 9118 }, { "epoch": 2.95, "learning_rate": 2.9552967923721086e-09, "loss": 0.1525, "step": 9119 }, { "epoch": 2.96, "learning_rate": 2.9129357239901514e-09, "loss": 0.1454, "step": 9120 }, { "epoch": 2.96, "learning_rate": 2.8708802748914077e-09, "loss": 0.145, "step": 9121 }, { "epoch": 2.96, "learning_rate": 2.829130450222872e-09, "loss": 0.1277, "step": 9122 }, { "epoch": 2.96, "learning_rate": 2.7876862550940685e-09, "loss": 0.15, "step": 9123 }, { "epoch": 2.96, "learning_rate": 2.7465476945778835e-09, "loss": 0.1499, "step": 9124 }, { "epoch": 2.96, "learning_rate": 2.705714773708623e-09, "loss": 0.1498, "step": 9125 }, { "epoch": 2.96, "learning_rate": 2.6651874974845115e-09, "loss": 0.1438, "step": 9126 }, { "epoch": 2.96, "learning_rate": 2.6249658708651928e-09, "loss": 0.1432, "step": 9127 }, { "epoch": 2.96, "learning_rate": 2.5850498987733952e-09, "loss": 0.1489, "step": 9128 }, { "epoch": 2.96, "learning_rate": 2.545439586094933e-09, "loss": 0.1526, "step": 9129 }, { "epoch": 2.96, "learning_rate": 2.506134937677318e-09, "loss": 0.1391, "step": 9130 }, { "epoch": 2.96, "learning_rate": 2.4671359583314237e-09, "loss": 0.1516, "step": 9131 }, { "epoch": 2.96, "learning_rate": 2.4284426528298212e-09, "loss": 0.1405, "step": 9132 }, { "epoch": 2.96, "learning_rate": 2.3900550259084445e-09, "loss": 0.1511, "step": 9133 }, { "epoch": 2.96, "learning_rate": 2.351973082265757e-09, "loss": 0.1555, "step": 9134 }, { "epoch": 2.96, "learning_rate": 2.314196826562476e-09, "loss": 0.1547, "step": 9135 }, { "epoch": 2.96, "learning_rate": 2.2767262634218466e-09, "loss": 0.1663, "step": 9136 }, { "epoch": 2.96, "learning_rate": 2.239561397430201e-09, "loss": 0.1554, "step": 9137 }, { "epoch": 2.96, "learning_rate": 2.2027022331361226e-09, "loss": 0.1538, "step": 9138 }, { "epoch": 2.96, "learning_rate": 2.1661487750504473e-09, "loss": 0.1705, "step": 9139 }, { "epoch": 2.96, "learning_rate": 2.129901027647652e-09, "loss": 0.1504, "step": 9140 }, { "epoch": 2.96, "learning_rate": 2.0939589953633542e-09, "loss": 0.1463, "step": 9141 }, { "epoch": 2.96, "learning_rate": 2.0583226825970915e-09, "loss": 0.1534, "step": 9142 }, { "epoch": 2.96, "learning_rate": 2.022992093710097e-09, "loss": 0.1425, "step": 9143 }, { "epoch": 2.96, "learning_rate": 1.9879672330266886e-09, "loss": 0.1338, "step": 9144 }, { "epoch": 2.96, "learning_rate": 1.9532481048334383e-09, "loss": 0.1661, "step": 9145 }, { "epoch": 2.96, "learning_rate": 1.918834713379447e-09, "loss": 0.1509, "step": 9146 }, { "epoch": 2.96, "learning_rate": 1.884727062876901e-09, "loss": 0.133, "step": 9147 }, { "epoch": 2.96, "learning_rate": 1.8509251575002386e-09, "loss": 0.1452, "step": 9148 }, { "epoch": 2.96, "learning_rate": 1.8174290013864282e-09, "loss": 0.1474, "step": 9149 }, { "epoch": 2.97, "learning_rate": 1.784238598634691e-09, "loss": 0.1671, "step": 9150 }, { "epoch": 2.97, "learning_rate": 1.7513539533078882e-09, "loss": 0.1556, "step": 9151 }, { "epoch": 2.97, "learning_rate": 1.7187750694303007e-09, "loss": 0.1749, "step": 9152 }, { "epoch": 2.97, "learning_rate": 1.686501950989572e-09, "loss": 0.152, "step": 9153 }, { "epoch": 2.97, "learning_rate": 1.6545346019350427e-09, "loss": 0.1464, "step": 9154 }, { "epoch": 2.97, "learning_rate": 1.6228730261799718e-09, "loss": 0.1413, "step": 9155 }, { "epoch": 2.97, "learning_rate": 1.5915172275990375e-09, "loss": 0.1375, "step": 9156 }, { "epoch": 2.97, "learning_rate": 1.5604672100297258e-09, "loss": 0.1485, "step": 9157 }, { "epoch": 2.97, "learning_rate": 1.5297229772726075e-09, "loss": 0.1545, "step": 9158 }, { "epoch": 2.97, "learning_rate": 1.499284533090506e-09, "loss": 0.14, "step": 9159 }, { "epoch": 2.97, "learning_rate": 1.469151881208497e-09, "loss": 0.1505, "step": 9160 }, { "epoch": 2.97, "learning_rate": 1.4393250253144642e-09, "loss": 0.1461, "step": 9161 }, { "epoch": 2.97, "learning_rate": 1.4098039690593756e-09, "loss": 0.1338, "step": 9162 }, { "epoch": 2.97, "learning_rate": 1.3805887160558973e-09, "loss": 0.1496, "step": 9163 }, { "epoch": 2.97, "learning_rate": 1.3516792698797797e-09, "loss": 0.1458, "step": 9164 }, { "epoch": 2.97, "learning_rate": 1.323075634069304e-09, "loss": 0.1334, "step": 9165 }, { "epoch": 2.97, "learning_rate": 1.2947778121255584e-09, "loss": 0.1382, "step": 9166 }, { "epoch": 2.97, "learning_rate": 1.2667858075113281e-09, "loss": 0.1397, "step": 9167 }, { "epoch": 2.97, "learning_rate": 1.239099623653317e-09, "loss": 0.1421, "step": 9168 }, { "epoch": 2.97, "learning_rate": 1.2117192639393704e-09, "loss": 0.1565, "step": 9169 }, { "epoch": 2.97, "learning_rate": 1.1846447317206967e-09, "loss": 0.1661, "step": 9170 }, { "epoch": 2.97, "learning_rate": 1.1578760303113113e-09, "loss": 0.1339, "step": 9171 }, { "epoch": 2.97, "learning_rate": 1.131413162987205e-09, "loss": 0.155, "step": 9172 }, { "epoch": 2.97, "learning_rate": 1.1052561329871757e-09, "loss": 0.1515, "step": 9173 }, { "epoch": 2.97, "learning_rate": 1.0794049435128296e-09, "loss": 0.1538, "step": 9174 }, { "epoch": 2.97, "learning_rate": 1.0538595977277466e-09, "loss": 0.1488, "step": 9175 }, { "epoch": 2.97, "learning_rate": 1.028620098758315e-09, "loss": 0.15, "step": 9176 }, { "epoch": 2.97, "learning_rate": 1.0036864496942856e-09, "loss": 0.1608, "step": 9177 }, { "epoch": 2.97, "learning_rate": 9.79058653586551e-10, "loss": 0.145, "step": 9178 }, { "epoch": 2.97, "learning_rate": 9.54736713449922e-10, "loss": 0.1492, "step": 9179 }, { "epoch": 2.97, "learning_rate": 9.307206322606288e-10, "loss": 0.1341, "step": 9180 }, { "epoch": 2.98, "learning_rate": 9.070104129582647e-10, "loss": 0.159, "step": 9181 }, { "epoch": 2.98, "learning_rate": 8.836060584449524e-10, "loss": 0.1528, "step": 9182 }, { "epoch": 2.98, "learning_rate": 8.6050757158479e-10, "loss": 0.1404, "step": 9183 }, { "epoch": 2.98, "learning_rate": 8.377149552049602e-10, "loss": 0.1457, "step": 9184 }, { "epoch": 2.98, "learning_rate": 8.15228212095176e-10, "loss": 0.1506, "step": 9185 }, { "epoch": 2.98, "learning_rate": 7.930473450074028e-10, "loss": 0.1419, "step": 9186 }, { "epoch": 2.98, "learning_rate": 7.711723566564133e-10, "loss": 0.1368, "step": 9187 }, { "epoch": 2.98, "learning_rate": 7.496032497195105e-10, "loss": 0.1558, "step": 9188 }, { "epoch": 2.98, "learning_rate": 7.283400268365271e-10, "loss": 0.1371, "step": 9189 }, { "epoch": 2.98, "learning_rate": 7.073826906098258e-10, "loss": 0.1394, "step": 9190 }, { "epoch": 2.98, "learning_rate": 6.867312436045769e-10, "loss": 0.1498, "step": 9191 }, { "epoch": 2.98, "learning_rate": 6.663856883482034e-10, "loss": 0.1511, "step": 9192 }, { "epoch": 2.98, "learning_rate": 6.463460273306577e-10, "loss": 0.1596, "step": 9193 }, { "epoch": 2.98, "learning_rate": 6.266122630049776e-10, "loss": 0.1489, "step": 9194 }, { "epoch": 2.98, "learning_rate": 6.071843977861758e-10, "loss": 0.1569, "step": 9195 }, { "epoch": 2.98, "learning_rate": 5.880624340517948e-10, "loss": 0.1567, "step": 9196 }, { "epoch": 2.98, "learning_rate": 5.692463741424625e-10, "loss": 0.1584, "step": 9197 }, { "epoch": 2.98, "learning_rate": 5.507362203607814e-10, "loss": 0.1488, "step": 9198 }, { "epoch": 2.98, "learning_rate": 5.325319749727165e-10, "loss": 0.1481, "step": 9199 }, { "epoch": 2.98, "learning_rate": 5.146336402059304e-10, "loss": 0.1689, "step": 9200 }, { "epoch": 2.98, "learning_rate": 4.970412182511708e-10, "loss": 0.1347, "step": 9201 }, { "epoch": 2.98, "learning_rate": 4.797547112614376e-10, "loss": 0.156, "step": 9202 }, { "epoch": 2.98, "learning_rate": 4.627741213525383e-10, "loss": 0.1509, "step": 9203 }, { "epoch": 2.98, "learning_rate": 4.460994506028105e-10, "loss": 0.1233, "step": 9204 }, { "epoch": 2.98, "learning_rate": 4.2973070105256643e-10, "loss": 0.1434, "step": 9205 }, { "epoch": 2.98, "learning_rate": 4.136678747060363e-10, "loss": 0.1564, "step": 9206 }, { "epoch": 2.98, "learning_rate": 3.9791097352831487e-10, "loss": 0.1554, "step": 9207 }, { "epoch": 2.98, "learning_rate": 3.824599994484146e-10, "loss": 0.1463, "step": 9208 }, { "epoch": 2.98, "learning_rate": 3.673149543573229e-10, "loss": 0.153, "step": 9209 }, { "epoch": 2.98, "learning_rate": 3.5247584010827953e-10, "loss": 0.1475, "step": 9210 }, { "epoch": 2.98, "learning_rate": 3.3794265851816444e-10, "loss": 0.1544, "step": 9211 }, { "epoch": 2.99, "learning_rate": 3.237154113649998e-10, "loss": 0.1464, "step": 9212 }, { "epoch": 2.99, "learning_rate": 3.0979410039017053e-10, "loss": 0.1572, "step": 9213 }, { "epoch": 2.99, "learning_rate": 2.961787272978689e-10, "loss": 0.1502, "step": 9214 }, { "epoch": 2.99, "learning_rate": 2.828692937542621e-10, "loss": 0.157, "step": 9215 }, { "epoch": 2.99, "learning_rate": 2.6986580138832487e-10, "loss": 0.1371, "step": 9216 }, { "epoch": 2.99, "learning_rate": 2.571682517915619e-10, "loss": 0.1507, "step": 9217 }, { "epoch": 2.99, "learning_rate": 2.447766465180079e-10, "loss": 0.1551, "step": 9218 }, { "epoch": 2.99, "learning_rate": 2.3269098708422754e-10, "loss": 0.1533, "step": 9219 }, { "epoch": 2.99, "learning_rate": 2.2091127496959298e-10, "loss": 0.1494, "step": 9220 }, { "epoch": 2.99, "learning_rate": 2.0943751161545122e-10, "loss": 0.1544, "step": 9221 }, { "epoch": 2.99, "learning_rate": 1.98269698426512e-10, "loss": 0.145, "step": 9222 }, { "epoch": 2.99, "learning_rate": 1.8740783676945984e-10, "loss": 0.141, "step": 9223 }, { "epoch": 2.99, "learning_rate": 1.768519279732317e-10, "loss": 0.1656, "step": 9224 }, { "epoch": 2.99, "learning_rate": 1.666019733306823e-10, "loss": 0.1418, "step": 9225 }, { "epoch": 2.99, "learning_rate": 1.5665797409553097e-10, "loss": 0.1695, "step": 9226 }, { "epoch": 2.99, "learning_rate": 1.4701993148485972e-10, "loss": 0.1697, "step": 9227 }, { "epoch": 2.99, "learning_rate": 1.3768784667883562e-10, "loss": 0.1543, "step": 9228 }, { "epoch": 2.99, "learning_rate": 1.2866172081904548e-10, "loss": 0.1534, "step": 9229 }, { "epoch": 2.99, "learning_rate": 1.1994155501071636e-10, "loss": 0.1572, "step": 9230 }, { "epoch": 2.99, "learning_rate": 1.1152735032077255e-10, "loss": 0.1622, "step": 9231 }, { "epoch": 2.99, "learning_rate": 1.0341910777894593e-10, "loss": 0.1461, "step": 9232 }, { "epoch": 2.99, "learning_rate": 9.561682837777586e-11, "loss": 0.1562, "step": 9233 }, { "epoch": 2.99, "learning_rate": 8.812051307205416e-11, "loss": 0.1478, "step": 9234 }, { "epoch": 2.99, "learning_rate": 8.093016277938015e-11, "loss": 0.1345, "step": 9235 }, { "epoch": 2.99, "learning_rate": 7.404577837988313e-11, "loss": 0.1395, "step": 9236 }, { "epoch": 2.99, "learning_rate": 6.746736071594484e-11, "loss": 0.1343, "step": 9237 }, { "epoch": 2.99, "learning_rate": 6.119491059303206e-11, "loss": 0.1495, "step": 9238 }, { "epoch": 2.99, "learning_rate": 5.522842877830892e-11, "loss": 0.1525, "step": 9239 }, { "epoch": 2.99, "learning_rate": 4.956791600230215e-11, "loss": 0.1457, "step": 9240 }, { "epoch": 2.99, "learning_rate": 4.4213372957790935e-11, "loss": 0.1357, "step": 9241 }, { "epoch": 2.99, "learning_rate": 3.9164800300084404e-11, "loss": 0.1546, "step": 9242 }, { "epoch": 3.0, "learning_rate": 3.442219864729923e-11, "loss": 0.1598, "step": 9243 }, { "epoch": 3.0, "learning_rate": 2.998556857952695e-11, "loss": 0.1595, "step": 9244 }, { "epoch": 3.0, "learning_rate": 2.5854910639944165e-11, "loss": 0.1571, "step": 9245 }, { "epoch": 3.0, "learning_rate": 2.203022533425747e-11, "loss": 0.1448, "step": 9246 }, { "epoch": 3.0, "learning_rate": 1.8511513130148317e-11, "loss": 0.1315, "step": 9247 }, { "epoch": 3.0, "learning_rate": 1.529877445866079e-11, "loss": 0.1513, "step": 9248 }, { "epoch": 3.0, "learning_rate": 1.2392009713091402e-11, "loss": 0.1352, "step": 9249 }, { "epoch": 3.0, "learning_rate": 9.791219248711515e-12, "loss": 0.1479, "step": 9250 }, { "epoch": 3.0, "learning_rate": 7.496403384155137e-12, "loss": 0.147, "step": 9251 }, { "epoch": 3.0, "learning_rate": 5.507562400308697e-12, "loss": 0.1453, "step": 9252 }, { "epoch": 3.0, "learning_rate": 3.8246965403110344e-12, "loss": 0.1571, "step": 9253 }, { "epoch": 3.0, "learning_rate": 2.4478060103860777e-12, "loss": 0.1419, "step": 9254 }, { "epoch": 3.0, "learning_rate": 1.376890979287726e-12, "loss": 0.1416, "step": 9255 }, { "epoch": 3.0, "learning_rate": 6.119515774671847e-13, "loss": 0.1414, "step": 9256 }, { "epoch": 3.0, "learning_rate": 1.529878990158551e-13, "loss": 0.1501, "step": 9257 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 0.1455, "step": 9258 }, { "epoch": 3.0, "step": 9258, "total_flos": 6.941098484086866e+17, "train_loss": 0.17310240187377002, "train_runtime": 21369.0849, "train_samples_per_second": 55.454, "train_steps_per_second": 0.433 } ], "logging_steps": 1.0, "max_steps": 9258, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100000, "total_flos": 6.941098484086866e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }