{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.2222222222222223, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.9268292682926833e-06, "loss": 2.0292, "step": 1 }, { "epoch": 0.0, "learning_rate": 5.853658536585367e-06, "loss": 2.0417, "step": 2 }, { "epoch": 0.01, "learning_rate": 8.780487804878048e-06, "loss": 1.5471, "step": 3 }, { "epoch": 0.01, "learning_rate": 1.1707317073170733e-05, "loss": 1.4769, "step": 4 }, { "epoch": 0.01, "learning_rate": 1.4634146341463415e-05, "loss": 1.3682, "step": 5 }, { "epoch": 0.01, "learning_rate": 1.7560975609756096e-05, "loss": 1.3256, "step": 6 }, { "epoch": 0.02, "learning_rate": 2.048780487804878e-05, "loss": 1.317, "step": 7 }, { "epoch": 0.02, "learning_rate": 2.3414634146341466e-05, "loss": 1.3441, "step": 8 }, { "epoch": 0.02, "learning_rate": 2.6341463414634148e-05, "loss": 1.3028, "step": 9 }, { "epoch": 0.02, "learning_rate": 2.926829268292683e-05, "loss": 1.3212, "step": 10 }, { "epoch": 0.02, "learning_rate": 3.2195121951219514e-05, "loss": 1.2535, "step": 11 }, { "epoch": 0.03, "learning_rate": 3.512195121951219e-05, "loss": 1.2933, "step": 12 }, { "epoch": 0.03, "learning_rate": 3.804878048780488e-05, "loss": 1.2811, "step": 13 }, { "epoch": 0.03, "learning_rate": 4.097560975609756e-05, "loss": 1.304, "step": 14 }, { "epoch": 0.03, "learning_rate": 4.390243902439024e-05, "loss": 1.3205, "step": 15 }, { "epoch": 0.04, "learning_rate": 4.682926829268293e-05, "loss": 1.2825, "step": 16 }, { "epoch": 0.04, "learning_rate": 4.975609756097561e-05, "loss": 1.2774, "step": 17 }, { "epoch": 0.04, "learning_rate": 5.2682926829268296e-05, "loss": 1.2641, "step": 18 }, { "epoch": 0.04, "learning_rate": 5.560975609756098e-05, "loss": 1.2429, "step": 19 }, { "epoch": 0.04, "learning_rate": 5.853658536585366e-05, "loss": 1.2503, "step": 20 }, { "epoch": 0.05, "learning_rate": 6.146341463414634e-05, "loss": 1.2882, "step": 21 }, { "epoch": 0.05, "learning_rate": 6.439024390243903e-05, "loss": 1.3068, "step": 22 }, { "epoch": 0.05, "learning_rate": 6.731707317073171e-05, "loss": 1.2592, "step": 23 }, { "epoch": 0.05, "learning_rate": 7.024390243902439e-05, "loss": 1.2538, "step": 24 }, { "epoch": 0.06, "learning_rate": 7.317073170731707e-05, "loss": 1.3129, "step": 25 }, { "epoch": 0.06, "learning_rate": 7.609756097560976e-05, "loss": 1.2578, "step": 26 }, { "epoch": 0.06, "learning_rate": 7.902439024390244e-05, "loss": 1.2534, "step": 27 }, { "epoch": 0.06, "learning_rate": 8.195121951219513e-05, "loss": 1.3043, "step": 28 }, { "epoch": 0.06, "learning_rate": 8.48780487804878e-05, "loss": 1.2677, "step": 29 }, { "epoch": 0.07, "learning_rate": 8.780487804878048e-05, "loss": 1.2535, "step": 30 }, { "epoch": 0.07, "learning_rate": 9.073170731707318e-05, "loss": 1.2639, "step": 31 }, { "epoch": 0.07, "learning_rate": 9.365853658536587e-05, "loss": 1.3038, "step": 32 }, { "epoch": 0.07, "learning_rate": 9.658536585365855e-05, "loss": 1.2966, "step": 33 }, { "epoch": 0.08, "learning_rate": 9.951219512195122e-05, "loss": 1.2355, "step": 34 }, { "epoch": 0.08, "learning_rate": 0.0001024390243902439, "loss": 1.2861, "step": 35 }, { "epoch": 0.08, "learning_rate": 0.00010536585365853659, "loss": 1.3257, "step": 36 }, { "epoch": 0.08, "learning_rate": 0.00010829268292682928, "loss": 1.3195, "step": 37 }, { "epoch": 0.08, "learning_rate": 0.00011121951219512196, "loss": 1.3274, "step": 38 }, { "epoch": 0.09, "learning_rate": 0.00011414634146341463, "loss": 1.2388, "step": 39 }, { "epoch": 0.09, "learning_rate": 0.00011707317073170732, "loss": 1.2686, "step": 40 }, { "epoch": 0.09, "learning_rate": 0.00012, "loss": 1.274, "step": 41 }, { "epoch": 0.09, "learning_rate": 0.00011999982720089112, "loss": 1.2754, "step": 42 }, { "epoch": 0.1, "learning_rate": 0.00011999930880455974, "loss": 1.2609, "step": 43 }, { "epoch": 0.1, "learning_rate": 0.00011999844481399185, "loss": 1.3151, "step": 44 }, { "epoch": 0.1, "learning_rate": 0.00011999723523416397, "loss": 1.3002, "step": 45 }, { "epoch": 0.1, "learning_rate": 0.00011999568007204328, "loss": 1.2993, "step": 46 }, { "epoch": 0.1, "learning_rate": 0.00011999377933658745, "loss": 1.301, "step": 47 }, { "epoch": 0.11, "learning_rate": 0.00011999153303874466, "loss": 1.3308, "step": 48 }, { "epoch": 0.11, "learning_rate": 0.00011998894119145353, "loss": 1.2533, "step": 49 }, { "epoch": 0.11, "learning_rate": 0.00011998600380964302, "loss": 1.2932, "step": 50 }, { "epoch": 0.11, "learning_rate": 0.00011998272091023235, "loss": 1.2577, "step": 51 }, { "epoch": 0.12, "learning_rate": 0.00011997909251213094, "loss": 1.3185, "step": 52 }, { "epoch": 0.12, "learning_rate": 0.00011997511863623823, "loss": 1.2698, "step": 53 }, { "epoch": 0.12, "learning_rate": 0.00011997079930544366, "loss": 1.2681, "step": 54 }, { "epoch": 0.12, "learning_rate": 0.00011996613454462643, "loss": 1.2688, "step": 55 }, { "epoch": 0.12, "learning_rate": 0.0001199611243806554, "loss": 1.2947, "step": 56 }, { "epoch": 0.13, "learning_rate": 0.000119955768842389, "loss": 1.3007, "step": 57 }, { "epoch": 0.13, "learning_rate": 0.00011995006796067497, "loss": 1.2609, "step": 58 }, { "epoch": 0.13, "learning_rate": 0.00011994402176835021, "loss": 1.3019, "step": 59 }, { "epoch": 0.13, "learning_rate": 0.0001199376303002406, "loss": 1.3291, "step": 60 }, { "epoch": 0.14, "learning_rate": 0.00011993089359316082, "loss": 1.271, "step": 61 }, { "epoch": 0.14, "learning_rate": 0.00011992381168591412, "loss": 1.3238, "step": 62 }, { "epoch": 0.14, "learning_rate": 0.00011991638461929203, "loss": 1.325, "step": 63 }, { "epoch": 0.14, "learning_rate": 0.00011990861243607424, "loss": 1.249, "step": 64 }, { "epoch": 0.14, "learning_rate": 0.00011990049518102833, "loss": 1.2804, "step": 65 }, { "epoch": 0.15, "learning_rate": 0.00011989203290090944, "loss": 1.2872, "step": 66 }, { "epoch": 0.15, "learning_rate": 0.00011988322564446003, "loss": 1.3123, "step": 67 }, { "epoch": 0.15, "learning_rate": 0.00011987407346240964, "loss": 1.3108, "step": 68 }, { "epoch": 0.15, "learning_rate": 0.00011986457640747457, "loss": 1.3069, "step": 69 }, { "epoch": 0.16, "learning_rate": 0.00011985473453435758, "loss": 1.2944, "step": 70 }, { "epoch": 0.16, "learning_rate": 0.00011984454789974758, "loss": 1.2525, "step": 71 }, { "epoch": 0.16, "learning_rate": 0.00011983401656231926, "loss": 1.2582, "step": 72 }, { "epoch": 0.16, "learning_rate": 0.0001198231405827328, "loss": 1.3009, "step": 73 }, { "epoch": 0.16, "learning_rate": 0.00011981192002363357, "loss": 1.3136, "step": 74 }, { "epoch": 0.17, "learning_rate": 0.00011980035494965159, "loss": 1.2795, "step": 75 }, { "epoch": 0.17, "learning_rate": 0.0001197884454274014, "loss": 1.3302, "step": 76 }, { "epoch": 0.17, "learning_rate": 0.00011977619152548147, "loss": 1.3062, "step": 77 }, { "epoch": 0.17, "learning_rate": 0.0001197635933144739, "loss": 1.2965, "step": 78 }, { "epoch": 0.18, "learning_rate": 0.00011975065086694404, "loss": 1.27, "step": 79 }, { "epoch": 0.18, "learning_rate": 0.00011973736425743998, "loss": 1.2587, "step": 80 }, { "epoch": 0.18, "learning_rate": 0.0001197237335624922, "loss": 1.3065, "step": 81 }, { "epoch": 0.18, "learning_rate": 0.00011970975886061309, "loss": 1.2612, "step": 82 }, { "epoch": 0.18, "learning_rate": 0.00011969544023229654, "loss": 1.317, "step": 83 }, { "epoch": 0.19, "learning_rate": 0.00011968077776001742, "loss": 1.3093, "step": 84 }, { "epoch": 0.19, "learning_rate": 0.00011966577152823111, "loss": 1.2632, "step": 85 }, { "epoch": 0.19, "learning_rate": 0.00011965042162337308, "loss": 1.2846, "step": 86 }, { "epoch": 0.19, "learning_rate": 0.00011963472813385833, "loss": 1.3133, "step": 87 }, { "epoch": 0.2, "learning_rate": 0.00011961869115008088, "loss": 1.3272, "step": 88 }, { "epoch": 0.2, "learning_rate": 0.0001196023107644133, "loss": 1.2728, "step": 89 }, { "epoch": 0.2, "learning_rate": 0.0001195855870712061, "loss": 1.2478, "step": 90 }, { "epoch": 0.2, "learning_rate": 0.00011956852016678727, "loss": 1.2854, "step": 91 }, { "epoch": 0.2, "learning_rate": 0.00011955111014946166, "loss": 1.2712, "step": 92 }, { "epoch": 0.21, "learning_rate": 0.00011953335711951047, "loss": 1.2605, "step": 93 }, { "epoch": 0.21, "learning_rate": 0.00011951526117919063, "loss": 1.3021, "step": 94 }, { "epoch": 0.21, "learning_rate": 0.00011949682243273419, "loss": 1.2482, "step": 95 }, { "epoch": 0.21, "learning_rate": 0.0001194780409863478, "loss": 1.2801, "step": 96 }, { "epoch": 0.22, "learning_rate": 0.00011945891694821206, "loss": 1.2746, "step": 97 }, { "epoch": 0.22, "learning_rate": 0.0001194394504284808, "loss": 1.28, "step": 98 }, { "epoch": 0.22, "learning_rate": 0.00011941964153928065, "loss": 1.2296, "step": 99 }, { "epoch": 0.22, "learning_rate": 0.00011939949039471018, "loss": 1.2531, "step": 100 }, { "epoch": 0.22, "learning_rate": 0.00011937899711083942, "loss": 1.2641, "step": 101 }, { "epoch": 0.23, "learning_rate": 0.00011935816180570905, "loss": 1.3043, "step": 102 }, { "epoch": 0.23, "learning_rate": 0.00011933698459932983, "loss": 1.2511, "step": 103 }, { "epoch": 0.23, "learning_rate": 0.00011931546561368184, "loss": 1.2838, "step": 104 }, { "epoch": 0.23, "learning_rate": 0.00011929360497271377, "loss": 1.2616, "step": 105 }, { "epoch": 0.24, "learning_rate": 0.0001192714028023423, "loss": 1.3006, "step": 106 }, { "epoch": 0.24, "learning_rate": 0.00011924885923045124, "loss": 1.2762, "step": 107 }, { "epoch": 0.24, "learning_rate": 0.00011922597438689093, "loss": 1.2797, "step": 108 }, { "epoch": 0.24, "learning_rate": 0.00011920274840347734, "loss": 1.2526, "step": 109 }, { "epoch": 0.24, "learning_rate": 0.00011917918141399149, "loss": 1.2742, "step": 110 }, { "epoch": 0.25, "learning_rate": 0.0001191552735541785, "loss": 1.2698, "step": 111 }, { "epoch": 0.25, "learning_rate": 0.00011913102496174698, "loss": 1.2516, "step": 112 }, { "epoch": 0.25, "learning_rate": 0.00011910643577636807, "loss": 1.2814, "step": 113 }, { "epoch": 0.25, "learning_rate": 0.00011908150613967473, "loss": 1.3506, "step": 114 }, { "epoch": 0.26, "learning_rate": 0.00011905623619526097, "loss": 1.3044, "step": 115 }, { "epoch": 0.26, "learning_rate": 0.0001190306260886809, "loss": 1.2427, "step": 116 }, { "epoch": 0.26, "learning_rate": 0.00011900467596744797, "loss": 1.3192, "step": 117 }, { "epoch": 0.26, "learning_rate": 0.00011897838598103412, "loss": 1.2457, "step": 118 }, { "epoch": 0.26, "learning_rate": 0.00011895175628086887, "loss": 1.2686, "step": 119 }, { "epoch": 0.27, "learning_rate": 0.0001189247870203385, "loss": 1.2782, "step": 120 }, { "epoch": 0.27, "learning_rate": 0.00011889747835478518, "loss": 1.3098, "step": 121 }, { "epoch": 0.27, "learning_rate": 0.00011886983044150598, "loss": 1.266, "step": 122 }, { "epoch": 0.27, "learning_rate": 0.00011884184343975209, "loss": 1.2918, "step": 123 }, { "epoch": 0.28, "learning_rate": 0.00011881351751072778, "loss": 1.2611, "step": 124 }, { "epoch": 0.28, "learning_rate": 0.00011878485281758958, "loss": 1.2192, "step": 125 }, { "epoch": 0.28, "learning_rate": 0.00011875584952544527, "loss": 1.3018, "step": 126 }, { "epoch": 0.28, "learning_rate": 0.00011872650780135294, "loss": 1.304, "step": 127 }, { "epoch": 0.28, "learning_rate": 0.00011869682781432005, "loss": 1.3303, "step": 128 }, { "epoch": 0.29, "learning_rate": 0.00011866680973530246, "loss": 1.2869, "step": 129 }, { "epoch": 0.29, "learning_rate": 0.00011863645373720338, "loss": 1.2533, "step": 130 }, { "epoch": 0.29, "learning_rate": 0.00011860575999487249, "loss": 1.2678, "step": 131 }, { "epoch": 0.29, "learning_rate": 0.00011857472868510483, "loss": 1.2895, "step": 132 }, { "epoch": 0.3, "learning_rate": 0.0001185433599866398, "loss": 1.3199, "step": 133 }, { "epoch": 0.3, "learning_rate": 0.0001185116540801602, "loss": 1.264, "step": 134 }, { "epoch": 0.3, "learning_rate": 0.00011847961114829109, "loss": 1.2979, "step": 135 }, { "epoch": 0.3, "learning_rate": 0.0001184472313755988, "loss": 1.2764, "step": 136 }, { "epoch": 0.3, "learning_rate": 0.0001184145149485899, "loss": 1.286, "step": 137 }, { "epoch": 0.31, "learning_rate": 0.00011838146205571, "loss": 1.2782, "step": 138 }, { "epoch": 0.31, "learning_rate": 0.00011834807288734277, "loss": 1.2893, "step": 139 }, { "epoch": 0.31, "learning_rate": 0.00011831434763580886, "loss": 1.2874, "step": 140 }, { "epoch": 0.31, "learning_rate": 0.0001182802864953647, "loss": 1.3005, "step": 141 }, { "epoch": 0.32, "learning_rate": 0.00011824588966220147, "loss": 1.2885, "step": 142 }, { "epoch": 0.32, "learning_rate": 0.00011821115733444388, "loss": 1.291, "step": 143 }, { "epoch": 0.32, "learning_rate": 0.00011817608971214912, "loss": 1.2475, "step": 144 }, { "epoch": 0.32, "learning_rate": 0.00011814068699730562, "loss": 1.2787, "step": 145 }, { "epoch": 0.32, "learning_rate": 0.00011810494939383203, "loss": 1.2816, "step": 146 }, { "epoch": 0.33, "learning_rate": 0.00011806887710757583, "loss": 1.3126, "step": 147 }, { "epoch": 0.33, "learning_rate": 0.00011803247034631235, "loss": 1.3111, "step": 148 }, { "epoch": 0.33, "learning_rate": 0.00011799572931974343, "loss": 1.2751, "step": 149 }, { "epoch": 0.33, "learning_rate": 0.0001179586542394963, "loss": 1.232, "step": 150 }, { "epoch": 0.34, "learning_rate": 0.00011792124531912233, "loss": 1.2673, "step": 151 }, { "epoch": 0.34, "learning_rate": 0.00011788350277409578, "loss": 1.2299, "step": 152 }, { "epoch": 0.34, "learning_rate": 0.00011784542682181257, "loss": 1.2662, "step": 153 }, { "epoch": 0.34, "learning_rate": 0.0001178070176815891, "loss": 1.2464, "step": 154 }, { "epoch": 0.34, "learning_rate": 0.00011776827557466086, "loss": 1.2414, "step": 155 }, { "epoch": 0.35, "learning_rate": 0.00011772920072418121, "loss": 1.254, "step": 156 }, { "epoch": 0.35, "learning_rate": 0.00011768979335522015, "loss": 1.2713, "step": 157 }, { "epoch": 0.35, "learning_rate": 0.00011765005369476294, "loss": 1.2481, "step": 158 }, { "epoch": 0.35, "learning_rate": 0.00011760998197170885, "loss": 1.2539, "step": 159 }, { "epoch": 0.36, "learning_rate": 0.00011756957841686985, "loss": 1.2687, "step": 160 }, { "epoch": 0.36, "learning_rate": 0.00011752884326296917, "loss": 1.2749, "step": 161 }, { "epoch": 0.36, "learning_rate": 0.00011748777674464008, "loss": 1.2518, "step": 162 }, { "epoch": 0.36, "learning_rate": 0.00011744637909842455, "loss": 1.3132, "step": 163 }, { "epoch": 0.36, "learning_rate": 0.00011740465056277176, "loss": 1.3026, "step": 164 }, { "epoch": 0.37, "learning_rate": 0.00011736259137803685, "loss": 1.2225, "step": 165 }, { "epoch": 0.37, "learning_rate": 0.00011732020178647945, "loss": 1.2805, "step": 166 }, { "epoch": 0.37, "learning_rate": 0.0001172774820322624, "loss": 1.2332, "step": 167 }, { "epoch": 0.37, "learning_rate": 0.00011723443236145015, "loss": 1.2702, "step": 168 }, { "epoch": 0.38, "learning_rate": 0.00011719105302200757, "loss": 1.2665, "step": 169 }, { "epoch": 0.38, "learning_rate": 0.00011714734426379837, "loss": 1.2261, "step": 170 }, { "epoch": 0.38, "learning_rate": 0.00011710330633858367, "loss": 1.2395, "step": 171 }, { "epoch": 0.38, "learning_rate": 0.00011705893950002063, "loss": 1.2588, "step": 172 }, { "epoch": 0.38, "learning_rate": 0.0001170142440036609, "loss": 1.2609, "step": 173 }, { "epoch": 0.39, "learning_rate": 0.00011696922010694925, "loss": 1.2454, "step": 174 }, { "epoch": 0.39, "learning_rate": 0.00011692386806922196, "loss": 1.2901, "step": 175 }, { "epoch": 0.39, "learning_rate": 0.00011687818815170541, "loss": 1.2764, "step": 176 }, { "epoch": 0.39, "learning_rate": 0.0001168321806175146, "loss": 1.2632, "step": 177 }, { "epoch": 0.4, "learning_rate": 0.00011678584573165155, "loss": 1.2594, "step": 178 }, { "epoch": 0.4, "learning_rate": 0.0001167391837610038, "loss": 1.2609, "step": 179 }, { "epoch": 0.4, "learning_rate": 0.00011669219497434297, "loss": 1.2928, "step": 180 }, { "epoch": 0.4, "learning_rate": 0.00011664487964232302, "loss": 1.2612, "step": 181 }, { "epoch": 0.4, "learning_rate": 0.00011659723803747888, "loss": 1.3024, "step": 182 }, { "epoch": 0.41, "learning_rate": 0.00011654927043422479, "loss": 1.2663, "step": 183 }, { "epoch": 0.41, "learning_rate": 0.0001165009771088527, "loss": 1.2345, "step": 184 }, { "epoch": 0.41, "learning_rate": 0.00011645235833953074, "loss": 1.3054, "step": 185 }, { "epoch": 0.41, "learning_rate": 0.00011640341440630155, "loss": 1.2414, "step": 186 }, { "epoch": 0.42, "learning_rate": 0.00011635414559108078, "loss": 1.304, "step": 187 }, { "epoch": 0.42, "learning_rate": 0.00011630455217765531, "loss": 1.2611, "step": 188 }, { "epoch": 0.42, "learning_rate": 0.00011625463445168175, "loss": 1.2403, "step": 189 }, { "epoch": 0.42, "learning_rate": 0.00011620439270068469, "loss": 1.2718, "step": 190 }, { "epoch": 0.42, "learning_rate": 0.00011615382721405513, "loss": 1.2647, "step": 191 }, { "epoch": 0.43, "learning_rate": 0.0001161029382830488, "loss": 1.2481, "step": 192 }, { "epoch": 0.43, "learning_rate": 0.00011605172620078439, "loss": 1.25, "step": 193 }, { "epoch": 0.43, "learning_rate": 0.000116000191262242, "loss": 1.2761, "step": 194 }, { "epoch": 0.43, "learning_rate": 0.00011594833376426134, "loss": 1.3131, "step": 195 }, { "epoch": 0.44, "learning_rate": 0.00011589615400554007, "loss": 1.222, "step": 196 }, { "epoch": 0.44, "learning_rate": 0.00011584365228663202, "loss": 1.2354, "step": 197 }, { "epoch": 0.44, "learning_rate": 0.00011579082890994557, "loss": 1.2224, "step": 198 }, { "epoch": 0.44, "learning_rate": 0.00011573768417974176, "loss": 1.2783, "step": 199 }, { "epoch": 0.44, "learning_rate": 0.00011568421840213267, "loss": 1.2999, "step": 200 }, { "epoch": 0.45, "learning_rate": 0.00011563043188507961, "loss": 1.2273, "step": 201 }, { "epoch": 0.45, "learning_rate": 0.0001155763249383913, "loss": 1.2849, "step": 202 }, { "epoch": 0.45, "learning_rate": 0.00011552189787372217, "loss": 1.2166, "step": 203 }, { "epoch": 0.45, "learning_rate": 0.00011546715100457046, "loss": 1.2472, "step": 204 }, { "epoch": 0.46, "learning_rate": 0.00011541208464627652, "loss": 1.2523, "step": 205 }, { "epoch": 0.46, "learning_rate": 0.00011535669911602097, "loss": 1.2413, "step": 206 }, { "epoch": 0.46, "learning_rate": 0.00011530099473282279, "loss": 1.2395, "step": 207 }, { "epoch": 0.46, "learning_rate": 0.00011524497181753759, "loss": 1.2717, "step": 208 }, { "epoch": 0.46, "learning_rate": 0.00011518863069285567, "loss": 1.2152, "step": 209 }, { "epoch": 0.47, "learning_rate": 0.00011513197168330026, "loss": 1.2583, "step": 210 }, { "epoch": 0.47, "learning_rate": 0.00011507499511522556, "loss": 1.2311, "step": 211 }, { "epoch": 0.47, "learning_rate": 0.00011501770131681491, "loss": 1.276, "step": 212 }, { "epoch": 0.47, "learning_rate": 0.0001149600906180789, "loss": 1.2171, "step": 213 }, { "epoch": 0.48, "learning_rate": 0.00011490216335085345, "loss": 1.2381, "step": 214 }, { "epoch": 0.48, "learning_rate": 0.00011484391984879785, "loss": 1.2577, "step": 215 }, { "epoch": 0.48, "learning_rate": 0.000114785360447393, "loss": 1.2659, "step": 216 }, { "epoch": 0.48, "learning_rate": 0.00011472648548393928, "loss": 1.2436, "step": 217 }, { "epoch": 0.48, "learning_rate": 0.00011466729529755472, "loss": 1.2542, "step": 218 }, { "epoch": 0.49, "learning_rate": 0.00011460779022917307, "loss": 1.2525, "step": 219 }, { "epoch": 0.49, "learning_rate": 0.00011454797062154173, "loss": 1.2511, "step": 220 }, { "epoch": 0.49, "learning_rate": 0.00011448783681921988, "loss": 1.2654, "step": 221 }, { "epoch": 0.49, "learning_rate": 0.00011442738916857643, "loss": 1.2532, "step": 222 }, { "epoch": 0.5, "learning_rate": 0.00011436662801778805, "loss": 1.2714, "step": 223 }, { "epoch": 0.5, "learning_rate": 0.00011430555371683716, "loss": 1.2708, "step": 224 }, { "epoch": 0.5, "learning_rate": 0.00011424416661750994, "loss": 1.1997, "step": 225 }, { "epoch": 0.5, "learning_rate": 0.00011418246707339422, "loss": 1.2324, "step": 226 }, { "epoch": 0.5, "learning_rate": 0.00011412045543987757, "loss": 1.2788, "step": 227 }, { "epoch": 0.51, "learning_rate": 0.00011405813207414514, "loss": 1.2543, "step": 228 }, { "epoch": 0.51, "learning_rate": 0.0001139954973351777, "loss": 1.2854, "step": 229 }, { "epoch": 0.51, "learning_rate": 0.00011393255158374945, "loss": 1.2034, "step": 230 }, { "epoch": 0.51, "learning_rate": 0.00011386929518242606, "loss": 1.2724, "step": 231 }, { "epoch": 0.52, "learning_rate": 0.00011380572849556251, "loss": 1.2631, "step": 232 }, { "epoch": 0.52, "learning_rate": 0.00011374185188930107, "loss": 1.281, "step": 233 }, { "epoch": 0.52, "learning_rate": 0.00011367766573156905, "loss": 1.2162, "step": 234 }, { "epoch": 0.52, "learning_rate": 0.00011361317039207682, "loss": 1.2456, "step": 235 }, { "epoch": 0.52, "learning_rate": 0.00011354836624231564, "loss": 1.2486, "step": 236 }, { "epoch": 0.53, "learning_rate": 0.00011348325365555547, "loss": 1.2923, "step": 237 }, { "epoch": 0.53, "learning_rate": 0.00011341783300684288, "loss": 1.2268, "step": 238 }, { "epoch": 0.53, "learning_rate": 0.00011335210467299887, "loss": 1.2583, "step": 239 }, { "epoch": 0.53, "learning_rate": 0.00011328606903261669, "loss": 1.2731, "step": 240 }, { "epoch": 0.54, "learning_rate": 0.00011321972646605965, "loss": 1.2224, "step": 241 }, { "epoch": 0.54, "learning_rate": 0.00011315307735545897, "loss": 1.2495, "step": 242 }, { "epoch": 0.54, "learning_rate": 0.00011308612208471157, "loss": 1.2641, "step": 243 }, { "epoch": 0.54, "learning_rate": 0.0001130188610394778, "loss": 1.2308, "step": 244 }, { "epoch": 0.54, "learning_rate": 0.00011295129460717928, "loss": 1.2406, "step": 245 }, { "epoch": 0.55, "learning_rate": 0.00011288342317699666, "loss": 1.252, "step": 246 }, { "epoch": 0.55, "learning_rate": 0.00011281524713986736, "loss": 1.2578, "step": 247 }, { "epoch": 0.55, "learning_rate": 0.00011274676688848332, "loss": 1.2609, "step": 248 }, { "epoch": 0.55, "learning_rate": 0.00011267798281728878, "loss": 1.2398, "step": 249 }, { "epoch": 0.56, "learning_rate": 0.00011260889532247793, "loss": 1.2689, "step": 250 }, { "epoch": 0.56, "learning_rate": 0.00011253950480199267, "loss": 1.2193, "step": 251 }, { "epoch": 0.56, "learning_rate": 0.00011246981165552038, "loss": 1.2361, "step": 252 }, { "epoch": 0.56, "learning_rate": 0.00011239981628449148, "loss": 1.2999, "step": 253 }, { "epoch": 0.56, "learning_rate": 0.00011232951909207721, "loss": 1.2357, "step": 254 }, { "epoch": 0.57, "learning_rate": 0.00011225892048318737, "loss": 1.2478, "step": 255 }, { "epoch": 0.57, "learning_rate": 0.0001121880208644678, "loss": 1.2718, "step": 256 }, { "epoch": 0.57, "learning_rate": 0.00011211682064429823, "loss": 1.2786, "step": 257 }, { "epoch": 0.57, "learning_rate": 0.00011204532023278979, "loss": 1.2636, "step": 258 }, { "epoch": 0.58, "learning_rate": 0.00011197352004178271, "loss": 1.2662, "step": 259 }, { "epoch": 0.58, "learning_rate": 0.00011190142048484403, "loss": 1.2616, "step": 260 }, { "epoch": 0.58, "learning_rate": 0.00011182902197726497, "loss": 1.2511, "step": 261 }, { "epoch": 0.58, "learning_rate": 0.00011175632493605883, "loss": 1.2794, "step": 262 }, { "epoch": 0.58, "learning_rate": 0.00011168332977995841, "loss": 1.2709, "step": 263 }, { "epoch": 0.59, "learning_rate": 0.00011161003692941364, "loss": 1.2583, "step": 264 }, { "epoch": 0.59, "learning_rate": 0.00011153644680658915, "loss": 1.2573, "step": 265 }, { "epoch": 0.59, "learning_rate": 0.00011146255983536184, "loss": 1.2251, "step": 266 }, { "epoch": 0.59, "learning_rate": 0.0001113883764413185, "loss": 1.2647, "step": 267 }, { "epoch": 0.6, "learning_rate": 0.00011131389705175328, "loss": 1.2252, "step": 268 }, { "epoch": 0.6, "learning_rate": 0.00011123912209566525, "loss": 1.2373, "step": 269 }, { "epoch": 0.6, "learning_rate": 0.00011116405200375591, "loss": 1.2447, "step": 270 }, { "epoch": 0.6, "learning_rate": 0.00011108868720842679, "loss": 1.2294, "step": 271 }, { "epoch": 0.6, "learning_rate": 0.00011101302814377686, "loss": 1.2497, "step": 272 }, { "epoch": 0.61, "learning_rate": 0.00011093707524560006, "loss": 1.2151, "step": 273 }, { "epoch": 0.61, "learning_rate": 0.00011086082895138288, "loss": 1.2533, "step": 274 }, { "epoch": 0.61, "learning_rate": 0.00011078428970030167, "loss": 1.2424, "step": 275 }, { "epoch": 0.61, "learning_rate": 0.00011070745793322026, "loss": 1.2656, "step": 276 }, { "epoch": 0.62, "learning_rate": 0.00011063033409268734, "loss": 1.2047, "step": 277 }, { "epoch": 0.62, "learning_rate": 0.00011055291862293394, "loss": 1.2705, "step": 278 }, { "epoch": 0.62, "learning_rate": 0.00011047521196987087, "loss": 1.2819, "step": 279 }, { "epoch": 0.62, "learning_rate": 0.00011039721458108616, "loss": 1.2561, "step": 280 }, { "epoch": 0.62, "learning_rate": 0.00011031892690584239, "loss": 1.2291, "step": 281 }, { "epoch": 0.63, "learning_rate": 0.00011024034939507433, "loss": 1.2234, "step": 282 }, { "epoch": 0.63, "learning_rate": 0.00011016148250138605, "loss": 1.247, "step": 283 }, { "epoch": 0.63, "learning_rate": 0.00011008232667904853, "loss": 1.2593, "step": 284 }, { "epoch": 0.63, "learning_rate": 0.00011000288238399695, "loss": 1.255, "step": 285 }, { "epoch": 0.64, "learning_rate": 0.0001099231500738281, "loss": 1.2471, "step": 286 }, { "epoch": 0.64, "learning_rate": 0.00010984313020779771, "loss": 1.2817, "step": 287 }, { "epoch": 0.64, "learning_rate": 0.00010976282324681785, "loss": 1.2424, "step": 288 }, { "epoch": 0.64, "learning_rate": 0.00010968222965345421, "loss": 1.2042, "step": 289 }, { "epoch": 0.64, "learning_rate": 0.0001096013498919235, "loss": 1.2324, "step": 290 }, { "epoch": 0.65, "learning_rate": 0.00010952018442809074, "loss": 1.2063, "step": 291 }, { "epoch": 0.65, "learning_rate": 0.00010943873372946661, "loss": 1.2345, "step": 292 }, { "epoch": 0.65, "learning_rate": 0.00010935699826520467, "loss": 1.2373, "step": 293 }, { "epoch": 0.65, "learning_rate": 0.00010927497850609882, "loss": 1.2694, "step": 294 }, { "epoch": 0.66, "learning_rate": 0.00010919267492458041, "loss": 1.2393, "step": 295 }, { "epoch": 0.66, "learning_rate": 0.00010911008799471562, "loss": 1.2307, "step": 296 }, { "epoch": 0.66, "learning_rate": 0.00010902721819220271, "loss": 1.2317, "step": 297 }, { "epoch": 0.66, "learning_rate": 0.0001089440659943693, "loss": 1.2129, "step": 298 }, { "epoch": 0.66, "learning_rate": 0.00010886063188016958, "loss": 1.2341, "step": 299 }, { "epoch": 0.67, "learning_rate": 0.00010877691633018154, "loss": 1.2456, "step": 300 }, { "epoch": 0.67, "learning_rate": 0.0001086929198266043, "loss": 1.2511, "step": 301 }, { "epoch": 0.67, "learning_rate": 0.0001086086428532552, "loss": 1.247, "step": 302 }, { "epoch": 0.67, "learning_rate": 0.0001085240858955671, "loss": 1.2702, "step": 303 }, { "epoch": 0.68, "learning_rate": 0.00010843924944058557, "loss": 1.2601, "step": 304 }, { "epoch": 0.68, "learning_rate": 0.0001083541339769661, "loss": 1.1809, "step": 305 }, { "epoch": 0.68, "learning_rate": 0.00010826873999497118, "loss": 1.2388, "step": 306 }, { "epoch": 0.68, "learning_rate": 0.00010818306798646766, "loss": 1.2608, "step": 307 }, { "epoch": 0.68, "learning_rate": 0.00010809711844492373, "loss": 1.2479, "step": 308 }, { "epoch": 0.69, "learning_rate": 0.00010801089186540621, "loss": 1.2148, "step": 309 }, { "epoch": 0.69, "learning_rate": 0.00010792438874457763, "loss": 1.225, "step": 310 }, { "epoch": 0.69, "learning_rate": 0.00010783760958069341, "loss": 1.2321, "step": 311 }, { "epoch": 0.69, "learning_rate": 0.00010775055487359894, "loss": 1.2556, "step": 312 }, { "epoch": 0.7, "learning_rate": 0.00010766322512472675, "loss": 1.2122, "step": 313 }, { "epoch": 0.7, "learning_rate": 0.00010757562083709362, "loss": 1.2606, "step": 314 }, { "epoch": 0.7, "learning_rate": 0.00010748774251529763, "loss": 1.2528, "step": 315 }, { "epoch": 0.7, "learning_rate": 0.00010739959066551528, "loss": 1.2228, "step": 316 }, { "epoch": 0.7, "learning_rate": 0.00010731116579549864, "loss": 1.1934, "step": 317 }, { "epoch": 0.71, "learning_rate": 0.00010722246841457232, "loss": 1.2169, "step": 318 }, { "epoch": 0.71, "learning_rate": 0.0001071334990336306, "loss": 1.2657, "step": 319 }, { "epoch": 0.71, "learning_rate": 0.0001070442581651345, "loss": 1.2457, "step": 320 }, { "epoch": 0.71, "learning_rate": 0.00010695474632310871, "loss": 1.2288, "step": 321 }, { "epoch": 0.72, "learning_rate": 0.00010686496402313882, "loss": 1.2224, "step": 322 }, { "epoch": 0.72, "learning_rate": 0.00010677491178236823, "loss": 1.2083, "step": 323 }, { "epoch": 0.72, "learning_rate": 0.00010668459011949512, "loss": 1.2031, "step": 324 }, { "epoch": 0.72, "learning_rate": 0.00010659399955476964, "loss": 1.2406, "step": 325 }, { "epoch": 0.72, "learning_rate": 0.00010650314060999073, "loss": 1.1586, "step": 326 }, { "epoch": 0.73, "learning_rate": 0.00010641201380850319, "loss": 1.2393, "step": 327 }, { "epoch": 0.73, "learning_rate": 0.00010632061967519473, "loss": 1.1987, "step": 328 }, { "epoch": 0.73, "learning_rate": 0.00010622895873649281, "loss": 1.1982, "step": 329 }, { "epoch": 0.73, "learning_rate": 0.00010613703152036172, "loss": 1.2368, "step": 330 }, { "epoch": 0.74, "learning_rate": 0.00010604483855629952, "loss": 1.2315, "step": 331 }, { "epoch": 0.74, "learning_rate": 0.00010595238037533491, "loss": 1.1949, "step": 332 }, { "epoch": 0.74, "learning_rate": 0.0001058596575100243, "loss": 1.2115, "step": 333 }, { "epoch": 0.74, "learning_rate": 0.00010576667049444861, "loss": 1.2225, "step": 334 }, { "epoch": 0.74, "learning_rate": 0.0001056734198642103, "loss": 1.2243, "step": 335 }, { "epoch": 0.75, "learning_rate": 0.00010557990615643023, "loss": 1.2686, "step": 336 }, { "epoch": 0.75, "learning_rate": 0.00010548612990974458, "loss": 1.2224, "step": 337 }, { "epoch": 0.75, "learning_rate": 0.00010539209166430176, "loss": 1.2389, "step": 338 }, { "epoch": 0.75, "learning_rate": 0.00010529779196175924, "loss": 1.2085, "step": 339 }, { "epoch": 0.76, "learning_rate": 0.00010520323134528051, "loss": 1.2501, "step": 340 }, { "epoch": 0.76, "learning_rate": 0.00010510841035953194, "loss": 1.2202, "step": 341 }, { "epoch": 0.76, "learning_rate": 0.00010501332955067958, "loss": 1.2156, "step": 342 }, { "epoch": 0.76, "learning_rate": 0.00010491798946638606, "loss": 1.2211, "step": 343 }, { "epoch": 0.76, "learning_rate": 0.00010482239065580742, "loss": 1.2209, "step": 344 }, { "epoch": 0.77, "learning_rate": 0.00010472653366958998, "loss": 1.2249, "step": 345 }, { "epoch": 0.77, "learning_rate": 0.00010463041905986715, "loss": 1.1985, "step": 346 }, { "epoch": 0.77, "learning_rate": 0.0001045340473802562, "loss": 1.2689, "step": 347 }, { "epoch": 0.77, "learning_rate": 0.00010443741918585517, "loss": 1.2237, "step": 348 }, { "epoch": 0.78, "learning_rate": 0.00010434053503323955, "loss": 1.2322, "step": 349 }, { "epoch": 0.78, "learning_rate": 0.00010424339548045921, "loss": 1.195, "step": 350 }, { "epoch": 0.78, "learning_rate": 0.0001041460010870351, "loss": 1.1922, "step": 351 }, { "epoch": 0.78, "learning_rate": 0.00010404835241395601, "loss": 1.2476, "step": 352 }, { "epoch": 0.78, "learning_rate": 0.00010395045002367541, "loss": 1.2289, "step": 353 }, { "epoch": 0.79, "learning_rate": 0.00010385229448010814, "loss": 1.249, "step": 354 }, { "epoch": 0.79, "learning_rate": 0.00010375388634862723, "loss": 1.2138, "step": 355 }, { "epoch": 0.79, "learning_rate": 0.00010365522619606062, "loss": 1.1953, "step": 356 }, { "epoch": 0.79, "learning_rate": 0.00010355631459068779, "loss": 1.2433, "step": 357 }, { "epoch": 0.8, "learning_rate": 0.00010345715210223671, "loss": 1.1922, "step": 358 }, { "epoch": 0.8, "learning_rate": 0.00010335773930188036, "loss": 1.2071, "step": 359 }, { "epoch": 0.8, "learning_rate": 0.0001032580767622335, "loss": 1.2189, "step": 360 }, { "epoch": 0.8, "learning_rate": 0.0001031581650573494, "loss": 1.2081, "step": 361 }, { "epoch": 0.8, "learning_rate": 0.00010305800476271651, "loss": 1.2028, "step": 362 }, { "epoch": 0.81, "learning_rate": 0.00010295759645525515, "loss": 1.2179, "step": 363 }, { "epoch": 0.81, "learning_rate": 0.00010285694071331422, "loss": 1.2041, "step": 364 }, { "epoch": 0.81, "learning_rate": 0.00010275603811666778, "loss": 1.2169, "step": 365 }, { "epoch": 0.81, "learning_rate": 0.00010265488924651176, "loss": 1.2272, "step": 366 }, { "epoch": 0.82, "learning_rate": 0.00010255349468546072, "loss": 1.2104, "step": 367 }, { "epoch": 0.82, "learning_rate": 0.00010245185501754425, "loss": 1.2381, "step": 368 }, { "epoch": 0.82, "learning_rate": 0.00010234997082820383, "loss": 1.2231, "step": 369 }, { "epoch": 0.82, "learning_rate": 0.00010224784270428942, "loss": 1.2252, "step": 370 }, { "epoch": 0.82, "learning_rate": 0.00010214547123405592, "loss": 1.2611, "step": 371 }, { "epoch": 0.83, "learning_rate": 0.00010204285700715998, "loss": 1.2557, "step": 372 }, { "epoch": 0.83, "learning_rate": 0.00010194000061465648, "loss": 1.2176, "step": 373 }, { "epoch": 0.83, "learning_rate": 0.00010183690264899521, "loss": 1.2211, "step": 374 }, { "epoch": 0.83, "learning_rate": 0.00010173356370401741, "loss": 1.2117, "step": 375 }, { "epoch": 0.84, "learning_rate": 0.00010162998437495228, "loss": 1.2404, "step": 376 }, { "epoch": 0.84, "learning_rate": 0.0001015261652584137, "loss": 1.2181, "step": 377 }, { "epoch": 0.84, "learning_rate": 0.0001014221069523967, "loss": 1.2391, "step": 378 }, { "epoch": 0.84, "learning_rate": 0.00010131781005627406, "loss": 1.2204, "step": 379 }, { "epoch": 0.84, "learning_rate": 0.00010121327517079276, "loss": 1.2545, "step": 380 }, { "epoch": 0.85, "learning_rate": 0.00010110850289807066, "loss": 1.2036, "step": 381 }, { "epoch": 0.85, "learning_rate": 0.00010100349384159291, "loss": 1.2642, "step": 382 }, { "epoch": 0.85, "learning_rate": 0.00010089824860620861, "loss": 1.2331, "step": 383 }, { "epoch": 0.85, "learning_rate": 0.00010079276779812714, "loss": 1.2063, "step": 384 }, { "epoch": 0.86, "learning_rate": 0.00010068705202491485, "loss": 1.1969, "step": 385 }, { "epoch": 0.86, "learning_rate": 0.00010058110189549143, "loss": 1.2161, "step": 386 }, { "epoch": 0.86, "learning_rate": 0.00010047491802012648, "loss": 1.2371, "step": 387 }, { "epoch": 0.86, "learning_rate": 0.000100368501010436, "loss": 1.2154, "step": 388 }, { "epoch": 0.86, "learning_rate": 0.00010026185147937877, "loss": 1.2139, "step": 389 }, { "epoch": 0.87, "learning_rate": 0.00010015497004125293, "loss": 1.2437, "step": 390 }, { "epoch": 0.87, "learning_rate": 0.00010004785731169242, "loss": 1.2077, "step": 391 }, { "epoch": 0.87, "learning_rate": 9.994051390766333e-05, "loss": 1.2164, "step": 392 }, { "epoch": 0.87, "learning_rate": 9.983294044746051e-05, "loss": 1.2177, "step": 393 }, { "epoch": 0.88, "learning_rate": 9.97251375507039e-05, "loss": 1.2081, "step": 394 }, { "epoch": 0.88, "learning_rate": 9.961710583833494e-05, "loss": 1.2373, "step": 395 }, { "epoch": 0.88, "learning_rate": 9.950884593261315e-05, "loss": 1.218, "step": 396 }, { "epoch": 0.88, "learning_rate": 9.940035845711232e-05, "loss": 1.24, "step": 397 }, { "epoch": 0.88, "learning_rate": 9.929164403671711e-05, "loss": 1.1771, "step": 398 }, { "epoch": 0.89, "learning_rate": 9.918270329761933e-05, "loss": 1.2131, "step": 399 }, { "epoch": 0.89, "learning_rate": 9.907353686731444e-05, "loss": 1.2335, "step": 400 }, { "epoch": 0.89, "learning_rate": 9.89641453745978e-05, "loss": 1.2427, "step": 401 }, { "epoch": 0.89, "learning_rate": 9.885452944956118e-05, "loss": 1.2184, "step": 402 }, { "epoch": 0.9, "learning_rate": 9.874468972358904e-05, "loss": 1.2157, "step": 403 }, { "epoch": 0.9, "learning_rate": 9.863462682935493e-05, "loss": 1.2399, "step": 404 }, { "epoch": 0.9, "learning_rate": 9.852434140081789e-05, "loss": 1.221, "step": 405 }, { "epoch": 0.9, "learning_rate": 9.841383407321866e-05, "loss": 1.2568, "step": 406 }, { "epoch": 0.9, "learning_rate": 9.830310548307622e-05, "loss": 1.2178, "step": 407 }, { "epoch": 0.91, "learning_rate": 9.819215626818392e-05, "loss": 1.2101, "step": 408 }, { "epoch": 0.91, "learning_rate": 9.808098706760595e-05, "loss": 1.212, "step": 409 }, { "epoch": 0.91, "learning_rate": 9.796959852167363e-05, "loss": 1.2028, "step": 410 }, { "epoch": 0.91, "learning_rate": 9.785799127198162e-05, "loss": 1.1922, "step": 411 }, { "epoch": 0.92, "learning_rate": 9.77461659613844e-05, "loss": 1.1668, "step": 412 }, { "epoch": 0.92, "learning_rate": 9.763412323399245e-05, "loss": 1.1926, "step": 413 }, { "epoch": 0.92, "learning_rate": 9.752186373516853e-05, "loss": 1.2085, "step": 414 }, { "epoch": 0.92, "learning_rate": 9.740938811152401e-05, "loss": 1.2137, "step": 415 }, { "epoch": 0.92, "learning_rate": 9.729669701091517e-05, "loss": 1.2701, "step": 416 }, { "epoch": 0.93, "learning_rate": 9.718379108243939e-05, "loss": 1.2591, "step": 417 }, { "epoch": 0.93, "learning_rate": 9.707067097643147e-05, "loss": 1.2277, "step": 418 }, { "epoch": 0.93, "learning_rate": 9.695733734445982e-05, "loss": 1.2128, "step": 419 }, { "epoch": 0.93, "learning_rate": 9.684379083932286e-05, "loss": 1.2091, "step": 420 }, { "epoch": 0.94, "learning_rate": 9.673003211504503e-05, "loss": 1.2067, "step": 421 }, { "epoch": 0.94, "learning_rate": 9.661606182687324e-05, "loss": 1.191, "step": 422 }, { "epoch": 0.94, "learning_rate": 9.650188063127296e-05, "loss": 1.1973, "step": 423 }, { "epoch": 0.94, "learning_rate": 9.638748918592445e-05, "loss": 1.2405, "step": 424 }, { "epoch": 0.94, "learning_rate": 9.627288814971908e-05, "loss": 1.2487, "step": 425 }, { "epoch": 0.95, "learning_rate": 9.615807818275539e-05, "loss": 1.1887, "step": 426 }, { "epoch": 0.95, "learning_rate": 9.604305994633539e-05, "loss": 1.2214, "step": 427 }, { "epoch": 0.95, "learning_rate": 9.592783410296071e-05, "loss": 1.2136, "step": 428 }, { "epoch": 0.95, "learning_rate": 9.581240131632876e-05, "loss": 1.2074, "step": 429 }, { "epoch": 0.96, "learning_rate": 9.569676225132898e-05, "loss": 1.1687, "step": 430 }, { "epoch": 0.96, "learning_rate": 9.558091757403897e-05, "loss": 1.1988, "step": 431 }, { "epoch": 0.96, "learning_rate": 9.54648679517206e-05, "loss": 1.2025, "step": 432 }, { "epoch": 0.96, "learning_rate": 9.534861405281625e-05, "loss": 1.2027, "step": 433 }, { "epoch": 0.96, "learning_rate": 9.523215654694493e-05, "loss": 1.2192, "step": 434 }, { "epoch": 0.97, "learning_rate": 9.511549610489844e-05, "loss": 1.2245, "step": 435 }, { "epoch": 0.97, "learning_rate": 9.499863339863741e-05, "loss": 1.1959, "step": 436 }, { "epoch": 0.97, "learning_rate": 9.48815691012876e-05, "loss": 1.2241, "step": 437 }, { "epoch": 0.97, "learning_rate": 9.476430388713586e-05, "loss": 1.2185, "step": 438 }, { "epoch": 0.98, "learning_rate": 9.464683843162635e-05, "loss": 1.202, "step": 439 }, { "epoch": 0.98, "learning_rate": 9.45291734113566e-05, "loss": 1.2031, "step": 440 }, { "epoch": 0.98, "learning_rate": 9.441130950407367e-05, "loss": 1.2341, "step": 441 }, { "epoch": 0.98, "learning_rate": 9.42932473886701e-05, "loss": 1.2146, "step": 442 }, { "epoch": 0.98, "learning_rate": 9.417498774518019e-05, "loss": 1.2221, "step": 443 }, { "epoch": 0.99, "learning_rate": 9.4056531254776e-05, "loss": 1.2147, "step": 444 }, { "epoch": 0.99, "learning_rate": 9.393787859976338e-05, "loss": 1.229, "step": 445 }, { "epoch": 0.99, "learning_rate": 9.381903046357809e-05, "loss": 1.2305, "step": 446 }, { "epoch": 0.99, "learning_rate": 9.369998753078188e-05, "loss": 1.2403, "step": 447 }, { "epoch": 1.0, "learning_rate": 9.35807504870585e-05, "loss": 1.2102, "step": 448 }, { "epoch": 1.0, "learning_rate": 9.346132001920977e-05, "loss": 1.1927, "step": 449 }, { "epoch": 1.0, "learning_rate": 9.334169681515164e-05, "loss": 1.1759, "step": 450 }, { "epoch": 1.0, "learning_rate": 9.322188156391023e-05, "loss": 0.9853, "step": 451 }, { "epoch": 1.0, "learning_rate": 9.31018749556178e-05, "loss": 0.9395, "step": 452 }, { "epoch": 1.01, "learning_rate": 9.29816776815089e-05, "loss": 0.9399, "step": 453 }, { "epoch": 1.01, "learning_rate": 9.28612904339162e-05, "loss": 0.8974, "step": 454 }, { "epoch": 1.01, "learning_rate": 9.27407139062667e-05, "loss": 0.9517, "step": 455 }, { "epoch": 1.01, "learning_rate": 9.261994879307761e-05, "loss": 0.889, "step": 456 }, { "epoch": 1.02, "learning_rate": 9.24989957899524e-05, "loss": 0.9045, "step": 457 }, { "epoch": 1.02, "learning_rate": 9.237785559357675e-05, "loss": 0.8965, "step": 458 }, { "epoch": 1.02, "learning_rate": 9.225652890171464e-05, "loss": 0.9576, "step": 459 }, { "epoch": 1.02, "learning_rate": 9.213501641320418e-05, "loss": 0.9374, "step": 460 }, { "epoch": 1.02, "learning_rate": 9.20133188279537e-05, "loss": 0.8864, "step": 461 }, { "epoch": 1.03, "learning_rate": 9.189143684693768e-05, "loss": 0.8912, "step": 462 }, { "epoch": 1.03, "learning_rate": 9.176937117219272e-05, "loss": 0.8981, "step": 463 }, { "epoch": 1.03, "learning_rate": 9.164712250681344e-05, "loss": 0.892, "step": 464 }, { "epoch": 1.03, "learning_rate": 9.152469155494857e-05, "loss": 0.8896, "step": 465 }, { "epoch": 1.04, "learning_rate": 9.140207902179673e-05, "loss": 0.9112, "step": 466 }, { "epoch": 1.04, "learning_rate": 9.127928561360246e-05, "loss": 0.8909, "step": 467 }, { "epoch": 1.04, "learning_rate": 9.115631203765218e-05, "loss": 0.9034, "step": 468 }, { "epoch": 1.04, "learning_rate": 9.103315900226999e-05, "loss": 0.903, "step": 469 }, { "epoch": 1.04, "learning_rate": 9.090982721681376e-05, "loss": 0.9069, "step": 470 }, { "epoch": 1.05, "learning_rate": 9.07863173916709e-05, "loss": 0.877, "step": 471 }, { "epoch": 1.05, "learning_rate": 9.06626302382543e-05, "loss": 0.874, "step": 472 }, { "epoch": 1.05, "learning_rate": 9.05387664689983e-05, "loss": 0.8924, "step": 473 }, { "epoch": 1.05, "learning_rate": 9.041472679735459e-05, "loss": 0.9288, "step": 474 }, { "epoch": 1.06, "learning_rate": 9.029051193778793e-05, "loss": 0.8858, "step": 475 }, { "epoch": 1.06, "learning_rate": 9.016612260577223e-05, "loss": 0.869, "step": 476 }, { "epoch": 1.06, "learning_rate": 9.004155951778635e-05, "loss": 0.8812, "step": 477 }, { "epoch": 1.06, "learning_rate": 8.991682339130999e-05, "loss": 0.8824, "step": 478 }, { "epoch": 1.06, "learning_rate": 8.979191494481956e-05, "loss": 0.891, "step": 479 }, { "epoch": 1.07, "learning_rate": 8.966683489778394e-05, "loss": 0.8898, "step": 480 }, { "epoch": 1.07, "learning_rate": 8.954158397066053e-05, "loss": 0.8971, "step": 481 }, { "epoch": 1.07, "learning_rate": 8.941616288489093e-05, "loss": 0.8956, "step": 482 }, { "epoch": 1.07, "learning_rate": 8.929057236289687e-05, "loss": 0.903, "step": 483 }, { "epoch": 1.08, "learning_rate": 8.916481312807606e-05, "loss": 0.9051, "step": 484 }, { "epoch": 1.08, "learning_rate": 8.90388859047979e-05, "loss": 0.8458, "step": 485 }, { "epoch": 1.08, "learning_rate": 8.891279141839948e-05, "loss": 0.8767, "step": 486 }, { "epoch": 1.08, "learning_rate": 8.878653039518131e-05, "loss": 0.878, "step": 487 }, { "epoch": 1.08, "learning_rate": 8.866010356240313e-05, "loss": 0.9164, "step": 488 }, { "epoch": 1.09, "learning_rate": 8.853351164827973e-05, "loss": 0.8943, "step": 489 }, { "epoch": 1.09, "learning_rate": 8.840675538197676e-05, "loss": 0.8845, "step": 490 }, { "epoch": 1.09, "learning_rate": 8.827983549360659e-05, "loss": 0.8685, "step": 491 }, { "epoch": 1.09, "learning_rate": 8.815275271422398e-05, "loss": 0.898, "step": 492 }, { "epoch": 1.1, "learning_rate": 8.802550777582197e-05, "loss": 0.8824, "step": 493 }, { "epoch": 1.1, "learning_rate": 8.789810141132762e-05, "loss": 0.9068, "step": 494 }, { "epoch": 1.1, "learning_rate": 8.777053435459781e-05, "loss": 0.9178, "step": 495 }, { "epoch": 1.1, "learning_rate": 8.7642807340415e-05, "loss": 0.886, "step": 496 }, { "epoch": 1.1, "learning_rate": 8.7514921104483e-05, "loss": 0.8724, "step": 497 }, { "epoch": 1.11, "learning_rate": 8.738687638342273e-05, "loss": 0.8978, "step": 498 }, { "epoch": 1.11, "learning_rate": 8.725867391476798e-05, "loss": 0.9092, "step": 499 }, { "epoch": 1.11, "learning_rate": 8.713031443696114e-05, "loss": 0.9074, "step": 500 }, { "epoch": 1.11, "learning_rate": 8.700179868934902e-05, "loss": 0.9011, "step": 501 }, { "epoch": 1.12, "learning_rate": 8.687312741217851e-05, "loss": 0.8765, "step": 502 }, { "epoch": 1.12, "learning_rate": 8.67443013465923e-05, "loss": 0.8833, "step": 503 }, { "epoch": 1.12, "learning_rate": 8.661532123462474e-05, "loss": 0.904, "step": 504 }, { "epoch": 1.12, "learning_rate": 8.648618781919745e-05, "loss": 0.9168, "step": 505 }, { "epoch": 1.12, "learning_rate": 8.635690184411505e-05, "loss": 0.9153, "step": 506 }, { "epoch": 1.13, "learning_rate": 8.62274640540609e-05, "loss": 0.8865, "step": 507 }, { "epoch": 1.13, "learning_rate": 8.609787519459285e-05, "loss": 0.9066, "step": 508 }, { "epoch": 1.13, "learning_rate": 8.596813601213889e-05, "loss": 0.8977, "step": 509 }, { "epoch": 1.13, "learning_rate": 8.583824725399285e-05, "loss": 0.8505, "step": 510 }, { "epoch": 1.14, "learning_rate": 8.570820966831008e-05, "loss": 0.9051, "step": 511 }, { "epoch": 1.14, "learning_rate": 8.557802400410326e-05, "loss": 0.8694, "step": 512 }, { "epoch": 1.14, "learning_rate": 8.544769101123793e-05, "loss": 0.8798, "step": 513 }, { "epoch": 1.14, "learning_rate": 8.531721144042826e-05, "loss": 0.8827, "step": 514 }, { "epoch": 1.14, "learning_rate": 8.518658604323272e-05, "loss": 0.8774, "step": 515 }, { "epoch": 1.15, "learning_rate": 8.505581557204968e-05, "loss": 0.9107, "step": 516 }, { "epoch": 1.15, "learning_rate": 8.49249007801132e-05, "loss": 0.8869, "step": 517 }, { "epoch": 1.15, "learning_rate": 8.479384242148856e-05, "loss": 0.9126, "step": 518 }, { "epoch": 1.15, "learning_rate": 8.466264125106806e-05, "loss": 0.9356, "step": 519 }, { "epoch": 1.16, "learning_rate": 8.45312980245665e-05, "loss": 0.8958, "step": 520 }, { "epoch": 1.16, "learning_rate": 8.4399813498517e-05, "loss": 0.8788, "step": 521 }, { "epoch": 1.16, "learning_rate": 8.42681884302665e-05, "loss": 0.8861, "step": 522 }, { "epoch": 1.16, "learning_rate": 8.413642357797148e-05, "loss": 0.8971, "step": 523 }, { "epoch": 1.16, "learning_rate": 8.40045197005936e-05, "loss": 0.882, "step": 524 }, { "epoch": 1.17, "learning_rate": 8.387247755789525e-05, "loss": 0.909, "step": 525 }, { "epoch": 1.17, "learning_rate": 8.374029791043527e-05, "loss": 0.8958, "step": 526 }, { "epoch": 1.17, "learning_rate": 8.36079815195645e-05, "loss": 0.9146, "step": 527 }, { "epoch": 1.17, "learning_rate": 8.347552914742142e-05, "loss": 0.8973, "step": 528 }, { "epoch": 1.18, "learning_rate": 8.334294155692774e-05, "loss": 0.9057, "step": 529 }, { "epoch": 1.18, "learning_rate": 8.321021951178405e-05, "loss": 0.8658, "step": 530 }, { "epoch": 1.18, "learning_rate": 8.30773637764654e-05, "loss": 0.9342, "step": 531 }, { "epoch": 1.18, "learning_rate": 8.29443751162169e-05, "loss": 0.8892, "step": 532 }, { "epoch": 1.18, "learning_rate": 8.281125429704923e-05, "loss": 0.9283, "step": 533 }, { "epoch": 1.19, "learning_rate": 8.26780020857344e-05, "loss": 0.8811, "step": 534 }, { "epoch": 1.19, "learning_rate": 8.254461924980116e-05, "loss": 0.8753, "step": 535 }, { "epoch": 1.19, "learning_rate": 8.24111065575307e-05, "loss": 0.8788, "step": 536 }, { "epoch": 1.19, "learning_rate": 8.227746477795215e-05, "loss": 0.9025, "step": 537 }, { "epoch": 1.2, "learning_rate": 8.21436946808382e-05, "loss": 0.9066, "step": 538 }, { "epoch": 1.2, "learning_rate": 8.200979703670062e-05, "loss": 0.8974, "step": 539 }, { "epoch": 1.2, "learning_rate": 8.18757726167859e-05, "loss": 0.898, "step": 540 }, { "epoch": 1.2, "learning_rate": 8.17416221930707e-05, "loss": 0.9442, "step": 541 }, { "epoch": 1.2, "learning_rate": 8.160734653825743e-05, "loss": 0.9257, "step": 542 }, { "epoch": 1.21, "learning_rate": 8.147294642576993e-05, "loss": 0.9328, "step": 543 }, { "epoch": 1.21, "learning_rate": 8.133842262974885e-05, "loss": 0.8714, "step": 544 }, { "epoch": 1.21, "learning_rate": 8.120377592504725e-05, "loss": 0.8962, "step": 545 }, { "epoch": 1.21, "learning_rate": 8.106900708722612e-05, "loss": 0.9002, "step": 546 }, { "epoch": 1.22, "learning_rate": 8.093411689255001e-05, "loss": 0.912, "step": 547 }, { "epoch": 1.22, "learning_rate": 8.07991061179824e-05, "loss": 0.8852, "step": 548 }, { "epoch": 1.22, "learning_rate": 8.066397554118136e-05, "loss": 0.8716, "step": 549 }, { "epoch": 1.22, "learning_rate": 8.052872594049501e-05, "loss": 0.9013, "step": 550 }, { "epoch": 1.22, "learning_rate": 8.0393358094957e-05, "loss": 0.9178, "step": 551 }, { "epoch": 1.23, "learning_rate": 8.025787278428213e-05, "loss": 0.9117, "step": 552 }, { "epoch": 1.23, "learning_rate": 8.012227078886174e-05, "loss": 0.9364, "step": 553 }, { "epoch": 1.23, "learning_rate": 7.998655288975931e-05, "loss": 0.886, "step": 554 }, { "epoch": 1.23, "learning_rate": 7.985071986870591e-05, "loss": 0.8811, "step": 555 }, { "epoch": 1.24, "learning_rate": 7.971477250809569e-05, "loss": 0.9123, "step": 556 }, { "epoch": 1.24, "learning_rate": 7.957871159098143e-05, "loss": 0.8964, "step": 557 }, { "epoch": 1.24, "learning_rate": 7.944253790106996e-05, "loss": 0.8677, "step": 558 }, { "epoch": 1.24, "learning_rate": 7.930625222271768e-05, "loss": 0.9193, "step": 559 }, { "epoch": 1.24, "learning_rate": 7.916985534092606e-05, "loss": 0.9202, "step": 560 }, { "epoch": 1.25, "learning_rate": 7.903334804133711e-05, "loss": 0.8913, "step": 561 }, { "epoch": 1.25, "learning_rate": 7.889673111022878e-05, "loss": 0.884, "step": 562 }, { "epoch": 1.25, "learning_rate": 7.876000533451057e-05, "loss": 0.8619, "step": 563 }, { "epoch": 1.25, "learning_rate": 7.862317150171886e-05, "loss": 0.9247, "step": 564 }, { "epoch": 1.26, "learning_rate": 7.848623040001246e-05, "loss": 0.9068, "step": 565 }, { "epoch": 1.26, "learning_rate": 7.834918281816805e-05, "loss": 0.8869, "step": 566 }, { "epoch": 1.26, "learning_rate": 7.821202954557568e-05, "loss": 0.8924, "step": 567 }, { "epoch": 1.26, "learning_rate": 7.807477137223406e-05, "loss": 0.906, "step": 568 }, { "epoch": 1.26, "learning_rate": 7.793740908874622e-05, "loss": 0.9001, "step": 569 }, { "epoch": 1.27, "learning_rate": 7.779994348631484e-05, "loss": 0.9018, "step": 570 }, { "epoch": 1.27, "learning_rate": 7.76623753567377e-05, "loss": 0.9202, "step": 571 }, { "epoch": 1.27, "learning_rate": 7.752470549240314e-05, "loss": 0.8876, "step": 572 }, { "epoch": 1.27, "learning_rate": 7.738693468628548e-05, "loss": 0.8735, "step": 573 }, { "epoch": 1.28, "learning_rate": 7.724906373194049e-05, "loss": 0.8747, "step": 574 }, { "epoch": 1.28, "learning_rate": 7.711109342350075e-05, "loss": 0.9046, "step": 575 }, { "epoch": 1.28, "learning_rate": 7.697302455567116e-05, "loss": 0.8478, "step": 576 }, { "epoch": 1.28, "learning_rate": 7.683485792372427e-05, "loss": 0.887, "step": 577 }, { "epoch": 1.28, "learning_rate": 7.669659432349581e-05, "loss": 0.8691, "step": 578 }, { "epoch": 1.29, "learning_rate": 7.655823455137998e-05, "loss": 0.9361, "step": 579 }, { "epoch": 1.29, "learning_rate": 7.641977940432499e-05, "loss": 0.8743, "step": 580 }, { "epoch": 1.29, "learning_rate": 7.628122967982834e-05, "loss": 0.8967, "step": 581 }, { "epoch": 1.29, "learning_rate": 7.614258617593234e-05, "loss": 0.8915, "step": 582 }, { "epoch": 1.3, "learning_rate": 7.600384969121945e-05, "loss": 0.8841, "step": 583 }, { "epoch": 1.3, "learning_rate": 7.586502102480773e-05, "loss": 0.8665, "step": 584 }, { "epoch": 1.3, "learning_rate": 7.572610097634613e-05, "loss": 0.8986, "step": 585 }, { "epoch": 1.3, "learning_rate": 7.558709034601004e-05, "loss": 0.9087, "step": 586 }, { "epoch": 1.3, "learning_rate": 7.544798993449654e-05, "loss": 0.9034, "step": 587 }, { "epoch": 1.31, "learning_rate": 7.53088005430199e-05, "loss": 0.8983, "step": 588 }, { "epoch": 1.31, "learning_rate": 7.516952297330684e-05, "loss": 0.9164, "step": 589 }, { "epoch": 1.31, "learning_rate": 7.503015802759202e-05, "loss": 0.8989, "step": 590 }, { "epoch": 1.31, "learning_rate": 7.489070650861344e-05, "loss": 0.921, "step": 591 }, { "epoch": 1.32, "learning_rate": 7.475116921960766e-05, "loss": 0.8921, "step": 592 }, { "epoch": 1.32, "learning_rate": 7.461154696430534e-05, "loss": 0.8911, "step": 593 }, { "epoch": 1.32, "learning_rate": 7.447184054692651e-05, "loss": 0.8925, "step": 594 }, { "epoch": 1.32, "learning_rate": 7.4332050772176e-05, "loss": 0.9112, "step": 595 }, { "epoch": 1.32, "learning_rate": 7.419217844523875e-05, "loss": 0.9141, "step": 596 }, { "epoch": 1.33, "learning_rate": 7.405222437177523e-05, "loss": 0.8961, "step": 597 }, { "epoch": 1.33, "learning_rate": 7.391218935791671e-05, "loss": 0.9301, "step": 598 }, { "epoch": 1.33, "learning_rate": 7.377207421026074e-05, "loss": 0.8793, "step": 599 }, { "epoch": 1.33, "learning_rate": 7.363187973586639e-05, "loss": 0.8918, "step": 600 }, { "epoch": 1.34, "learning_rate": 7.349160674224967e-05, "loss": 0.9046, "step": 601 }, { "epoch": 1.34, "learning_rate": 7.335125603737886e-05, "loss": 0.871, "step": 602 }, { "epoch": 1.34, "learning_rate": 7.321082842966986e-05, "loss": 0.8872, "step": 603 }, { "epoch": 1.34, "learning_rate": 7.307032472798151e-05, "loss": 0.91, "step": 604 }, { "epoch": 1.34, "learning_rate": 7.292974574161098e-05, "loss": 0.927, "step": 605 }, { "epoch": 1.35, "learning_rate": 7.278909228028903e-05, "loss": 0.9181, "step": 606 }, { "epoch": 1.35, "learning_rate": 7.26483651541754e-05, "loss": 0.8765, "step": 607 }, { "epoch": 1.35, "learning_rate": 7.25075651738542e-05, "loss": 0.9266, "step": 608 }, { "epoch": 1.35, "learning_rate": 7.236669315032912e-05, "loss": 0.9351, "step": 609 }, { "epoch": 1.36, "learning_rate": 7.222574989501881e-05, "loss": 0.8773, "step": 610 }, { "epoch": 1.36, "learning_rate": 7.208473621975227e-05, "loss": 0.9041, "step": 611 }, { "epoch": 1.36, "learning_rate": 7.194365293676404e-05, "loss": 0.9115, "step": 612 }, { "epoch": 1.36, "learning_rate": 7.180250085868969e-05, "loss": 0.8905, "step": 613 }, { "epoch": 1.36, "learning_rate": 7.166128079856096e-05, "loss": 0.883, "step": 614 }, { "epoch": 1.37, "learning_rate": 7.151999356980121e-05, "loss": 0.9163, "step": 615 }, { "epoch": 1.37, "learning_rate": 7.137863998622067e-05, "loss": 0.8888, "step": 616 }, { "epoch": 1.37, "learning_rate": 7.123722086201181e-05, "loss": 0.9359, "step": 617 }, { "epoch": 1.37, "learning_rate": 7.109573701174457e-05, "loss": 0.8758, "step": 618 }, { "epoch": 1.38, "learning_rate": 7.09541892503617e-05, "loss": 0.8859, "step": 619 }, { "epoch": 1.38, "learning_rate": 7.081257839317415e-05, "loss": 0.8991, "step": 620 }, { "epoch": 1.38, "learning_rate": 7.067090525585621e-05, "loss": 0.8943, "step": 621 }, { "epoch": 1.38, "learning_rate": 7.052917065444098e-05, "loss": 0.8796, "step": 622 }, { "epoch": 1.38, "learning_rate": 7.03873754053155e-05, "loss": 0.8951, "step": 623 }, { "epoch": 1.39, "learning_rate": 7.024552032521625e-05, "loss": 0.9414, "step": 624 }, { "epoch": 1.39, "learning_rate": 7.010360623122425e-05, "loss": 0.8948, "step": 625 }, { "epoch": 1.39, "learning_rate": 6.996163394076047e-05, "loss": 0.8822, "step": 626 }, { "epoch": 1.39, "learning_rate": 6.98196042715811e-05, "loss": 0.9204, "step": 627 }, { "epoch": 1.4, "learning_rate": 6.967751804177279e-05, "loss": 0.8788, "step": 628 }, { "epoch": 1.4, "learning_rate": 6.953537606974799e-05, "loss": 0.9132, "step": 629 }, { "epoch": 1.4, "learning_rate": 6.939317917424028e-05, "loss": 0.9086, "step": 630 }, { "epoch": 1.4, "learning_rate": 6.925092817429956e-05, "loss": 0.8749, "step": 631 }, { "epoch": 1.4, "learning_rate": 6.910862388928732e-05, "loss": 0.8853, "step": 632 }, { "epoch": 1.41, "learning_rate": 6.896626713887203e-05, "loss": 0.9135, "step": 633 }, { "epoch": 1.41, "learning_rate": 6.882385874302436e-05, "loss": 0.8551, "step": 634 }, { "epoch": 1.41, "learning_rate": 6.868139952201243e-05, "loss": 0.8678, "step": 635 }, { "epoch": 1.41, "learning_rate": 6.853889029639712e-05, "loss": 0.9216, "step": 636 }, { "epoch": 1.42, "learning_rate": 6.839633188702733e-05, "loss": 0.8722, "step": 637 }, { "epoch": 1.42, "learning_rate": 6.825372511503526e-05, "loss": 0.9105, "step": 638 }, { "epoch": 1.42, "learning_rate": 6.811107080183171e-05, "loss": 0.8553, "step": 639 }, { "epoch": 1.42, "learning_rate": 6.796836976910128e-05, "loss": 0.8705, "step": 640 }, { "epoch": 1.42, "learning_rate": 6.782562283879765e-05, "loss": 0.9197, "step": 641 }, { "epoch": 1.43, "learning_rate": 6.768283083313891e-05, "loss": 0.8856, "step": 642 }, { "epoch": 1.43, "learning_rate": 6.753999457460279e-05, "loss": 0.8936, "step": 643 }, { "epoch": 1.43, "learning_rate": 6.739711488592188e-05, "loss": 0.8796, "step": 644 }, { "epoch": 1.43, "learning_rate": 6.725419259007895e-05, "loss": 0.8901, "step": 645 }, { "epoch": 1.44, "learning_rate": 6.711122851030217e-05, "loss": 0.9187, "step": 646 }, { "epoch": 1.44, "learning_rate": 6.696822347006038e-05, "loss": 0.8882, "step": 647 }, { "epoch": 1.44, "learning_rate": 6.682517829305842e-05, "loss": 0.9129, "step": 648 }, { "epoch": 1.44, "learning_rate": 6.668209380323221e-05, "loss": 0.9057, "step": 649 }, { "epoch": 1.44, "learning_rate": 6.653897082474416e-05, "loss": 0.8826, "step": 650 }, { "epoch": 1.45, "learning_rate": 6.639581018197841e-05, "loss": 0.8933, "step": 651 }, { "epoch": 1.45, "learning_rate": 6.625261269953598e-05, "loss": 0.8698, "step": 652 }, { "epoch": 1.45, "learning_rate": 6.610937920223014e-05, "loss": 0.9076, "step": 653 }, { "epoch": 1.45, "learning_rate": 6.596611051508155e-05, "loss": 0.8564, "step": 654 }, { "epoch": 1.46, "learning_rate": 6.58228074633136e-05, "loss": 0.8766, "step": 655 }, { "epoch": 1.46, "learning_rate": 6.567947087234762e-05, "loss": 0.9037, "step": 656 }, { "epoch": 1.46, "learning_rate": 6.553610156779812e-05, "loss": 0.9012, "step": 657 }, { "epoch": 1.46, "learning_rate": 6.539270037546804e-05, "loss": 0.9266, "step": 658 }, { "epoch": 1.46, "learning_rate": 6.524926812134396e-05, "loss": 0.8679, "step": 659 }, { "epoch": 1.47, "learning_rate": 6.510580563159145e-05, "loss": 0.9083, "step": 660 }, { "epoch": 1.47, "learning_rate": 6.496231373255014e-05, "loss": 0.8996, "step": 661 }, { "epoch": 1.47, "learning_rate": 6.481879325072914e-05, "loss": 0.9023, "step": 662 }, { "epoch": 1.47, "learning_rate": 6.467524501280213e-05, "loss": 0.9016, "step": 663 }, { "epoch": 1.48, "learning_rate": 6.453166984560274e-05, "loss": 0.8643, "step": 664 }, { "epoch": 1.48, "learning_rate": 6.438806857611963e-05, "loss": 0.8907, "step": 665 }, { "epoch": 1.48, "learning_rate": 6.424444203149187e-05, "loss": 0.8973, "step": 666 }, { "epoch": 1.48, "learning_rate": 6.410079103900409e-05, "loss": 0.9139, "step": 667 }, { "epoch": 1.48, "learning_rate": 6.395711642608172e-05, "loss": 0.8845, "step": 668 }, { "epoch": 1.49, "learning_rate": 6.381341902028629e-05, "loss": 0.8828, "step": 669 }, { "epoch": 1.49, "learning_rate": 6.366969964931058e-05, "loss": 0.882, "step": 670 }, { "epoch": 1.49, "learning_rate": 6.352595914097388e-05, "loss": 0.8749, "step": 671 }, { "epoch": 1.49, "learning_rate": 6.338219832321725e-05, "loss": 0.9002, "step": 672 }, { "epoch": 1.5, "learning_rate": 6.323841802409875e-05, "loss": 0.8694, "step": 673 }, { "epoch": 1.5, "learning_rate": 6.309461907178863e-05, "loss": 0.9151, "step": 674 }, { "epoch": 1.5, "learning_rate": 6.295080229456456e-05, "loss": 0.9025, "step": 675 }, { "epoch": 1.5, "learning_rate": 6.280696852080694e-05, "loss": 0.8891, "step": 676 }, { "epoch": 1.5, "learning_rate": 6.2663118578994e-05, "loss": 0.9178, "step": 677 }, { "epoch": 1.51, "learning_rate": 6.251925329769718e-05, "loss": 0.9171, "step": 678 }, { "epoch": 1.51, "learning_rate": 6.237537350557617e-05, "loss": 0.9079, "step": 679 }, { "epoch": 1.51, "learning_rate": 6.223148003137435e-05, "loss": 0.8745, "step": 680 }, { "epoch": 1.51, "learning_rate": 6.208757370391379e-05, "loss": 0.9134, "step": 681 }, { "epoch": 1.52, "learning_rate": 6.194365535209074e-05, "loss": 0.8587, "step": 682 }, { "epoch": 1.52, "learning_rate": 6.179972580487057e-05, "loss": 0.9039, "step": 683 }, { "epoch": 1.52, "learning_rate": 6.165578589128323e-05, "loss": 0.9315, "step": 684 }, { "epoch": 1.52, "learning_rate": 6.151183644041834e-05, "loss": 0.9108, "step": 685 }, { "epoch": 1.52, "learning_rate": 6.136787828142047e-05, "loss": 0.8866, "step": 686 }, { "epoch": 1.53, "learning_rate": 6.122391224348433e-05, "loss": 0.8872, "step": 687 }, { "epoch": 1.53, "learning_rate": 6.107993915585001e-05, "loss": 0.9115, "step": 688 }, { "epoch": 1.53, "learning_rate": 6.0935959847798226e-05, "loss": 0.8776, "step": 689 }, { "epoch": 1.53, "learning_rate": 6.079197514864554e-05, "loss": 0.8889, "step": 690 }, { "epoch": 1.54, "learning_rate": 6.064798588773952e-05, "loss": 0.922, "step": 691 }, { "epoch": 1.54, "learning_rate": 6.0503992894454006e-05, "loss": 0.907, "step": 692 }, { "epoch": 1.54, "learning_rate": 6.035999699818442e-05, "loss": 0.9033, "step": 693 }, { "epoch": 1.54, "learning_rate": 6.02159990283428e-05, "loss": 0.8618, "step": 694 }, { "epoch": 1.54, "learning_rate": 6.0071999814353204e-05, "loss": 0.8837, "step": 695 }, { "epoch": 1.55, "learning_rate": 5.99280001856468e-05, "loss": 0.8993, "step": 696 }, { "epoch": 1.55, "learning_rate": 5.97840009716572e-05, "loss": 0.8508, "step": 697 }, { "epoch": 1.55, "learning_rate": 5.964000300181559e-05, "loss": 0.9043, "step": 698 }, { "epoch": 1.55, "learning_rate": 5.9496007105546004e-05, "loss": 0.9256, "step": 699 }, { "epoch": 1.56, "learning_rate": 5.935201411226049e-05, "loss": 0.8824, "step": 700 }, { "epoch": 1.56, "learning_rate": 5.920802485135447e-05, "loss": 0.9082, "step": 701 }, { "epoch": 1.56, "learning_rate": 5.9064040152201777e-05, "loss": 0.9122, "step": 702 }, { "epoch": 1.56, "learning_rate": 5.892006084415001e-05, "loss": 0.8684, "step": 703 }, { "epoch": 1.56, "learning_rate": 5.87760877565157e-05, "loss": 0.8681, "step": 704 }, { "epoch": 1.57, "learning_rate": 5.863212171857953e-05, "loss": 0.8688, "step": 705 }, { "epoch": 1.57, "learning_rate": 5.8488163559581656e-05, "loss": 0.867, "step": 706 }, { "epoch": 1.57, "learning_rate": 5.8344214108716775e-05, "loss": 0.8632, "step": 707 }, { "epoch": 1.57, "learning_rate": 5.820027419512944e-05, "loss": 0.8988, "step": 708 }, { "epoch": 1.58, "learning_rate": 5.805634464790927e-05, "loss": 0.8976, "step": 709 }, { "epoch": 1.58, "learning_rate": 5.791242629608622e-05, "loss": 0.8957, "step": 710 }, { "epoch": 1.58, "learning_rate": 5.7768519968625685e-05, "loss": 0.8449, "step": 711 }, { "epoch": 1.58, "learning_rate": 5.7624626494423846e-05, "loss": 0.9322, "step": 712 }, { "epoch": 1.58, "learning_rate": 5.748074670230282e-05, "loss": 0.8835, "step": 713 }, { "epoch": 1.59, "learning_rate": 5.733688142100598e-05, "loss": 0.9181, "step": 714 }, { "epoch": 1.59, "learning_rate": 5.7193031479193065e-05, "loss": 0.8979, "step": 715 }, { "epoch": 1.59, "learning_rate": 5.704919770543544e-05, "loss": 0.8659, "step": 716 }, { "epoch": 1.59, "learning_rate": 5.690538092821139e-05, "loss": 0.8898, "step": 717 }, { "epoch": 1.6, "learning_rate": 5.6761581975901255e-05, "loss": 0.8836, "step": 718 }, { "epoch": 1.6, "learning_rate": 5.661780167678277e-05, "loss": 0.9026, "step": 719 }, { "epoch": 1.6, "learning_rate": 5.6474040859026145e-05, "loss": 0.9006, "step": 720 }, { "epoch": 1.6, "learning_rate": 5.633030035068945e-05, "loss": 0.8909, "step": 721 }, { "epoch": 1.6, "learning_rate": 5.6186580979713706e-05, "loss": 0.9137, "step": 722 }, { "epoch": 1.61, "learning_rate": 5.604288357391828e-05, "loss": 0.912, "step": 723 }, { "epoch": 1.61, "learning_rate": 5.5899208960995915e-05, "loss": 0.9013, "step": 724 }, { "epoch": 1.61, "learning_rate": 5.575555796850813e-05, "loss": 0.8905, "step": 725 }, { "epoch": 1.61, "learning_rate": 5.561193142388037e-05, "loss": 0.9419, "step": 726 }, { "epoch": 1.62, "learning_rate": 5.546833015439727e-05, "loss": 0.9136, "step": 727 }, { "epoch": 1.62, "learning_rate": 5.5324754987197876e-05, "loss": 0.8949, "step": 728 }, { "epoch": 1.62, "learning_rate": 5.518120674927088e-05, "loss": 0.8903, "step": 729 }, { "epoch": 1.62, "learning_rate": 5.5037686267449886e-05, "loss": 0.8621, "step": 730 }, { "epoch": 1.62, "learning_rate": 5.489419436840856e-05, "loss": 0.873, "step": 731 }, { "epoch": 1.63, "learning_rate": 5.475073187865603e-05, "loss": 0.8902, "step": 732 }, { "epoch": 1.63, "learning_rate": 5.460729962453197e-05, "loss": 0.8985, "step": 733 }, { "epoch": 1.63, "learning_rate": 5.446389843220189e-05, "loss": 0.8906, "step": 734 }, { "epoch": 1.63, "learning_rate": 5.4320529127652394e-05, "loss": 0.8643, "step": 735 }, { "epoch": 1.64, "learning_rate": 5.417719253668641e-05, "loss": 0.8695, "step": 736 }, { "epoch": 1.64, "learning_rate": 5.4033889484918476e-05, "loss": 0.8997, "step": 737 }, { "epoch": 1.64, "learning_rate": 5.3890620797769894e-05, "loss": 0.8748, "step": 738 }, { "epoch": 1.64, "learning_rate": 5.374738730046401e-05, "loss": 0.8905, "step": 739 }, { "epoch": 1.64, "learning_rate": 5.360418981802159e-05, "loss": 0.9044, "step": 740 }, { "epoch": 1.65, "learning_rate": 5.3461029175255834e-05, "loss": 0.9008, "step": 741 }, { "epoch": 1.65, "learning_rate": 5.33179061967678e-05, "loss": 0.8648, "step": 742 }, { "epoch": 1.65, "learning_rate": 5.317482170694159e-05, "loss": 0.8987, "step": 743 }, { "epoch": 1.65, "learning_rate": 5.303177652993962e-05, "loss": 0.9102, "step": 744 }, { "epoch": 1.66, "learning_rate": 5.288877148969784e-05, "loss": 0.8639, "step": 745 }, { "epoch": 1.66, "learning_rate": 5.274580740992107e-05, "loss": 0.8713, "step": 746 }, { "epoch": 1.66, "learning_rate": 5.260288511407814e-05, "loss": 0.8878, "step": 747 }, { "epoch": 1.66, "learning_rate": 5.246000542539721e-05, "loss": 0.888, "step": 748 }, { "epoch": 1.66, "learning_rate": 5.2317169166861096e-05, "loss": 0.8598, "step": 749 }, { "epoch": 1.67, "learning_rate": 5.217437716120237e-05, "loss": 0.9094, "step": 750 }, { "epoch": 1.67, "learning_rate": 5.203163023089874e-05, "loss": 0.8648, "step": 751 }, { "epoch": 1.67, "learning_rate": 5.18889291981683e-05, "loss": 0.8844, "step": 752 }, { "epoch": 1.67, "learning_rate": 5.174627488496475e-05, "loss": 0.8593, "step": 753 }, { "epoch": 1.68, "learning_rate": 5.160366811297269e-05, "loss": 0.8916, "step": 754 }, { "epoch": 1.68, "learning_rate": 5.14611097036029e-05, "loss": 0.8647, "step": 755 }, { "epoch": 1.68, "learning_rate": 5.131860047798759e-05, "loss": 0.8975, "step": 756 }, { "epoch": 1.68, "learning_rate": 5.117614125697564e-05, "loss": 0.898, "step": 757 }, { "epoch": 1.68, "learning_rate": 5.103373286112797e-05, "loss": 0.8703, "step": 758 }, { "epoch": 1.69, "learning_rate": 5.089137611071269e-05, "loss": 0.865, "step": 759 }, { "epoch": 1.69, "learning_rate": 5.0749071825700455e-05, "loss": 0.888, "step": 760 }, { "epoch": 1.69, "learning_rate": 5.060682082575972e-05, "loss": 0.8707, "step": 761 }, { "epoch": 1.69, "learning_rate": 5.046462393025202e-05, "loss": 0.8743, "step": 762 }, { "epoch": 1.7, "learning_rate": 5.032248195822724e-05, "loss": 0.8957, "step": 763 }, { "epoch": 1.7, "learning_rate": 5.0180395728418923e-05, "loss": 0.8876, "step": 764 }, { "epoch": 1.7, "learning_rate": 5.003836605923952e-05, "loss": 0.8571, "step": 765 }, { "epoch": 1.7, "learning_rate": 4.989639376877574e-05, "loss": 0.8655, "step": 766 }, { "epoch": 1.7, "learning_rate": 4.9754479674783744e-05, "loss": 0.8823, "step": 767 }, { "epoch": 1.71, "learning_rate": 4.96126245946845e-05, "loss": 0.8753, "step": 768 }, { "epoch": 1.71, "learning_rate": 4.9470829345559044e-05, "loss": 0.8835, "step": 769 }, { "epoch": 1.71, "learning_rate": 4.93290947441438e-05, "loss": 0.8921, "step": 770 }, { "epoch": 1.71, "learning_rate": 4.918742160682586e-05, "loss": 0.876, "step": 771 }, { "epoch": 1.72, "learning_rate": 4.9045810749638305e-05, "loss": 0.912, "step": 772 }, { "epoch": 1.72, "learning_rate": 4.890426298825546e-05, "loss": 0.9259, "step": 773 }, { "epoch": 1.72, "learning_rate": 4.876277913798819e-05, "loss": 0.8822, "step": 774 }, { "epoch": 1.72, "learning_rate": 4.862136001377932e-05, "loss": 0.8957, "step": 775 }, { "epoch": 1.72, "learning_rate": 4.84800064301988e-05, "loss": 0.8935, "step": 776 }, { "epoch": 1.73, "learning_rate": 4.833871920143905e-05, "loss": 0.8739, "step": 777 }, { "epoch": 1.73, "learning_rate": 4.819749914131032e-05, "loss": 0.88, "step": 778 }, { "epoch": 1.73, "learning_rate": 4.805634706323596e-05, "loss": 0.8859, "step": 779 }, { "epoch": 1.73, "learning_rate": 4.7915263780247744e-05, "loss": 0.852, "step": 780 }, { "epoch": 1.74, "learning_rate": 4.77742501049812e-05, "loss": 0.8774, "step": 781 }, { "epoch": 1.74, "learning_rate": 4.7633306849670906e-05, "loss": 0.9166, "step": 782 }, { "epoch": 1.74, "learning_rate": 4.74924348261458e-05, "loss": 0.8773, "step": 783 }, { "epoch": 1.74, "learning_rate": 4.73516348458246e-05, "loss": 0.9, "step": 784 }, { "epoch": 1.74, "learning_rate": 4.7210907719710985e-05, "loss": 0.8678, "step": 785 }, { "epoch": 1.75, "learning_rate": 4.707025425838904e-05, "loss": 0.8743, "step": 786 }, { "epoch": 1.75, "learning_rate": 4.692967527201849e-05, "loss": 0.8748, "step": 787 }, { "epoch": 1.75, "learning_rate": 4.678917157033015e-05, "loss": 0.8972, "step": 788 }, { "epoch": 1.75, "learning_rate": 4.664874396262114e-05, "loss": 0.9149, "step": 789 }, { "epoch": 1.76, "learning_rate": 4.650839325775035e-05, "loss": 0.8609, "step": 790 }, { "epoch": 1.76, "learning_rate": 4.636812026413362e-05, "loss": 0.8849, "step": 791 }, { "epoch": 1.76, "learning_rate": 4.622792578973926e-05, "loss": 0.8396, "step": 792 }, { "epoch": 1.76, "learning_rate": 4.608781064208329e-05, "loss": 0.8749, "step": 793 }, { "epoch": 1.76, "learning_rate": 4.594777562822478e-05, "loss": 0.8876, "step": 794 }, { "epoch": 1.77, "learning_rate": 4.580782155476124e-05, "loss": 0.8608, "step": 795 }, { "epoch": 1.77, "learning_rate": 4.566794922782401e-05, "loss": 0.8541, "step": 796 }, { "epoch": 1.77, "learning_rate": 4.55281594530735e-05, "loss": 0.8943, "step": 797 }, { "epoch": 1.77, "learning_rate": 4.538845303569468e-05, "loss": 0.8855, "step": 798 }, { "epoch": 1.78, "learning_rate": 4.524883078039236e-05, "loss": 0.9044, "step": 799 }, { "epoch": 1.78, "learning_rate": 4.5109293491386574e-05, "loss": 0.8696, "step": 800 }, { "epoch": 1.78, "learning_rate": 4.4969841972407975e-05, "loss": 0.8856, "step": 801 }, { "epoch": 1.78, "learning_rate": 4.4830477026693174e-05, "loss": 0.8855, "step": 802 }, { "epoch": 1.78, "learning_rate": 4.4691199456980124e-05, "loss": 0.8757, "step": 803 }, { "epoch": 1.79, "learning_rate": 4.455201006550346e-05, "loss": 0.8659, "step": 804 }, { "epoch": 1.79, "learning_rate": 4.441290965398997e-05, "loss": 0.8783, "step": 805 }, { "epoch": 1.79, "learning_rate": 4.427389902365389e-05, "loss": 0.8709, "step": 806 }, { "epoch": 1.79, "learning_rate": 4.413497897519229e-05, "loss": 0.8902, "step": 807 }, { "epoch": 1.8, "learning_rate": 4.399615030878056e-05, "loss": 0.8592, "step": 808 }, { "epoch": 1.8, "learning_rate": 4.385741382406768e-05, "loss": 0.8872, "step": 809 }, { "epoch": 1.8, "learning_rate": 4.3718770320171675e-05, "loss": 0.8412, "step": 810 }, { "epoch": 1.8, "learning_rate": 4.3580220595675034e-05, "loss": 0.8991, "step": 811 }, { "epoch": 1.8, "learning_rate": 4.344176544862003e-05, "loss": 0.8699, "step": 812 }, { "epoch": 1.81, "learning_rate": 4.330340567650421e-05, "loss": 0.899, "step": 813 }, { "epoch": 1.81, "learning_rate": 4.316514207627574e-05, "loss": 0.8972, "step": 814 }, { "epoch": 1.81, "learning_rate": 4.302697544432887e-05, "loss": 0.9027, "step": 815 }, { "epoch": 1.81, "learning_rate": 4.288890657649926e-05, "loss": 0.8791, "step": 816 }, { "epoch": 1.82, "learning_rate": 4.275093626805952e-05, "loss": 0.8567, "step": 817 }, { "epoch": 1.82, "learning_rate": 4.261306531371451e-05, "loss": 0.8879, "step": 818 }, { "epoch": 1.82, "learning_rate": 4.247529450759686e-05, "loss": 0.8643, "step": 819 }, { "epoch": 1.82, "learning_rate": 4.2337624643262304e-05, "loss": 0.8892, "step": 820 }, { "epoch": 1.82, "learning_rate": 4.220005651368516e-05, "loss": 0.8808, "step": 821 }, { "epoch": 1.83, "learning_rate": 4.2062590911253785e-05, "loss": 0.8825, "step": 822 }, { "epoch": 1.83, "learning_rate": 4.192522862776596e-05, "loss": 0.9095, "step": 823 }, { "epoch": 1.83, "learning_rate": 4.178797045442435e-05, "loss": 0.8632, "step": 824 }, { "epoch": 1.83, "learning_rate": 4.165081718183195e-05, "loss": 0.8327, "step": 825 }, { "epoch": 1.84, "learning_rate": 4.151376959998756e-05, "loss": 0.8498, "step": 826 }, { "epoch": 1.84, "learning_rate": 4.1376828498281155e-05, "loss": 0.8993, "step": 827 }, { "epoch": 1.84, "learning_rate": 4.123999466548944e-05, "loss": 0.8822, "step": 828 }, { "epoch": 1.84, "learning_rate": 4.110326888977122e-05, "loss": 0.8647, "step": 829 }, { "epoch": 1.84, "learning_rate": 4.09666519586629e-05, "loss": 0.8726, "step": 830 }, { "epoch": 1.85, "learning_rate": 4.083014465907394e-05, "loss": 0.8798, "step": 831 }, { "epoch": 1.85, "learning_rate": 4.0693747777282334e-05, "loss": 0.909, "step": 832 }, { "epoch": 1.85, "learning_rate": 4.0557462098930055e-05, "loss": 0.8665, "step": 833 }, { "epoch": 1.85, "learning_rate": 4.042128840901858e-05, "loss": 0.8626, "step": 834 }, { "epoch": 1.86, "learning_rate": 4.028522749190431e-05, "loss": 0.8765, "step": 835 }, { "epoch": 1.86, "learning_rate": 4.014928013129409e-05, "loss": 0.8295, "step": 836 }, { "epoch": 1.86, "learning_rate": 4.001344711024069e-05, "loss": 0.8936, "step": 837 }, { "epoch": 1.86, "learning_rate": 3.987772921113826e-05, "loss": 0.893, "step": 838 }, { "epoch": 1.86, "learning_rate": 3.974212721571788e-05, "loss": 0.8726, "step": 839 }, { "epoch": 1.87, "learning_rate": 3.960664190504301e-05, "loss": 0.8881, "step": 840 }, { "epoch": 1.87, "learning_rate": 3.9471274059505005e-05, "loss": 0.8708, "step": 841 }, { "epoch": 1.87, "learning_rate": 3.9336024458818635e-05, "loss": 0.8602, "step": 842 }, { "epoch": 1.87, "learning_rate": 3.920089388201759e-05, "loss": 0.8906, "step": 843 }, { "epoch": 1.88, "learning_rate": 3.906588310745e-05, "loss": 0.8987, "step": 844 }, { "epoch": 1.88, "learning_rate": 3.893099291277388e-05, "loss": 0.898, "step": 845 }, { "epoch": 1.88, "learning_rate": 3.879622407495277e-05, "loss": 0.8516, "step": 846 }, { "epoch": 1.88, "learning_rate": 3.866157737025116e-05, "loss": 0.8571, "step": 847 }, { "epoch": 1.88, "learning_rate": 3.852705357423007e-05, "loss": 0.8516, "step": 848 }, { "epoch": 1.89, "learning_rate": 3.8392653461742577e-05, "loss": 0.8529, "step": 849 }, { "epoch": 1.89, "learning_rate": 3.825837780692934e-05, "loss": 0.8345, "step": 850 }, { "epoch": 1.89, "learning_rate": 3.812422738321411e-05, "loss": 0.8737, "step": 851 }, { "epoch": 1.89, "learning_rate": 3.7990202963299366e-05, "loss": 0.8744, "step": 852 }, { "epoch": 1.9, "learning_rate": 3.785630531916181e-05, "loss": 0.8445, "step": 853 }, { "epoch": 1.9, "learning_rate": 3.772253522204784e-05, "loss": 0.8953, "step": 854 }, { "epoch": 1.9, "learning_rate": 3.7588893442469306e-05, "loss": 0.8855, "step": 855 }, { "epoch": 1.9, "learning_rate": 3.7455380750198846e-05, "loss": 0.865, "step": 856 }, { "epoch": 1.9, "learning_rate": 3.7321997914265616e-05, "loss": 0.8641, "step": 857 }, { "epoch": 1.91, "learning_rate": 3.718874570295078e-05, "loss": 0.8741, "step": 858 }, { "epoch": 1.91, "learning_rate": 3.705562488378312e-05, "loss": 0.855, "step": 859 }, { "epoch": 1.91, "learning_rate": 3.69226362235346e-05, "loss": 0.8534, "step": 860 }, { "epoch": 1.91, "learning_rate": 3.678978048821595e-05, "loss": 0.8412, "step": 861 }, { "epoch": 1.92, "learning_rate": 3.665705844307227e-05, "loss": 0.8799, "step": 862 }, { "epoch": 1.92, "learning_rate": 3.65244708525786e-05, "loss": 0.88, "step": 863 }, { "epoch": 1.92, "learning_rate": 3.6392018480435505e-05, "loss": 0.8754, "step": 864 }, { "epoch": 1.92, "learning_rate": 3.6259702089564735e-05, "loss": 0.9, "step": 865 }, { "epoch": 1.92, "learning_rate": 3.612752244210476e-05, "loss": 0.8999, "step": 866 }, { "epoch": 1.93, "learning_rate": 3.599548029940642e-05, "loss": 0.8757, "step": 867 }, { "epoch": 1.93, "learning_rate": 3.5863576422028536e-05, "loss": 0.9028, "step": 868 }, { "epoch": 1.93, "learning_rate": 3.573181156973351e-05, "loss": 0.8594, "step": 869 }, { "epoch": 1.93, "learning_rate": 3.560018650148302e-05, "loss": 0.8576, "step": 870 }, { "epoch": 1.94, "learning_rate": 3.5468701975433504e-05, "loss": 0.8628, "step": 871 }, { "epoch": 1.94, "learning_rate": 3.5337358748931946e-05, "loss": 0.8762, "step": 872 }, { "epoch": 1.94, "learning_rate": 3.520615757851144e-05, "loss": 0.8671, "step": 873 }, { "epoch": 1.94, "learning_rate": 3.507509921988682e-05, "loss": 0.8806, "step": 874 }, { "epoch": 1.94, "learning_rate": 3.4944184427950325e-05, "loss": 0.8895, "step": 875 }, { "epoch": 1.95, "learning_rate": 3.4813413956767295e-05, "loss": 0.8812, "step": 876 }, { "epoch": 1.95, "learning_rate": 3.468278855957174e-05, "loss": 0.8756, "step": 877 }, { "epoch": 1.95, "learning_rate": 3.455230898876205e-05, "loss": 0.8863, "step": 878 }, { "epoch": 1.95, "learning_rate": 3.4421975995896726e-05, "loss": 0.8704, "step": 879 }, { "epoch": 1.96, "learning_rate": 3.429179033168992e-05, "loss": 0.8714, "step": 880 }, { "epoch": 1.96, "learning_rate": 3.416175274600717e-05, "loss": 0.8869, "step": 881 }, { "epoch": 1.96, "learning_rate": 3.403186398786112e-05, "loss": 0.8608, "step": 882 }, { "epoch": 1.96, "learning_rate": 3.3902124805407154e-05, "loss": 0.8984, "step": 883 }, { "epoch": 1.96, "learning_rate": 3.377253594593912e-05, "loss": 0.8477, "step": 884 }, { "epoch": 1.97, "learning_rate": 3.364309815588499e-05, "loss": 0.8735, "step": 885 }, { "epoch": 1.97, "learning_rate": 3.351381218080258e-05, "loss": 0.8592, "step": 886 }, { "epoch": 1.97, "learning_rate": 3.3384678765375257e-05, "loss": 0.837, "step": 887 }, { "epoch": 1.97, "learning_rate": 3.325569865340771e-05, "loss": 0.8653, "step": 888 }, { "epoch": 1.98, "learning_rate": 3.312687258782151e-05, "loss": 0.8385, "step": 889 }, { "epoch": 1.98, "learning_rate": 3.2998201310650995e-05, "loss": 0.8794, "step": 890 }, { "epoch": 1.98, "learning_rate": 3.286968556303887e-05, "loss": 0.8761, "step": 891 }, { "epoch": 1.98, "learning_rate": 3.274132608523204e-05, "loss": 0.882, "step": 892 }, { "epoch": 1.98, "learning_rate": 3.261312361657727e-05, "loss": 0.8849, "step": 893 }, { "epoch": 1.99, "learning_rate": 3.2485078895517e-05, "loss": 0.8571, "step": 894 }, { "epoch": 1.99, "learning_rate": 3.235719265958498e-05, "loss": 0.8554, "step": 895 }, { "epoch": 1.99, "learning_rate": 3.222946564540217e-05, "loss": 0.8459, "step": 896 }, { "epoch": 1.99, "learning_rate": 3.210189858867238e-05, "loss": 0.8627, "step": 897 }, { "epoch": 2.0, "learning_rate": 3.197449222417804e-05, "loss": 0.8543, "step": 898 }, { "epoch": 2.0, "learning_rate": 3.184724728577603e-05, "loss": 0.8538, "step": 899 }, { "epoch": 2.0, "learning_rate": 3.1720164506393426e-05, "loss": 0.8591, "step": 900 }, { "epoch": 2.0, "learning_rate": 3.1593244618023246e-05, "loss": 0.4844, "step": 901 }, { "epoch": 2.0, "learning_rate": 3.14664883517203e-05, "loss": 0.4434, "step": 902 }, { "epoch": 2.01, "learning_rate": 3.13398964375969e-05, "loss": 0.4157, "step": 903 }, { "epoch": 2.01, "learning_rate": 3.121346960481869e-05, "loss": 0.392, "step": 904 }, { "epoch": 2.01, "learning_rate": 3.108720858160052e-05, "loss": 0.4112, "step": 905 }, { "epoch": 2.01, "learning_rate": 3.0961114095202115e-05, "loss": 0.4132, "step": 906 }, { "epoch": 2.02, "learning_rate": 3.083518687192397e-05, "loss": 0.4043, "step": 907 }, { "epoch": 2.02, "learning_rate": 3.070942763710314e-05, "loss": 0.3904, "step": 908 }, { "epoch": 2.02, "learning_rate": 3.0583837115109085e-05, "loss": 0.3975, "step": 909 }, { "epoch": 2.02, "learning_rate": 3.045841602933947e-05, "loss": 0.3928, "step": 910 }, { "epoch": 2.02, "learning_rate": 3.0333165102216057e-05, "loss": 0.3887, "step": 911 }, { "epoch": 2.03, "learning_rate": 3.020808505518045e-05, "loss": 0.3765, "step": 912 }, { "epoch": 2.03, "learning_rate": 3.0083176608689983e-05, "loss": 0.3679, "step": 913 }, { "epoch": 2.03, "learning_rate": 2.9958440482213635e-05, "loss": 0.4016, "step": 914 }, { "epoch": 2.03, "learning_rate": 2.9833877394227778e-05, "loss": 0.3845, "step": 915 }, { "epoch": 2.04, "learning_rate": 2.9709488062212084e-05, "loss": 0.3582, "step": 916 }, { "epoch": 2.04, "learning_rate": 2.9585273202645425e-05, "loss": 0.3709, "step": 917 }, { "epoch": 2.04, "learning_rate": 2.9461233531001697e-05, "loss": 0.3715, "step": 918 }, { "epoch": 2.04, "learning_rate": 2.9337369761745715e-05, "loss": 0.3608, "step": 919 }, { "epoch": 2.04, "learning_rate": 2.9213682608329127e-05, "loss": 0.3733, "step": 920 }, { "epoch": 2.05, "learning_rate": 2.9090172783186234e-05, "loss": 0.3788, "step": 921 }, { "epoch": 2.05, "learning_rate": 2.8966840997729996e-05, "loss": 0.3656, "step": 922 }, { "epoch": 2.05, "learning_rate": 2.884368796234783e-05, "loss": 0.3637, "step": 923 }, { "epoch": 2.05, "learning_rate": 2.8720714386397548e-05, "loss": 0.3615, "step": 924 }, { "epoch": 2.06, "learning_rate": 2.8597920978203288e-05, "loss": 0.3734, "step": 925 }, { "epoch": 2.06, "learning_rate": 2.8475308445051456e-05, "loss": 0.3603, "step": 926 }, { "epoch": 2.06, "learning_rate": 2.835287749318658e-05, "loss": 0.3749, "step": 927 }, { "epoch": 2.06, "learning_rate": 2.8230628827807295e-05, "loss": 0.3683, "step": 928 }, { "epoch": 2.06, "learning_rate": 2.8108563153062326e-05, "loss": 0.3673, "step": 929 }, { "epoch": 2.07, "learning_rate": 2.7986681172046306e-05, "loss": 0.3817, "step": 930 }, { "epoch": 2.07, "learning_rate": 2.7864983586795808e-05, "loss": 0.3785, "step": 931 }, { "epoch": 2.07, "learning_rate": 2.774347109828535e-05, "loss": 0.3589, "step": 932 }, { "epoch": 2.07, "learning_rate": 2.7622144406423235e-05, "loss": 0.3586, "step": 933 }, { "epoch": 2.08, "learning_rate": 2.7501004210047614e-05, "loss": 0.3766, "step": 934 }, { "epoch": 2.08, "learning_rate": 2.73800512069224e-05, "loss": 0.359, "step": 935 }, { "epoch": 2.08, "learning_rate": 2.7259286093733313e-05, "loss": 0.3778, "step": 936 }, { "epoch": 2.08, "learning_rate": 2.7138709566083814e-05, "loss": 0.3664, "step": 937 }, { "epoch": 2.08, "learning_rate": 2.701832231849112e-05, "loss": 0.3484, "step": 938 }, { "epoch": 2.09, "learning_rate": 2.6898125044382186e-05, "loss": 0.3625, "step": 939 }, { "epoch": 2.09, "learning_rate": 2.6778118436089773e-05, "loss": 0.3585, "step": 940 }, { "epoch": 2.09, "learning_rate": 2.6658303184848363e-05, "loss": 0.3774, "step": 941 }, { "epoch": 2.09, "learning_rate": 2.6538679980790244e-05, "loss": 0.3574, "step": 942 }, { "epoch": 2.1, "learning_rate": 2.6419249512941523e-05, "loss": 0.342, "step": 943 }, { "epoch": 2.1, "learning_rate": 2.630001246921814e-05, "loss": 0.3684, "step": 944 }, { "epoch": 2.1, "learning_rate": 2.6180969536421928e-05, "loss": 0.373, "step": 945 }, { "epoch": 2.1, "learning_rate": 2.6062121400236623e-05, "loss": 0.3772, "step": 946 }, { "epoch": 2.1, "learning_rate": 2.5943468745224004e-05, "loss": 0.3624, "step": 947 }, { "epoch": 2.11, "learning_rate": 2.5825012254819793e-05, "loss": 0.3545, "step": 948 }, { "epoch": 2.11, "learning_rate": 2.5706752611329903e-05, "loss": 0.3572, "step": 949 }, { "epoch": 2.11, "learning_rate": 2.5588690495926343e-05, "loss": 0.3541, "step": 950 }, { "epoch": 2.11, "learning_rate": 2.5470826588643393e-05, "loss": 0.3597, "step": 951 }, { "epoch": 2.12, "learning_rate": 2.5353161568373653e-05, "loss": 0.3567, "step": 952 }, { "epoch": 2.12, "learning_rate": 2.523569611286415e-05, "loss": 0.3684, "step": 953 }, { "epoch": 2.12, "learning_rate": 2.5118430898712418e-05, "loss": 0.3586, "step": 954 }, { "epoch": 2.12, "learning_rate": 2.5001366601362606e-05, "loss": 0.3616, "step": 955 }, { "epoch": 2.12, "learning_rate": 2.488450389510157e-05, "loss": 0.3661, "step": 956 }, { "epoch": 2.13, "learning_rate": 2.4767843453055065e-05, "loss": 0.3443, "step": 957 }, { "epoch": 2.13, "learning_rate": 2.4651385947183756e-05, "loss": 0.3586, "step": 958 }, { "epoch": 2.13, "learning_rate": 2.4535132048279413e-05, "loss": 0.3522, "step": 959 }, { "epoch": 2.13, "learning_rate": 2.4419082425961047e-05, "loss": 0.3683, "step": 960 }, { "epoch": 2.14, "learning_rate": 2.4303237748671032e-05, "loss": 0.3437, "step": 961 }, { "epoch": 2.14, "learning_rate": 2.418759868367126e-05, "loss": 0.3581, "step": 962 }, { "epoch": 2.14, "learning_rate": 2.4072165897039327e-05, "loss": 0.376, "step": 963 }, { "epoch": 2.14, "learning_rate": 2.3956940053664616e-05, "loss": 0.3558, "step": 964 }, { "epoch": 2.14, "learning_rate": 2.384192181724462e-05, "loss": 0.3449, "step": 965 }, { "epoch": 2.15, "learning_rate": 2.3727111850280917e-05, "loss": 0.3712, "step": 966 }, { "epoch": 2.15, "learning_rate": 2.361251081407555e-05, "loss": 0.3631, "step": 967 }, { "epoch": 2.15, "learning_rate": 2.3498119368727052e-05, "loss": 0.3532, "step": 968 }, { "epoch": 2.15, "learning_rate": 2.3383938173126764e-05, "loss": 0.3545, "step": 969 }, { "epoch": 2.16, "learning_rate": 2.3269967884954974e-05, "loss": 0.3638, "step": 970 }, { "epoch": 2.16, "learning_rate": 2.315620916067716e-05, "loss": 0.3532, "step": 971 }, { "epoch": 2.16, "learning_rate": 2.304266265554019e-05, "loss": 0.3485, "step": 972 }, { "epoch": 2.16, "learning_rate": 2.2929329023568543e-05, "loss": 0.3604, "step": 973 }, { "epoch": 2.16, "learning_rate": 2.281620891756061e-05, "loss": 0.3547, "step": 974 }, { "epoch": 2.17, "learning_rate": 2.2703302989084833e-05, "loss": 0.3621, "step": 975 }, { "epoch": 2.17, "learning_rate": 2.2590611888475993e-05, "loss": 0.3608, "step": 976 }, { "epoch": 2.17, "learning_rate": 2.2478136264831488e-05, "loss": 0.3623, "step": 977 }, { "epoch": 2.17, "learning_rate": 2.236587676600757e-05, "loss": 0.3622, "step": 978 }, { "epoch": 2.18, "learning_rate": 2.225383403861562e-05, "loss": 0.3664, "step": 979 }, { "epoch": 2.18, "learning_rate": 2.214200872801841e-05, "loss": 0.3603, "step": 980 }, { "epoch": 2.18, "learning_rate": 2.2030401478326398e-05, "loss": 0.3437, "step": 981 }, { "epoch": 2.18, "learning_rate": 2.191901293239406e-05, "loss": 0.3715, "step": 982 }, { "epoch": 2.18, "learning_rate": 2.180784373181609e-05, "loss": 0.3613, "step": 983 }, { "epoch": 2.19, "learning_rate": 2.1696894516923776e-05, "loss": 0.3566, "step": 984 }, { "epoch": 2.19, "learning_rate": 2.1586165926781324e-05, "loss": 0.3476, "step": 985 }, { "epoch": 2.19, "learning_rate": 2.147565859918211e-05, "loss": 0.3506, "step": 986 }, { "epoch": 2.19, "learning_rate": 2.1365373170645063e-05, "loss": 0.3543, "step": 987 }, { "epoch": 2.2, "learning_rate": 2.1255310276410968e-05, "loss": 0.3517, "step": 988 }, { "epoch": 2.2, "learning_rate": 2.114547055043883e-05, "loss": 0.3567, "step": 989 }, { "epoch": 2.2, "learning_rate": 2.103585462540221e-05, "loss": 0.3647, "step": 990 }, { "epoch": 2.2, "learning_rate": 2.0926463132685555e-05, "loss": 0.364, "step": 991 }, { "epoch": 2.2, "learning_rate": 2.0817296702380662e-05, "loss": 0.3489, "step": 992 }, { "epoch": 2.21, "learning_rate": 2.07083559632829e-05, "loss": 0.3516, "step": 993 }, { "epoch": 2.21, "learning_rate": 2.059964154288769e-05, "loss": 0.3576, "step": 994 }, { "epoch": 2.21, "learning_rate": 2.049115406738687e-05, "loss": 0.3573, "step": 995 }, { "epoch": 2.21, "learning_rate": 2.0382894161665065e-05, "loss": 0.3637, "step": 996 }, { "epoch": 2.22, "learning_rate": 2.0274862449296132e-05, "loss": 0.348, "step": 997 }, { "epoch": 2.22, "learning_rate": 2.016705955253951e-05, "loss": 0.361, "step": 998 }, { "epoch": 2.22, "learning_rate": 2.005948609233668e-05, "loss": 0.3559, "step": 999 }, { "epoch": 2.22, "learning_rate": 1.99521426883076e-05, "loss": 0.3608, "step": 1000 } ], "max_steps": 1350, "num_train_epochs": 3, "total_flos": 9.936494717011231e+17, "trial_name": null, "trial_params": null }