{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 369, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 6.666666666666667e-07, "loss": 0.6599, "step": 1 }, { "epoch": 0.02, "learning_rate": 1.3333333333333334e-06, "loss": 0.6877, "step": 2 }, { "epoch": 0.02, "learning_rate": 2.0000000000000003e-06, "loss": 0.6107, "step": 3 }, { "epoch": 0.03, "learning_rate": 2.666666666666667e-06, "loss": 0.599, "step": 4 }, { "epoch": 0.04, "learning_rate": 3.3333333333333333e-06, "loss": 0.6458, "step": 5 }, { "epoch": 0.05, "learning_rate": 4.000000000000001e-06, "loss": 0.5955, "step": 6 }, { "epoch": 0.06, "learning_rate": 4.666666666666667e-06, "loss": 0.6571, "step": 7 }, { "epoch": 0.07, "learning_rate": 5.333333333333334e-06, "loss": 0.5542, "step": 8 }, { "epoch": 0.07, "learning_rate": 6e-06, "loss": 0.6561, "step": 9 }, { "epoch": 0.08, "learning_rate": 6.666666666666667e-06, "loss": 0.5446, "step": 10 }, { "epoch": 0.09, "learning_rate": 7.333333333333333e-06, "loss": 0.5365, "step": 11 }, { "epoch": 0.1, "learning_rate": 8.000000000000001e-06, "loss": 0.557, "step": 12 }, { "epoch": 0.11, "learning_rate": 8.666666666666668e-06, "loss": 0.5364, "step": 13 }, { "epoch": 0.11, "learning_rate": 9.333333333333334e-06, "loss": 0.5132, "step": 14 }, { "epoch": 0.12, "learning_rate": 1e-05, "loss": 0.5247, "step": 15 }, { "epoch": 0.13, "learning_rate": 9.99980310695311e-06, "loss": 0.5467, "step": 16 }, { "epoch": 0.14, "learning_rate": 9.999212443319191e-06, "loss": 0.5564, "step": 17 }, { "epoch": 0.15, "learning_rate": 9.998228055617264e-06, "loss": 0.5345, "step": 18 }, { "epoch": 0.15, "learning_rate": 9.996850021374969e-06, "loss": 0.5275, "step": 19 }, { "epoch": 0.16, "learning_rate": 9.99507844912245e-06, "loss": 0.5486, "step": 20 }, { "epoch": 0.17, "learning_rate": 9.99291347838381e-06, "loss": 0.5356, "step": 21 }, { "epoch": 0.18, "learning_rate": 9.990355279666124e-06, "loss": 0.465, "step": 22 }, { "epoch": 0.19, "learning_rate": 9.987404054446009e-06, "loss": 0.461, "step": 23 }, { "epoch": 0.2, "learning_rate": 9.984060035153752e-06, "loss": 0.5196, "step": 24 }, { "epoch": 0.2, "learning_rate": 9.980323485155013e-06, "loss": 0.4973, "step": 25 }, { "epoch": 0.21, "learning_rate": 9.97619469873008e-06, "loss": 0.4969, "step": 26 }, { "epoch": 0.22, "learning_rate": 9.971674001050687e-06, "loss": 0.55, "step": 27 }, { "epoch": 0.23, "learning_rate": 9.96676174815441e-06, "loss": 0.4769, "step": 28 }, { "epoch": 0.24, "learning_rate": 9.961458326916624e-06, "loss": 0.524, "step": 29 }, { "epoch": 0.24, "learning_rate": 9.955764155020037e-06, "loss": 0.4532, "step": 30 }, { "epoch": 0.25, "learning_rate": 9.94967968092179e-06, "loss": 0.4578, "step": 31 }, { "epoch": 0.26, "learning_rate": 9.943205383818142e-06, "loss": 0.4492, "step": 32 }, { "epoch": 0.27, "learning_rate": 9.936341773606723e-06, "loss": 0.4494, "step": 33 }, { "epoch": 0.28, "learning_rate": 9.929089390846389e-06, "loss": 0.4929, "step": 34 }, { "epoch": 0.28, "learning_rate": 9.92144880671463e-06, "loss": 0.4793, "step": 35 }, { "epoch": 0.29, "learning_rate": 9.913420622962606e-06, "loss": 0.5119, "step": 36 }, { "epoch": 0.3, "learning_rate": 9.90500547186774e-06, "loss": 0.4647, "step": 37 }, { "epoch": 0.31, "learning_rate": 9.896204016183924e-06, "loss": 0.4212, "step": 38 }, { "epoch": 0.32, "learning_rate": 9.887016949089334e-06, "loss": 0.6057, "step": 39 }, { "epoch": 0.33, "learning_rate": 9.87744499413182e-06, "loss": 0.4191, "step": 40 }, { "epoch": 0.33, "learning_rate": 9.867488905171934e-06, "loss": 0.4644, "step": 41 }, { "epoch": 0.34, "learning_rate": 9.85714946632355e-06, "loss": 0.5441, "step": 42 }, { "epoch": 0.35, "learning_rate": 9.846427491892117e-06, "loss": 0.4522, "step": 43 }, { "epoch": 0.36, "learning_rate": 9.835323826310522e-06, "loss": 0.4858, "step": 44 }, { "epoch": 0.37, "learning_rate": 9.823839344072582e-06, "loss": 0.5271, "step": 45 }, { "epoch": 0.37, "learning_rate": 9.811974949664176e-06, "loss": 0.5581, "step": 46 }, { "epoch": 0.38, "learning_rate": 9.79973157749201e-06, "loss": 0.4977, "step": 47 }, { "epoch": 0.39, "learning_rate": 9.787110191810027e-06, "loss": 0.5291, "step": 48 }, { "epoch": 0.4, "learning_rate": 9.77411178664346e-06, "loss": 0.5594, "step": 49 }, { "epoch": 0.41, "learning_rate": 9.760737385710546e-06, "loss": 0.4527, "step": 50 }, { "epoch": 0.41, "learning_rate": 9.746988042341907e-06, "loss": 0.4942, "step": 51 }, { "epoch": 0.42, "learning_rate": 9.732864839397585e-06, "loss": 0.424, "step": 52 }, { "epoch": 0.43, "learning_rate": 9.718368889181763e-06, "loss": 0.4824, "step": 53 }, { "epoch": 0.44, "learning_rate": 9.703501333355167e-06, "loss": 0.5366, "step": 54 }, { "epoch": 0.45, "learning_rate": 9.68826334284514e-06, "loss": 0.5005, "step": 55 }, { "epoch": 0.46, "learning_rate": 9.672656117753435e-06, "loss": 0.4514, "step": 56 }, { "epoch": 0.46, "learning_rate": 9.656680887261693e-06, "loss": 0.4147, "step": 57 }, { "epoch": 0.47, "learning_rate": 9.640338909534636e-06, "loss": 0.4939, "step": 58 }, { "epoch": 0.48, "learning_rate": 9.62363147162098e-06, "loss": 0.4276, "step": 59 }, { "epoch": 0.49, "learning_rate": 9.606559889352065e-06, "loss": 0.4393, "step": 60 }, { "epoch": 0.5, "learning_rate": 9.589125507238234e-06, "loss": 0.4778, "step": 61 }, { "epoch": 0.5, "learning_rate": 9.571329698362931e-06, "loss": 0.4908, "step": 62 }, { "epoch": 0.51, "learning_rate": 9.553173864274567e-06, "loss": 0.478, "step": 63 }, { "epoch": 0.52, "learning_rate": 9.53465943487614e-06, "loss": 0.5457, "step": 64 }, { "epoch": 0.53, "learning_rate": 9.51578786831262e-06, "loss": 0.5119, "step": 65 }, { "epoch": 0.54, "learning_rate": 9.496560650856097e-06, "loss": 0.4648, "step": 66 }, { "epoch": 0.54, "learning_rate": 9.476979296788746e-06, "loss": 0.5101, "step": 67 }, { "epoch": 0.55, "learning_rate": 9.457045348283552e-06, "loss": 0.5433, "step": 68 }, { "epoch": 0.56, "learning_rate": 9.436760375282858e-06, "loss": 0.4319, "step": 69 }, { "epoch": 0.57, "learning_rate": 9.416125975374722e-06, "loss": 0.4584, "step": 70 }, { "epoch": 0.58, "learning_rate": 9.395143773667089e-06, "loss": 0.4193, "step": 71 }, { "epoch": 0.59, "learning_rate": 9.373815422659806e-06, "loss": 0.4883, "step": 72 }, { "epoch": 0.59, "learning_rate": 9.352142602114487e-06, "loss": 0.3313, "step": 73 }, { "epoch": 0.6, "learning_rate": 9.330127018922195e-06, "loss": 0.4623, "step": 74 }, { "epoch": 0.61, "learning_rate": 9.307770406969032e-06, "loss": 0.4359, "step": 75 }, { "epoch": 0.62, "learning_rate": 9.285074526999577e-06, "loss": 0.4673, "step": 76 }, { "epoch": 0.63, "learning_rate": 9.262041166478215e-06, "loss": 0.4999, "step": 77 }, { "epoch": 0.63, "learning_rate": 9.238672139448354e-06, "loss": 0.5058, "step": 78 }, { "epoch": 0.64, "learning_rate": 9.214969286389577e-06, "loss": 0.4482, "step": 79 }, { "epoch": 0.65, "learning_rate": 9.190934474072658e-06, "loss": 0.4803, "step": 80 }, { "epoch": 0.66, "learning_rate": 9.166569595412576e-06, "loss": 0.5325, "step": 81 }, { "epoch": 0.67, "learning_rate": 9.141876569319405e-06, "loss": 0.4484, "step": 82 }, { "epoch": 0.67, "learning_rate": 9.116857340547203e-06, "loss": 0.4946, "step": 83 }, { "epoch": 0.68, "learning_rate": 9.091513879540845e-06, "loss": 0.4395, "step": 84 }, { "epoch": 0.69, "learning_rate": 9.065848182280835e-06, "loss": 0.4457, "step": 85 }, { "epoch": 0.7, "learning_rate": 9.039862270126102e-06, "loss": 0.5122, "step": 86 }, { "epoch": 0.71, "learning_rate": 9.013558189654819e-06, "loss": 0.375, "step": 87 }, { "epoch": 0.72, "learning_rate": 8.986938012503203e-06, "loss": 0.4703, "step": 88 }, { "epoch": 0.72, "learning_rate": 8.960003835202369e-06, "loss": 0.4903, "step": 89 }, { "epoch": 0.73, "learning_rate": 8.932757779013214e-06, "loss": 0.4175, "step": 90 }, { "epoch": 0.74, "learning_rate": 8.90520198975934e-06, "loss": 0.3969, "step": 91 }, { "epoch": 0.75, "learning_rate": 8.877338637658074e-06, "loss": 0.3992, "step": 92 }, { "epoch": 0.76, "learning_rate": 8.849169917149532e-06, "loss": 0.4853, "step": 93 }, { "epoch": 0.76, "learning_rate": 8.820698046723796e-06, "loss": 0.4282, "step": 94 }, { "epoch": 0.77, "learning_rate": 8.791925268746193e-06, "loss": 0.3794, "step": 95 }, { "epoch": 0.78, "learning_rate": 8.762853849280692e-06, "loss": 0.4813, "step": 96 }, { "epoch": 0.79, "learning_rate": 8.73348607791144e-06, "loss": 0.437, "step": 97 }, { "epoch": 0.8, "learning_rate": 8.703824267562424e-06, "loss": 0.3978, "step": 98 }, { "epoch": 0.8, "learning_rate": 8.673870754315336e-06, "loss": 0.4679, "step": 99 }, { "epoch": 0.81, "learning_rate": 8.64362789722557e-06, "loss": 0.5102, "step": 100 }, { "epoch": 0.82, "learning_rate": 8.613098078136436e-06, "loss": 0.4321, "step": 101 }, { "epoch": 0.83, "learning_rate": 8.582283701491576e-06, "loss": 0.4686, "step": 102 }, { "epoch": 0.84, "learning_rate": 8.551187194145591e-06, "loss": 0.386, "step": 103 }, { "epoch": 0.85, "learning_rate": 8.519811005172916e-06, "loss": 0.4447, "step": 104 }, { "epoch": 0.85, "learning_rate": 8.488157605674924e-06, "loss": 0.3968, "step": 105 }, { "epoch": 0.86, "learning_rate": 8.456229488585328e-06, "loss": 0.4679, "step": 106 }, { "epoch": 0.87, "learning_rate": 8.424029168473829e-06, "loss": 0.3828, "step": 107 }, { "epoch": 0.88, "learning_rate": 8.391559181348081e-06, "loss": 0.4348, "step": 108 }, { "epoch": 0.89, "learning_rate": 8.358822084453964e-06, "loss": 0.3562, "step": 109 }, { "epoch": 0.89, "learning_rate": 8.325820456074181e-06, "loss": 0.4638, "step": 110 }, { "epoch": 0.9, "learning_rate": 8.292556895325195e-06, "loss": 0.4689, "step": 111 }, { "epoch": 0.91, "learning_rate": 8.259034021952537e-06, "loss": 0.3464, "step": 112 }, { "epoch": 0.92, "learning_rate": 8.225254476124479e-06, "loss": 0.4532, "step": 113 }, { "epoch": 0.93, "learning_rate": 8.191220918224102e-06, "loss": 0.4164, "step": 114 }, { "epoch": 0.93, "learning_rate": 8.156936028639768e-06, "loss": 0.4506, "step": 115 }, { "epoch": 0.94, "learning_rate": 8.12240250755403e-06, "loss": 0.4063, "step": 116 }, { "epoch": 0.95, "learning_rate": 8.08762307473096e-06, "loss": 0.5001, "step": 117 }, { "epoch": 0.96, "learning_rate": 8.052600469301958e-06, "loss": 0.3469, "step": 118 }, { "epoch": 0.97, "learning_rate": 8.01733744955002e-06, "loss": 0.3872, "step": 119 }, { "epoch": 0.98, "learning_rate": 7.981836792692508e-06, "loss": 0.4526, "step": 120 }, { "epoch": 0.98, "learning_rate": 7.946101294662418e-06, "loss": 0.395, "step": 121 }, { "epoch": 0.99, "learning_rate": 7.91013376988819e-06, "loss": 0.4131, "step": 122 }, { "epoch": 1.0, "learning_rate": 7.873937051072037e-06, "loss": 0.4579, "step": 123 }, { "epoch": 1.01, "learning_rate": 7.83751398896686e-06, "loss": 0.3213, "step": 124 }, { "epoch": 1.02, "learning_rate": 7.80086745215173e-06, "loss": 0.2939, "step": 125 }, { "epoch": 1.02, "learning_rate": 7.764000326805967e-06, "loss": 0.2849, "step": 126 }, { "epoch": 1.03, "learning_rate": 7.726915516481824e-06, "loss": 0.3404, "step": 127 }, { "epoch": 1.04, "learning_rate": 7.68961594187582e-06, "loss": 0.282, "step": 128 }, { "epoch": 1.05, "learning_rate": 7.652104540598712e-06, "loss": 0.2835, "step": 129 }, { "epoch": 1.06, "learning_rate": 7.614384266944139e-06, "loss": 0.3239, "step": 130 }, { "epoch": 1.07, "learning_rate": 7.5764580916559405e-06, "loss": 0.3164, "step": 131 }, { "epoch": 1.07, "learning_rate": 7.5383290016942e-06, "loss": 0.2774, "step": 132 }, { "epoch": 1.08, "learning_rate": 7.500000000000001e-06, "loss": 0.3302, "step": 133 }, { "epoch": 1.09, "learning_rate": 7.461474105258911e-06, "loss": 0.3427, "step": 134 }, { "epoch": 1.1, "learning_rate": 7.422754351663252e-06, "loss": 0.3113, "step": 135 }, { "epoch": 1.11, "learning_rate": 7.3838437886731264e-06, "loss": 0.3235, "step": 136 }, { "epoch": 1.11, "learning_rate": 7.3447454807762565e-06, "loss": 0.3108, "step": 137 }, { "epoch": 1.12, "learning_rate": 7.30546250724663e-06, "loss": 0.3307, "step": 138 }, { "epoch": 1.13, "learning_rate": 7.265997961901987e-06, "loss": 0.2831, "step": 139 }, { "epoch": 1.14, "learning_rate": 7.226354952860157e-06, "loss": 0.2836, "step": 140 }, { "epoch": 1.15, "learning_rate": 7.186536602294278e-06, "loss": 0.3313, "step": 141 }, { "epoch": 1.15, "learning_rate": 7.146546046186893e-06, "loss": 0.2578, "step": 142 }, { "epoch": 1.16, "learning_rate": 7.106386434082979e-06, "loss": 0.3012, "step": 143 }, { "epoch": 1.17, "learning_rate": 7.066060928841891e-06, "loss": 0.2756, "step": 144 }, { "epoch": 1.18, "learning_rate": 7.025572706388268e-06, "loss": 0.3011, "step": 145 }, { "epoch": 1.19, "learning_rate": 6.984924955461901e-06, "loss": 0.2848, "step": 146 }, { "epoch": 1.2, "learning_rate": 6.944120877366605e-06, "loss": 0.3199, "step": 147 }, { "epoch": 1.2, "learning_rate": 6.9031636857180795e-06, "loss": 0.2064, "step": 148 }, { "epoch": 1.21, "learning_rate": 6.86205660619083e-06, "loss": 0.2879, "step": 149 }, { "epoch": 1.22, "learning_rate": 6.820802876264112e-06, "loss": 0.2742, "step": 150 }, { "epoch": 1.23, "learning_rate": 6.7794057449669545e-06, "loss": 0.2949, "step": 151 }, { "epoch": 1.24, "learning_rate": 6.7378684726222875e-06, "loss": 0.3383, "step": 152 }, { "epoch": 1.24, "learning_rate": 6.6961943305901515e-06, "loss": 0.3225, "step": 153 }, { "epoch": 1.25, "learning_rate": 6.65438660101007e-06, "loss": 0.2556, "step": 154 }, { "epoch": 1.26, "learning_rate": 6.612448576542545e-06, "loss": 0.2907, "step": 155 }, { "epoch": 1.27, "learning_rate": 6.570383560109745e-06, "loss": 0.2352, "step": 156 }, { "epoch": 1.28, "learning_rate": 6.52819486463537e-06, "loss": 0.2937, "step": 157 }, { "epoch": 1.28, "learning_rate": 6.48588581278374e-06, "loss": 0.2745, "step": 158 }, { "epoch": 1.29, "learning_rate": 6.443459736698106e-06, "loss": 0.2803, "step": 159 }, { "epoch": 1.3, "learning_rate": 6.400919977738222e-06, "loss": 0.3071, "step": 160 }, { "epoch": 1.31, "learning_rate": 6.3582698862171945e-06, "loss": 0.2386, "step": 161 }, { "epoch": 1.32, "learning_rate": 6.315512821137606e-06, "loss": 0.3501, "step": 162 }, { "epoch": 1.33, "learning_rate": 6.272652149926989e-06, "loss": 0.2953, "step": 163 }, { "epoch": 1.33, "learning_rate": 6.229691248172599e-06, "loss": 0.3543, "step": 164 }, { "epoch": 1.34, "learning_rate": 6.186633499355576e-06, "loss": 0.3613, "step": 165 }, { "epoch": 1.35, "learning_rate": 6.143482294584459e-06, "loss": 0.2325, "step": 166 }, { "epoch": 1.36, "learning_rate": 6.100241032328125e-06, "loss": 0.2632, "step": 167 }, { "epoch": 1.37, "learning_rate": 6.056913118148122e-06, "loss": 0.2542, "step": 168 }, { "epoch": 1.37, "learning_rate": 6.013501964430468e-06, "loss": 0.2744, "step": 169 }, { "epoch": 1.38, "learning_rate": 5.970010990116892e-06, "loss": 0.3116, "step": 170 }, { "epoch": 1.39, "learning_rate": 5.926443620435572e-06, "loss": 0.2646, "step": 171 }, { "epoch": 1.4, "learning_rate": 5.8828032866313725e-06, "loss": 0.2782, "step": 172 }, { "epoch": 1.41, "learning_rate": 5.839093425695609e-06, "loss": 0.2478, "step": 173 }, { "epoch": 1.41, "learning_rate": 5.795317480095361e-06, "loss": 0.2735, "step": 174 }, { "epoch": 1.42, "learning_rate": 5.751478897502353e-06, "loss": 0.236, "step": 175 }, { "epoch": 1.43, "learning_rate": 5.707581130521424e-06, "loss": 0.1979, "step": 176 }, { "epoch": 1.44, "learning_rate": 5.663627636418611e-06, "loss": 0.3049, "step": 177 }, { "epoch": 1.45, "learning_rate": 5.619621876848864e-06, "loss": 0.257, "step": 178 }, { "epoch": 1.46, "learning_rate": 5.575567317583415e-06, "loss": 0.2911, "step": 179 }, { "epoch": 1.46, "learning_rate": 5.531467428236827e-06, "loss": 0.2907, "step": 180 }, { "epoch": 1.47, "learning_rate": 5.4873256819937325e-06, "loss": 0.2957, "step": 181 }, { "epoch": 1.48, "learning_rate": 5.443145555335296e-06, "loss": 0.3088, "step": 182 }, { "epoch": 1.49, "learning_rate": 5.398930527765416e-06, "loss": 0.3311, "step": 183 }, { "epoch": 1.5, "learning_rate": 5.354684081536693e-06, "loss": 0.2715, "step": 184 }, { "epoch": 1.5, "learning_rate": 5.31040970137617e-06, "loss": 0.3215, "step": 185 }, { "epoch": 1.51, "learning_rate": 5.266110874210893e-06, "loss": 0.2985, "step": 186 }, { "epoch": 1.52, "learning_rate": 5.221791088893282e-06, "loss": 0.2187, "step": 187 }, { "epoch": 1.53, "learning_rate": 5.177453835926366e-06, "loss": 0.2988, "step": 188 }, { "epoch": 1.54, "learning_rate": 5.133102607188875e-06, "loss": 0.2472, "step": 189 }, { "epoch": 1.54, "learning_rate": 5.0887408956602316e-06, "loss": 0.2629, "step": 190 }, { "epoch": 1.55, "learning_rate": 5.044372195145455e-06, "loss": 0.2923, "step": 191 }, { "epoch": 1.56, "learning_rate": 5e-06, "loss": 0.26, "step": 192 }, { "epoch": 1.57, "learning_rate": 4.9556278048545445e-06, "loss": 0.319, "step": 193 }, { "epoch": 1.58, "learning_rate": 4.911259104339771e-06, "loss": 0.3336, "step": 194 }, { "epoch": 1.59, "learning_rate": 4.866897392811127e-06, "loss": 0.2899, "step": 195 }, { "epoch": 1.59, "learning_rate": 4.822546164073635e-06, "loss": 0.2963, "step": 196 }, { "epoch": 1.6, "learning_rate": 4.778208911106718e-06, "loss": 0.3229, "step": 197 }, { "epoch": 1.61, "learning_rate": 4.7338891257891085e-06, "loss": 0.2387, "step": 198 }, { "epoch": 1.62, "learning_rate": 4.689590298623831e-06, "loss": 0.2407, "step": 199 }, { "epoch": 1.63, "learning_rate": 4.645315918463308e-06, "loss": 0.3072, "step": 200 }, { "epoch": 1.63, "learning_rate": 4.601069472234584e-06, "loss": 0.3005, "step": 201 }, { "epoch": 1.64, "learning_rate": 4.556854444664706e-06, "loss": 0.2535, "step": 202 }, { "epoch": 1.65, "learning_rate": 4.512674318006268e-06, "loss": 0.2647, "step": 203 }, { "epoch": 1.66, "learning_rate": 4.468532571763174e-06, "loss": 0.311, "step": 204 }, { "epoch": 1.67, "learning_rate": 4.424432682416585e-06, "loss": 0.2937, "step": 205 }, { "epoch": 1.67, "learning_rate": 4.380378123151139e-06, "loss": 0.2979, "step": 206 }, { "epoch": 1.68, "learning_rate": 4.336372363581391e-06, "loss": 0.2954, "step": 207 }, { "epoch": 1.69, "learning_rate": 4.292418869478577e-06, "loss": 0.2672, "step": 208 }, { "epoch": 1.7, "learning_rate": 4.248521102497649e-06, "loss": 0.2789, "step": 209 }, { "epoch": 1.71, "learning_rate": 4.204682519904641e-06, "loss": 0.2538, "step": 210 }, { "epoch": 1.72, "learning_rate": 4.160906574304392e-06, "loss": 0.3101, "step": 211 }, { "epoch": 1.72, "learning_rate": 4.117196713368629e-06, "loss": 0.2745, "step": 212 }, { "epoch": 1.73, "learning_rate": 4.073556379564429e-06, "loss": 0.3036, "step": 213 }, { "epoch": 1.74, "learning_rate": 4.0299890098831096e-06, "loss": 0.2783, "step": 214 }, { "epoch": 1.75, "learning_rate": 3.986498035569533e-06, "loss": 0.3256, "step": 215 }, { "epoch": 1.76, "learning_rate": 3.9430868818518786e-06, "loss": 0.2546, "step": 216 }, { "epoch": 1.76, "learning_rate": 3.899758967671879e-06, "loss": 0.2967, "step": 217 }, { "epoch": 1.77, "learning_rate": 3.856517705415543e-06, "loss": 0.2555, "step": 218 }, { "epoch": 1.78, "learning_rate": 3.813366500644426e-06, "loss": 0.3004, "step": 219 }, { "epoch": 1.79, "learning_rate": 3.770308751827402e-06, "loss": 0.2688, "step": 220 }, { "epoch": 1.8, "learning_rate": 3.727347850073012e-06, "loss": 0.2993, "step": 221 }, { "epoch": 1.8, "learning_rate": 3.6844871788623946e-06, "loss": 0.2555, "step": 222 }, { "epoch": 1.81, "learning_rate": 3.641730113782807e-06, "loss": 0.3196, "step": 223 }, { "epoch": 1.82, "learning_rate": 3.5990800222617774e-06, "loss": 0.312, "step": 224 }, { "epoch": 1.83, "learning_rate": 3.5565402633018963e-06, "loss": 0.2413, "step": 225 }, { "epoch": 1.84, "learning_rate": 3.5141141872162613e-06, "loss": 0.2841, "step": 226 }, { "epoch": 1.85, "learning_rate": 3.4718051353646304e-06, "loss": 0.2625, "step": 227 }, { "epoch": 1.85, "learning_rate": 3.4296164398902576e-06, "loss": 0.3172, "step": 228 }, { "epoch": 1.86, "learning_rate": 3.387551423457456e-06, "loss": 0.2592, "step": 229 }, { "epoch": 1.87, "learning_rate": 3.345613398989932e-06, "loss": 0.2597, "step": 230 }, { "epoch": 1.88, "learning_rate": 3.3038056694098485e-06, "loss": 0.2864, "step": 231 }, { "epoch": 1.89, "learning_rate": 3.262131527377715e-06, "loss": 0.2718, "step": 232 }, { "epoch": 1.89, "learning_rate": 3.220594255033046e-06, "loss": 0.2534, "step": 233 }, { "epoch": 1.9, "learning_rate": 3.1791971237358893e-06, "loss": 0.3039, "step": 234 }, { "epoch": 1.91, "learning_rate": 3.1379433938091695e-06, "loss": 0.2367, "step": 235 }, { "epoch": 1.92, "learning_rate": 3.0968363142819226e-06, "loss": 0.2944, "step": 236 }, { "epoch": 1.93, "learning_rate": 3.0558791226333974e-06, "loss": 0.309, "step": 237 }, { "epoch": 1.93, "learning_rate": 3.0150750445380995e-06, "loss": 0.2449, "step": 238 }, { "epoch": 1.94, "learning_rate": 2.9744272936117323e-06, "loss": 0.2566, "step": 239 }, { "epoch": 1.95, "learning_rate": 2.9339390711581105e-06, "loss": 0.3015, "step": 240 }, { "epoch": 1.96, "learning_rate": 2.8936135659170217e-06, "loss": 0.307, "step": 241 }, { "epoch": 1.97, "learning_rate": 2.853453953813108e-06, "loss": 0.2849, "step": 242 }, { "epoch": 1.98, "learning_rate": 2.8134633977057236e-06, "loss": 0.2484, "step": 243 }, { "epoch": 1.98, "learning_rate": 2.7736450471398435e-06, "loss": 0.2986, "step": 244 }, { "epoch": 1.99, "learning_rate": 2.734002038098015e-06, "loss": 0.2615, "step": 245 }, { "epoch": 2.0, "learning_rate": 2.69453749275337e-06, "loss": 0.2882, "step": 246 }, { "epoch": 2.01, "learning_rate": 2.655254519223746e-06, "loss": 0.1893, "step": 247 }, { "epoch": 2.02, "learning_rate": 2.616156211326875e-06, "loss": 0.2194, "step": 248 }, { "epoch": 2.02, "learning_rate": 2.57724564833675e-06, "loss": 0.2098, "step": 249 }, { "epoch": 2.03, "learning_rate": 2.5385258947410908e-06, "loss": 0.1978, "step": 250 }, { "epoch": 2.04, "learning_rate": 2.5000000000000015e-06, "loss": 0.1965, "step": 251 }, { "epoch": 2.05, "learning_rate": 2.461670998305802e-06, "loss": 0.1986, "step": 252 }, { "epoch": 2.06, "learning_rate": 2.4235419083440615e-06, "loss": 0.1991, "step": 253 }, { "epoch": 2.07, "learning_rate": 2.3856157330558625e-06, "loss": 0.1666, "step": 254 }, { "epoch": 2.07, "learning_rate": 2.3478954594012884e-06, "loss": 0.2013, "step": 255 }, { "epoch": 2.08, "learning_rate": 2.310384058124181e-06, "loss": 0.1857, "step": 256 }, { "epoch": 2.09, "learning_rate": 2.273084483518176e-06, "loss": 0.1885, "step": 257 }, { "epoch": 2.1, "learning_rate": 2.2359996731940348e-06, "loss": 0.1671, "step": 258 }, { "epoch": 2.11, "learning_rate": 2.1991325478482695e-06, "loss": 0.1984, "step": 259 }, { "epoch": 2.11, "learning_rate": 2.162486011033142e-06, "loss": 0.1859, "step": 260 }, { "epoch": 2.12, "learning_rate": 2.1260629489279662e-06, "loss": 0.1765, "step": 261 }, { "epoch": 2.13, "learning_rate": 2.089866230111813e-06, "loss": 0.2104, "step": 262 }, { "epoch": 2.14, "learning_rate": 2.053898705337583e-06, "loss": 0.2241, "step": 263 }, { "epoch": 2.15, "learning_rate": 2.0181632073074925e-06, "loss": 0.1425, "step": 264 }, { "epoch": 2.15, "learning_rate": 1.9826625504499807e-06, "loss": 0.1926, "step": 265 }, { "epoch": 2.16, "learning_rate": 1.947399530698043e-06, "loss": 0.2269, "step": 266 }, { "epoch": 2.17, "learning_rate": 1.912376925269041e-06, "loss": 0.1895, "step": 267 }, { "epoch": 2.18, "learning_rate": 1.8775974924459716e-06, "loss": 0.1641, "step": 268 }, { "epoch": 2.19, "learning_rate": 1.8430639713602317e-06, "loss": 0.1564, "step": 269 }, { "epoch": 2.2, "learning_rate": 1.808779081775901e-06, "loss": 0.1866, "step": 270 }, { "epoch": 2.2, "learning_rate": 1.7747455238755223e-06, "loss": 0.1548, "step": 271 }, { "epoch": 2.21, "learning_rate": 1.7409659780474652e-06, "loss": 0.2103, "step": 272 }, { "epoch": 2.22, "learning_rate": 1.7074431046748075e-06, "loss": 0.203, "step": 273 }, { "epoch": 2.23, "learning_rate": 1.6741795439258218e-06, "loss": 0.165, "step": 274 }, { "epoch": 2.24, "learning_rate": 1.641177915546036e-06, "loss": 0.1798, "step": 275 }, { "epoch": 2.24, "learning_rate": 1.6084408186519195e-06, "loss": 0.1806, "step": 276 }, { "epoch": 2.25, "learning_rate": 1.5759708315261724e-06, "loss": 0.1536, "step": 277 }, { "epoch": 2.26, "learning_rate": 1.5437705114146735e-06, "loss": 0.1984, "step": 278 }, { "epoch": 2.27, "learning_rate": 1.511842394325077e-06, "loss": 0.1925, "step": 279 }, { "epoch": 2.28, "learning_rate": 1.4801889948270852e-06, "loss": 0.189, "step": 280 }, { "epoch": 2.28, "learning_rate": 1.44881280585441e-06, "loss": 0.1666, "step": 281 }, { "epoch": 2.29, "learning_rate": 1.4177162985084242e-06, "loss": 0.1991, "step": 282 }, { "epoch": 2.3, "learning_rate": 1.3869019218635644e-06, "loss": 0.189, "step": 283 }, { "epoch": 2.31, "learning_rate": 1.3563721027744309e-06, "loss": 0.19, "step": 284 }, { "epoch": 2.32, "learning_rate": 1.3261292456846648e-06, "loss": 0.2029, "step": 285 }, { "epoch": 2.33, "learning_rate": 1.2961757324375768e-06, "loss": 0.1991, "step": 286 }, { "epoch": 2.33, "learning_rate": 1.2665139220885615e-06, "loss": 0.1352, "step": 287 }, { "epoch": 2.34, "learning_rate": 1.2371461507193077e-06, "loss": 0.1947, "step": 288 }, { "epoch": 2.35, "learning_rate": 1.2080747312538082e-06, "loss": 0.2072, "step": 289 }, { "epoch": 2.36, "learning_rate": 1.1793019532762057e-06, "loss": 0.1431, "step": 290 }, { "epoch": 2.37, "learning_rate": 1.1508300828504682e-06, "loss": 0.2012, "step": 291 }, { "epoch": 2.37, "learning_rate": 1.122661362341927e-06, "loss": 0.2178, "step": 292 }, { "epoch": 2.38, "learning_rate": 1.0947980102406597e-06, "loss": 0.1373, "step": 293 }, { "epoch": 2.39, "learning_rate": 1.0672422209867879e-06, "loss": 0.1813, "step": 294 }, { "epoch": 2.4, "learning_rate": 1.0399961647976315e-06, "loss": 0.1857, "step": 295 }, { "epoch": 2.41, "learning_rate": 1.0130619874967983e-06, "loss": 0.193, "step": 296 }, { "epoch": 2.41, "learning_rate": 9.86441810345183e-07, "loss": 0.1867, "step": 297 }, { "epoch": 2.42, "learning_rate": 9.60137729873898e-07, "loss": 0.1749, "step": 298 }, { "epoch": 2.43, "learning_rate": 9.34151817719166e-07, "loss": 0.2128, "step": 299 }, { "epoch": 2.44, "learning_rate": 9.08486120459155e-07, "loss": 0.2344, "step": 300 }, { "epoch": 2.45, "learning_rate": 8.831426594527976e-07, "loss": 0.1891, "step": 301 }, { "epoch": 2.46, "learning_rate": 8.581234306805969e-07, "loss": 0.197, "step": 302 }, { "epoch": 2.46, "learning_rate": 8.334304045874248e-07, "loss": 0.1708, "step": 303 }, { "epoch": 2.47, "learning_rate": 8.090655259273428e-07, "loss": 0.1736, "step": 304 }, { "epoch": 2.48, "learning_rate": 7.850307136104246e-07, "loss": 0.1595, "step": 305 }, { "epoch": 2.49, "learning_rate": 7.613278605516455e-07, "loss": 0.183, "step": 306 }, { "epoch": 2.5, "learning_rate": 7.379588335217875e-07, "loss": 0.1774, "step": 307 }, { "epoch": 2.5, "learning_rate": 7.149254730004246e-07, "loss": 0.1929, "step": 308 }, { "epoch": 2.51, "learning_rate": 6.922295930309691e-07, "loss": 0.2028, "step": 309 }, { "epoch": 2.52, "learning_rate": 6.698729810778065e-07, "loss": 0.1932, "step": 310 }, { "epoch": 2.53, "learning_rate": 6.478573978855146e-07, "loss": 0.1754, "step": 311 }, { "epoch": 2.54, "learning_rate": 6.261845773401936e-07, "loss": 0.1608, "step": 312 }, { "epoch": 2.54, "learning_rate": 6.048562263329139e-07, "loss": 0.1948, "step": 313 }, { "epoch": 2.55, "learning_rate": 5.838740246252794e-07, "loss": 0.1881, "step": 314 }, { "epoch": 2.56, "learning_rate": 5.632396247171429e-07, "loss": 0.1863, "step": 315 }, { "epoch": 2.57, "learning_rate": 5.429546517164486e-07, "loss": 0.179, "step": 316 }, { "epoch": 2.58, "learning_rate": 5.230207032112549e-07, "loss": 0.1465, "step": 317 }, { "epoch": 2.59, "learning_rate": 5.034393491439044e-07, "loss": 0.2105, "step": 318 }, { "epoch": 2.59, "learning_rate": 4.842121316873821e-07, "loss": 0.1953, "step": 319 }, { "epoch": 2.6, "learning_rate": 4.653405651238607e-07, "loss": 0.1462, "step": 320 }, { "epoch": 2.61, "learning_rate": 4.468261357254339e-07, "loss": 0.1707, "step": 321 }, { "epoch": 2.62, "learning_rate": 4.286703016370719e-07, "loss": 0.1866, "step": 322 }, { "epoch": 2.63, "learning_rate": 4.108744927617669e-07, "loss": 0.1773, "step": 323 }, { "epoch": 2.63, "learning_rate": 3.934401106479352e-07, "loss": 0.161, "step": 324 }, { "epoch": 2.64, "learning_rate": 3.763685283790208e-07, "loss": 0.1965, "step": 325 }, { "epoch": 2.65, "learning_rate": 3.596610904653652e-07, "loss": 0.1642, "step": 326 }, { "epoch": 2.66, "learning_rate": 3.433191127383079e-07, "loss": 0.1445, "step": 327 }, { "epoch": 2.67, "learning_rate": 3.2734388224656575e-07, "loss": 0.1796, "step": 328 }, { "epoch": 2.67, "learning_rate": 3.1173665715486076e-07, "loss": 0.2027, "step": 329 }, { "epoch": 2.68, "learning_rate": 2.9649866664483387e-07, "loss": 0.2391, "step": 330 }, { "epoch": 2.69, "learning_rate": 2.816311108182368e-07, "loss": 0.1802, "step": 331 }, { "epoch": 2.7, "learning_rate": 2.671351606024153e-07, "loss": 0.1727, "step": 332 }, { "epoch": 2.71, "learning_rate": 2.530119576580936e-07, "loss": 0.1808, "step": 333 }, { "epoch": 2.72, "learning_rate": 2.3926261428945386e-07, "loss": 0.1679, "step": 334 }, { "epoch": 2.72, "learning_rate": 2.2588821335654044e-07, "loss": 0.168, "step": 335 }, { "epoch": 2.73, "learning_rate": 2.1288980818997272e-07, "loss": 0.1577, "step": 336 }, { "epoch": 2.74, "learning_rate": 2.0026842250799038e-07, "loss": 0.1666, "step": 337 }, { "epoch": 2.75, "learning_rate": 1.8802505033582608e-07, "loss": 0.1381, "step": 338 }, { "epoch": 2.76, "learning_rate": 1.7616065592742038e-07, "loss": 0.201, "step": 339 }, { "epoch": 2.76, "learning_rate": 1.6467617368947918e-07, "loss": 0.1973, "step": 340 }, { "epoch": 2.77, "learning_rate": 1.5357250810788316e-07, "loss": 0.2446, "step": 341 }, { "epoch": 2.78, "learning_rate": 1.4285053367645074e-07, "loss": 0.195, "step": 342 }, { "epoch": 2.79, "learning_rate": 1.3251109482806667e-07, "loss": 0.2026, "step": 343 }, { "epoch": 2.8, "learning_rate": 1.2255500586818015e-07, "loss": 0.1694, "step": 344 }, { "epoch": 2.8, "learning_rate": 1.1298305091066664e-07, "loss": 0.191, "step": 345 }, { "epoch": 2.81, "learning_rate": 1.0379598381607681e-07, "loss": 0.2006, "step": 346 }, { "epoch": 2.82, "learning_rate": 9.499452813226284e-08, "loss": 0.178, "step": 347 }, { "epoch": 2.83, "learning_rate": 8.657937703739516e-08, "loss": 0.192, "step": 348 }, { "epoch": 2.84, "learning_rate": 7.855119328537109e-08, "loss": 0.1635, "step": 349 }, { "epoch": 2.85, "learning_rate": 7.09106091536127e-08, "loss": 0.1688, "step": 350 }, { "epoch": 2.85, "learning_rate": 6.365822639327724e-08, "loss": 0.1947, "step": 351 }, { "epoch": 2.86, "learning_rate": 5.679461618185944e-08, "loss": 0.1723, "step": 352 }, { "epoch": 2.87, "learning_rate": 5.032031907821089e-08, "loss": 0.1956, "step": 353 }, { "epoch": 2.88, "learning_rate": 4.423584497996458e-08, "loss": 0.1645, "step": 354 }, { "epoch": 2.89, "learning_rate": 3.8541673083377086e-08, "loss": 0.1953, "step": 355 }, { "epoch": 2.89, "learning_rate": 3.323825184559204e-08, "loss": 0.198, "step": 356 }, { "epoch": 2.9, "learning_rate": 2.8325998949314536e-08, "loss": 0.199, "step": 357 }, { "epoch": 2.91, "learning_rate": 2.3805301269920754e-08, "loss": 0.1848, "step": 358 }, { "epoch": 2.92, "learning_rate": 1.9676514844987338e-08, "loss": 0.1837, "step": 359 }, { "epoch": 2.93, "learning_rate": 1.593996484624938e-08, "loss": 0.2076, "step": 360 }, { "epoch": 2.93, "learning_rate": 1.2595945553992572e-08, "loss": 0.1755, "step": 361 }, { "epoch": 2.94, "learning_rate": 9.6447203338762e-09, "loss": 0.16, "step": 362 }, { "epoch": 2.95, "learning_rate": 7.0865216161902785e-09, "loss": 0.1927, "step": 363 }, { "epoch": 2.96, "learning_rate": 4.921550877550752e-09, "loss": 0.1608, "step": 364 }, { "epoch": 2.97, "learning_rate": 3.1499786250321904e-09, "loss": 0.148, "step": 365 }, { "epoch": 2.98, "learning_rate": 1.7719443827368677e-09, "loss": 0.2179, "step": 366 }, { "epoch": 2.98, "learning_rate": 7.875566808107638e-10, "loss": 0.21, "step": 367 }, { "epoch": 2.99, "learning_rate": 1.9689304688985667e-10, "loss": 0.1669, "step": 368 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 0.1506, "step": 369 }, { "epoch": 3.0, "step": 369, "total_flos": 4.2750491234402304e+17, "train_loss": 0.31762939703658344, "train_runtime": 4556.7844, "train_samples_per_second": 6.796, "train_steps_per_second": 0.081 } ], "max_steps": 369, "num_train_epochs": 3, "total_flos": 4.2750491234402304e+17, "trial_name": null, "trial_params": null }