{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3773940937824323, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.000000000000001e-06, "loss": 1.3771, "step": 1 }, { "epoch": 0.0, "learning_rate": 8.000000000000001e-06, "loss": 1.6683, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.2e-05, "loss": 1.6333, "step": 3 }, { "epoch": 0.0, "learning_rate": 1.6000000000000003e-05, "loss": 1.4646, "step": 4 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 1.4043, "step": 5 }, { "epoch": 0.0, "learning_rate": 2.4e-05, "loss": 1.6339, "step": 6 }, { "epoch": 0.0, "learning_rate": 2.8000000000000003e-05, "loss": 1.8558, "step": 7 }, { "epoch": 0.0, "learning_rate": 3.2000000000000005e-05, "loss": 1.4294, "step": 8 }, { "epoch": 0.0, "learning_rate": 3.6e-05, "loss": 1.5738, "step": 9 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 1.5463, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.4000000000000006e-05, "loss": 1.6751, "step": 11 }, { "epoch": 0.0, "learning_rate": 4.8e-05, "loss": 1.3243, "step": 12 }, { "epoch": 0.0, "learning_rate": 5.2000000000000004e-05, "loss": 1.6727, "step": 13 }, { "epoch": 0.01, "learning_rate": 5.6000000000000006e-05, "loss": 1.4421, "step": 14 }, { "epoch": 0.01, "learning_rate": 6e-05, "loss": 1.5361, "step": 15 }, { "epoch": 0.01, "learning_rate": 6.400000000000001e-05, "loss": 1.5129, "step": 16 }, { "epoch": 0.01, "learning_rate": 6.800000000000001e-05, "loss": 1.3696, "step": 17 }, { "epoch": 0.01, "learning_rate": 7.2e-05, "loss": 1.4057, "step": 18 }, { "epoch": 0.01, "learning_rate": 7.6e-05, "loss": 1.473, "step": 19 }, { "epoch": 0.01, "learning_rate": 8e-05, "loss": 1.5723, "step": 20 }, { "epoch": 0.01, "learning_rate": 8.4e-05, "loss": 1.5101, "step": 21 }, { "epoch": 0.01, "learning_rate": 8.800000000000001e-05, "loss": 1.6298, "step": 22 }, { "epoch": 0.01, "learning_rate": 9.200000000000001e-05, "loss": 1.4543, "step": 23 }, { "epoch": 0.01, "learning_rate": 9.6e-05, "loss": 1.4296, "step": 24 }, { "epoch": 0.01, "learning_rate": 0.0001, "loss": 1.529, "step": 25 }, { "epoch": 0.01, "learning_rate": 0.00010400000000000001, "loss": 1.6653, "step": 26 }, { "epoch": 0.01, "learning_rate": 0.00010800000000000001, "loss": 1.6178, "step": 27 }, { "epoch": 0.01, "learning_rate": 0.00011200000000000001, "loss": 1.4393, "step": 28 }, { "epoch": 0.01, "learning_rate": 0.000116, "loss": 1.4561, "step": 29 }, { "epoch": 0.01, "learning_rate": 0.00012, "loss": 1.5368, "step": 30 }, { "epoch": 0.01, "learning_rate": 0.000124, "loss": 1.5757, "step": 31 }, { "epoch": 0.01, "learning_rate": 0.00012800000000000002, "loss": 1.4725, "step": 32 }, { "epoch": 0.01, "learning_rate": 0.000132, "loss": 1.6976, "step": 33 }, { "epoch": 0.01, "learning_rate": 0.00013600000000000003, "loss": 1.4537, "step": 34 }, { "epoch": 0.01, "learning_rate": 0.00014, "loss": 1.3886, "step": 35 }, { "epoch": 0.01, "learning_rate": 0.000144, "loss": 1.4515, "step": 36 }, { "epoch": 0.01, "learning_rate": 0.000148, "loss": 1.3569, "step": 37 }, { "epoch": 0.01, "learning_rate": 0.000152, "loss": 1.3558, "step": 38 }, { "epoch": 0.01, "learning_rate": 0.00015600000000000002, "loss": 1.5902, "step": 39 }, { "epoch": 0.02, "learning_rate": 0.00016, "loss": 1.3728, "step": 40 }, { "epoch": 0.02, "learning_rate": 0.000164, "loss": 1.4576, "step": 41 }, { "epoch": 0.02, "learning_rate": 0.000168, "loss": 1.3009, "step": 42 }, { "epoch": 0.02, "learning_rate": 0.000172, "loss": 1.4652, "step": 43 }, { "epoch": 0.02, "learning_rate": 0.00017600000000000002, "loss": 1.4355, "step": 44 }, { "epoch": 0.02, "learning_rate": 0.00018, "loss": 1.6715, "step": 45 }, { "epoch": 0.02, "learning_rate": 0.00018400000000000003, "loss": 1.3025, "step": 46 }, { "epoch": 0.02, "learning_rate": 0.000188, "loss": 1.3885, "step": 47 }, { "epoch": 0.02, "learning_rate": 0.000192, "loss": 1.5249, "step": 48 }, { "epoch": 0.02, "learning_rate": 0.000196, "loss": 1.3855, "step": 49 }, { "epoch": 0.02, "learning_rate": 0.0002, "loss": 1.7058, "step": 50 }, { "epoch": 0.02, "learning_rate": 0.00019999945320801072, "loss": 1.4766, "step": 51 }, { "epoch": 0.02, "learning_rate": 0.0001999978128380225, "loss": 1.4239, "step": 52 }, { "epoch": 0.02, "learning_rate": 0.00019999507890797408, "loss": 1.527, "step": 53 }, { "epoch": 0.02, "learning_rate": 0.0001999912514477634, "loss": 1.4989, "step": 54 }, { "epoch": 0.02, "learning_rate": 0.0001999863304992469, "loss": 1.4985, "step": 55 }, { "epoch": 0.02, "learning_rate": 0.0001999803161162393, "loss": 1.4964, "step": 56 }, { "epoch": 0.02, "learning_rate": 0.0001999732083645129, "loss": 1.3822, "step": 57 }, { "epoch": 0.02, "learning_rate": 0.00019996500732179695, "loss": 1.4295, "step": 58 }, { "epoch": 0.02, "learning_rate": 0.0001999557130777767, "loss": 1.7715, "step": 59 }, { "epoch": 0.02, "learning_rate": 0.00019994532573409262, "loss": 1.3596, "step": 60 }, { "epoch": 0.02, "learning_rate": 0.00019993384540433894, "loss": 1.5048, "step": 61 }, { "epoch": 0.02, "learning_rate": 0.00019992127221406275, "loss": 1.2653, "step": 62 }, { "epoch": 0.02, "learning_rate": 0.00019990760630076237, "loss": 1.3854, "step": 63 }, { "epoch": 0.02, "learning_rate": 0.00019989284781388617, "loss": 1.4032, "step": 64 }, { "epoch": 0.02, "learning_rate": 0.00019987699691483048, "loss": 1.4055, "step": 65 }, { "epoch": 0.02, "learning_rate": 0.00019986005377693825, "loss": 1.4952, "step": 66 }, { "epoch": 0.03, "learning_rate": 0.00019984201858549693, "loss": 1.5547, "step": 67 }, { "epoch": 0.03, "learning_rate": 0.00019982289153773646, "loss": 1.5564, "step": 68 }, { "epoch": 0.03, "learning_rate": 0.00019980267284282717, "loss": 1.5104, "step": 69 }, { "epoch": 0.03, "learning_rate": 0.00019978136272187747, "loss": 1.4518, "step": 70 }, { "epoch": 0.03, "learning_rate": 0.00019975896140793142, "loss": 1.5248, "step": 71 }, { "epoch": 0.03, "learning_rate": 0.00019973546914596623, "loss": 1.3548, "step": 72 }, { "epoch": 0.03, "learning_rate": 0.0001997108861928895, "loss": 1.4315, "step": 73 }, { "epoch": 0.03, "learning_rate": 0.00019968521281753642, "loss": 1.8023, "step": 74 }, { "epoch": 0.03, "learning_rate": 0.000199658449300667, "loss": 1.8757, "step": 75 }, { "epoch": 0.03, "learning_rate": 0.00019963059593496268, "loss": 1.55, "step": 76 }, { "epoch": 0.03, "learning_rate": 0.0001996016530250235, "loss": 1.9263, "step": 77 }, { "epoch": 0.03, "learning_rate": 0.0001995716208873644, "loss": 1.5269, "step": 78 }, { "epoch": 0.03, "learning_rate": 0.00019954049985041204, "loss": 1.6926, "step": 79 }, { "epoch": 0.03, "learning_rate": 0.00019950829025450114, "loss": 1.1818, "step": 80 }, { "epoch": 0.03, "learning_rate": 0.00019947499245187068, "loss": 1.2472, "step": 81 }, { "epoch": 0.03, "learning_rate": 0.00019944060680666002, "loss": 1.6255, "step": 82 }, { "epoch": 0.03, "learning_rate": 0.00019940513369490516, "loss": 1.5712, "step": 83 }, { "epoch": 0.03, "learning_rate": 0.0001993685735045343, "loss": 1.3684, "step": 84 }, { "epoch": 0.03, "learning_rate": 0.00019933092663536382, "loss": 1.462, "step": 85 }, { "epoch": 0.03, "learning_rate": 0.00019929219349909392, "loss": 1.5298, "step": 86 }, { "epoch": 0.03, "learning_rate": 0.0001992523745193039, "loss": 1.331, "step": 87 }, { "epoch": 0.03, "learning_rate": 0.0001992114701314478, "loss": 1.7856, "step": 88 }, { "epoch": 0.03, "learning_rate": 0.0001991694807828494, "loss": 1.5808, "step": 89 }, { "epoch": 0.03, "learning_rate": 0.00019912640693269752, "loss": 1.4136, "step": 90 }, { "epoch": 0.03, "learning_rate": 0.0001990822490520409, "loss": 1.4673, "step": 91 }, { "epoch": 0.03, "learning_rate": 0.000199037007623783, "loss": 1.6898, "step": 92 }, { "epoch": 0.04, "learning_rate": 0.00019899068314267688, "loss": 1.6261, "step": 93 }, { "epoch": 0.04, "learning_rate": 0.0001989432761153196, "loss": 1.4805, "step": 94 }, { "epoch": 0.04, "learning_rate": 0.00019889478706014687, "loss": 1.5018, "step": 95 }, { "epoch": 0.04, "learning_rate": 0.00019884521650742715, "loss": 1.6089, "step": 96 }, { "epoch": 0.04, "learning_rate": 0.00019879456499925614, "loss": 1.6487, "step": 97 }, { "epoch": 0.04, "learning_rate": 0.00019874283308955057, "loss": 1.517, "step": 98 }, { "epoch": 0.04, "learning_rate": 0.00019869002134404235, "loss": 1.5386, "step": 99 }, { "epoch": 0.04, "learning_rate": 0.00019863613034027224, "loss": 1.4188, "step": 100 }, { "epoch": 0.04, "learning_rate": 0.00019858116066758362, "loss": 1.4651, "step": 101 }, { "epoch": 0.04, "learning_rate": 0.00019852511292711608, "loss": 1.3594, "step": 102 }, { "epoch": 0.04, "learning_rate": 0.00019846798773179866, "loss": 1.4322, "step": 103 }, { "epoch": 0.04, "learning_rate": 0.0001984097857063434, "loss": 1.4874, "step": 104 }, { "epoch": 0.04, "learning_rate": 0.00019835050748723824, "loss": 1.3447, "step": 105 }, { "epoch": 0.04, "learning_rate": 0.00019829015372274038, "loss": 1.4409, "step": 106 }, { "epoch": 0.04, "learning_rate": 0.0001982287250728689, "loss": 1.4706, "step": 107 }, { "epoch": 0.04, "learning_rate": 0.0001981662222093976, "loss": 1.2554, "step": 108 }, { "epoch": 0.04, "learning_rate": 0.00019810264581584787, "loss": 1.4362, "step": 109 }, { "epoch": 0.04, "learning_rate": 0.00019803799658748094, "loss": 1.5449, "step": 110 }, { "epoch": 0.04, "learning_rate": 0.0001979722752312904, "loss": 1.5537, "step": 111 }, { "epoch": 0.04, "learning_rate": 0.00019790548246599447, "loss": 1.77, "step": 112 }, { "epoch": 0.04, "learning_rate": 0.00019783761902202813, "loss": 1.474, "step": 113 }, { "epoch": 0.04, "learning_rate": 0.00019776868564153516, "loss": 1.5617, "step": 114 }, { "epoch": 0.04, "learning_rate": 0.00019769868307835994, "loss": 1.1664, "step": 115 }, { "epoch": 0.04, "learning_rate": 0.00019762761209803927, "loss": 1.2867, "step": 116 }, { "epoch": 0.04, "learning_rate": 0.00019755547347779403, "loss": 1.5673, "step": 117 }, { "epoch": 0.04, "learning_rate": 0.0001974822680065206, "loss": 1.5614, "step": 118 }, { "epoch": 0.04, "learning_rate": 0.00019740799648478233, "loss": 1.5865, "step": 119 }, { "epoch": 0.05, "learning_rate": 0.0001973326597248006, "loss": 1.4008, "step": 120 }, { "epoch": 0.05, "learning_rate": 0.00019725625855044617, "loss": 1.7168, "step": 121 }, { "epoch": 0.05, "learning_rate": 0.00019717879379723012, "loss": 1.4881, "step": 122 }, { "epoch": 0.05, "learning_rate": 0.0001971002663122945, "loss": 1.7234, "step": 123 }, { "epoch": 0.05, "learning_rate": 0.00019702067695440332, "loss": 1.5824, "step": 124 }, { "epoch": 0.05, "learning_rate": 0.00019694002659393305, "loss": 1.4202, "step": 125 }, { "epoch": 0.05, "learning_rate": 0.0001968583161128631, "loss": 1.3443, "step": 126 }, { "epoch": 0.05, "learning_rate": 0.00019677554640476624, "loss": 1.4996, "step": 127 }, { "epoch": 0.05, "learning_rate": 0.00019669171837479873, "loss": 1.6795, "step": 128 }, { "epoch": 0.05, "learning_rate": 0.00019660683293969041, "loss": 1.4691, "step": 129 }, { "epoch": 0.05, "learning_rate": 0.00019652089102773488, "loss": 1.6054, "step": 130 }, { "epoch": 0.05, "learning_rate": 0.00019643389357877907, "loss": 1.5673, "step": 131 }, { "epoch": 0.05, "learning_rate": 0.00019634584154421317, "loss": 1.4519, "step": 132 }, { "epoch": 0.05, "learning_rate": 0.00019625673588696008, "loss": 1.6135, "step": 133 }, { "epoch": 0.05, "learning_rate": 0.00019616657758146503, "loss": 1.4928, "step": 134 }, { "epoch": 0.05, "learning_rate": 0.00019607536761368484, "loss": 1.4667, "step": 135 }, { "epoch": 0.05, "learning_rate": 0.00019598310698107702, "loss": 1.5076, "step": 136 }, { "epoch": 0.05, "learning_rate": 0.0001958897966925891, "loss": 1.6252, "step": 137 }, { "epoch": 0.05, "learning_rate": 0.0001957954377686475, "loss": 1.5067, "step": 138 }, { "epoch": 0.05, "learning_rate": 0.00019570003124114619, "loss": 1.4797, "step": 139 }, { "epoch": 0.05, "learning_rate": 0.00019560357815343577, "loss": 1.5738, "step": 140 }, { "epoch": 0.05, "learning_rate": 0.0001955060795603117, "loss": 1.5238, "step": 141 }, { "epoch": 0.05, "learning_rate": 0.000195407536528003, "loss": 1.713, "step": 142 }, { "epoch": 0.05, "learning_rate": 0.00019530795013416046, "loss": 1.6015, "step": 143 }, { "epoch": 0.05, "learning_rate": 0.00019520732146784491, "loss": 1.6067, "step": 144 }, { "epoch": 0.05, "learning_rate": 0.00019510565162951537, "loss": 1.6063, "step": 145 }, { "epoch": 0.06, "learning_rate": 0.00019500294173101687, "loss": 1.6428, "step": 146 }, { "epoch": 0.06, "learning_rate": 0.00019489919289556845, "loss": 1.4835, "step": 147 }, { "epoch": 0.06, "learning_rate": 0.0001947944062577507, "loss": 1.491, "step": 148 }, { "epoch": 0.06, "learning_rate": 0.0001946885829634935, "loss": 1.4183, "step": 149 }, { "epoch": 0.06, "learning_rate": 0.00019458172417006347, "loss": 1.4366, "step": 150 }, { "epoch": 0.06, "learning_rate": 0.00019447383104605125, "loss": 1.5319, "step": 151 }, { "epoch": 0.06, "learning_rate": 0.00019436490477135878, "loss": 1.4926, "step": 152 }, { "epoch": 0.06, "learning_rate": 0.0001942549465371863, "loss": 1.7489, "step": 153 }, { "epoch": 0.06, "learning_rate": 0.00019414395754601947, "loss": 1.4453, "step": 154 }, { "epoch": 0.06, "learning_rate": 0.00019403193901161613, "loss": 1.4814, "step": 155 }, { "epoch": 0.06, "learning_rate": 0.00019391889215899299, "loss": 1.6483, "step": 156 }, { "epoch": 0.06, "learning_rate": 0.00019380481822441235, "loss": 1.5119, "step": 157 }, { "epoch": 0.06, "learning_rate": 0.00019368971845536845, "loss": 1.318, "step": 158 }, { "epoch": 0.06, "learning_rate": 0.000193573594110574, "loss": 1.4537, "step": 159 }, { "epoch": 0.06, "learning_rate": 0.0001934564464599461, "loss": 1.5204, "step": 160 }, { "epoch": 0.06, "learning_rate": 0.0001933382767845928, "loss": 1.4601, "step": 161 }, { "epoch": 0.06, "learning_rate": 0.00019321908637679865, "loss": 1.5626, "step": 162 }, { "epoch": 0.06, "learning_rate": 0.00019309887654001096, "loss": 1.4477, "step": 163 }, { "epoch": 0.06, "learning_rate": 0.00019297764858882514, "loss": 1.5851, "step": 164 }, { "epoch": 0.06, "learning_rate": 0.00019285540384897073, "loss": 1.555, "step": 165 }, { "epoch": 0.06, "learning_rate": 0.00019273214365729655, "loss": 1.4885, "step": 166 }, { "epoch": 0.06, "learning_rate": 0.00019260786936175635, "loss": 1.6686, "step": 167 }, { "epoch": 0.06, "learning_rate": 0.00019248258232139388, "loss": 1.7091, "step": 168 }, { "epoch": 0.06, "learning_rate": 0.00019235628390632822, "loss": 1.549, "step": 169 }, { "epoch": 0.06, "learning_rate": 0.00019222897549773848, "loss": 1.5747, "step": 170 }, { "epoch": 0.06, "learning_rate": 0.00019210065848784913, "loss": 1.8573, "step": 171 }, { "epoch": 0.06, "learning_rate": 0.00019197133427991436, "loss": 1.5019, "step": 172 }, { "epoch": 0.07, "learning_rate": 0.000191841004288203, "loss": 1.5118, "step": 173 }, { "epoch": 0.07, "learning_rate": 0.000191709669937983, "loss": 1.5818, "step": 174 }, { "epoch": 0.07, "learning_rate": 0.00019157733266550575, "loss": 1.4686, "step": 175 }, { "epoch": 0.07, "learning_rate": 0.00019144399391799043, "loss": 1.3446, "step": 176 }, { "epoch": 0.07, "learning_rate": 0.0001913096551536083, "loss": 1.5599, "step": 177 }, { "epoch": 0.07, "learning_rate": 0.00019117431784146645, "loss": 1.3847, "step": 178 }, { "epoch": 0.07, "learning_rate": 0.00019103798346159213, "loss": 1.3977, "step": 179 }, { "epoch": 0.07, "learning_rate": 0.00019090065350491626, "loss": 1.4674, "step": 180 }, { "epoch": 0.07, "learning_rate": 0.00019076232947325722, "loss": 1.8562, "step": 181 }, { "epoch": 0.07, "learning_rate": 0.00019062301287930446, "loss": 1.4907, "step": 182 }, { "epoch": 0.07, "learning_rate": 0.00019048270524660196, "loss": 1.5626, "step": 183 }, { "epoch": 0.07, "learning_rate": 0.0001903414081095315, "loss": 1.8103, "step": 184 }, { "epoch": 0.07, "learning_rate": 0.00019019912301329592, "loss": 1.5239, "step": 185 }, { "epoch": 0.07, "learning_rate": 0.00019005585151390223, "loss": 1.8052, "step": 186 }, { "epoch": 0.07, "learning_rate": 0.0001899115951781446, "loss": 1.4232, "step": 187 }, { "epoch": 0.07, "learning_rate": 0.00018976635558358722, "loss": 1.6923, "step": 188 }, { "epoch": 0.07, "learning_rate": 0.00018962013431854702, "loss": 1.3907, "step": 189 }, { "epoch": 0.07, "learning_rate": 0.00018947293298207635, "loss": 1.5926, "step": 190 }, { "epoch": 0.07, "learning_rate": 0.0001893247531839454, "loss": 1.4904, "step": 191 }, { "epoch": 0.07, "learning_rate": 0.00018917559654462474, "loss": 1.3902, "step": 192 }, { "epoch": 0.07, "learning_rate": 0.00018902546469526743, "loss": 1.8517, "step": 193 }, { "epoch": 0.07, "learning_rate": 0.00018887435927769137, "loss": 1.5293, "step": 194 }, { "epoch": 0.07, "learning_rate": 0.0001887222819443612, "loss": 1.4155, "step": 195 }, { "epoch": 0.07, "learning_rate": 0.00018856923435837022, "loss": 1.4577, "step": 196 }, { "epoch": 0.07, "learning_rate": 0.00018841521819342236, "loss": 1.5775, "step": 197 }, { "epoch": 0.07, "learning_rate": 0.0001882602351338137, "loss": 1.4697, "step": 198 }, { "epoch": 0.08, "learning_rate": 0.00018810428687441414, "loss": 1.69, "step": 199 }, { "epoch": 0.08, "learning_rate": 0.0001879473751206489, "loss": 1.3792, "step": 200 }, { "epoch": 0.08, "learning_rate": 0.00018778950158847976, "loss": 1.3913, "step": 201 }, { "epoch": 0.08, "learning_rate": 0.00018763066800438636, "loss": 1.677, "step": 202 }, { "epoch": 0.08, "learning_rate": 0.00018747087610534736, "loss": 1.6473, "step": 203 }, { "epoch": 0.08, "learning_rate": 0.00018731012763882133, "loss": 1.5274, "step": 204 }, { "epoch": 0.08, "learning_rate": 0.00018714842436272773, "loss": 1.6764, "step": 205 }, { "epoch": 0.08, "learning_rate": 0.00018698576804542777, "loss": 1.4051, "step": 206 }, { "epoch": 0.08, "learning_rate": 0.00018682216046570475, "loss": 1.4622, "step": 207 }, { "epoch": 0.08, "learning_rate": 0.00018665760341274505, "loss": 1.7445, "step": 208 }, { "epoch": 0.08, "learning_rate": 0.0001864920986861182, "loss": 1.6491, "step": 209 }, { "epoch": 0.08, "learning_rate": 0.00018632564809575742, "loss": 1.8047, "step": 210 }, { "epoch": 0.08, "learning_rate": 0.0001861582534619396, "loss": 1.6011, "step": 211 }, { "epoch": 0.08, "learning_rate": 0.00018598991661526572, "loss": 1.4319, "step": 212 }, { "epoch": 0.08, "learning_rate": 0.0001858206393966405, "loss": 1.6013, "step": 213 }, { "epoch": 0.08, "learning_rate": 0.00018565042365725258, "loss": 1.5379, "step": 214 }, { "epoch": 0.08, "learning_rate": 0.0001854792712585539, "loss": 1.5043, "step": 215 }, { "epoch": 0.08, "learning_rate": 0.00018530718407223974, "loss": 1.8997, "step": 216 }, { "epoch": 0.08, "learning_rate": 0.00018513416398022802, "loss": 1.6282, "step": 217 }, { "epoch": 0.08, "learning_rate": 0.0001849602128746387, "loss": 1.6524, "step": 218 }, { "epoch": 0.08, "learning_rate": 0.00018478533265777318, "loss": 1.6837, "step": 219 }, { "epoch": 0.08, "learning_rate": 0.00018460952524209355, "loss": 1.3932, "step": 220 }, { "epoch": 0.08, "learning_rate": 0.00018443279255020152, "loss": 1.5441, "step": 221 }, { "epoch": 0.08, "learning_rate": 0.00018425513651481747, "loss": 1.4939, "step": 222 }, { "epoch": 0.08, "learning_rate": 0.0001840765590787594, "loss": 1.6861, "step": 223 }, { "epoch": 0.08, "learning_rate": 0.00018389706219492147, "loss": 1.6589, "step": 224 }, { "epoch": 0.08, "learning_rate": 0.00018371664782625287, "loss": 1.4755, "step": 225 }, { "epoch": 0.09, "learning_rate": 0.00018353531794573625, "loss": 1.5676, "step": 226 }, { "epoch": 0.09, "learning_rate": 0.0001833530745363661, "loss": 1.6569, "step": 227 }, { "epoch": 0.09, "learning_rate": 0.00018316991959112716, "loss": 1.6602, "step": 228 }, { "epoch": 0.09, "learning_rate": 0.0001829858551129726, "loss": 1.5835, "step": 229 }, { "epoch": 0.09, "learning_rate": 0.00018280088311480201, "loss": 1.4652, "step": 230 }, { "epoch": 0.09, "learning_rate": 0.00018261500561943955, "loss": 1.3955, "step": 231 }, { "epoch": 0.09, "learning_rate": 0.00018242822465961176, "loss": 1.7412, "step": 232 }, { "epoch": 0.09, "learning_rate": 0.00018224054227792524, "loss": 1.6381, "step": 233 }, { "epoch": 0.09, "learning_rate": 0.00018205196052684445, "loss": 1.6402, "step": 234 }, { "epoch": 0.09, "learning_rate": 0.00018186248146866927, "loss": 1.6209, "step": 235 }, { "epoch": 0.09, "learning_rate": 0.00018167210717551224, "loss": 1.5313, "step": 236 }, { "epoch": 0.09, "learning_rate": 0.00018148083972927616, "loss": 1.7225, "step": 237 }, { "epoch": 0.09, "learning_rate": 0.00018128868122163123, "loss": 1.615, "step": 238 }, { "epoch": 0.09, "learning_rate": 0.000181095633753992, "loss": 1.3971, "step": 239 }, { "epoch": 0.09, "learning_rate": 0.00018090169943749476, "loss": 1.6964, "step": 240 }, { "epoch": 0.09, "learning_rate": 0.00018070688039297403, "loss": 1.6681, "step": 241 }, { "epoch": 0.09, "learning_rate": 0.00018051117875093976, "loss": 1.674, "step": 242 }, { "epoch": 0.09, "learning_rate": 0.00018031459665155363, "loss": 1.3495, "step": 243 }, { "epoch": 0.09, "learning_rate": 0.00018011713624460608, "loss": 1.4248, "step": 244 }, { "epoch": 0.09, "learning_rate": 0.0001799187996894925, "loss": 1.7693, "step": 245 }, { "epoch": 0.09, "learning_rate": 0.0001797195891551896, "loss": 1.7993, "step": 246 }, { "epoch": 0.09, "learning_rate": 0.00017951950682023191, "loss": 1.5703, "step": 247 }, { "epoch": 0.09, "learning_rate": 0.00017931855487268782, "loss": 1.5827, "step": 248 }, { "epoch": 0.09, "learning_rate": 0.00017911673551013551, "loss": 1.4503, "step": 249 }, { "epoch": 0.09, "learning_rate": 0.00017891405093963938, "loss": 1.6982, "step": 250 }, { "epoch": 0.09, "learning_rate": 0.00017871050337772525, "loss": 1.5524, "step": 251 }, { "epoch": 0.1, "learning_rate": 0.0001785060950503568, "loss": 1.6975, "step": 252 }, { "epoch": 0.1, "learning_rate": 0.0001783008281929106, "loss": 1.6971, "step": 253 }, { "epoch": 0.1, "learning_rate": 0.0001780947050501522, "loss": 1.7433, "step": 254 }, { "epoch": 0.1, "learning_rate": 0.00017788772787621126, "loss": 1.587, "step": 255 }, { "epoch": 0.1, "learning_rate": 0.00017767989893455698, "loss": 1.4709, "step": 256 }, { "epoch": 0.1, "learning_rate": 0.00017747122049797335, "loss": 1.4217, "step": 257 }, { "epoch": 0.1, "learning_rate": 0.00017726169484853438, "loss": 1.7229, "step": 258 }, { "epoch": 0.1, "learning_rate": 0.00017705132427757895, "loss": 1.7149, "step": 259 }, { "epoch": 0.1, "learning_rate": 0.00017684011108568592, "loss": 1.6936, "step": 260 }, { "epoch": 0.1, "learning_rate": 0.00017662805758264893, "loss": 1.5013, "step": 261 }, { "epoch": 0.1, "learning_rate": 0.00017641516608745114, "loss": 1.6229, "step": 262 }, { "epoch": 0.1, "learning_rate": 0.00017620143892823977, "loss": 1.3457, "step": 263 }, { "epoch": 0.1, "learning_rate": 0.00017598687844230088, "loss": 1.5029, "step": 264 }, { "epoch": 0.1, "learning_rate": 0.0001757714869760335, "loss": 1.4726, "step": 265 }, { "epoch": 0.1, "learning_rate": 0.0001755552668849242, "loss": 1.7338, "step": 266 }, { "epoch": 0.1, "learning_rate": 0.00017533822053352128, "loss": 1.6824, "step": 267 }, { "epoch": 0.1, "learning_rate": 0.00017512035029540885, "loss": 1.7167, "step": 268 }, { "epoch": 0.1, "learning_rate": 0.00017490165855318094, "loss": 1.6222, "step": 269 }, { "epoch": 0.1, "learning_rate": 0.0001746821476984154, "loss": 1.5473, "step": 270 }, { "epoch": 0.1, "learning_rate": 0.00017446182013164778, "loss": 1.3884, "step": 271 }, { "epoch": 0.1, "learning_rate": 0.000174240678262345, "loss": 1.7926, "step": 272 }, { "epoch": 0.1, "learning_rate": 0.00017401872450887917, "loss": 1.5833, "step": 273 }, { "epoch": 0.1, "learning_rate": 0.00017379596129850098, "loss": 1.7576, "step": 274 }, { "epoch": 0.1, "learning_rate": 0.00017357239106731317, "loss": 1.7172, "step": 275 }, { "epoch": 0.1, "learning_rate": 0.000173348016260244, "loss": 1.4629, "step": 276 }, { "epoch": 0.1, "learning_rate": 0.00017312283933102038, "loss": 1.5903, "step": 277 }, { "epoch": 0.1, "learning_rate": 0.00017289686274214118, "loss": 1.8602, "step": 278 }, { "epoch": 0.11, "learning_rate": 0.0001726700889648501, "loss": 1.6213, "step": 279 }, { "epoch": 0.11, "learning_rate": 0.00017244252047910892, "loss": 1.7249, "step": 280 }, { "epoch": 0.11, "learning_rate": 0.00017221415977357007, "loss": 1.6502, "step": 281 }, { "epoch": 0.11, "learning_rate": 0.00017198500934554966, "loss": 1.5524, "step": 282 }, { "epoch": 0.11, "learning_rate": 0.0001717550717010001, "loss": 1.4555, "step": 283 }, { "epoch": 0.11, "learning_rate": 0.00017152434935448256, "loss": 1.5179, "step": 284 }, { "epoch": 0.11, "learning_rate": 0.00017129284482913972, "loss": 1.8455, "step": 285 }, { "epoch": 0.11, "learning_rate": 0.00017106056065666793, "loss": 1.3196, "step": 286 }, { "epoch": 0.11, "learning_rate": 0.00017082749937728973, "loss": 1.5119, "step": 287 }, { "epoch": 0.11, "learning_rate": 0.0001705936635397259, "loss": 1.5426, "step": 288 }, { "epoch": 0.11, "learning_rate": 0.0001703590557011677, "loss": 1.5863, "step": 289 }, { "epoch": 0.11, "learning_rate": 0.00017012367842724887, "loss": 1.6083, "step": 290 }, { "epoch": 0.11, "learning_rate": 0.00016988753429201755, "loss": 1.4486, "step": 291 }, { "epoch": 0.11, "learning_rate": 0.00016965062587790823, "loss": 1.5676, "step": 292 }, { "epoch": 0.11, "learning_rate": 0.0001694129557757133, "loss": 1.6153, "step": 293 }, { "epoch": 0.11, "learning_rate": 0.00016917452658455495, "loss": 1.8382, "step": 294 }, { "epoch": 0.11, "learning_rate": 0.0001689353409118566, "loss": 1.7203, "step": 295 }, { "epoch": 0.11, "learning_rate": 0.00016869540137331445, "loss": 1.8497, "step": 296 }, { "epoch": 0.11, "learning_rate": 0.00016845471059286887, "loss": 1.6448, "step": 297 }, { "epoch": 0.11, "learning_rate": 0.00016821327120267567, "loss": 1.7336, "step": 298 }, { "epoch": 0.11, "learning_rate": 0.00016797108584307732, "loss": 1.9354, "step": 299 }, { "epoch": 0.11, "learning_rate": 0.00016772815716257412, "loss": 1.388, "step": 300 }, { "epoch": 0.11, "learning_rate": 0.0001674844878177952, "loss": 1.7663, "step": 301 }, { "epoch": 0.11, "learning_rate": 0.00016724008047346947, "loss": 1.6741, "step": 302 }, { "epoch": 0.11, "learning_rate": 0.0001669949378023965, "loss": 1.4152, "step": 303 }, { "epoch": 0.11, "learning_rate": 0.00016674906248541726, "loss": 1.3268, "step": 304 }, { "epoch": 0.12, "learning_rate": 0.0001665024572113848, "loss": 1.7646, "step": 305 }, { "epoch": 0.12, "learning_rate": 0.000166255124677135, "loss": 1.4683, "step": 306 }, { "epoch": 0.12, "learning_rate": 0.00016600706758745668, "loss": 1.7198, "step": 307 }, { "epoch": 0.12, "learning_rate": 0.00016575828865506245, "loss": 1.5474, "step": 308 }, { "epoch": 0.12, "learning_rate": 0.00016550879060055895, "loss": 1.5883, "step": 309 }, { "epoch": 0.12, "learning_rate": 0.00016525857615241687, "loss": 1.7022, "step": 310 }, { "epoch": 0.12, "learning_rate": 0.0001650076480469413, "loss": 1.6334, "step": 311 }, { "epoch": 0.12, "learning_rate": 0.0001647560090282419, "loss": 1.6134, "step": 312 }, { "epoch": 0.12, "learning_rate": 0.00016450366184820255, "loss": 1.5379, "step": 313 }, { "epoch": 0.12, "learning_rate": 0.00016425060926645167, "loss": 1.644, "step": 314 }, { "epoch": 0.12, "learning_rate": 0.00016399685405033167, "loss": 1.7672, "step": 315 }, { "epoch": 0.12, "learning_rate": 0.000163742398974869, "loss": 1.6279, "step": 316 }, { "epoch": 0.12, "learning_rate": 0.00016348724682274353, "loss": 1.5003, "step": 317 }, { "epoch": 0.12, "learning_rate": 0.00016323140038425842, "loss": 1.4426, "step": 318 }, { "epoch": 0.12, "learning_rate": 0.00016297486245730927, "loss": 1.5677, "step": 319 }, { "epoch": 0.12, "learning_rate": 0.0001627176358473537, "loss": 1.6226, "step": 320 }, { "epoch": 0.12, "learning_rate": 0.0001624597233673808, "loss": 1.7272, "step": 321 }, { "epoch": 0.12, "learning_rate": 0.0001622011278378801, "loss": 1.6675, "step": 322 }, { "epoch": 0.12, "learning_rate": 0.00016194185208681083, "loss": 1.3257, "step": 323 }, { "epoch": 0.12, "learning_rate": 0.0001616818989495711, "loss": 1.777, "step": 324 }, { "epoch": 0.12, "learning_rate": 0.0001614212712689668, "loss": 1.5283, "step": 325 }, { "epoch": 0.12, "learning_rate": 0.00016115997189518043, "loss": 1.5958, "step": 326 }, { "epoch": 0.12, "learning_rate": 0.00016089800368574014, "loss": 1.5726, "step": 327 }, { "epoch": 0.12, "learning_rate": 0.00016063536950548826, "loss": 1.4865, "step": 328 }, { "epoch": 0.12, "learning_rate": 0.0001603720722265501, "loss": 1.4585, "step": 329 }, { "epoch": 0.12, "learning_rate": 0.00016010811472830252, "loss": 1.501, "step": 330 }, { "epoch": 0.12, "learning_rate": 0.00015984349989734247, "loss": 1.4846, "step": 331 }, { "epoch": 0.13, "learning_rate": 0.0001595782306274553, "loss": 1.5386, "step": 332 }, { "epoch": 0.13, "learning_rate": 0.00015931230981958326, "loss": 1.4934, "step": 333 }, { "epoch": 0.13, "learning_rate": 0.0001590457403817937, "loss": 1.7925, "step": 334 }, { "epoch": 0.13, "learning_rate": 0.00015877852522924732, "loss": 1.7928, "step": 335 }, { "epoch": 0.13, "learning_rate": 0.00015851066728416618, "loss": 1.6508, "step": 336 }, { "epoch": 0.13, "learning_rate": 0.00015824216947580183, "loss": 1.6663, "step": 337 }, { "epoch": 0.13, "learning_rate": 0.00015797303474040332, "loss": 1.8191, "step": 338 }, { "epoch": 0.13, "learning_rate": 0.000157703266021185, "loss": 1.5659, "step": 339 }, { "epoch": 0.13, "learning_rate": 0.00015743286626829437, "loss": 1.6147, "step": 340 }, { "epoch": 0.13, "learning_rate": 0.00015716183843877976, "loss": 1.6888, "step": 341 }, { "epoch": 0.13, "learning_rate": 0.00015689018549655813, "loss": 1.3531, "step": 342 }, { "epoch": 0.13, "learning_rate": 0.00015661791041238256, "loss": 1.4753, "step": 343 }, { "epoch": 0.13, "learning_rate": 0.00015634501616380967, "loss": 1.6138, "step": 344 }, { "epoch": 0.13, "learning_rate": 0.0001560715057351673, "loss": 1.4431, "step": 345 }, { "epoch": 0.13, "learning_rate": 0.00015579738211752165, "loss": 1.63, "step": 346 }, { "epoch": 0.13, "learning_rate": 0.00015552264830864468, "loss": 1.9748, "step": 347 }, { "epoch": 0.13, "learning_rate": 0.00015524730731298134, "loss": 1.5779, "step": 348 }, { "epoch": 0.13, "learning_rate": 0.00015497136214161664, "loss": 1.8374, "step": 349 }, { "epoch": 0.13, "learning_rate": 0.00015469481581224272, "loss": 1.4975, "step": 350 }, { "epoch": 0.13, "learning_rate": 0.00015441767134912596, "loss": 1.7641, "step": 351 }, { "epoch": 0.13, "learning_rate": 0.0001541399317830738, "loss": 1.4802, "step": 352 }, { "epoch": 0.13, "learning_rate": 0.00015386160015140168, "loss": 1.4818, "step": 353 }, { "epoch": 0.13, "learning_rate": 0.00015358267949789966, "loss": 1.6149, "step": 354 }, { "epoch": 0.13, "learning_rate": 0.0001533031728727994, "loss": 1.6037, "step": 355 }, { "epoch": 0.13, "learning_rate": 0.0001530230833327405, "loss": 1.555, "step": 356 }, { "epoch": 0.13, "learning_rate": 0.00015274241394073733, "loss": 1.8756, "step": 357 }, { "epoch": 0.14, "learning_rate": 0.00015246116776614538, "loss": 1.7481, "step": 358 }, { "epoch": 0.14, "learning_rate": 0.00015217934788462774, "loss": 1.7517, "step": 359 }, { "epoch": 0.14, "learning_rate": 0.00015189695737812152, "loss": 1.6353, "step": 360 }, { "epoch": 0.14, "learning_rate": 0.00015161399933480402, "loss": 1.5501, "step": 361 }, { "epoch": 0.14, "learning_rate": 0.00015133047684905916, "loss": 1.5374, "step": 362 }, { "epoch": 0.14, "learning_rate": 0.00015104639302144327, "loss": 1.5304, "step": 363 }, { "epoch": 0.14, "learning_rate": 0.0001507617509586517, "loss": 1.4024, "step": 364 }, { "epoch": 0.14, "learning_rate": 0.0001504765537734844, "loss": 1.6756, "step": 365 }, { "epoch": 0.14, "learning_rate": 0.00015019080458481202, "loss": 1.7731, "step": 366 }, { "epoch": 0.14, "learning_rate": 0.00014990450651754207, "loss": 1.7577, "step": 367 }, { "epoch": 0.14, "learning_rate": 0.00014961766270258422, "loss": 1.6514, "step": 368 }, { "epoch": 0.14, "learning_rate": 0.0001493302762768165, "loss": 1.5114, "step": 369 }, { "epoch": 0.14, "learning_rate": 0.00014904235038305083, "loss": 1.5046, "step": 370 }, { "epoch": 0.14, "learning_rate": 0.00014875388816999865, "loss": 1.6181, "step": 371 }, { "epoch": 0.14, "learning_rate": 0.00014846489279223652, "loss": 1.622, "step": 372 }, { "epoch": 0.14, "learning_rate": 0.00014817536741017152, "loss": 1.5497, "step": 373 }, { "epoch": 0.14, "learning_rate": 0.00014788531519000696, "loss": 1.4466, "step": 374 }, { "epoch": 0.14, "learning_rate": 0.00014759473930370736, "loss": 1.6712, "step": 375 }, { "epoch": 0.14, "learning_rate": 0.0001473036429289641, "loss": 1.6282, "step": 376 }, { "epoch": 0.14, "learning_rate": 0.0001470120292491605, "loss": 1.7498, "step": 377 }, { "epoch": 0.14, "learning_rate": 0.00014671990145333696, "loss": 1.7531, "step": 378 }, { "epoch": 0.14, "learning_rate": 0.0001464272627361564, "loss": 1.718, "step": 379 }, { "epoch": 0.14, "learning_rate": 0.0001461341162978688, "loss": 1.4252, "step": 380 }, { "epoch": 0.14, "learning_rate": 0.0001458404653442767, "loss": 1.6305, "step": 381 }, { "epoch": 0.14, "learning_rate": 0.00014554631308669994, "loss": 1.6279, "step": 382 }, { "epoch": 0.14, "learning_rate": 0.00014525166274194037, "loss": 1.6559, "step": 383 }, { "epoch": 0.14, "learning_rate": 0.00014495651753224705, "loss": 1.6125, "step": 384 }, { "epoch": 0.15, "learning_rate": 0.00014466088068528068, "loss": 1.3162, "step": 385 }, { "epoch": 0.15, "learning_rate": 0.00014436475543407843, "loss": 1.4924, "step": 386 }, { "epoch": 0.15, "learning_rate": 0.00014406814501701857, "loss": 1.7356, "step": 387 }, { "epoch": 0.15, "learning_rate": 0.00014377105267778518, "loss": 1.7235, "step": 388 }, { "epoch": 0.15, "learning_rate": 0.00014347348166533248, "loss": 1.6112, "step": 389 }, { "epoch": 0.15, "learning_rate": 0.00014317543523384928, "loss": 1.7354, "step": 390 }, { "epoch": 0.15, "learning_rate": 0.00014287691664272375, "loss": 1.7269, "step": 391 }, { "epoch": 0.15, "learning_rate": 0.00014257792915650728, "loss": 1.5006, "step": 392 }, { "epoch": 0.15, "learning_rate": 0.00014227847604487913, "loss": 1.5781, "step": 393 }, { "epoch": 0.15, "learning_rate": 0.0001419785605826106, "loss": 1.4548, "step": 394 }, { "epoch": 0.15, "learning_rate": 0.00014167818604952906, "loss": 1.63, "step": 395 }, { "epoch": 0.15, "learning_rate": 0.00014137735573048233, "loss": 1.5867, "step": 396 }, { "epoch": 0.15, "learning_rate": 0.00014107607291530256, "loss": 1.5673, "step": 397 }, { "epoch": 0.15, "learning_rate": 0.00014077434089877037, "loss": 1.6088, "step": 398 }, { "epoch": 0.15, "learning_rate": 0.00014047216298057873, "loss": 1.7011, "step": 399 }, { "epoch": 0.15, "learning_rate": 0.00014016954246529696, "loss": 1.6084, "step": 400 }, { "epoch": 0.15, "learning_rate": 0.00013986648266233452, "loss": 1.7252, "step": 401 }, { "epoch": 0.15, "learning_rate": 0.00013956298688590484, "loss": 1.6078, "step": 402 }, { "epoch": 0.15, "learning_rate": 0.00013925905845498914, "loss": 1.7508, "step": 403 }, { "epoch": 0.15, "learning_rate": 0.00013895470069330004, "loss": 1.3831, "step": 404 }, { "epoch": 0.15, "learning_rate": 0.00013864991692924523, "loss": 1.5327, "step": 405 }, { "epoch": 0.15, "learning_rate": 0.00013834471049589117, "loss": 1.4633, "step": 406 }, { "epoch": 0.15, "learning_rate": 0.00013803908473092647, "loss": 1.6618, "step": 407 }, { "epoch": 0.15, "learning_rate": 0.00013773304297662559, "loss": 1.7644, "step": 408 }, { "epoch": 0.15, "learning_rate": 0.00013742658857981204, "loss": 1.4573, "step": 409 }, { "epoch": 0.15, "learning_rate": 0.00013711972489182208, "loss": 1.5862, "step": 410 }, { "epoch": 0.16, "learning_rate": 0.00013681245526846783, "loss": 1.5642, "step": 411 }, { "epoch": 0.16, "learning_rate": 0.00013650478307000057, "loss": 1.6751, "step": 412 }, { "epoch": 0.16, "learning_rate": 0.0001361967116610743, "loss": 1.6486, "step": 413 }, { "epoch": 0.16, "learning_rate": 0.00013588824441070852, "loss": 1.6286, "step": 414 }, { "epoch": 0.16, "learning_rate": 0.00013557938469225167, "loss": 1.5833, "step": 415 }, { "epoch": 0.16, "learning_rate": 0.00013527013588334415, "loss": 1.6383, "step": 416 }, { "epoch": 0.16, "learning_rate": 0.00013496050136588134, "loss": 1.7988, "step": 417 }, { "epoch": 0.16, "learning_rate": 0.00013465048452597682, "loss": 1.5313, "step": 418 }, { "epoch": 0.16, "learning_rate": 0.000134340088753925, "loss": 1.7617, "step": 419 }, { "epoch": 0.16, "learning_rate": 0.00013402931744416433, "loss": 1.6456, "step": 420 }, { "epoch": 0.16, "learning_rate": 0.00013371817399524005, "loss": 1.548, "step": 421 }, { "epoch": 0.16, "learning_rate": 0.00013340666180976712, "loss": 1.6117, "step": 422 }, { "epoch": 0.16, "learning_rate": 0.00013309478429439283, "loss": 1.6574, "step": 423 }, { "epoch": 0.16, "learning_rate": 0.00013278254485975976, "loss": 1.5813, "step": 424 }, { "epoch": 0.16, "learning_rate": 0.00013246994692046836, "loss": 1.5934, "step": 425 }, { "epoch": 0.16, "learning_rate": 0.00013215699389503954, "loss": 1.624, "step": 426 }, { "epoch": 0.16, "learning_rate": 0.00013184368920587754, "loss": 1.4844, "step": 427 }, { "epoch": 0.16, "learning_rate": 0.00013153003627923218, "loss": 1.5961, "step": 428 }, { "epoch": 0.16, "learning_rate": 0.0001312160385451616, "loss": 1.7463, "step": 429 }, { "epoch": 0.16, "learning_rate": 0.00013090169943749476, "loss": 1.6307, "step": 430 }, { "epoch": 0.16, "learning_rate": 0.00013058702239379376, "loss": 1.7261, "step": 431 }, { "epoch": 0.16, "learning_rate": 0.00013027201085531634, "loss": 1.733, "step": 432 }, { "epoch": 0.16, "learning_rate": 0.00012995666826697819, "loss": 1.5627, "step": 433 }, { "epoch": 0.16, "learning_rate": 0.0001296409980773154, "loss": 1.4446, "step": 434 }, { "epoch": 0.16, "learning_rate": 0.0001293250037384465, "loss": 1.4851, "step": 435 }, { "epoch": 0.16, "learning_rate": 0.00012900868870603503, "loss": 1.8002, "step": 436 }, { "epoch": 0.16, "learning_rate": 0.0001286920564392514, "loss": 1.6335, "step": 437 }, { "epoch": 0.17, "learning_rate": 0.0001283751104007355, "loss": 1.8301, "step": 438 }, { "epoch": 0.17, "learning_rate": 0.00012805785405655833, "loss": 1.6114, "step": 439 }, { "epoch": 0.17, "learning_rate": 0.00012774029087618446, "loss": 1.8021, "step": 440 }, { "epoch": 0.17, "learning_rate": 0.00012742242433243396, "loss": 1.4632, "step": 441 }, { "epoch": 0.17, "learning_rate": 0.00012710425790144446, "loss": 1.5362, "step": 442 }, { "epoch": 0.17, "learning_rate": 0.00012678579506263297, "loss": 1.7287, "step": 443 }, { "epoch": 0.17, "learning_rate": 0.00012646703929865817, "loss": 1.8786, "step": 444 }, { "epoch": 0.17, "learning_rate": 0.00012614799409538198, "loss": 1.6777, "step": 445 }, { "epoch": 0.17, "learning_rate": 0.00012582866294183167, "loss": 1.6491, "step": 446 }, { "epoch": 0.17, "learning_rate": 0.00012550904933016153, "loss": 1.6196, "step": 447 }, { "epoch": 0.17, "learning_rate": 0.00012518915675561483, "loss": 1.5774, "step": 448 }, { "epoch": 0.17, "learning_rate": 0.0001248689887164855, "loss": 1.6325, "step": 449 }, { "epoch": 0.17, "learning_rate": 0.00012454854871407994, "loss": 1.3833, "step": 450 }, { "epoch": 0.17, "learning_rate": 0.00012422784025267864, "loss": 1.5203, "step": 451 }, { "epoch": 0.17, "learning_rate": 0.00012390686683949798, "loss": 1.5659, "step": 452 }, { "epoch": 0.17, "learning_rate": 0.00012358563198465182, "loss": 1.3192, "step": 453 }, { "epoch": 0.17, "learning_rate": 0.00012326413920111303, "loss": 1.5701, "step": 454 }, { "epoch": 0.17, "learning_rate": 0.00012294239200467516, "loss": 1.4334, "step": 455 }, { "epoch": 0.17, "learning_rate": 0.00012262039391391404, "loss": 1.729, "step": 456 }, { "epoch": 0.17, "learning_rate": 0.0001222981484501492, "loss": 1.6675, "step": 457 }, { "epoch": 0.17, "learning_rate": 0.00012197565913740531, "loss": 1.48, "step": 458 }, { "epoch": 0.17, "learning_rate": 0.00012165292950237399, "loss": 1.533, "step": 459 }, { "epoch": 0.17, "learning_rate": 0.0001213299630743747, "loss": 1.6758, "step": 460 }, { "epoch": 0.17, "learning_rate": 0.0001210067633853166, "loss": 1.5528, "step": 461 }, { "epoch": 0.17, "learning_rate": 0.00012068333396965968, "loss": 1.6283, "step": 462 }, { "epoch": 0.17, "learning_rate": 0.00012035967836437625, "loss": 1.812, "step": 463 }, { "epoch": 0.18, "learning_rate": 0.00012003580010891213, "loss": 1.6004, "step": 464 }, { "epoch": 0.18, "learning_rate": 0.00011971170274514802, "loss": 1.4833, "step": 465 }, { "epoch": 0.18, "learning_rate": 0.00011938738981736085, "loss": 1.5398, "step": 466 }, { "epoch": 0.18, "learning_rate": 0.0001190628648721847, "loss": 1.7085, "step": 467 }, { "epoch": 0.18, "learning_rate": 0.00011873813145857249, "loss": 1.5997, "step": 468 }, { "epoch": 0.18, "learning_rate": 0.00011841319312775671, "loss": 1.6156, "step": 469 }, { "epoch": 0.18, "learning_rate": 0.000118088053433211, "loss": 1.4217, "step": 470 }, { "epoch": 0.18, "learning_rate": 0.00011776271593061089, "loss": 1.5492, "step": 471 }, { "epoch": 0.18, "learning_rate": 0.00011743718417779517, "loss": 1.6231, "step": 472 }, { "epoch": 0.18, "learning_rate": 0.000117111461734727, "loss": 1.5459, "step": 473 }, { "epoch": 0.18, "learning_rate": 0.00011678555216345477, "loss": 1.6095, "step": 474 }, { "epoch": 0.18, "learning_rate": 0.00011645945902807341, "loss": 1.7221, "step": 475 }, { "epoch": 0.18, "learning_rate": 0.00011613318589468511, "loss": 1.7852, "step": 476 }, { "epoch": 0.18, "learning_rate": 0.00011580673633136065, "loss": 1.6094, "step": 477 }, { "epoch": 0.18, "learning_rate": 0.00011548011390810017, "loss": 1.6245, "step": 478 }, { "epoch": 0.18, "learning_rate": 0.00011515332219679404, "loss": 1.5804, "step": 479 }, { "epoch": 0.18, "learning_rate": 0.0001148263647711842, "loss": 1.6381, "step": 480 }, { "epoch": 0.18, "learning_rate": 0.0001144992452068246, "loss": 1.5489, "step": 481 }, { "epoch": 0.18, "learning_rate": 0.00011417196708104243, "loss": 1.5808, "step": 482 }, { "epoch": 0.18, "learning_rate": 0.00011384453397289876, "loss": 1.7598, "step": 483 }, { "epoch": 0.18, "learning_rate": 0.0001135169494631497, "loss": 1.631, "step": 484 }, { "epoch": 0.18, "learning_rate": 0.00011318921713420691, "loss": 1.4051, "step": 485 }, { "epoch": 0.18, "learning_rate": 0.00011286134057009863, "loss": 1.558, "step": 486 }, { "epoch": 0.18, "learning_rate": 0.00011253332335643043, "loss": 1.7557, "step": 487 }, { "epoch": 0.18, "learning_rate": 0.00011220516908034601, "loss": 1.3264, "step": 488 }, { "epoch": 0.18, "learning_rate": 0.00011187688133048801, "loss": 1.513, "step": 489 }, { "epoch": 0.18, "learning_rate": 0.00011154846369695863, "loss": 1.624, "step": 490 }, { "epoch": 0.19, "learning_rate": 0.00011121991977128045, "loss": 1.9427, "step": 491 }, { "epoch": 0.19, "learning_rate": 0.00011089125314635726, "loss": 1.4963, "step": 492 }, { "epoch": 0.19, "learning_rate": 0.0001105624674164346, "loss": 1.4605, "step": 493 }, { "epoch": 0.19, "learning_rate": 0.00011023356617706052, "loss": 1.5062, "step": 494 }, { "epoch": 0.19, "learning_rate": 0.0001099045530250463, "loss": 1.5084, "step": 495 }, { "epoch": 0.19, "learning_rate": 0.00010957543155842702, "loss": 1.7999, "step": 496 }, { "epoch": 0.19, "learning_rate": 0.00010924620537642236, "loss": 1.7851, "step": 497 }, { "epoch": 0.19, "learning_rate": 0.00010891687807939707, "loss": 1.7145, "step": 498 }, { "epoch": 0.19, "learning_rate": 0.00010858745326882171, "loss": 1.6778, "step": 499 }, { "epoch": 0.19, "learning_rate": 0.00010825793454723325, "loss": 1.6589, "step": 500 }, { "epoch": 0.19, "learning_rate": 0.00010792832551819558, "loss": 1.7932, "step": 501 }, { "epoch": 0.19, "learning_rate": 0.00010759862978626031, "loss": 1.4803, "step": 502 }, { "epoch": 0.19, "learning_rate": 0.00010726885095692712, "loss": 1.3864, "step": 503 }, { "epoch": 0.19, "learning_rate": 0.00010693899263660441, "loss": 1.6757, "step": 504 }, { "epoch": 0.19, "learning_rate": 0.00010660905843256994, "loss": 1.4477, "step": 505 }, { "epoch": 0.19, "learning_rate": 0.00010627905195293135, "loss": 1.5601, "step": 506 }, { "epoch": 0.19, "learning_rate": 0.00010594897680658658, "loss": 1.4949, "step": 507 }, { "epoch": 0.19, "learning_rate": 0.00010561883660318455, "loss": 1.3819, "step": 508 }, { "epoch": 0.19, "learning_rate": 0.00010528863495308567, "loss": 1.7058, "step": 509 }, { "epoch": 0.19, "learning_rate": 0.00010495837546732224, "loss": 1.8273, "step": 510 }, { "epoch": 0.19, "learning_rate": 0.0001046280617575591, "loss": 1.5141, "step": 511 }, { "epoch": 0.19, "learning_rate": 0.00010429769743605407, "loss": 1.529, "step": 512 }, { "epoch": 0.19, "learning_rate": 0.00010396728611561844, "loss": 1.6281, "step": 513 }, { "epoch": 0.19, "learning_rate": 0.00010363683140957745, "loss": 1.5206, "step": 514 }, { "epoch": 0.19, "learning_rate": 0.00010330633693173082, "loss": 1.7675, "step": 515 }, { "epoch": 0.19, "learning_rate": 0.00010297580629631325, "loss": 1.6477, "step": 516 }, { "epoch": 0.2, "learning_rate": 0.00010264524311795478, "loss": 1.5997, "step": 517 }, { "epoch": 0.2, "learning_rate": 0.00010231465101164139, "loss": 1.8573, "step": 518 }, { "epoch": 0.2, "learning_rate": 0.00010198403359267537, "loss": 1.4737, "step": 519 }, { "epoch": 0.2, "learning_rate": 0.00010165339447663587, "loss": 1.7002, "step": 520 }, { "epoch": 0.2, "learning_rate": 0.00010132273727933925, "loss": 1.5743, "step": 521 }, { "epoch": 0.2, "learning_rate": 0.00010099206561679963, "loss": 1.6136, "step": 522 }, { "epoch": 0.2, "learning_rate": 0.00010066138310518942, "loss": 1.5749, "step": 523 }, { "epoch": 0.2, "learning_rate": 0.00010033069336079952, "loss": 1.7259, "step": 524 }, { "epoch": 0.2, "learning_rate": 0.0001, "loss": 1.6527, "step": 525 }, { "epoch": 0.2, "learning_rate": 9.966930663920049e-05, "loss": 1.775, "step": 526 }, { "epoch": 0.2, "learning_rate": 9.93386168948106e-05, "loss": 1.5532, "step": 527 }, { "epoch": 0.2, "learning_rate": 9.900793438320037e-05, "loss": 1.7757, "step": 528 }, { "epoch": 0.2, "learning_rate": 9.86772627206608e-05, "loss": 1.6747, "step": 529 }, { "epoch": 0.2, "learning_rate": 9.834660552336415e-05, "loss": 1.6145, "step": 530 }, { "epoch": 0.2, "learning_rate": 9.801596640732465e-05, "loss": 1.7237, "step": 531 }, { "epoch": 0.2, "learning_rate": 9.768534898835862e-05, "loss": 1.8218, "step": 532 }, { "epoch": 0.2, "learning_rate": 9.735475688204521e-05, "loss": 1.7219, "step": 533 }, { "epoch": 0.2, "learning_rate": 9.702419370368676e-05, "loss": 1.6511, "step": 534 }, { "epoch": 0.2, "learning_rate": 9.669366306826919e-05, "loss": 1.4605, "step": 535 }, { "epoch": 0.2, "learning_rate": 9.636316859042259e-05, "loss": 1.8221, "step": 536 }, { "epoch": 0.2, "learning_rate": 9.603271388438159e-05, "loss": 1.4343, "step": 537 }, { "epoch": 0.2, "learning_rate": 9.570230256394596e-05, "loss": 1.7632, "step": 538 }, { "epoch": 0.2, "learning_rate": 9.537193824244091e-05, "loss": 1.6369, "step": 539 }, { "epoch": 0.2, "learning_rate": 9.504162453267777e-05, "loss": 1.8309, "step": 540 }, { "epoch": 0.2, "learning_rate": 9.471136504691436e-05, "loss": 1.4987, "step": 541 }, { "epoch": 0.2, "learning_rate": 9.438116339681545e-05, "loss": 1.4305, "step": 542 }, { "epoch": 0.2, "learning_rate": 9.405102319341344e-05, "loss": 1.6346, "step": 543 }, { "epoch": 0.21, "learning_rate": 9.372094804706867e-05, "loss": 1.5976, "step": 544 }, { "epoch": 0.21, "learning_rate": 9.339094156743007e-05, "loss": 1.8561, "step": 545 }, { "epoch": 0.21, "learning_rate": 9.30610073633956e-05, "loss": 1.6587, "step": 546 }, { "epoch": 0.21, "learning_rate": 9.273114904307289e-05, "loss": 1.6006, "step": 547 }, { "epoch": 0.21, "learning_rate": 9.24013702137397e-05, "loss": 1.681, "step": 548 }, { "epoch": 0.21, "learning_rate": 9.20716744818044e-05, "loss": 1.6353, "step": 549 }, { "epoch": 0.21, "learning_rate": 9.174206545276677e-05, "loss": 1.5338, "step": 550 }, { "epoch": 0.21, "learning_rate": 9.14125467311783e-05, "loss": 1.7111, "step": 551 }, { "epoch": 0.21, "learning_rate": 9.108312192060298e-05, "loss": 1.6232, "step": 552 }, { "epoch": 0.21, "learning_rate": 9.075379462357766e-05, "loss": 1.5872, "step": 553 }, { "epoch": 0.21, "learning_rate": 9.042456844157299e-05, "loss": 1.6202, "step": 554 }, { "epoch": 0.21, "learning_rate": 9.009544697495374e-05, "loss": 1.479, "step": 555 }, { "epoch": 0.21, "learning_rate": 8.97664338229395e-05, "loss": 1.5796, "step": 556 }, { "epoch": 0.21, "learning_rate": 8.943753258356546e-05, "loss": 1.7654, "step": 557 }, { "epoch": 0.21, "learning_rate": 8.910874685364275e-05, "loss": 1.5687, "step": 558 }, { "epoch": 0.21, "learning_rate": 8.878008022871958e-05, "loss": 1.7592, "step": 559 }, { "epoch": 0.21, "learning_rate": 8.845153630304139e-05, "loss": 1.6815, "step": 560 }, { "epoch": 0.21, "learning_rate": 8.812311866951198e-05, "loss": 1.6589, "step": 561 }, { "epoch": 0.21, "learning_rate": 8.7794830919654e-05, "loss": 1.565, "step": 562 }, { "epoch": 0.21, "learning_rate": 8.746667664356956e-05, "loss": 1.6184, "step": 563 }, { "epoch": 0.21, "learning_rate": 8.713865942990141e-05, "loss": 1.6804, "step": 564 }, { "epoch": 0.21, "learning_rate": 8.681078286579311e-05, "loss": 1.5697, "step": 565 }, { "epoch": 0.21, "learning_rate": 8.648305053685034e-05, "loss": 1.7056, "step": 566 }, { "epoch": 0.21, "learning_rate": 8.615546602710125e-05, "loss": 1.6924, "step": 567 }, { "epoch": 0.21, "learning_rate": 8.582803291895758e-05, "loss": 1.5869, "step": 568 }, { "epoch": 0.21, "learning_rate": 8.550075479317542e-05, "loss": 1.7831, "step": 569 }, { "epoch": 0.22, "learning_rate": 8.517363522881579e-05, "loss": 1.7121, "step": 570 }, { "epoch": 0.22, "learning_rate": 8.484667780320597e-05, "loss": 1.6502, "step": 571 }, { "epoch": 0.22, "learning_rate": 8.451988609189987e-05, "loss": 1.5674, "step": 572 }, { "epoch": 0.22, "learning_rate": 8.419326366863938e-05, "loss": 1.7858, "step": 573 }, { "epoch": 0.22, "learning_rate": 8.386681410531491e-05, "loss": 1.6807, "step": 574 }, { "epoch": 0.22, "learning_rate": 8.35405409719266e-05, "loss": 1.6116, "step": 575 }, { "epoch": 0.22, "learning_rate": 8.321444783654524e-05, "loss": 1.6685, "step": 576 }, { "epoch": 0.22, "learning_rate": 8.2888538265273e-05, "loss": 1.5393, "step": 577 }, { "epoch": 0.22, "learning_rate": 8.256281582220485e-05, "loss": 1.6817, "step": 578 }, { "epoch": 0.22, "learning_rate": 8.223728406938914e-05, "loss": 1.8488, "step": 579 }, { "epoch": 0.22, "learning_rate": 8.191194656678904e-05, "loss": 1.5492, "step": 580 }, { "epoch": 0.22, "learning_rate": 8.15868068722433e-05, "loss": 1.5958, "step": 581 }, { "epoch": 0.22, "learning_rate": 8.126186854142752e-05, "loss": 1.6587, "step": 582 }, { "epoch": 0.22, "learning_rate": 8.093713512781534e-05, "loss": 1.6371, "step": 583 }, { "epoch": 0.22, "learning_rate": 8.061261018263919e-05, "loss": 1.615, "step": 584 }, { "epoch": 0.22, "learning_rate": 8.028829725485199e-05, "loss": 1.6166, "step": 585 }, { "epoch": 0.22, "learning_rate": 7.996419989108789e-05, "loss": 1.5911, "step": 586 }, { "epoch": 0.22, "learning_rate": 7.964032163562378e-05, "loss": 1.5945, "step": 587 }, { "epoch": 0.22, "learning_rate": 7.931666603034033e-05, "loss": 1.5537, "step": 588 }, { "epoch": 0.22, "learning_rate": 7.899323661468343e-05, "loss": 1.4572, "step": 589 }, { "epoch": 0.22, "learning_rate": 7.867003692562534e-05, "loss": 1.6527, "step": 590 }, { "epoch": 0.22, "learning_rate": 7.834707049762603e-05, "loss": 1.7753, "step": 591 }, { "epoch": 0.22, "learning_rate": 7.80243408625947e-05, "loss": 1.5923, "step": 592 }, { "epoch": 0.22, "learning_rate": 7.770185154985085e-05, "loss": 1.6989, "step": 593 }, { "epoch": 0.22, "learning_rate": 7.7379606086086e-05, "loss": 1.4455, "step": 594 }, { "epoch": 0.22, "learning_rate": 7.705760799532485e-05, "loss": 1.654, "step": 595 }, { "epoch": 0.22, "learning_rate": 7.673586079888698e-05, "loss": 1.8404, "step": 596 }, { "epoch": 0.23, "learning_rate": 7.641436801534818e-05, "loss": 2.0157, "step": 597 }, { "epoch": 0.23, "learning_rate": 7.6093133160502e-05, "loss": 1.4572, "step": 598 }, { "epoch": 0.23, "learning_rate": 7.577215974732137e-05, "loss": 1.6459, "step": 599 }, { "epoch": 0.23, "learning_rate": 7.54514512859201e-05, "loss": 1.6336, "step": 600 }, { "epoch": 0.23, "learning_rate": 7.513101128351454e-05, "loss": 1.3708, "step": 601 }, { "epoch": 0.23, "learning_rate": 7.48108432443852e-05, "loss": 1.4441, "step": 602 }, { "epoch": 0.23, "learning_rate": 7.449095066983849e-05, "loss": 1.7811, "step": 603 }, { "epoch": 0.23, "learning_rate": 7.417133705816837e-05, "loss": 1.576, "step": 604 }, { "epoch": 0.23, "learning_rate": 7.385200590461803e-05, "loss": 1.674, "step": 605 }, { "epoch": 0.23, "learning_rate": 7.353296070134186e-05, "loss": 1.52, "step": 606 }, { "epoch": 0.23, "learning_rate": 7.321420493736705e-05, "loss": 1.6435, "step": 607 }, { "epoch": 0.23, "learning_rate": 7.289574209855559e-05, "loss": 1.6924, "step": 608 }, { "epoch": 0.23, "learning_rate": 7.257757566756605e-05, "loss": 1.714, "step": 609 }, { "epoch": 0.23, "learning_rate": 7.225970912381556e-05, "loss": 1.6064, "step": 610 }, { "epoch": 0.23, "learning_rate": 7.194214594344168e-05, "loss": 1.7011, "step": 611 }, { "epoch": 0.23, "learning_rate": 7.16248895992645e-05, "loss": 1.5956, "step": 612 }, { "epoch": 0.23, "learning_rate": 7.130794356074859e-05, "loss": 1.3912, "step": 613 }, { "epoch": 0.23, "learning_rate": 7.099131129396501e-05, "loss": 1.5384, "step": 614 }, { "epoch": 0.23, "learning_rate": 7.067499626155354e-05, "loss": 1.7346, "step": 615 }, { "epoch": 0.23, "learning_rate": 7.035900192268464e-05, "loss": 1.4425, "step": 616 }, { "epoch": 0.23, "learning_rate": 7.004333173302185e-05, "loss": 1.802, "step": 617 }, { "epoch": 0.23, "learning_rate": 6.972798914468369e-05, "loss": 1.5367, "step": 618 }, { "epoch": 0.23, "learning_rate": 6.941297760620627e-05, "loss": 1.5427, "step": 619 }, { "epoch": 0.23, "learning_rate": 6.909830056250527e-05, "loss": 1.5057, "step": 620 }, { "epoch": 0.23, "learning_rate": 6.878396145483841e-05, "loss": 1.5293, "step": 621 }, { "epoch": 0.23, "learning_rate": 6.846996372076786e-05, "loss": 1.4316, "step": 622 }, { "epoch": 0.24, "learning_rate": 6.815631079412248e-05, "loss": 1.6498, "step": 623 }, { "epoch": 0.24, "learning_rate": 6.784300610496048e-05, "loss": 1.6795, "step": 624 }, { "epoch": 0.24, "learning_rate": 6.753005307953167e-05, "loss": 1.4548, "step": 625 }, { "epoch": 0.24, "learning_rate": 6.721745514024022e-05, "loss": 1.5507, "step": 626 }, { "epoch": 0.24, "learning_rate": 6.690521570560717e-05, "loss": 1.4182, "step": 627 }, { "epoch": 0.24, "learning_rate": 6.65933381902329e-05, "loss": 1.7437, "step": 628 }, { "epoch": 0.24, "learning_rate": 6.628182600475999e-05, "loss": 1.5987, "step": 629 }, { "epoch": 0.24, "learning_rate": 6.59706825558357e-05, "loss": 1.6205, "step": 630 }, { "epoch": 0.24, "learning_rate": 6.565991124607507e-05, "loss": 1.6679, "step": 631 }, { "epoch": 0.24, "learning_rate": 6.534951547402322e-05, "loss": 1.8517, "step": 632 }, { "epoch": 0.24, "learning_rate": 6.503949863411865e-05, "loss": 1.6901, "step": 633 }, { "epoch": 0.24, "learning_rate": 6.47298641166559e-05, "loss": 1.5482, "step": 634 }, { "epoch": 0.24, "learning_rate": 6.442061530774834e-05, "loss": 1.6358, "step": 635 }, { "epoch": 0.24, "learning_rate": 6.411175558929152e-05, "loss": 1.7407, "step": 636 }, { "epoch": 0.24, "learning_rate": 6.38032883389257e-05, "loss": 1.7207, "step": 637 }, { "epoch": 0.24, "learning_rate": 6.349521692999945e-05, "loss": 1.7949, "step": 638 }, { "epoch": 0.24, "learning_rate": 6.318754473153221e-05, "loss": 1.4887, "step": 639 }, { "epoch": 0.24, "learning_rate": 6.28802751081779e-05, "loss": 1.4875, "step": 640 }, { "epoch": 0.24, "learning_rate": 6.257341142018798e-05, "loss": 1.6094, "step": 641 }, { "epoch": 0.24, "learning_rate": 6.226695702337442e-05, "loss": 1.5121, "step": 642 }, { "epoch": 0.24, "learning_rate": 6.196091526907355e-05, "loss": 1.7423, "step": 643 }, { "epoch": 0.24, "learning_rate": 6.165528950410884e-05, "loss": 1.6317, "step": 644 }, { "epoch": 0.24, "learning_rate": 6.135008307075481e-05, "loss": 1.6648, "step": 645 }, { "epoch": 0.24, "learning_rate": 6.10452993067e-05, "loss": 1.6393, "step": 646 }, { "epoch": 0.24, "learning_rate": 6.074094154501087e-05, "loss": 1.5263, "step": 647 }, { "epoch": 0.24, "learning_rate": 6.0437013114095195e-05, "loss": 1.4215, "step": 648 }, { "epoch": 0.24, "learning_rate": 6.01335173376655e-05, "loss": 1.5247, "step": 649 }, { "epoch": 0.25, "learning_rate": 5.983045753470308e-05, "loss": 1.6335, "step": 650 }, { "epoch": 0.25, "learning_rate": 5.95278370194213e-05, "loss": 1.6662, "step": 651 }, { "epoch": 0.25, "learning_rate": 5.922565910122967e-05, "loss": 1.6644, "step": 652 }, { "epoch": 0.25, "learning_rate": 5.8923927084697475e-05, "loss": 1.6358, "step": 653 }, { "epoch": 0.25, "learning_rate": 5.862264426951768e-05, "loss": 1.2212, "step": 654 }, { "epoch": 0.25, "learning_rate": 5.832181395047098e-05, "loss": 1.5045, "step": 655 }, { "epoch": 0.25, "learning_rate": 5.8021439417389444e-05, "loss": 1.5293, "step": 656 }, { "epoch": 0.25, "learning_rate": 5.772152395512087e-05, "loss": 1.6957, "step": 657 }, { "epoch": 0.25, "learning_rate": 5.7422070843492734e-05, "loss": 1.6455, "step": 658 }, { "epoch": 0.25, "learning_rate": 5.7123083357276283e-05, "loss": 1.7537, "step": 659 }, { "epoch": 0.25, "learning_rate": 5.6824564766150726e-05, "loss": 1.369, "step": 660 }, { "epoch": 0.25, "learning_rate": 5.652651833466755e-05, "loss": 1.5874, "step": 661 }, { "epoch": 0.25, "learning_rate": 5.622894732221482e-05, "loss": 1.5345, "step": 662 }, { "epoch": 0.25, "learning_rate": 5.593185498298141e-05, "loss": 1.4931, "step": 663 }, { "epoch": 0.25, "learning_rate": 5.563524456592163e-05, "loss": 1.8471, "step": 664 }, { "epoch": 0.25, "learning_rate": 5.533911931471936e-05, "loss": 1.512, "step": 665 }, { "epoch": 0.25, "learning_rate": 5.504348246775299e-05, "loss": 1.6631, "step": 666 }, { "epoch": 0.25, "learning_rate": 5.4748337258059626e-05, "loss": 1.5525, "step": 667 }, { "epoch": 0.25, "learning_rate": 5.4453686913300074e-05, "loss": 1.8276, "step": 668 }, { "epoch": 0.25, "learning_rate": 5.415953465572332e-05, "loss": 1.6255, "step": 669 }, { "epoch": 0.25, "learning_rate": 5.386588370213124e-05, "loss": 1.5748, "step": 670 }, { "epoch": 0.25, "learning_rate": 5.357273726384367e-05, "loss": 1.7415, "step": 671 }, { "epoch": 0.25, "learning_rate": 5.328009854666303e-05, "loss": 1.2476, "step": 672 }, { "epoch": 0.25, "learning_rate": 5.2987970750839555e-05, "loss": 1.4693, "step": 673 }, { "epoch": 0.25, "learning_rate": 5.269635707103593e-05, "loss": 1.5491, "step": 674 }, { "epoch": 0.25, "learning_rate": 5.240526069629265e-05, "loss": 1.5098, "step": 675 }, { "epoch": 0.26, "learning_rate": 5.2114684809993044e-05, "loss": 1.6815, "step": 676 }, { "epoch": 0.26, "learning_rate": 5.182463258982846e-05, "loss": 1.5905, "step": 677 }, { "epoch": 0.26, "learning_rate": 5.1535107207763534e-05, "loss": 1.5357, "step": 678 }, { "epoch": 0.26, "learning_rate": 5.124611183000138e-05, "loss": 1.7417, "step": 679 }, { "epoch": 0.26, "learning_rate": 5.095764961694922e-05, "loss": 1.7145, "step": 680 }, { "epoch": 0.26, "learning_rate": 5.0669723723183506e-05, "loss": 1.6636, "step": 681 }, { "epoch": 0.26, "learning_rate": 5.0382337297415773e-05, "loss": 1.4667, "step": 682 }, { "epoch": 0.26, "learning_rate": 5.0095493482457955e-05, "loss": 1.6005, "step": 683 }, { "epoch": 0.26, "learning_rate": 4.980919541518796e-05, "loss": 1.7212, "step": 684 }, { "epoch": 0.26, "learning_rate": 4.952344622651566e-05, "loss": 1.6054, "step": 685 }, { "epoch": 0.26, "learning_rate": 4.923824904134829e-05, "loss": 1.4423, "step": 686 }, { "epoch": 0.26, "learning_rate": 4.895360697855674e-05, "loss": 1.5751, "step": 687 }, { "epoch": 0.26, "learning_rate": 4.866952315094088e-05, "loss": 1.3508, "step": 688 }, { "epoch": 0.26, "learning_rate": 4.838600066519596e-05, "loss": 1.5055, "step": 689 }, { "epoch": 0.26, "learning_rate": 4.810304262187852e-05, "loss": 1.5928, "step": 690 }, { "epoch": 0.26, "learning_rate": 4.7820652115372253e-05, "loss": 1.5872, "step": 691 }, { "epoch": 0.26, "learning_rate": 4.753883223385467e-05, "loss": 1.6046, "step": 692 }, { "epoch": 0.26, "learning_rate": 4.72575860592627e-05, "loss": 1.5913, "step": 693 }, { "epoch": 0.26, "learning_rate": 4.697691666725955e-05, "loss": 1.6866, "step": 694 }, { "epoch": 0.26, "learning_rate": 4.669682712720065e-05, "loss": 1.5976, "step": 695 }, { "epoch": 0.26, "learning_rate": 4.6417320502100316e-05, "loss": 1.4559, "step": 696 }, { "epoch": 0.26, "learning_rate": 4.613839984859835e-05, "loss": 1.6007, "step": 697 }, { "epoch": 0.26, "learning_rate": 4.58600682169262e-05, "loss": 1.5832, "step": 698 }, { "epoch": 0.26, "learning_rate": 4.55823286508741e-05, "loss": 1.6939, "step": 699 }, { "epoch": 0.26, "learning_rate": 4.530518418775733e-05, "loss": 1.5553, "step": 700 }, { "epoch": 0.26, "learning_rate": 4.502863785838342e-05, "loss": 1.4034, "step": 701 }, { "epoch": 0.26, "learning_rate": 4.475269268701868e-05, "loss": 1.4394, "step": 702 }, { "epoch": 0.27, "learning_rate": 4.447735169135533e-05, "loss": 1.6641, "step": 703 }, { "epoch": 0.27, "learning_rate": 4.4202617882478405e-05, "loss": 1.4679, "step": 704 }, { "epoch": 0.27, "learning_rate": 4.392849426483274e-05, "loss": 1.5268, "step": 705 }, { "epoch": 0.27, "learning_rate": 4.365498383619036e-05, "loss": 1.8219, "step": 706 }, { "epoch": 0.27, "learning_rate": 4.338208958761747e-05, "loss": 1.4109, "step": 707 }, { "epoch": 0.27, "learning_rate": 4.310981450344189e-05, "loss": 1.4852, "step": 708 }, { "epoch": 0.27, "learning_rate": 4.2838161561220245e-05, "loss": 1.7155, "step": 709 }, { "epoch": 0.27, "learning_rate": 4.256713373170564e-05, "loss": 1.6242, "step": 710 }, { "epoch": 0.27, "learning_rate": 4.229673397881499e-05, "loss": 1.49, "step": 711 }, { "epoch": 0.27, "learning_rate": 4.2026965259596666e-05, "loss": 1.5365, "step": 712 }, { "epoch": 0.27, "learning_rate": 4.1757830524198195e-05, "loss": 1.5405, "step": 713 }, { "epoch": 0.27, "learning_rate": 4.148933271583385e-05, "loss": 1.7893, "step": 714 }, { "epoch": 0.27, "learning_rate": 4.12214747707527e-05, "loss": 1.6398, "step": 715 }, { "epoch": 0.27, "learning_rate": 4.0954259618206295e-05, "loss": 1.1292, "step": 716 }, { "epoch": 0.27, "learning_rate": 4.0687690180416736e-05, "loss": 1.7568, "step": 717 }, { "epoch": 0.27, "learning_rate": 4.0421769372544736e-05, "loss": 1.6105, "step": 718 }, { "epoch": 0.27, "learning_rate": 4.015650010265757e-05, "loss": 1.6282, "step": 719 }, { "epoch": 0.27, "learning_rate": 3.9891885271697496e-05, "loss": 1.4906, "step": 720 }, { "epoch": 0.27, "learning_rate": 3.962792777344992e-05, "loss": 1.6453, "step": 721 }, { "epoch": 0.27, "learning_rate": 3.936463049451179e-05, "loss": 1.5742, "step": 722 }, { "epoch": 0.27, "learning_rate": 3.910199631425989e-05, "loss": 1.7459, "step": 723 }, { "epoch": 0.27, "learning_rate": 3.884002810481958e-05, "loss": 1.4263, "step": 724 }, { "epoch": 0.27, "learning_rate": 3.857872873103322e-05, "loss": 1.6211, "step": 725 }, { "epoch": 0.27, "learning_rate": 3.8318101050428904e-05, "loss": 1.6104, "step": 726 }, { "epoch": 0.27, "learning_rate": 3.805814791318921e-05, "loss": 1.5656, "step": 727 }, { "epoch": 0.27, "learning_rate": 3.779887216211995e-05, "loss": 1.5755, "step": 728 }, { "epoch": 0.28, "learning_rate": 3.754027663261922e-05, "loss": 1.5348, "step": 729 }, { "epoch": 0.28, "learning_rate": 3.7282364152646297e-05, "loss": 1.5228, "step": 730 }, { "epoch": 0.28, "learning_rate": 3.702513754269076e-05, "loss": 1.2992, "step": 731 }, { "epoch": 0.28, "learning_rate": 3.676859961574162e-05, "loss": 1.7796, "step": 732 }, { "epoch": 0.28, "learning_rate": 3.6512753177256476e-05, "loss": 1.3485, "step": 733 }, { "epoch": 0.28, "learning_rate": 3.6257601025131026e-05, "loss": 1.4678, "step": 734 }, { "epoch": 0.28, "learning_rate": 3.600314594966834e-05, "loss": 1.244, "step": 735 }, { "epoch": 0.28, "learning_rate": 3.574939073354838e-05, "loss": 1.7183, "step": 736 }, { "epoch": 0.28, "learning_rate": 3.549633815179746e-05, "loss": 1.473, "step": 737 }, { "epoch": 0.28, "learning_rate": 3.5243990971758125e-05, "loss": 1.6696, "step": 738 }, { "epoch": 0.28, "learning_rate": 3.499235195305868e-05, "loss": 1.4558, "step": 739 }, { "epoch": 0.28, "learning_rate": 3.4741423847583134e-05, "loss": 1.5574, "step": 740 }, { "epoch": 0.28, "learning_rate": 3.449120939944107e-05, "loss": 1.7925, "step": 741 }, { "epoch": 0.28, "learning_rate": 3.424171134493756e-05, "loss": 1.5674, "step": 742 }, { "epoch": 0.28, "learning_rate": 3.399293241254336e-05, "loss": 1.689, "step": 743 }, { "epoch": 0.28, "learning_rate": 3.3744875322865034e-05, "loss": 1.3738, "step": 744 }, { "epoch": 0.28, "learning_rate": 3.349754278861517e-05, "loss": 1.4457, "step": 745 }, { "epoch": 0.28, "learning_rate": 3.325093751458276e-05, "loss": 1.7723, "step": 746 }, { "epoch": 0.28, "learning_rate": 3.3005062197603506e-05, "loss": 1.5807, "step": 747 }, { "epoch": 0.28, "learning_rate": 3.275991952653054e-05, "loss": 1.69, "step": 748 }, { "epoch": 0.28, "learning_rate": 3.25155121822048e-05, "loss": 1.5512, "step": 749 }, { "epoch": 0.28, "learning_rate": 3.227184283742591e-05, "loss": 1.4606, "step": 750 }, { "epoch": 0.28, "learning_rate": 3.2028914156922705e-05, "loss": 1.6702, "step": 751 }, { "epoch": 0.28, "learning_rate": 3.178672879732435e-05, "loss": 1.5935, "step": 752 }, { "epoch": 0.28, "learning_rate": 3.154528940713113e-05, "loss": 1.8427, "step": 753 }, { "epoch": 0.28, "learning_rate": 3.1304598626685545e-05, "loss": 1.5779, "step": 754 }, { "epoch": 0.28, "learning_rate": 3.106465908814342e-05, "loss": 1.4777, "step": 755 }, { "epoch": 0.29, "learning_rate": 3.0825473415445074e-05, "loss": 1.7201, "step": 756 }, { "epoch": 0.29, "learning_rate": 3.0587044224286746e-05, "loss": 1.557, "step": 757 }, { "epoch": 0.29, "learning_rate": 3.034937412209178e-05, "loss": 1.4025, "step": 758 }, { "epoch": 0.29, "learning_rate": 3.0112465707982417e-05, "loss": 1.4136, "step": 759 }, { "epoch": 0.29, "learning_rate": 2.9876321572751144e-05, "loss": 1.602, "step": 760 }, { "epoch": 0.29, "learning_rate": 2.964094429883231e-05, "loss": 1.4762, "step": 761 }, { "epoch": 0.29, "learning_rate": 2.940633646027414e-05, "loss": 1.5751, "step": 762 }, { "epoch": 0.29, "learning_rate": 2.9172500622710263e-05, "loss": 1.4926, "step": 763 }, { "epoch": 0.29, "learning_rate": 2.8939439343332086e-05, "loss": 1.5308, "step": 764 }, { "epoch": 0.29, "learning_rate": 2.87071551708603e-05, "loss": 1.2756, "step": 765 }, { "epoch": 0.29, "learning_rate": 2.8475650645517472e-05, "loss": 1.5544, "step": 766 }, { "epoch": 0.29, "learning_rate": 2.824492829899994e-05, "loss": 1.43, "step": 767 }, { "epoch": 0.29, "learning_rate": 2.8014990654450325e-05, "loss": 1.3823, "step": 768 }, { "epoch": 0.29, "learning_rate": 2.778584022642996e-05, "loss": 1.5595, "step": 769 }, { "epoch": 0.29, "learning_rate": 2.7557479520891104e-05, "loss": 1.6086, "step": 770 }, { "epoch": 0.29, "learning_rate": 2.7329911035149937e-05, "loss": 1.5887, "step": 771 }, { "epoch": 0.29, "learning_rate": 2.7103137257858868e-05, "loss": 1.5278, "step": 772 }, { "epoch": 0.29, "learning_rate": 2.687716066897964e-05, "loss": 1.7046, "step": 773 }, { "epoch": 0.29, "learning_rate": 2.6651983739756026e-05, "loss": 1.5074, "step": 774 }, { "epoch": 0.29, "learning_rate": 2.6427608932686843e-05, "loss": 1.5464, "step": 775 }, { "epoch": 0.29, "learning_rate": 2.6204038701499056e-05, "loss": 1.4047, "step": 776 }, { "epoch": 0.29, "learning_rate": 2.598127549112084e-05, "loss": 1.5001, "step": 777 }, { "epoch": 0.29, "learning_rate": 2.5759321737655017e-05, "loss": 1.7997, "step": 778 }, { "epoch": 0.29, "learning_rate": 2.5538179868352253e-05, "loss": 1.8855, "step": 779 }, { "epoch": 0.29, "learning_rate": 2.5317852301584643e-05, "loss": 1.5479, "step": 780 }, { "epoch": 0.29, "learning_rate": 2.5098341446819097e-05, "loss": 1.6239, "step": 781 }, { "epoch": 0.3, "learning_rate": 2.487964970459118e-05, "loss": 1.3434, "step": 782 }, { "epoch": 0.3, "learning_rate": 2.466177946647874e-05, "loss": 1.4003, "step": 783 }, { "epoch": 0.3, "learning_rate": 2.4444733115075823e-05, "loss": 1.3932, "step": 784 }, { "epoch": 0.3, "learning_rate": 2.422851302396655e-05, "loss": 1.4909, "step": 785 }, { "epoch": 0.3, "learning_rate": 2.4013121557699157e-05, "loss": 1.5588, "step": 786 }, { "epoch": 0.3, "learning_rate": 2.3798561071760238e-05, "loss": 1.6942, "step": 787 }, { "epoch": 0.3, "learning_rate": 2.3584833912548888e-05, "loss": 1.4435, "step": 788 }, { "epoch": 0.3, "learning_rate": 2.3371942417351077e-05, "loss": 1.8429, "step": 789 }, { "epoch": 0.3, "learning_rate": 2.315988891431412e-05, "loss": 1.5738, "step": 790 }, { "epoch": 0.3, "learning_rate": 2.2948675722421086e-05, "loss": 1.5925, "step": 791 }, { "epoch": 0.3, "learning_rate": 2.2738305151465645e-05, "loss": 1.6478, "step": 792 }, { "epoch": 0.3, "learning_rate": 2.2528779502026652e-05, "loss": 1.5971, "step": 793 }, { "epoch": 0.3, "learning_rate": 2.2320101065443056e-05, "loss": 1.3759, "step": 794 }, { "epoch": 0.3, "learning_rate": 2.2112272123788768e-05, "loss": 1.5112, "step": 795 }, { "epoch": 0.3, "learning_rate": 2.190529494984782e-05, "loss": 1.4325, "step": 796 }, { "epoch": 0.3, "learning_rate": 2.1699171807089412e-05, "loss": 1.3526, "step": 797 }, { "epoch": 0.3, "learning_rate": 2.149390494964323e-05, "loss": 1.4915, "step": 798 }, { "epoch": 0.3, "learning_rate": 2.1289496622274753e-05, "loss": 1.928, "step": 799 }, { "epoch": 0.3, "learning_rate": 2.1085949060360654e-05, "loss": 1.6015, "step": 800 }, { "epoch": 0.3, "learning_rate": 2.0883264489864475e-05, "loss": 1.7447, "step": 801 }, { "epoch": 0.3, "learning_rate": 2.0681445127312214e-05, "loss": 1.4804, "step": 802 }, { "epoch": 0.3, "learning_rate": 2.048049317976809e-05, "loss": 1.7259, "step": 803 }, { "epoch": 0.3, "learning_rate": 2.0280410844810428e-05, "loss": 1.507, "step": 804 }, { "epoch": 0.3, "learning_rate": 2.008120031050753e-05, "loss": 1.5448, "step": 805 }, { "epoch": 0.3, "learning_rate": 1.988286375539391e-05, "loss": 1.414, "step": 806 }, { "epoch": 0.3, "learning_rate": 1.9685403348446374e-05, "loss": 1.3942, "step": 807 }, { "epoch": 0.3, "learning_rate": 1.9488821249060297e-05, "loss": 1.4963, "step": 808 }, { "epoch": 0.31, "learning_rate": 1.9293119607025987e-05, "loss": 1.657, "step": 809 }, { "epoch": 0.31, "learning_rate": 1.9098300562505266e-05, "loss": 1.4615, "step": 810 }, { "epoch": 0.31, "learning_rate": 1.8904366246008e-05, "loss": 1.6224, "step": 811 }, { "epoch": 0.31, "learning_rate": 1.871131877836879e-05, "loss": 1.7225, "step": 812 }, { "epoch": 0.31, "learning_rate": 1.8519160270723857e-05, "loss": 1.3401, "step": 813 }, { "epoch": 0.31, "learning_rate": 1.8327892824487792e-05, "loss": 1.4441, "step": 814 }, { "epoch": 0.31, "learning_rate": 1.8137518531330767e-05, "loss": 1.7494, "step": 815 }, { "epoch": 0.31, "learning_rate": 1.7948039473155554e-05, "loss": 1.6001, "step": 816 }, { "epoch": 0.31, "learning_rate": 1.7759457722074768e-05, "loss": 1.5838, "step": 817 }, { "epoch": 0.31, "learning_rate": 1.7571775340388276e-05, "loss": 1.5504, "step": 818 }, { "epoch": 0.31, "learning_rate": 1.738499438056045e-05, "loss": 1.6978, "step": 819 }, { "epoch": 0.31, "learning_rate": 1.7199116885197995e-05, "loss": 1.6522, "step": 820 }, { "epoch": 0.31, "learning_rate": 1.7014144887027404e-05, "loss": 1.5064, "step": 821 }, { "epoch": 0.31, "learning_rate": 1.683008040887285e-05, "loss": 1.8634, "step": 822 }, { "epoch": 0.31, "learning_rate": 1.6646925463633922e-05, "loss": 1.4848, "step": 823 }, { "epoch": 0.31, "learning_rate": 1.646468205426377e-05, "loss": 1.7141, "step": 824 }, { "epoch": 0.31, "learning_rate": 1.6283352173747145e-05, "loss": 1.7473, "step": 825 }, { "epoch": 0.31, "learning_rate": 1.6102937805078544e-05, "loss": 1.3034, "step": 826 }, { "epoch": 0.31, "learning_rate": 1.592344092124064e-05, "loss": 1.4523, "step": 827 }, { "epoch": 0.31, "learning_rate": 1.5744863485182537e-05, "loss": 1.6129, "step": 828 }, { "epoch": 0.31, "learning_rate": 1.5567207449798515e-05, "loss": 1.4829, "step": 829 }, { "epoch": 0.31, "learning_rate": 1.5390474757906446e-05, "loss": 1.5858, "step": 830 }, { "epoch": 0.31, "learning_rate": 1.5214667342226818e-05, "loss": 1.5967, "step": 831 }, { "epoch": 0.31, "learning_rate": 1.5039787125361326e-05, "loss": 1.6712, "step": 832 }, { "epoch": 0.31, "learning_rate": 1.4865836019771995e-05, "loss": 1.6599, "step": 833 }, { "epoch": 0.31, "learning_rate": 1.4692815927760273e-05, "loss": 1.6611, "step": 834 }, { "epoch": 0.32, "learning_rate": 1.4520728741446089e-05, "loss": 1.5227, "step": 835 }, { "epoch": 0.32, "learning_rate": 1.4349576342747462e-05, "loss": 1.752, "step": 836 }, { "epoch": 0.32, "learning_rate": 1.4179360603359504e-05, "loss": 1.8085, "step": 837 }, { "epoch": 0.32, "learning_rate": 1.4010083384734308e-05, "loss": 1.5195, "step": 838 }, { "epoch": 0.32, "learning_rate": 1.384174653806044e-05, "loss": 1.4271, "step": 839 }, { "epoch": 0.32, "learning_rate": 1.3674351904242611e-05, "loss": 1.5761, "step": 840 }, { "epoch": 0.32, "learning_rate": 1.350790131388181e-05, "loss": 1.2839, "step": 841 }, { "epoch": 0.32, "learning_rate": 1.3342396587254958e-05, "loss": 1.6232, "step": 842 }, { "epoch": 0.32, "learning_rate": 1.3177839534295277e-05, "loss": 1.5391, "step": 843 }, { "epoch": 0.32, "learning_rate": 1.3014231954572287e-05, "loss": 1.5893, "step": 844 }, { "epoch": 0.32, "learning_rate": 1.2851575637272262e-05, "loss": 1.5226, "step": 845 }, { "epoch": 0.32, "learning_rate": 1.2689872361178701e-05, "loss": 1.3675, "step": 846 }, { "epoch": 0.32, "learning_rate": 1.2529123894652661e-05, "loss": 1.4554, "step": 847 }, { "epoch": 0.32, "learning_rate": 1.2369331995613665e-05, "loss": 1.2827, "step": 848 }, { "epoch": 0.32, "learning_rate": 1.2210498411520255e-05, "loss": 1.7629, "step": 849 }, { "epoch": 0.32, "learning_rate": 1.2052624879351104e-05, "loss": 1.3588, "step": 850 }, { "epoch": 0.32, "learning_rate": 1.189571312558585e-05, "loss": 1.4657, "step": 851 }, { "epoch": 0.32, "learning_rate": 1.173976486618631e-05, "loss": 1.7873, "step": 852 }, { "epoch": 0.32, "learning_rate": 1.158478180657766e-05, "loss": 1.6978, "step": 853 }, { "epoch": 0.32, "learning_rate": 1.143076564162977e-05, "loss": 1.4346, "step": 854 }, { "epoch": 0.32, "learning_rate": 1.1277718055638819e-05, "loss": 1.4188, "step": 855 }, { "epoch": 0.32, "learning_rate": 1.1125640722308628e-05, "loss": 1.4854, "step": 856 }, { "epoch": 0.32, "learning_rate": 1.097453530473258e-05, "loss": 1.5549, "step": 857 }, { "epoch": 0.32, "learning_rate": 1.0824403455375288e-05, "loss": 1.4872, "step": 858 }, { "epoch": 0.32, "learning_rate": 1.0675246816054586e-05, "loss": 1.45, "step": 859 }, { "epoch": 0.32, "learning_rate": 1.0527067017923654e-05, "loss": 1.4492, "step": 860 }, { "epoch": 0.32, "learning_rate": 1.0379865681452971e-05, "loss": 1.5337, "step": 861 }, { "epoch": 0.33, "learning_rate": 1.0233644416412791e-05, "loss": 1.6242, "step": 862 }, { "epoch": 0.33, "learning_rate": 1.0088404821855412e-05, "loss": 1.4762, "step": 863 }, { "epoch": 0.33, "learning_rate": 9.944148486097793e-06, "loss": 1.3621, "step": 864 }, { "epoch": 0.33, "learning_rate": 9.80087698670411e-06, "loss": 1.8927, "step": 865 }, { "epoch": 0.33, "learning_rate": 9.658591890468515e-06, "loss": 1.699, "step": 866 }, { "epoch": 0.33, "learning_rate": 9.517294753398064e-06, "loss": 1.5539, "step": 867 }, { "epoch": 0.33, "learning_rate": 9.376987120695545e-06, "loss": 1.4888, "step": 868 }, { "epoch": 0.33, "learning_rate": 9.237670526742793e-06, "loss": 1.5137, "step": 869 }, { "epoch": 0.33, "learning_rate": 9.09934649508375e-06, "loss": 1.3459, "step": 870 }, { "epoch": 0.33, "learning_rate": 8.96201653840788e-06, "loss": 1.5647, "step": 871 }, { "epoch": 0.33, "learning_rate": 8.825682158533554e-06, "loss": 1.5302, "step": 872 }, { "epoch": 0.33, "learning_rate": 8.690344846391729e-06, "loss": 1.3084, "step": 873 }, { "epoch": 0.33, "learning_rate": 8.55600608200956e-06, "loss": 1.4081, "step": 874 }, { "epoch": 0.33, "learning_rate": 8.422667334494249e-06, "loss": 1.4316, "step": 875 }, { "epoch": 0.33, "learning_rate": 8.290330062017016e-06, "loss": 1.5, "step": 876 }, { "epoch": 0.33, "learning_rate": 8.158995711797002e-06, "loss": 1.5807, "step": 877 }, { "epoch": 0.33, "learning_rate": 8.02866572008566e-06, "loss": 1.4631, "step": 878 }, { "epoch": 0.33, "learning_rate": 7.899341512150894e-06, "loss": 1.6158, "step": 879 }, { "epoch": 0.33, "learning_rate": 7.771024502261526e-06, "loss": 1.6828, "step": 880 }, { "epoch": 0.33, "learning_rate": 7.643716093671827e-06, "loss": 1.806, "step": 881 }, { "epoch": 0.33, "learning_rate": 7.51741767860612e-06, "loss": 1.4556, "step": 882 }, { "epoch": 0.33, "learning_rate": 7.392130638243666e-06, "loss": 1.7357, "step": 883 }, { "epoch": 0.33, "learning_rate": 7.267856342703461e-06, "loss": 1.5677, "step": 884 }, { "epoch": 0.33, "learning_rate": 7.144596151029303e-06, "loss": 1.5767, "step": 885 }, { "epoch": 0.33, "learning_rate": 7.022351411174866e-06, "loss": 1.483, "step": 886 }, { "epoch": 0.33, "learning_rate": 6.901123459989067e-06, "loss": 1.5725, "step": 887 }, { "epoch": 0.34, "learning_rate": 6.780913623201346e-06, "loss": 1.648, "step": 888 }, { "epoch": 0.34, "learning_rate": 6.661723215407223e-06, "loss": 1.3089, "step": 889 }, { "epoch": 0.34, "learning_rate": 6.543553540053926e-06, "loss": 1.3673, "step": 890 }, { "epoch": 0.34, "learning_rate": 6.426405889426046e-06, "loss": 1.7906, "step": 891 }, { "epoch": 0.34, "learning_rate": 6.310281544631546e-06, "loss": 1.4324, "step": 892 }, { "epoch": 0.34, "learning_rate": 6.195181775587655e-06, "loss": 1.5023, "step": 893 }, { "epoch": 0.34, "learning_rate": 6.081107841007006e-06, "loss": 1.3731, "step": 894 }, { "epoch": 0.34, "learning_rate": 5.968060988383883e-06, "loss": 1.3659, "step": 895 }, { "epoch": 0.34, "learning_rate": 5.856042453980526e-06, "loss": 1.4401, "step": 896 }, { "epoch": 0.34, "learning_rate": 5.745053462813699e-06, "loss": 1.2283, "step": 897 }, { "epoch": 0.34, "learning_rate": 5.63509522864123e-06, "loss": 1.6408, "step": 898 }, { "epoch": 0.34, "learning_rate": 5.5261689539487515e-06, "loss": 1.6938, "step": 899 }, { "epoch": 0.34, "learning_rate": 5.418275829936537e-06, "loss": 1.4477, "step": 900 }, { "epoch": 0.34, "learning_rate": 5.3114170365065164e-06, "loss": 1.5634, "step": 901 }, { "epoch": 0.34, "learning_rate": 5.205593742249326e-06, "loss": 1.4759, "step": 902 }, { "epoch": 0.34, "learning_rate": 5.100807104431571e-06, "loss": 1.6343, "step": 903 }, { "epoch": 0.34, "learning_rate": 4.997058268983135e-06, "loss": 1.5719, "step": 904 }, { "epoch": 0.34, "learning_rate": 4.8943483704846475e-06, "loss": 1.7144, "step": 905 }, { "epoch": 0.34, "learning_rate": 4.792678532155115e-06, "loss": 1.4101, "step": 906 }, { "epoch": 0.34, "learning_rate": 4.692049865839565e-06, "loss": 1.4293, "step": 907 }, { "epoch": 0.34, "learning_rate": 4.592463471997022e-06, "loss": 1.4332, "step": 908 }, { "epoch": 0.34, "learning_rate": 4.493920439688315e-06, "loss": 1.5606, "step": 909 }, { "epoch": 0.34, "learning_rate": 4.3964218465642355e-06, "loss": 1.5196, "step": 910 }, { "epoch": 0.34, "learning_rate": 4.299968758853812e-06, "loss": 1.7813, "step": 911 }, { "epoch": 0.34, "learning_rate": 4.204562231352516e-06, "loss": 1.5976, "step": 912 }, { "epoch": 0.34, "learning_rate": 4.1102033074108985e-06, "loss": 1.731, "step": 913 }, { "epoch": 0.34, "learning_rate": 4.016893018922996e-06, "loss": 1.4989, "step": 914 }, { "epoch": 0.35, "learning_rate": 3.924632386315186e-06, "loss": 1.4378, "step": 915 }, { "epoch": 0.35, "learning_rate": 3.83342241853496e-06, "loss": 1.3466, "step": 916 }, { "epoch": 0.35, "learning_rate": 3.7432641130399236e-06, "loss": 1.4538, "step": 917 }, { "epoch": 0.35, "learning_rate": 3.6541584557868604e-06, "loss": 1.5828, "step": 918 }, { "epoch": 0.35, "learning_rate": 3.5661064212209493e-06, "loss": 1.6145, "step": 919 }, { "epoch": 0.35, "learning_rate": 3.4791089722651436e-06, "loss": 1.6774, "step": 920 }, { "epoch": 0.35, "learning_rate": 3.393167060309588e-06, "loss": 1.4517, "step": 921 }, { "epoch": 0.35, "learning_rate": 3.3082816252012926e-06, "loss": 1.7202, "step": 922 }, { "epoch": 0.35, "learning_rate": 3.2244535952337562e-06, "loss": 1.6624, "step": 923 }, { "epoch": 0.35, "learning_rate": 3.1416838871368924e-06, "loss": 1.4744, "step": 924 }, { "epoch": 0.35, "learning_rate": 3.059973406066963e-06, "loss": 1.4983, "step": 925 }, { "epoch": 0.35, "learning_rate": 2.9793230455966937e-06, "loss": 1.6633, "step": 926 }, { "epoch": 0.35, "learning_rate": 2.8997336877055194e-06, "loss": 1.7303, "step": 927 }, { "epoch": 0.35, "learning_rate": 2.821206202769899e-06, "loss": 1.6822, "step": 928 }, { "epoch": 0.35, "learning_rate": 2.743741449553827e-06, "loss": 1.4701, "step": 929 }, { "epoch": 0.35, "learning_rate": 2.667340275199426e-06, "loss": 1.5291, "step": 930 }, { "epoch": 0.35, "learning_rate": 2.5920035152176892e-06, "loss": 1.4836, "step": 931 }, { "epoch": 0.35, "learning_rate": 2.5177319934794e-06, "loss": 1.5585, "step": 932 }, { "epoch": 0.35, "learning_rate": 2.44452652220597e-06, "loss": 1.5227, "step": 933 }, { "epoch": 0.35, "learning_rate": 2.3723879019607374e-06, "loss": 1.4404, "step": 934 }, { "epoch": 0.35, "learning_rate": 2.3013169216400733e-06, "loss": 1.602, "step": 935 }, { "epoch": 0.35, "learning_rate": 2.2313143584648423e-06, "loss": 1.5406, "step": 936 }, { "epoch": 0.35, "learning_rate": 2.162380977971867e-06, "loss": 1.4306, "step": 937 }, { "epoch": 0.35, "learning_rate": 2.0945175340055357e-06, "loss": 1.3999, "step": 938 }, { "epoch": 0.35, "learning_rate": 2.0277247687096155e-06, "loss": 1.3581, "step": 939 }, { "epoch": 0.35, "learning_rate": 1.9620034125190644e-06, "loss": 1.5773, "step": 940 }, { "epoch": 0.36, "learning_rate": 1.8973541841521335e-06, "loss": 1.3736, "step": 941 }, { "epoch": 0.36, "learning_rate": 1.8337777906023978e-06, "loss": 1.5649, "step": 942 }, { "epoch": 0.36, "learning_rate": 1.771274927131139e-06, "loss": 1.709, "step": 943 }, { "epoch": 0.36, "learning_rate": 1.7098462772596302e-06, "loss": 1.6171, "step": 944 }, { "epoch": 0.36, "learning_rate": 1.6494925127617634e-06, "loss": 1.4633, "step": 945 }, { "epoch": 0.36, "learning_rate": 1.5902142936566334e-06, "loss": 1.7157, "step": 946 }, { "epoch": 0.36, "learning_rate": 1.532012268201344e-06, "loss": 1.5205, "step": 947 }, { "epoch": 0.36, "learning_rate": 1.4748870728839347e-06, "loss": 1.5265, "step": 948 }, { "epoch": 0.36, "learning_rate": 1.4188393324163663e-06, "loss": 1.4795, "step": 949 }, { "epoch": 0.36, "learning_rate": 1.3638696597277679e-06, "loss": 1.4224, "step": 950 }, { "epoch": 0.36, "learning_rate": 1.3099786559576554e-06, "loss": 1.5683, "step": 951 }, { "epoch": 0.36, "learning_rate": 1.2571669104494256e-06, "loss": 1.3445, "step": 952 }, { "epoch": 0.36, "learning_rate": 1.2054350007438707e-06, "loss": 1.3331, "step": 953 }, { "epoch": 0.36, "learning_rate": 1.1547834925728528e-06, "loss": 1.4435, "step": 954 }, { "epoch": 0.36, "learning_rate": 1.1052129398531507e-06, "loss": 1.6942, "step": 955 }, { "epoch": 0.36, "learning_rate": 1.0567238846803996e-06, "loss": 1.471, "step": 956 }, { "epoch": 0.36, "learning_rate": 1.0093168573231393e-06, "loss": 1.3496, "step": 957 }, { "epoch": 0.36, "learning_rate": 9.62992376217009e-07, "loss": 1.4363, "step": 958 }, { "epoch": 0.36, "learning_rate": 9.177509479591173e-07, "loss": 1.2465, "step": 959 }, { "epoch": 0.36, "learning_rate": 8.735930673024806e-07, "loss": 1.4027, "step": 960 }, { "epoch": 0.36, "learning_rate": 8.305192171506049e-07, "loss": 1.6351, "step": 961 }, { "epoch": 0.36, "learning_rate": 7.885298685522235e-07, "loss": 1.4707, "step": 962 }, { "epoch": 0.36, "learning_rate": 7.476254806961014e-07, "loss": 1.6195, "step": 963 }, { "epoch": 0.36, "learning_rate": 7.078065009060941e-07, "loss": 1.5442, "step": 964 }, { "epoch": 0.36, "learning_rate": 6.690733646361857e-07, "loss": 1.492, "step": 965 }, { "epoch": 0.36, "learning_rate": 6.314264954657256e-07, "loss": 1.479, "step": 966 }, { "epoch": 0.36, "learning_rate": 5.948663050948767e-07, "loss": 1.6958, "step": 967 }, { "epoch": 0.37, "learning_rate": 5.593931933399854e-07, "loss": 1.6097, "step": 968 }, { "epoch": 0.37, "learning_rate": 5.250075481293526e-07, "loss": 1.5101, "step": 969 }, { "epoch": 0.37, "learning_rate": 4.917097454988584e-07, "loss": 1.5687, "step": 970 }, { "epoch": 0.37, "learning_rate": 4.5950014958795475e-07, "loss": 1.4106, "step": 971 }, { "epoch": 0.37, "learning_rate": 4.2837911263562404e-07, "loss": 1.4372, "step": 972 }, { "epoch": 0.37, "learning_rate": 3.983469749765267e-07, "loss": 1.5331, "step": 973 }, { "epoch": 0.37, "learning_rate": 3.694040650373154e-07, "loss": 1.816, "step": 974 }, { "epoch": 0.37, "learning_rate": 3.415506993330153e-07, "loss": 1.5552, "step": 975 }, { "epoch": 0.37, "learning_rate": 3.1478718246357173e-07, "loss": 1.4554, "step": 976 }, { "epoch": 0.37, "learning_rate": 2.891138071105193e-07, "loss": 1.4739, "step": 977 }, { "epoch": 0.37, "learning_rate": 2.645308540337843e-07, "loss": 1.5654, "step": 978 }, { "epoch": 0.37, "learning_rate": 2.410385920685765e-07, "loss": 1.4487, "step": 979 }, { "epoch": 0.37, "learning_rate": 2.1863727812254653e-07, "loss": 1.4449, "step": 980 }, { "epoch": 0.37, "learning_rate": 1.973271571728441e-07, "loss": 1.517, "step": 981 }, { "epoch": 0.37, "learning_rate": 1.7710846226355328e-07, "loss": 1.5971, "step": 982 }, { "epoch": 0.37, "learning_rate": 1.5798141450307225e-07, "loss": 1.6616, "step": 983 }, { "epoch": 0.37, "learning_rate": 1.3994622306173765e-07, "loss": 1.3977, "step": 984 }, { "epoch": 0.37, "learning_rate": 1.230030851695263e-07, "loss": 1.5483, "step": 985 }, { "epoch": 0.37, "learning_rate": 1.0715218611384581e-07, "loss": 1.9048, "step": 986 }, { "epoch": 0.37, "learning_rate": 9.239369923762508e-08, "loss": 1.5007, "step": 987 }, { "epoch": 0.37, "learning_rate": 7.872778593728258e-08, "loss": 1.5753, "step": 988 }, { "epoch": 0.37, "learning_rate": 6.615459566108317e-08, "loss": 1.4905, "step": 989 }, { "epoch": 0.37, "learning_rate": 5.467426590739511e-08, "loss": 1.525, "step": 990 }, { "epoch": 0.37, "learning_rate": 4.428692222329112e-08, "loss": 1.5879, "step": 991 }, { "epoch": 0.37, "learning_rate": 3.499267820307184e-08, "loss": 1.5155, "step": 992 }, { "epoch": 0.37, "learning_rate": 2.6791635487122268e-08, "loss": 1.4999, "step": 993 }, { "epoch": 0.38, "learning_rate": 1.9683883760723832e-08, "loss": 1.5192, "step": 994 }, { "epoch": 0.38, "learning_rate": 1.3669500753099585e-08, "loss": 1.6683, "step": 995 }, { "epoch": 0.38, "learning_rate": 8.748552236603757e-09, "loss": 1.6939, "step": 996 }, { "epoch": 0.38, "learning_rate": 4.921092025911289e-09, "loss": 1.4738, "step": 997 }, { "epoch": 0.38, "learning_rate": 2.187161977540431e-09, "loss": 1.3242, "step": 998 }, { "epoch": 0.38, "learning_rate": 5.467919892865325e-10, "loss": 1.4856, "step": 999 }, { "epoch": 0.38, "learning_rate": 0.0, "loss": 1.2842, "step": 1000 } ], "max_steps": 1000, "num_train_epochs": 1, "total_flos": 4.212995477615309e+16, "trial_name": null, "trial_params": null }