|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.4643978008769782, |
|
"global_step": 652, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.517241379310345e-06, |
|
"loss": 0.4754, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.103448275862069e-05, |
|
"loss": 0.497, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.6551724137931037e-05, |
|
"loss": 0.5361, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.206896551724138e-05, |
|
"loss": 0.4945, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.7586206896551727e-05, |
|
"loss": 0.4847, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.310344827586207e-05, |
|
"loss": 0.4741, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.862068965517242e-05, |
|
"loss": 0.4742, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.413793103448276e-05, |
|
"loss": 0.4544, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9655172413793107e-05, |
|
"loss": 0.4779, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.517241379310345e-05, |
|
"loss": 0.4374, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.068965517241379e-05, |
|
"loss": 0.5045, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.620689655172415e-05, |
|
"loss": 0.46, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.17241379310345e-05, |
|
"loss": 0.4489, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.724137931034484e-05, |
|
"loss": 0.4535, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.999989544228434e-05, |
|
"loss": 0.4802, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.999905898383868e-05, |
|
"loss": 0.5038, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.999738608443897e-05, |
|
"loss": 0.4804, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.999487677906807e-05, |
|
"loss": 0.4752, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.999153112019939e-05, |
|
"loss": 0.4681, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.998734917779568e-05, |
|
"loss": 0.456, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.998233103930771e-05, |
|
"loss": 0.4724, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.99764768096724e-05, |
|
"loss": 0.4514, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.99697866113105e-05, |
|
"loss": 0.4601, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.996226058412426e-05, |
|
"loss": 0.4754, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.995389888549428e-05, |
|
"loss": 0.4395, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.994470169027636e-05, |
|
"loss": 0.4795, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.993466919079781e-05, |
|
"loss": 0.4506, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.992380159685338e-05, |
|
"loss": 0.5032, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.991209913570094e-05, |
|
"loss": 0.4629, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.989956205205669e-05, |
|
"loss": 0.4653, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.988619060809005e-05, |
|
"loss": 0.4462, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.98719850834182e-05, |
|
"loss": 0.434, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.985694577510017e-05, |
|
"loss": 0.4845, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.98410729976307e-05, |
|
"loss": 0.4586, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.982436708293364e-05, |
|
"loss": 0.4515, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.980682838035498e-05, |
|
"loss": 0.4621, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.978845725665557e-05, |
|
"loss": 0.4524, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.976925409600346e-05, |
|
"loss": 0.4458, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.974921929996585e-05, |
|
"loss": 0.4992, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.97283532875007e-05, |
|
"loss": 0.4931, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.970665649494793e-05, |
|
"loss": 0.469, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.968412937602039e-05, |
|
"loss": 0.5017, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.966077240179428e-05, |
|
"loss": 0.4753, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.963658606069931e-05, |
|
"loss": 0.4618, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.961157085850858e-05, |
|
"loss": 0.4356, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.958572731832784e-05, |
|
"loss": 0.5153, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.955905598058471e-05, |
|
"loss": 0.4933, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.95315574030173e-05, |
|
"loss": 0.4749, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.950323216066252e-05, |
|
"loss": 0.4162, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.947408084584417e-05, |
|
"loss": 0.4894, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.944410406816039e-05, |
|
"loss": 0.4281, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.941330245447103e-05, |
|
"loss": 0.4245, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.938167664888458e-05, |
|
"loss": 0.4468, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.934922731274452e-05, |
|
"loss": 0.4765, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.931595512461566e-05, |
|
"loss": 0.4411, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.928186078026991e-05, |
|
"loss": 0.4727, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.924694499267169e-05, |
|
"loss": 0.4123, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.921120849196303e-05, |
|
"loss": 0.4703, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.917465202544833e-05, |
|
"loss": 0.4674, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.913727635757873e-05, |
|
"loss": 0.4812, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.909908226993609e-05, |
|
"loss": 0.4304, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.906007056121667e-05, |
|
"loss": 0.4449, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.902024204721445e-05, |
|
"loss": 0.4433, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.897959756080402e-05, |
|
"loss": 0.4602, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.89381379519232e-05, |
|
"loss": 0.4698, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.889586408755526e-05, |
|
"loss": 0.3831, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.885277685171078e-05, |
|
"loss": 0.4394, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.880887714540917e-05, |
|
"loss": 0.4302, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.876416588665984e-05, |
|
"loss": 0.4079, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.871864401044297e-05, |
|
"loss": 0.4639, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.867231246869002e-05, |
|
"loss": 0.4498, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.862517223026373e-05, |
|
"loss": 0.4289, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.857722428093797e-05, |
|
"loss": 0.4515, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.8528469623377e-05, |
|
"loss": 0.4322, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.847890927711464e-05, |
|
"loss": 0.434, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.842854427853285e-05, |
|
"loss": 0.4431, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.83773756808401e-05, |
|
"loss": 0.4089, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.83254045540493e-05, |
|
"loss": 0.488, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.827263198495554e-05, |
|
"loss": 0.4526, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.821905907711319e-05, |
|
"loss": 0.4184, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.816468695081297e-05, |
|
"loss": 0.4176, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.810951674305845e-05, |
|
"loss": 0.4562, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.805354960754233e-05, |
|
"loss": 0.4403, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.799678671462222e-05, |
|
"loss": 0.4238, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.793922925129626e-05, |
|
"loss": 0.4911, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.788087842117826e-05, |
|
"loss": 0.4704, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.782173544447253e-05, |
|
"loss": 0.4355, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.776180155794836e-05, |
|
"loss": 0.4341, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.770107801491414e-05, |
|
"loss": 0.4198, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.763956608519123e-05, |
|
"loss": 0.4756, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.757726705508732e-05, |
|
"loss": 0.4315, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.751418222736954e-05, |
|
"loss": 0.4167, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.745031292123727e-05, |
|
"loss": 0.4316, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.738566047229454e-05, |
|
"loss": 0.4103, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.732022623252203e-05, |
|
"loss": 0.4527, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.725401157024887e-05, |
|
"loss": 0.4587, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.718701787012406e-05, |
|
"loss": 0.457, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.711924653308737e-05, |
|
"loss": 0.4463, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.705069897634022e-05, |
|
"loss": 0.3972, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.69813766333159e-05, |
|
"loss": 0.4375, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.691128095364969e-05, |
|
"loss": 0.4472, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.684041340314852e-05, |
|
"loss": 0.4198, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.676877546376028e-05, |
|
"loss": 0.4365, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.669636863354287e-05, |
|
"loss": 0.4393, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.662319442663286e-05, |
|
"loss": 0.373, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.654925437321386e-05, |
|
"loss": 0.4099, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.647455001948443e-05, |
|
"loss": 0.4473, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.639908292762585e-05, |
|
"loss": 0.4578, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.632285467576943e-05, |
|
"loss": 0.4263, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.624586685796343e-05, |
|
"loss": 0.402, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.616812108413986e-05, |
|
"loss": 0.4347, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.608961898008066e-05, |
|
"loss": 0.45, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.601036218738382e-05, |
|
"loss": 0.4445, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.593035236342903e-05, |
|
"loss": 0.421, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.5849591181343e-05, |
|
"loss": 0.4048, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.576808032996444e-05, |
|
"loss": 0.4372, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.568582151380881e-05, |
|
"loss": 0.3985, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.560281645303266e-05, |
|
"loss": 0.4387, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.551906688339762e-05, |
|
"loss": 0.3997, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.543457455623413e-05, |
|
"loss": 0.3969, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.534934123840486e-05, |
|
"loss": 0.4212, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.526336871226763e-05, |
|
"loss": 0.4026, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.51766587756383e-05, |
|
"loss": 0.424, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.50892132417531e-05, |
|
"loss": 0.4037, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.500103393923068e-05, |
|
"loss": 0.395, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.491212271203391e-05, |
|
"loss": 0.385, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.482248141943133e-05, |
|
"loss": 0.4292, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.473211193595825e-05, |
|
"loss": 0.4002, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.464101615137756e-05, |
|
"loss": 0.4208, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.454919597064017e-05, |
|
"loss": 0.3817, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.445665331384526e-05, |
|
"loss": 0.3668, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.436339011620007e-05, |
|
"loss": 0.4409, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.426940832797941e-05, |
|
"loss": 0.3886, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.417470991448494e-05, |
|
"loss": 0.4372, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.407929685600403e-05, |
|
"loss": 0.4362, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.398317114776836e-05, |
|
"loss": 0.4164, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.388633479991219e-05, |
|
"loss": 0.4498, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.378878983743037e-05, |
|
"loss": 0.3623, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.369053830013588e-05, |
|
"loss": 0.4217, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.35915822426173e-05, |
|
"loss": 0.4554, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.349192373419583e-05, |
|
"loss": 0.4505, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.339156485888191e-05, |
|
"loss": 0.3735, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.329050771533176e-05, |
|
"loss": 0.4257, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.31887544168034e-05, |
|
"loss": 0.469, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.308630709111261e-05, |
|
"loss": 0.3918, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.29831678805882e-05, |
|
"loss": 0.4332, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.287933894202743e-05, |
|
"loss": 0.4651, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.277482244665079e-05, |
|
"loss": 0.3642, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.266962058005659e-05, |
|
"loss": 0.4475, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.256373554217536e-05, |
|
"loss": 0.4169, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.245716954722374e-05, |
|
"loss": 0.4228, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.234992482365821e-05, |
|
"loss": 0.4214, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.22420036141285e-05, |
|
"loss": 0.4339, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.21334081754307e-05, |
|
"loss": 0.4068, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.202414077846003e-05, |
|
"loss": 0.4048, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.191420370816339e-05, |
|
"loss": 0.4318, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.180359926349158e-05, |
|
"loss": 0.3897, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.169232975735115e-05, |
|
"loss": 0.4575, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.158039751655619e-05, |
|
"loss": 0.442, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.146780488177948e-05, |
|
"loss": 0.3831, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.135455420750371e-05, |
|
"loss": 0.3654, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.124064786197213e-05, |
|
"loss": 0.4198, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.112608822713908e-05, |
|
"loss": 0.4102, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.10108776986202e-05, |
|
"loss": 0.375, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.089501868564224e-05, |
|
"loss": 0.4253, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.077851361099281e-05, |
|
"loss": 0.4087, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.066136491096961e-05, |
|
"loss": 0.4172, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.054357503532953e-05, |
|
"loss": 0.4306, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.042514644723741e-05, |
|
"loss": 0.3946, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.030608162321455e-05, |
|
"loss": 0.4295, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.018638305308693e-05, |
|
"loss": 0.4249, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.006605323993301e-05, |
|
"loss": 0.3931, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 6.994509470003167e-05, |
|
"loss": 0.3852, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 6.982350996280926e-05, |
|
"loss": 0.3973, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 6.970130157078697e-05, |
|
"loss": 0.422, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 6.957847207952752e-05, |
|
"loss": 0.3904, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 6.945502405758174e-05, |
|
"loss": 0.3854, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 6.933096008643489e-05, |
|
"loss": 0.3958, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 6.92062827604527e-05, |
|
"loss": 0.4068, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 6.9080994686827e-05, |
|
"loss": 0.3632, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 6.895509848552134e-05, |
|
"loss": 0.4087, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 6.882859678921612e-05, |
|
"loss": 0.3684, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 6.870149224325358e-05, |
|
"loss": 0.4083, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 6.85737875055824e-05, |
|
"loss": 0.4025, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 6.844548524670226e-05, |
|
"loss": 0.3936, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 6.831658814960786e-05, |
|
"loss": 0.4363, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6.818709890973289e-05, |
|
"loss": 0.4119, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6.805702023489361e-05, |
|
"loss": 0.3917, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6.792635484523231e-05, |
|
"loss": 0.4127, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6.779510547316034e-05, |
|
"loss": 0.3575, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6.766327486330103e-05, |
|
"loss": 0.3646, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6.753086577243225e-05, |
|
"loss": 0.4003, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6.739788096942879e-05, |
|
"loss": 0.4276, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 6.726432323520447e-05, |
|
"loss": 0.3895, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 6.713019536265395e-05, |
|
"loss": 0.392, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 6.699550015659437e-05, |
|
"loss": 0.3909, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 6.686024043370667e-05, |
|
"loss": 0.39, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 6.672441902247666e-05, |
|
"loss": 0.3835, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 6.658803876313592e-05, |
|
"loss": 0.4331, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 6.645110250760243e-05, |
|
"loss": 0.3894, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.631361311942085e-05, |
|
"loss": 0.3964, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.617557347370266e-05, |
|
"loss": 0.3844, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.603698645706614e-05, |
|
"loss": 0.3761, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.589785496757587e-05, |
|
"loss": 0.399, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.575818191468217e-05, |
|
"loss": 0.4046, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.561797021916031e-05, |
|
"loss": 0.402, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.547722281304935e-05, |
|
"loss": 0.3824, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.533594263959095e-05, |
|
"loss": 0.4048, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.519413265316762e-05, |
|
"loss": 0.382, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.50517958192412e-05, |
|
"loss": 0.3952, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.49089351142906e-05, |
|
"loss": 0.3711, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.476555352574973e-05, |
|
"loss": 0.3902, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.462165405194492e-05, |
|
"loss": 0.4164, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.447723970203232e-05, |
|
"loss": 0.4021, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.433231349593487e-05, |
|
"loss": 0.3853, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.418687846427922e-05, |
|
"loss": 0.385, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.404093764833235e-05, |
|
"loss": 0.4017, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.389449409993791e-05, |
|
"loss": 0.3751, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.374755088145253e-05, |
|
"loss": 0.3937, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.360011106568158e-05, |
|
"loss": 0.3854, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.345217773581512e-05, |
|
"loss": 0.4066, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.330375398536333e-05, |
|
"loss": 0.4047, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.315484291809181e-05, |
|
"loss": 0.3834, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.300544764795669e-05, |
|
"loss": 0.4043, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.285557129903954e-05, |
|
"loss": 0.3703, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.270521700548196e-05, |
|
"loss": 0.3748, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.255438791142017e-05, |
|
"loss": 0.4021, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.240308717091913e-05, |
|
"loss": 0.3973, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.22513179479067e-05, |
|
"loss": 0.3821, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.209908341610736e-05, |
|
"loss": 0.4095, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.194638675897591e-05, |
|
"loss": 0.4244, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.179323116963095e-05, |
|
"loss": 0.3658, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.163961985078798e-05, |
|
"loss": 0.3782, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.148555601469252e-05, |
|
"loss": 0.4021, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.133104288305293e-05, |
|
"loss": 0.3933, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.1176083686973e-05, |
|
"loss": 0.3814, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.1020681666884406e-05, |
|
"loss": 0.3991, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.086484007247894e-05, |
|
"loss": 0.4124, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.070856216264057e-05, |
|
"loss": 0.369, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.055185120537728e-05, |
|
"loss": 0.379, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.039471047775273e-05, |
|
"loss": 0.4135, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.023714326581772e-05, |
|
"loss": 0.3477, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.0079152864541506e-05, |
|
"loss": 0.4139, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.992074257774285e-05, |
|
"loss": 0.3711, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.976191571802097e-05, |
|
"loss": 0.4052, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.960267560668625e-05, |
|
"loss": 0.402, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.9443025573690795e-05, |
|
"loss": 0.3992, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.928296895755882e-05, |
|
"loss": 0.3895, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.9122509105316736e-05, |
|
"loss": 0.4004, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.896164937242331e-05, |
|
"loss": 0.3736, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.880039312269939e-05, |
|
"loss": 0.3452, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.863874372825757e-05, |
|
"loss": 0.3806, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.847670456943174e-05, |
|
"loss": 0.3684, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.831427903470631e-05, |
|
"loss": 0.3891, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.815147052064543e-05, |
|
"loss": 0.3882, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.798828243182191e-05, |
|
"loss": 0.3889, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.7824718180746034e-05, |
|
"loss": 0.3882, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.766078118779424e-05, |
|
"loss": 0.3863, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.749647488113755e-05, |
|
"loss": 0.3629, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.7331802696669885e-05, |
|
"loss": 0.3666, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.716676807793624e-05, |
|
"loss": 0.3499, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.7001374476060646e-05, |
|
"loss": 0.4004, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5.683562534967401e-05, |
|
"loss": 0.3468, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.666952416484183e-05, |
|
"loss": 0.3729, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.650307439499164e-05, |
|
"loss": 0.3763, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.6336279520840445e-05, |
|
"loss": 0.3543, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.61691430303219e-05, |
|
"loss": 0.4355, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.600166841851333e-05, |
|
"loss": 0.3764, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.583385918756277e-05, |
|
"loss": 0.3594, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.5665718846615584e-05, |
|
"loss": 0.3589, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.549725091174117e-05, |
|
"loss": 0.3726, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.532845890585941e-05, |
|
"loss": 0.3803, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.5159346358667e-05, |
|
"loss": 0.3661, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.4989916806563655e-05, |
|
"loss": 0.3902, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.482017379257812e-05, |
|
"loss": 0.3953, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.465012086629411e-05, |
|
"loss": 0.3633, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.447976158377611e-05, |
|
"loss": 0.3559, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.430909950749491e-05, |
|
"loss": 0.3633, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.413813820625324e-05, |
|
"loss": 0.3553, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.3966881255111024e-05, |
|
"loss": 0.3908, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.3795332235310744e-05, |
|
"loss": 0.3674, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.3623494734202415e-05, |
|
"loss": 0.4049, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.345137234516864e-05, |
|
"loss": 0.351, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.327896866754953e-05, |
|
"loss": 0.3518, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.310628730656726e-05, |
|
"loss": 0.3458, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.293333187325086e-05, |
|
"loss": 0.3756, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.276010598436061e-05, |
|
"loss": 0.3508, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.2586613262312414e-05, |
|
"loss": 0.3665, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.241285733510209e-05, |
|
"loss": 0.3502, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.223884183622943e-05, |
|
"loss": 0.3846, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.20645704046223e-05, |
|
"loss": 0.3475, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.189004668456051e-05, |
|
"loss": 0.4049, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.171527432559957e-05, |
|
"loss": 0.3801, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.1540256982494425e-05, |
|
"loss": 0.4386, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.1364998315123024e-05, |
|
"loss": 0.3385, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.118950198840977e-05, |
|
"loss": 0.3704, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.101377167224885e-05, |
|
"loss": 0.3842, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.0837811041427534e-05, |
|
"loss": 0.3807, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.0661623775549376e-05, |
|
"loss": 0.377, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.048521355895714e-05, |
|
"loss": 0.3896, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.030858408065587e-05, |
|
"loss": 0.3721, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.01317390342357e-05, |
|
"loss": 0.3744, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.995468211779459e-05, |
|
"loss": 0.4038, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.977741703386107e-05, |
|
"loss": 0.3491, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.9599947489316725e-05, |
|
"loss": 0.3655, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.942227719531875e-05, |
|
"loss": 0.3962, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.924440986722231e-05, |
|
"loss": 0.3642, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.906634922450285e-05, |
|
"loss": 0.3703, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.888809899067829e-05, |
|
"loss": 0.3628, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.870966289323123e-05, |
|
"loss": 0.3639, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.8531044663530944e-05, |
|
"loss": 0.3929, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.835224803675537e-05, |
|
"loss": 0.3438, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.817327675181297e-05, |
|
"loss": 0.3349, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.799413455126461e-05, |
|
"loss": 0.3395, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7814825181245254e-05, |
|
"loss": 0.3754, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.763535239138562e-05, |
|
"loss": 0.3264, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.74557199347338e-05, |
|
"loss": 0.3946, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7275931567676735e-05, |
|
"loss": 0.3699, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.709599104986172e-05, |
|
"loss": 0.3708, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.691590214411772e-05, |
|
"loss": 0.3557, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.6735668616376734e-05, |
|
"loss": 0.3537, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.6555294235595047e-05, |
|
"loss": 0.3601, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.637478277367436e-05, |
|
"loss": 0.366, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.6194138005382945e-05, |
|
"loss": 0.3928, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.6013363708276754e-05, |
|
"loss": 0.3209, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.5832463662620353e-05, |
|
"loss": 0.3796, |
|
"step": 652 |
|
} |
|
], |
|
"max_steps": 1403, |
|
"num_train_epochs": 1, |
|
"total_flos": 3.846934282119414e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|