|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 10000, |
|
"global_step": 268932, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.990703969776747e-05, |
|
"loss": 8.1971, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9814079395534934e-05, |
|
"loss": 7.3697, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.97211190933024e-05, |
|
"loss": 6.9478, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.962815879106986e-05, |
|
"loss": 6.5908, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.953519848883733e-05, |
|
"loss": 6.3102, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.94422381866048e-05, |
|
"loss": 6.0716, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9349277884372256e-05, |
|
"loss": 5.859, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.925631758213973e-05, |
|
"loss": 5.6554, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.916335727990719e-05, |
|
"loss": 5.4972, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.907039697767465e-05, |
|
"loss": 5.3606, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8977436675442126e-05, |
|
"loss": 5.2518, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8884476373209585e-05, |
|
"loss": 5.1562, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.879151607097705e-05, |
|
"loss": 5.0784, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.8698555768744516e-05, |
|
"loss": 5.0087, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.860559546651198e-05, |
|
"loss": 4.9465, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.851263516427945e-05, |
|
"loss": 4.8951, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.841967486204691e-05, |
|
"loss": 4.8451, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.832671455981438e-05, |
|
"loss": 4.8033, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.8233754257581845e-05, |
|
"loss": 4.772, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.814079395534931e-05, |
|
"loss": 4.729, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.2836232206800241, |
|
"eval_loss": 4.607733249664307, |
|
"eval_runtime": 5046.5458, |
|
"eval_samples_per_second": 89.595, |
|
"eval_steps_per_second": 1.4, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.8047833653116776e-05, |
|
"loss": 4.698, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.7954873350884235e-05, |
|
"loss": 4.6666, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.786191304865171e-05, |
|
"loss": 4.6425, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.7768952746419174e-05, |
|
"loss": 4.6117, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.767599244418663e-05, |
|
"loss": 4.5848, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.7583032141954105e-05, |
|
"loss": 4.5665, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.7490071839721564e-05, |
|
"loss": 4.5407, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.739711153748903e-05, |
|
"loss": 4.5278, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.73041512352565e-05, |
|
"loss": 4.501, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.721119093302396e-05, |
|
"loss": 4.4807, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.711823063079143e-05, |
|
"loss": 4.468, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.702527032855889e-05, |
|
"loss": 4.4464, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.693231002632636e-05, |
|
"loss": 4.4371, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.6839349724093824e-05, |
|
"loss": 4.4197, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.674638942186129e-05, |
|
"loss": 4.4046, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6653429119628756e-05, |
|
"loss": 4.3892, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.656046881739622e-05, |
|
"loss": 4.3779, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.646750851516369e-05, |
|
"loss": 4.3627, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.637454821293115e-05, |
|
"loss": 4.3501, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.628158791069861e-05, |
|
"loss": 4.3383, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.31616356516397504, |
|
"eval_loss": 4.231759548187256, |
|
"eval_runtime": 5043.1658, |
|
"eval_samples_per_second": 89.655, |
|
"eval_steps_per_second": 1.401, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.6188627608466085e-05, |
|
"loss": 4.3304, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.609566730623355e-05, |
|
"loss": 4.3221, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.600270700400101e-05, |
|
"loss": 4.3029, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.590974670176848e-05, |
|
"loss": 4.2954, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.581678639953594e-05, |
|
"loss": 4.2818, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5723826097303407e-05, |
|
"loss": 4.2812, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.563086579507088e-05, |
|
"loss": 4.2749, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.553790549283834e-05, |
|
"loss": 4.2555, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5444945190605804e-05, |
|
"loss": 4.2467, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.535198488837327e-05, |
|
"loss": 4.242, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5259024586140735e-05, |
|
"loss": 4.2395, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.51660642839082e-05, |
|
"loss": 4.2208, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.507310398167567e-05, |
|
"loss": 4.2181, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.498014367944313e-05, |
|
"loss": 4.2108, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.48871833772106e-05, |
|
"loss": 4.2053, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.4794223074978064e-05, |
|
"loss": 4.1924, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.470126277274553e-05, |
|
"loss": 4.187, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.460830247051299e-05, |
|
"loss": 4.1839, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.451534216828046e-05, |
|
"loss": 4.1739, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.442238186604793e-05, |
|
"loss": 4.1706, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.33159489024981786, |
|
"eval_loss": 4.065094947814941, |
|
"eval_runtime": 5049.5785, |
|
"eval_samples_per_second": 89.541, |
|
"eval_steps_per_second": 1.399, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.432942156381539e-05, |
|
"loss": 4.1606, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.423646126158286e-05, |
|
"loss": 4.1613, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.414350095935032e-05, |
|
"loss": 4.1533, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.405054065711779e-05, |
|
"loss": 4.1449, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.3957580354885256e-05, |
|
"loss": 4.1376, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.3864620052652715e-05, |
|
"loss": 4.1307, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.377165975042019e-05, |
|
"loss": 4.1225, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.3678699448187646e-05, |
|
"loss": 4.1147, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.358573914595511e-05, |
|
"loss": 4.1113, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.3492778843722584e-05, |
|
"loss": 4.1075, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.3399818541490043e-05, |
|
"loss": 4.1023, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.330685823925751e-05, |
|
"loss": 4.0986, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.3213897937024975e-05, |
|
"loss": 4.0933, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.312093763479244e-05, |
|
"loss": 4.0899, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.3027977332559906e-05, |
|
"loss": 4.0842, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.293501703032737e-05, |
|
"loss": 4.0783, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.284205672809484e-05, |
|
"loss": 4.0721, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.2749096425862304e-05, |
|
"loss": 4.0692, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.265613612362977e-05, |
|
"loss": 4.0613, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.2563175821397235e-05, |
|
"loss": 4.0594, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.34162108157696924, |
|
"eval_loss": 3.9599251747131348, |
|
"eval_runtime": 5055.4019, |
|
"eval_samples_per_second": 89.438, |
|
"eval_steps_per_second": 1.398, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.2470215519164694e-05, |
|
"loss": 4.0552, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.237725521693217e-05, |
|
"loss": 4.0538, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.228429491469963e-05, |
|
"loss": 4.049, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.219133461246709e-05, |
|
"loss": 4.04, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.2098374310234564e-05, |
|
"loss": 4.0417, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.200541400800202e-05, |
|
"loss": 4.0398, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.191245370576949e-05, |
|
"loss": 4.0348, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.181949340353696e-05, |
|
"loss": 4.0271, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.172653310130442e-05, |
|
"loss": 4.0187, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.1633572799071886e-05, |
|
"loss": 4.0195, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.154061249683935e-05, |
|
"loss": 4.0134, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.144765219460682e-05, |
|
"loss": 4.012, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.135469189237428e-05, |
|
"loss": 4.0103, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.126173159014175e-05, |
|
"loss": 4.0061, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.1168771287909215e-05, |
|
"loss": 3.9976, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.107581098567668e-05, |
|
"loss": 4.0012, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.0982850683444146e-05, |
|
"loss": 4.0002, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.088989038121161e-05, |
|
"loss": 3.9899, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.079693007897907e-05, |
|
"loss": 3.9881, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.070396977674654e-05, |
|
"loss": 3.9842, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.3486961893069126, |
|
"eval_loss": 3.8825323581695557, |
|
"eval_runtime": 5041.9196, |
|
"eval_samples_per_second": 89.677, |
|
"eval_steps_per_second": 1.401, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.0611009474514e-05, |
|
"loss": 3.9765, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.051804917228147e-05, |
|
"loss": 3.974, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.042508887004894e-05, |
|
"loss": 3.9809, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.03321285678164e-05, |
|
"loss": 3.9751, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.0239168265583865e-05, |
|
"loss": 3.9714, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.014620796335133e-05, |
|
"loss": 3.967, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.00532476611188e-05, |
|
"loss": 3.9642, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.996028735888626e-05, |
|
"loss": 3.9586, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.986732705665373e-05, |
|
"loss": 3.9591, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.9774366754421194e-05, |
|
"loss": 3.9532, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.968140645218866e-05, |
|
"loss": 3.9432, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.9588446149956126e-05, |
|
"loss": 3.9482, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.949548584772359e-05, |
|
"loss": 3.9414, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.940252554549105e-05, |
|
"loss": 3.9397, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.930956524325852e-05, |
|
"loss": 3.9329, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.921660494102599e-05, |
|
"loss": 3.9361, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.912364463879345e-05, |
|
"loss": 3.9322, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.903068433656092e-05, |
|
"loss": 3.929, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.893772403432838e-05, |
|
"loss": 3.9308, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.8844763732095845e-05, |
|
"loss": 3.9298, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.3545327865706106, |
|
"eval_loss": 3.824397087097168, |
|
"eval_runtime": 5028.3225, |
|
"eval_samples_per_second": 89.92, |
|
"eval_steps_per_second": 1.405, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.875180342986332e-05, |
|
"loss": 3.9259, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8658843127630776e-05, |
|
"loss": 3.9188, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.856588282539824e-05, |
|
"loss": 3.9109, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.847292252316571e-05, |
|
"loss": 3.9193, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8379962220933174e-05, |
|
"loss": 3.9078, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.828700191870064e-05, |
|
"loss": 3.913, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.8194041616468105e-05, |
|
"loss": 3.9015, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.810108131423557e-05, |
|
"loss": 3.9075, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.8008121012003037e-05, |
|
"loss": 3.9038, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.79151607097705e-05, |
|
"loss": 3.8996, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.782220040753797e-05, |
|
"loss": 3.8987, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.772924010530543e-05, |
|
"loss": 3.8999, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.76362798030729e-05, |
|
"loss": 3.8889, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.7543319500840365e-05, |
|
"loss": 3.8927, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.7450359198607824e-05, |
|
"loss": 3.8974, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.73573988963753e-05, |
|
"loss": 3.8895, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.7264438594142756e-05, |
|
"loss": 3.8908, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.717147829191022e-05, |
|
"loss": 3.8816, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.7078517989677694e-05, |
|
"loss": 3.8718, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.698555768744515e-05, |
|
"loss": 3.8777, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.35923910731713965, |
|
"eval_loss": 3.7790777683258057, |
|
"eval_runtime": 5041.8181, |
|
"eval_samples_per_second": 89.679, |
|
"eval_steps_per_second": 1.401, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.689259738521262e-05, |
|
"loss": 3.8821, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.6799637082980085e-05, |
|
"loss": 3.8743, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.670667678074755e-05, |
|
"loss": 3.8735, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.6613716478515016e-05, |
|
"loss": 3.8722, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.652075617628248e-05, |
|
"loss": 3.8644, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.642779587404995e-05, |
|
"loss": 3.867, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.633483557181741e-05, |
|
"loss": 3.8666, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.624187526958488e-05, |
|
"loss": 3.8615, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.6148914967352345e-05, |
|
"loss": 3.8662, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.6055954665119804e-05, |
|
"loss": 3.8645, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.5962994362887276e-05, |
|
"loss": 3.8559, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.587003406065474e-05, |
|
"loss": 3.8536, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.57770737584222e-05, |
|
"loss": 3.8527, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.5684113456189673e-05, |
|
"loss": 3.848, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.559115315395713e-05, |
|
"loss": 3.8495, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.54981928517246e-05, |
|
"loss": 3.8523, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.540523254949207e-05, |
|
"loss": 3.8505, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.531227224725953e-05, |
|
"loss": 3.8439, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.5219311945026995e-05, |
|
"loss": 3.8387, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.512635164279446e-05, |
|
"loss": 3.8455, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.36290677735789373, |
|
"eval_loss": 3.7436022758483887, |
|
"eval_runtime": 5030.6589, |
|
"eval_samples_per_second": 89.878, |
|
"eval_steps_per_second": 1.404, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.503339134056193e-05, |
|
"loss": 3.8383, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.494043103832939e-05, |
|
"loss": 3.8445, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.484747073609686e-05, |
|
"loss": 3.8439, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.4754510433864324e-05, |
|
"loss": 3.8357, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.466155013163179e-05, |
|
"loss": 3.8362, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4568589829399256e-05, |
|
"loss": 3.8311, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.447562952716672e-05, |
|
"loss": 3.8263, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.438266922493418e-05, |
|
"loss": 3.8328, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.428970892270165e-05, |
|
"loss": 3.8303, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.419674862046912e-05, |
|
"loss": 3.8202, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.410378831823658e-05, |
|
"loss": 3.8243, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.401082801600405e-05, |
|
"loss": 3.8284, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.391786771377151e-05, |
|
"loss": 3.8215, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.3824907411538975e-05, |
|
"loss": 3.8179, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.373194710930645e-05, |
|
"loss": 3.821, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.3638986807073906e-05, |
|
"loss": 3.816, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.354602650484138e-05, |
|
"loss": 3.8129, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.345306620260884e-05, |
|
"loss": 3.8203, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.3360105900376304e-05, |
|
"loss": 3.818, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.3267145598143776e-05, |
|
"loss": 3.8104, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.36599874258539994, |
|
"eval_loss": 3.7120308876037598, |
|
"eval_runtime": 5023.4047, |
|
"eval_samples_per_second": 90.008, |
|
"eval_steps_per_second": 1.406, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.3174185295911235e-05, |
|
"loss": 3.8046, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.30812249936787e-05, |
|
"loss": 3.8116, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.298826469144617e-05, |
|
"loss": 3.8063, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.289530438921363e-05, |
|
"loss": 3.8029, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.28023440869811e-05, |
|
"loss": 3.8023, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.2709383784748564e-05, |
|
"loss": 3.799, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.261642348251603e-05, |
|
"loss": 3.7974, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.2523463180283495e-05, |
|
"loss": 3.7981, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.243050287805096e-05, |
|
"loss": 3.7979, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.233754257581843e-05, |
|
"loss": 3.7971, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.2244582273585886e-05, |
|
"loss": 3.7956, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.215162197135336e-05, |
|
"loss": 3.7966, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.2058661669120824e-05, |
|
"loss": 3.7903, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.196570136688828e-05, |
|
"loss": 3.7944, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.1872741064655756e-05, |
|
"loss": 3.7942, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.1779780762423215e-05, |
|
"loss": 3.7871, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.168682046019068e-05, |
|
"loss": 3.7912, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.1593860157958146e-05, |
|
"loss": 3.7909, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.150089985572561e-05, |
|
"loss": 3.7798, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.140793955349308e-05, |
|
"loss": 3.7908, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.36866443948556843, |
|
"eval_loss": 3.686194896697998, |
|
"eval_runtime": 5036.4153, |
|
"eval_samples_per_second": 89.775, |
|
"eval_steps_per_second": 1.403, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.131497925126054e-05, |
|
"loss": 3.7867, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.122201894902801e-05, |
|
"loss": 3.7815, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.1129058646795475e-05, |
|
"loss": 3.781, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.103609834456294e-05, |
|
"loss": 3.7821, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.0943138042330406e-05, |
|
"loss": 3.7731, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.0850177740097865e-05, |
|
"loss": 3.7803, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.075721743786534e-05, |
|
"loss": 3.7747, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.0664257135632804e-05, |
|
"loss": 3.776, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.057129683340026e-05, |
|
"loss": 3.7718, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.0478336531167735e-05, |
|
"loss": 3.7762, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.0385376228935197e-05, |
|
"loss": 3.7731, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.029241592670266e-05, |
|
"loss": 3.7678, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.019945562447013e-05, |
|
"loss": 3.7706, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.0106495322237595e-05, |
|
"loss": 3.7676, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.0013535020005057e-05, |
|
"loss": 3.7703, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.9920574717772526e-05, |
|
"loss": 3.7657, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.982761441553999e-05, |
|
"loss": 3.7717, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9734654113307454e-05, |
|
"loss": 3.7669, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9641693811074923e-05, |
|
"loss": 3.7598, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9548733508842386e-05, |
|
"loss": 3.7613, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.37119206097292273, |
|
"eval_loss": 3.6628386974334717, |
|
"eval_runtime": 5026.6773, |
|
"eval_samples_per_second": 89.949, |
|
"eval_steps_per_second": 1.406, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9455773206609848e-05, |
|
"loss": 3.7575, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9362812904377317e-05, |
|
"loss": 3.7598, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9269852602144783e-05, |
|
"loss": 3.7473, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.9176892299912245e-05, |
|
"loss": 3.7578, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.9083931997679715e-05, |
|
"loss": 3.7512, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.8990971695447177e-05, |
|
"loss": 3.7517, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.8898011393214643e-05, |
|
"loss": 3.7585, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.8805051090982112e-05, |
|
"loss": 3.7595, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8712090788749574e-05, |
|
"loss": 3.7493, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8619130486517037e-05, |
|
"loss": 3.7554, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8526170184284506e-05, |
|
"loss": 3.7515, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.843320988205197e-05, |
|
"loss": 3.7483, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8340249579819434e-05, |
|
"loss": 3.7577, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8247289277586903e-05, |
|
"loss": 3.753, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8154328975354365e-05, |
|
"loss": 3.7472, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.806136867312183e-05, |
|
"loss": 3.742, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.79684083708893e-05, |
|
"loss": 3.7445, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.7875448068656763e-05, |
|
"loss": 3.7493, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.7782487766424225e-05, |
|
"loss": 3.7427, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7689527464191694e-05, |
|
"loss": 3.7492, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.3731269306324982, |
|
"eval_loss": 3.643425464630127, |
|
"eval_runtime": 5034.359, |
|
"eval_samples_per_second": 89.812, |
|
"eval_steps_per_second": 1.403, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.759656716195916e-05, |
|
"loss": 3.7424, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7503606859726622e-05, |
|
"loss": 3.7326, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.741064655749409e-05, |
|
"loss": 3.7368, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7317686255261554e-05, |
|
"loss": 3.7387, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.722472595302902e-05, |
|
"loss": 3.7392, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.713176565079649e-05, |
|
"loss": 3.7371, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.703880534856395e-05, |
|
"loss": 3.7302, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.6945845046331413e-05, |
|
"loss": 3.7336, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.6852884744098882e-05, |
|
"loss": 3.7401, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.6759924441866345e-05, |
|
"loss": 3.7264, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.666696413963381e-05, |
|
"loss": 3.7328, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.657400383740128e-05, |
|
"loss": 3.7239, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6481043535168742e-05, |
|
"loss": 3.727, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6388083232936204e-05, |
|
"loss": 3.7448, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6295122930703673e-05, |
|
"loss": 3.7296, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.620216262847114e-05, |
|
"loss": 3.7226, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.61092023262386e-05, |
|
"loss": 3.7289, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.601624202400607e-05, |
|
"loss": 3.7117, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5923281721773533e-05, |
|
"loss": 3.7261, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5830321419541e-05, |
|
"loss": 3.7228, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.3750882777539584, |
|
"eval_loss": 3.6245925426483154, |
|
"eval_runtime": 5034.4807, |
|
"eval_samples_per_second": 89.81, |
|
"eval_steps_per_second": 1.403, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5737361117308468e-05, |
|
"loss": 3.7316, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.564440081507593e-05, |
|
"loss": 3.7187, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5551440512843393e-05, |
|
"loss": 3.7147, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5458480210610862e-05, |
|
"loss": 3.7185, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5365519908378328e-05, |
|
"loss": 3.7186, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.527255960614579e-05, |
|
"loss": 3.7204, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.517959930391326e-05, |
|
"loss": 3.7154, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.508663900168072e-05, |
|
"loss": 3.7149, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.499367869944819e-05, |
|
"loss": 3.7138, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.4900718397215656e-05, |
|
"loss": 3.7194, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.480775809498312e-05, |
|
"loss": 3.7161, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4714797792750584e-05, |
|
"loss": 3.7113, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.462183749051805e-05, |
|
"loss": 3.7124, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4528877188285516e-05, |
|
"loss": 3.7113, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.443591688605298e-05, |
|
"loss": 3.7079, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4342956583820447e-05, |
|
"loss": 3.7111, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.424999628158791e-05, |
|
"loss": 3.7081, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.415703597935538e-05, |
|
"loss": 3.7061, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4064075677122845e-05, |
|
"loss": 3.7066, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.3971115374890307e-05, |
|
"loss": 3.7127, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.3766884162574383, |
|
"eval_loss": 3.609013319015503, |
|
"eval_runtime": 5024.1177, |
|
"eval_samples_per_second": 89.995, |
|
"eval_steps_per_second": 1.406, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.3878155072657773e-05, |
|
"loss": 3.7094, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.378519477042524e-05, |
|
"loss": 3.6994, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.3692234468192704e-05, |
|
"loss": 3.7066, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.359927416596017e-05, |
|
"loss": 3.705, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.3506313863727636e-05, |
|
"loss": 3.7026, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.3413353561495098e-05, |
|
"loss": 3.7035, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.3320393259262567e-05, |
|
"loss": 3.7052, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.3227432957030033e-05, |
|
"loss": 3.7018, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.3134472654797495e-05, |
|
"loss": 3.7042, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.304151235256496e-05, |
|
"loss": 3.7033, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.2948552050332427e-05, |
|
"loss": 3.7057, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.2855591748099893e-05, |
|
"loss": 3.7018, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.276263144586736e-05, |
|
"loss": 3.7007, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2669671143634824e-05, |
|
"loss": 3.6892, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2576710841402286e-05, |
|
"loss": 3.7029, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2483750539169756e-05, |
|
"loss": 3.6992, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.239079023693722e-05, |
|
"loss": 3.6961, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2297829934704684e-05, |
|
"loss": 3.6916, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.220486963247215e-05, |
|
"loss": 3.6983, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.2111909330239615e-05, |
|
"loss": 3.694, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.3782522490519747, |
|
"eval_loss": 3.5962026119232178, |
|
"eval_runtime": 5027.3439, |
|
"eval_samples_per_second": 89.937, |
|
"eval_steps_per_second": 1.405, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.201894902800708e-05, |
|
"loss": 3.6916, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.1925988725774547e-05, |
|
"loss": 3.69, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.1833028423542012e-05, |
|
"loss": 3.6858, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1740068121309475e-05, |
|
"loss": 3.7009, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.164710781907694e-05, |
|
"loss": 3.6886, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.155414751684441e-05, |
|
"loss": 3.6869, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1461187214611872e-05, |
|
"loss": 3.6898, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1368226912379338e-05, |
|
"loss": 3.695, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1275266610146804e-05, |
|
"loss": 3.6882, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.118230630791427e-05, |
|
"loss": 3.6909, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.1089346005681735e-05, |
|
"loss": 3.6862, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.09963857034492e-05, |
|
"loss": 3.6878, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0903425401216663e-05, |
|
"loss": 3.6882, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.081046509898413e-05, |
|
"loss": 3.6841, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0717504796751598e-05, |
|
"loss": 3.6828, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.062454449451906e-05, |
|
"loss": 3.6819, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0531584192286526e-05, |
|
"loss": 3.6854, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0438623890053992e-05, |
|
"loss": 3.6759, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0345663587821458e-05, |
|
"loss": 3.6811, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0252703285588923e-05, |
|
"loss": 3.6871, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.37971800692359575, |
|
"eval_loss": 3.583056688308716, |
|
"eval_runtime": 5027.862, |
|
"eval_samples_per_second": 89.928, |
|
"eval_steps_per_second": 1.405, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.015974298335639e-05, |
|
"loss": 3.6786, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0066782681123855e-05, |
|
"loss": 3.6793, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9973822378891317e-05, |
|
"loss": 3.6791, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9880862076658786e-05, |
|
"loss": 3.6806, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.9787901774426252e-05, |
|
"loss": 3.6757, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9694941472193715e-05, |
|
"loss": 3.6772, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.960198116996118e-05, |
|
"loss": 3.6803, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9509020867728646e-05, |
|
"loss": 3.674, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9416060565496112e-05, |
|
"loss": 3.6761, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9323100263263578e-05, |
|
"loss": 3.6718, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9230139961031043e-05, |
|
"loss": 3.675, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9137179658798506e-05, |
|
"loss": 3.6734, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9044219356565975e-05, |
|
"loss": 3.6716, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.895125905433344e-05, |
|
"loss": 3.6724, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8858298752100903e-05, |
|
"loss": 3.6742, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.876533844986837e-05, |
|
"loss": 3.6758, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8672378147635834e-05, |
|
"loss": 3.6704, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.85794178454033e-05, |
|
"loss": 3.6657, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8486457543170766e-05, |
|
"loss": 3.6662, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.839349724093823e-05, |
|
"loss": 3.6784, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.3810095955450411, |
|
"eval_loss": 3.5707802772521973, |
|
"eval_runtime": 5019.3993, |
|
"eval_samples_per_second": 90.08, |
|
"eval_steps_per_second": 1.408, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8300536938705694e-05, |
|
"loss": 3.672, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8207576636473163e-05, |
|
"loss": 3.6694, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.811461633424063e-05, |
|
"loss": 3.6686, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.802165603200809e-05, |
|
"loss": 3.6712, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7928695729775557e-05, |
|
"loss": 3.6743, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7835735427543023e-05, |
|
"loss": 3.6632, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.774277512531049e-05, |
|
"loss": 3.6609, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7649814823077954e-05, |
|
"loss": 3.668, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.755685452084542e-05, |
|
"loss": 3.6703, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7463894218612882e-05, |
|
"loss": 3.6671, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7370933916380348e-05, |
|
"loss": 3.66, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7277973614147817e-05, |
|
"loss": 3.6648, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.718501331191528e-05, |
|
"loss": 3.6681, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7092053009682745e-05, |
|
"loss": 3.6685, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.699909270745021e-05, |
|
"loss": 3.6669, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6906132405217677e-05, |
|
"loss": 3.6691, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6813172102985143e-05, |
|
"loss": 3.6575, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6720211800752608e-05, |
|
"loss": 3.6688, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.662725149852007e-05, |
|
"loss": 3.6558, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6534291196287536e-05, |
|
"loss": 3.6606, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.38233432261147465, |
|
"eval_loss": 3.559264898300171, |
|
"eval_runtime": 5018.0904, |
|
"eval_samples_per_second": 90.103, |
|
"eval_steps_per_second": 1.408, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6441330894055006e-05, |
|
"loss": 3.6577, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6348370591822468e-05, |
|
"loss": 3.6628, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6255410289589934e-05, |
|
"loss": 3.6661, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.61624499873574e-05, |
|
"loss": 3.6582, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6069489685124865e-05, |
|
"loss": 3.6554, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.597652938289233e-05, |
|
"loss": 3.6577, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5883569080659797e-05, |
|
"loss": 3.6586, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.579060877842726e-05, |
|
"loss": 3.6565, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5697648476194725e-05, |
|
"loss": 3.6623, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5604688173962194e-05, |
|
"loss": 3.6617, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5511727871729656e-05, |
|
"loss": 3.6555, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5418767569497122e-05, |
|
"loss": 3.6524, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5325807267264588e-05, |
|
"loss": 3.6602, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5232846965032052e-05, |
|
"loss": 3.6563, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5139886662799518e-05, |
|
"loss": 3.6551, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5046926360566985e-05, |
|
"loss": 3.6567, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.4953966058334447e-05, |
|
"loss": 3.6475, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.4861005756101915e-05, |
|
"loss": 3.654, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.476804545386938e-05, |
|
"loss": 3.6559, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4675085151636846e-05, |
|
"loss": 3.646, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.3834675999220702, |
|
"eval_loss": 3.549065351486206, |
|
"eval_runtime": 5019.1074, |
|
"eval_samples_per_second": 90.085, |
|
"eval_steps_per_second": 1.408, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.458212484940431e-05, |
|
"loss": 3.6544, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4489164547171776e-05, |
|
"loss": 3.6484, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4396204244939244e-05, |
|
"loss": 3.6467, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4303243942706706e-05, |
|
"loss": 3.6558, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4210283640474173e-05, |
|
"loss": 3.6461, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4117323338241639e-05, |
|
"loss": 3.6532, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4024363036009103e-05, |
|
"loss": 3.6551, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3931402733776569e-05, |
|
"loss": 3.6476, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3838442431544035e-05, |
|
"loss": 3.6466, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3745482129311499e-05, |
|
"loss": 3.6518, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3652521827078964e-05, |
|
"loss": 3.6413, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3559561524846432e-05, |
|
"loss": 3.6432, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3466601222613894e-05, |
|
"loss": 3.6464, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3373640920381362e-05, |
|
"loss": 3.644, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3280680618148827e-05, |
|
"loss": 3.6496, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3187720315916292e-05, |
|
"loss": 3.6407, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3094760013683757e-05, |
|
"loss": 3.6412, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3001799711451223e-05, |
|
"loss": 3.646, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2908839409218687e-05, |
|
"loss": 3.6432, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2815879106986153e-05, |
|
"loss": 3.6453, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.38430268095273507, |
|
"eval_loss": 3.5410099029541016, |
|
"eval_runtime": 5035.174, |
|
"eval_samples_per_second": 89.797, |
|
"eval_steps_per_second": 1.403, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.272291880475362e-05, |
|
"loss": 3.6374, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2629958502521083e-05, |
|
"loss": 3.6464, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.253699820028855e-05, |
|
"loss": 3.6488, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2444037898056014e-05, |
|
"loss": 3.6405, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.235107759582348e-05, |
|
"loss": 3.6354, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2258117293590946e-05, |
|
"loss": 3.6386, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.216515699135841e-05, |
|
"loss": 3.6349, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2072196689125877e-05, |
|
"loss": 3.6377, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1979236386893341e-05, |
|
"loss": 3.6374, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1886276084660807e-05, |
|
"loss": 3.6403, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1793315782428273e-05, |
|
"loss": 3.6397, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1700355480195738e-05, |
|
"loss": 3.6388, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1607395177963202e-05, |
|
"loss": 3.6413, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1514434875730668e-05, |
|
"loss": 3.6353, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1421474573498134e-05, |
|
"loss": 3.6395, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1328514271265598e-05, |
|
"loss": 3.6378, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1235553969033065e-05, |
|
"loss": 3.6394, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.114259366680053e-05, |
|
"loss": 3.6402, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1049633364567995e-05, |
|
"loss": 3.6421, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.0956673062335461e-05, |
|
"loss": 3.6393, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.38505967217174975, |
|
"eval_loss": 3.5341877937316895, |
|
"eval_runtime": 5017.9224, |
|
"eval_samples_per_second": 90.106, |
|
"eval_steps_per_second": 1.408, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.0863712760102927e-05, |
|
"loss": 3.6379, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.0770752457870393e-05, |
|
"loss": 3.6319, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0677792155637857e-05, |
|
"loss": 3.6318, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0584831853405322e-05, |
|
"loss": 3.6345, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0491871551172788e-05, |
|
"loss": 3.6346, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0398911248940254e-05, |
|
"loss": 3.6304, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0305950946707718e-05, |
|
"loss": 3.6273, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0212990644475184e-05, |
|
"loss": 3.6346, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.012003034224265e-05, |
|
"loss": 3.6329, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0027070040010113e-05, |
|
"loss": 3.6349, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.934109737777581e-06, |
|
"loss": 3.6369, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.841149435545045e-06, |
|
"loss": 3.6272, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.74818913331251e-06, |
|
"loss": 3.6283, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.655228831079976e-06, |
|
"loss": 3.6353, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.562268528847442e-06, |
|
"loss": 3.6251, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.469308226614906e-06, |
|
"loss": 3.6345, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.376347924382372e-06, |
|
"loss": 3.6256, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.283387622149838e-06, |
|
"loss": 3.6259, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.190427319917302e-06, |
|
"loss": 3.6288, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.09746701768477e-06, |
|
"loss": 3.6207, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.3857340442835604, |
|
"eval_loss": 3.5280263423919678, |
|
"eval_runtime": 5028.331, |
|
"eval_samples_per_second": 89.919, |
|
"eval_steps_per_second": 1.405, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.004506715452233e-06, |
|
"loss": 3.6311, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.911546413219699e-06, |
|
"loss": 3.6254, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.818586110987165e-06, |
|
"loss": 3.6307, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.72562580875463e-06, |
|
"loss": 3.629, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.632665506522095e-06, |
|
"loss": 3.6264, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.53970520428956e-06, |
|
"loss": 3.6215, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.446744902057026e-06, |
|
"loss": 3.6181, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.35378459982449e-06, |
|
"loss": 3.6253, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.260824297591958e-06, |
|
"loss": 3.6204, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.167863995359422e-06, |
|
"loss": 3.6264, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.074903693126887e-06, |
|
"loss": 3.6258, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.981943390894353e-06, |
|
"loss": 3.6279, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.888983088661817e-06, |
|
"loss": 3.6294, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.796022786429285e-06, |
|
"loss": 3.621, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.703062484196749e-06, |
|
"loss": 3.6315, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.610102181964214e-06, |
|
"loss": 3.6199, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.51714187973168e-06, |
|
"loss": 3.6182, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.424181577499145e-06, |
|
"loss": 3.6196, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.33122127526661e-06, |
|
"loss": 3.6216, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.2382609730340766e-06, |
|
"loss": 3.6288, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.386489052989915, |
|
"eval_loss": 3.5217700004577637, |
|
"eval_runtime": 5019.3728, |
|
"eval_samples_per_second": 90.08, |
|
"eval_steps_per_second": 1.408, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.1453006708015415e-06, |
|
"loss": 3.6201, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.052340368569006e-06, |
|
"loss": 3.6216, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.959380066336472e-06, |
|
"loss": 3.6214, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.866419764103937e-06, |
|
"loss": 3.6208, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.773459461871402e-06, |
|
"loss": 3.6116, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.6804991596388685e-06, |
|
"loss": 3.6187, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.587538857406333e-06, |
|
"loss": 3.6179, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.494578555173798e-06, |
|
"loss": 3.6112, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.401618252941265e-06, |
|
"loss": 3.6226, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.30865795070873e-06, |
|
"loss": 3.6148, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.2156976484761956e-06, |
|
"loss": 3.6107, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.1227373462436605e-06, |
|
"loss": 3.6129, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.029777044011125e-06, |
|
"loss": 3.6178, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.936816741778591e-06, |
|
"loss": 3.624, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.843856439546057e-06, |
|
"loss": 3.6232, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.750896137313522e-06, |
|
"loss": 3.6185, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.6579358350809875e-06, |
|
"loss": 3.6195, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.564975532848453e-06, |
|
"loss": 3.6236, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.472015230615918e-06, |
|
"loss": 3.6091, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.379054928383383e-06, |
|
"loss": 3.6176, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.3872213352261681, |
|
"eval_loss": 3.5150856971740723, |
|
"eval_runtime": 5028.7528, |
|
"eval_samples_per_second": 89.912, |
|
"eval_steps_per_second": 1.405, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.286094626150849e-06, |
|
"loss": 3.6167, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.193134323918314e-06, |
|
"loss": 3.62, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.1001740216857795e-06, |
|
"loss": 3.612, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.007213719453245e-06, |
|
"loss": 3.614, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.91425341722071e-06, |
|
"loss": 3.6181, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.821293114988176e-06, |
|
"loss": 3.6146, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.728332812755641e-06, |
|
"loss": 3.6145, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.6353725105231065e-06, |
|
"loss": 3.6156, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.542412208290571e-06, |
|
"loss": 3.6149, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.449451906058037e-06, |
|
"loss": 3.613, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.356491603825503e-06, |
|
"loss": 3.6158, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.263531301592968e-06, |
|
"loss": 3.6231, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.1705709993604336e-06, |
|
"loss": 3.6094, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.077610697127899e-06, |
|
"loss": 3.6135, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.984650394895364e-06, |
|
"loss": 3.6115, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.891690092662829e-06, |
|
"loss": 3.6093, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.798729790430295e-06, |
|
"loss": 3.6147, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.7057694881977598e-06, |
|
"loss": 3.6174, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.6128091859652255e-06, |
|
"loss": 3.6175, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.5198488837326913e-06, |
|
"loss": 3.6099, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.3877557812917544, |
|
"eval_loss": 3.510763168334961, |
|
"eval_runtime": 5030.6308, |
|
"eval_samples_per_second": 89.878, |
|
"eval_steps_per_second": 1.404, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.4268885815001566e-06, |
|
"loss": 3.6052, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.3339282792676215e-06, |
|
"loss": 3.6146, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.2409679770350872e-06, |
|
"loss": 3.6118, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.1480076748025526e-06, |
|
"loss": 3.6099, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.055047372570018e-06, |
|
"loss": 3.6089, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.9620870703374832e-06, |
|
"loss": 3.6042, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.8691267681049485e-06, |
|
"loss": 3.6121, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.7761664658724143e-06, |
|
"loss": 3.6041, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.683206163639879e-06, |
|
"loss": 3.6135, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.5902458614073445e-06, |
|
"loss": 3.6043, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.4972855591748103e-06, |
|
"loss": 3.6029, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.4043252569422756e-06, |
|
"loss": 3.6112, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.311364954709741e-06, |
|
"loss": 3.6021, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.2184046524772062e-06, |
|
"loss": 3.6078, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1254443502446716e-06, |
|
"loss": 3.6037, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.032484048012137e-06, |
|
"loss": 3.6054, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.9395237457796022e-06, |
|
"loss": 3.6063, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.8465634435470675e-06, |
|
"loss": 3.6049, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.753603141314533e-06, |
|
"loss": 3.6056, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6606428390819984e-06, |
|
"loss": 3.6093, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.3880700733260521, |
|
"eval_loss": 3.507866144180298, |
|
"eval_runtime": 5028.8015, |
|
"eval_samples_per_second": 89.911, |
|
"eval_steps_per_second": 1.405, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.567682536849464e-06, |
|
"loss": 3.6062, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4747222346169293e-06, |
|
"loss": 3.6027, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3817619323843946e-06, |
|
"loss": 3.6005, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.28880163015186e-06, |
|
"loss": 3.6119, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1958413279193255e-06, |
|
"loss": 3.6036, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1028810256867908e-06, |
|
"loss": 3.6089, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.009920723454256e-06, |
|
"loss": 3.6052, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.169604212217216e-07, |
|
"loss": 3.6063, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.240001189891869e-07, |
|
"loss": 3.6115, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.310398167566523e-07, |
|
"loss": 3.6035, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.380795145241177e-07, |
|
"loss": 3.5975, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.45119212291583e-07, |
|
"loss": 3.6047, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.521589100590484e-07, |
|
"loss": 3.6039, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.5919860782651375e-07, |
|
"loss": 3.6111, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.6623830559397913e-07, |
|
"loss": 3.6062, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.7327800336144456e-07, |
|
"loss": 3.6008, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.031770112890991e-08, |
|
"loss": 3.6071, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 268932, |
|
"total_flos": 4.497266479988736e+18, |
|
"train_loss": 3.8604874901614594, |
|
"train_runtime": 258151.997, |
|
"train_samples_per_second": 33.336, |
|
"train_steps_per_second": 1.042 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 268932, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10000, |
|
"total_flos": 4.497266479988736e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|