|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3867640739149119, |
|
"global_step": 90000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.0004211081201405e-05, |
|
"loss": 3.9197, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.0016844250917146e-05, |
|
"loss": 3.9108, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.003789928748371e-05, |
|
"loss": 3.8655, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.006737582146571e-05, |
|
"loss": 3.8602, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.010527333566261e-05, |
|
"loss": 3.8478, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.0151591165118474e-05, |
|
"loss": 3.8608, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.020632849713237e-05, |
|
"loss": 3.8431, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.0269484371273996e-05, |
|
"loss": 3.8447, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.034105767939918e-05, |
|
"loss": 3.8422, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.0421047165670686e-05, |
|
"loss": 3.8019, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.050945142657905e-05, |
|
"loss": 3.8109, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.0606268910968035e-05, |
|
"loss": 3.8192, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.071149792006154e-05, |
|
"loss": 3.8037, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.082513660749298e-05, |
|
"loss": 3.8074, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.0947182979338874e-05, |
|
"loss": 3.801, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.1077634894152375e-05, |
|
"loss": 3.786, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.121649006300163e-05, |
|
"loss": 3.7799, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.136374604951046e-05, |
|
"loss": 3.7721, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.1519400269899535e-05, |
|
"loss": 3.7829, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.1683449993033545e-05, |
|
"loss": 3.7922, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.185589234046783e-05, |
|
"loss": 3.7686, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.203672428649923e-05, |
|
"loss": 3.7742, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.2225942658219505e-05, |
|
"loss": 3.753, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.242354413557057e-05, |
|
"loss": 3.7663, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.262952525140289e-05, |
|
"loss": 3.7589, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.2843882391536666e-05, |
|
"loss": 3.7652, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.306661179482435e-05, |
|
"loss": 3.7618, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.329770955321802e-05, |
|
"loss": 3.7441, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.3537171611836355e-05, |
|
"loss": 3.7364, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.3784993769037384e-05, |
|
"loss": 3.7501, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.404117167649071e-05, |
|
"loss": 3.7363, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.43057008392547e-05, |
|
"loss": 3.7279, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.457857661585541e-05, |
|
"loss": 3.7158, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.485979421836779e-05, |
|
"loss": 3.74, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.514934871249906e-05, |
|
"loss": 3.7441, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.544723501767687e-05, |
|
"loss": 3.7141, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.575344790713656e-05, |
|
"loss": 3.7264, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.6067982008014406e-05, |
|
"loss": 3.7221, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.639083180144098e-05, |
|
"loss": 3.7363, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.672199162263839e-05, |
|
"loss": 3.73, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.706145566101981e-05, |
|
"loss": 3.7205, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.740921796029065e-05, |
|
"loss": 3.7192, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.776527241855393e-05, |
|
"loss": 3.7095, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.812961278841707e-05, |
|
"loss": 3.7311, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.85022326771014e-05, |
|
"loss": 3.7031, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.888312554655438e-05, |
|
"loss": 3.6934, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.92722847135643e-05, |
|
"loss": 3.7218, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9669703349877704e-05, |
|
"loss": 3.711, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.0075374482318863e-05, |
|
"loss": 3.7002, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.0489290992912625e-05, |
|
"loss": 3.698, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.0911445619008545e-05, |
|
"loss": 3.7097, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.1341830953409266e-05, |
|
"loss": 3.7012, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.1780439444499746e-05, |
|
"loss": 3.6847, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.2227263396380295e-05, |
|
"loss": 3.7072, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.2682294969000907e-05, |
|
"loss": 3.7052, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.31455261782996e-05, |
|
"loss": 3.6917, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.361694889634207e-05, |
|
"loss": 3.6951, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.409655485146426e-05, |
|
"loss": 3.6798, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.458433562841795e-05, |
|
"loss": 3.6888, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.50802826685176e-05, |
|
"loss": 3.685, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.558438726979166e-05, |
|
"loss": 3.6653, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.6096640587133956e-05, |
|
"loss": 3.6933, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.661703363246001e-05, |
|
"loss": 3.685, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.714555727486404e-05, |
|
"loss": 3.6799, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.768220224077955e-05, |
|
"loss": 3.656, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.822695911414171e-05, |
|
"loss": 3.675, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.877981833655294e-05, |
|
"loss": 3.6582, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.934077020745062e-05, |
|
"loss": 3.7009, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.990980488427666e-05, |
|
"loss": 3.6732, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.048691238265099e-05, |
|
"loss": 3.6509, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.107208257654653e-05, |
|
"loss": 3.6353, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.166530519846635e-05, |
|
"loss": 3.6662, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.226656983962476e-05, |
|
"loss": 3.6634, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.287586595012898e-05, |
|
"loss": 3.6607, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.349318283916478e-05, |
|
"loss": 3.654, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.411850967518416e-05, |
|
"loss": 3.6465, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.475183548609517e-05, |
|
"loss": 3.6527, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.539314915945428e-05, |
|
"loss": 3.6401, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.604243944266183e-05, |
|
"loss": 3.6453, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.669969494315882e-05, |
|
"loss": 3.6364, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.736490412862753e-05, |
|
"loss": 3.6379, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.803805532719332e-05, |
|
"loss": 3.6245, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.871913672763005e-05, |
|
"loss": 3.6333, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.940813637956603e-05, |
|
"loss": 3.6452, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.010504219369547e-05, |
|
"loss": 3.6487, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.080984194198902e-05, |
|
"loss": 3.6442, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.152252325790952e-05, |
|
"loss": 3.6384, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.22430736366282e-05, |
|
"loss": 3.6199, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.297148043524434e-05, |
|
"loss": 3.6341, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.370773087300741e-05, |
|
"loss": 3.6322, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.445181203154054e-05, |
|
"loss": 3.629, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.52037108550682e-05, |
|
"loss": 3.6105, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.596341415064452e-05, |
|
"loss": 3.6412, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.673090858838487e-05, |
|
"loss": 3.6355, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.750618070170043e-05, |
|
"loss": 3.6114, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.828921688753326e-05, |
|
"loss": 3.6142, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.908000340659633e-05, |
|
"loss": 3.6209, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.987852638361337e-05, |
|
"loss": 3.5979, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.06847718075632e-05, |
|
"loss": 3.6031, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.149872553192528e-05, |
|
"loss": 3.606, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.232037327492777e-05, |
|
"loss": 3.6232, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.314970061979832e-05, |
|
"loss": 3.6182, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.398669301501699e-05, |
|
"loss": 3.5904, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.483133577457161e-05, |
|
"loss": 3.6162, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.568361407821506e-05, |
|
"loss": 3.6084, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.65435129717262e-05, |
|
"loss": 3.598, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.74110173671712e-05, |
|
"loss": 3.6229, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.828611204316915e-05, |
|
"loss": 3.6101, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.916878164515847e-05, |
|
"loss": 3.5859, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.005901068566706e-05, |
|
"loss": 3.6207, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.095678354458317e-05, |
|
"loss": 3.6058, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.18620844694301e-05, |
|
"loss": 3.5935, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.27748975756424e-05, |
|
"loss": 3.5917, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.369520684684475e-05, |
|
"loss": 3.5861, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.462299613513243e-05, |
|
"loss": 3.5976, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.555824916135554e-05, |
|
"loss": 3.5914, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.650094951540393e-05, |
|
"loss": 3.5778, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.745108065649507e-05, |
|
"loss": 3.6049, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.840862591346507e-05, |
|
"loss": 3.5878, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.93735684850606e-05, |
|
"loss": 3.5918, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00010034589144023332, |
|
"loss": 3.59, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00010132557771843796, |
|
"loss": 3.5815, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00010231261012993076, |
|
"loss": 3.5806, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001033069713560718, |
|
"loss": 3.5927, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00010430864394962811, |
|
"loss": 3.595, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00010531761033508039, |
|
"loss": 3.5751, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00010633385280893134, |
|
"loss": 3.5741, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00010735735354001593, |
|
"loss": 3.5752, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00010838809456981471, |
|
"loss": 3.5666, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00010942605781276871, |
|
"loss": 3.5594, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00011047122505659652, |
|
"loss": 3.5652, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00011152357796261427, |
|
"loss": 3.5546, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00011258309806605742, |
|
"loss": 3.5427, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00011364976677640404, |
|
"loss": 3.5766, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00011472356537770196, |
|
"loss": 3.5791, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00011580447502889649, |
|
"loss": 3.5722, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00011689247676416152, |
|
"loss": 3.5775, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00011798755149323179, |
|
"loss": 3.5579, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00011908968000173793, |
|
"loss": 3.5618, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00012019884295154414, |
|
"loss": 3.5614, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00012131502088108667, |
|
"loss": 3.5444, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00012243819420571607, |
|
"loss": 3.5463, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00012356834321804048, |
|
"loss": 3.5502, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00012470544808827115, |
|
"loss": 3.5287, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00012584948886457082, |
|
"loss": 3.5414, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00012700044547340377, |
|
"loss": 3.5504, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00012815829771988744, |
|
"loss": 3.5381, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00012932302528814797, |
|
"loss": 3.5551, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00013049460774167522, |
|
"loss": 3.5331, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00013167302452368236, |
|
"loss": 3.5359, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001328582549574664, |
|
"loss": 3.5252, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00013405027824677038, |
|
"loss": 3.5104, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00013524907347614926, |
|
"loss": 3.5253, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00013645461961133603, |
|
"loss": 3.5249, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00013766689549961136, |
|
"loss": 3.5374, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00013888587987017427, |
|
"loss": 3.5093, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00014011155133451586, |
|
"loss": 3.5465, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00014134388838679408, |
|
"loss": 3.5362, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00014258286940421164, |
|
"loss": 3.5233, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00014382847264739456, |
|
"loss": 3.5425, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00014508067626077482, |
|
"loss": 3.5178, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00014633945827297273, |
|
"loss": 3.5288, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00014760479659718304, |
|
"loss": 3.52, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00014887666903156218, |
|
"loss": 3.5121, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001501550532596183, |
|
"loss": 3.5191, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00015143992685060208, |
|
"loss": 3.5097, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00015273126725990098, |
|
"loss": 3.5034, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00015402905182943438, |
|
"loss": 3.484, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00015533325778805166, |
|
"loss": 3.5111, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00015664386225193092, |
|
"loss": 3.5041, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00015796084222498145, |
|
"loss": 3.5126, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001592841745992464, |
|
"loss": 3.4939, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00016061383615530893, |
|
"loss": 3.4728, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001619498035626989, |
|
"loss": 3.4671, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00016329205338030289, |
|
"loss": 3.5058, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00016464056205677522, |
|
"loss": 3.4973, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001659953059309513, |
|
"loss": 3.488, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00016735626123226218, |
|
"loss": 3.4953, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00016872340408115283, |
|
"loss": 3.4772, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00017009671048950003, |
|
"loss": 3.4866, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00017147615636103365, |
|
"loss": 3.4752, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00017286171749175986, |
|
"loss": 3.4878, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001742533695703849, |
|
"loss": 3.468, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001756510881787427, |
|
"loss": 3.4708, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00017705484879222265, |
|
"loss": 3.4879, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001784646267801997, |
|
"loss": 3.4787, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00017988039740646764, |
|
"loss": 3.4673, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018130213582967188, |
|
"loss": 3.4794, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018272981710374596, |
|
"loss": 3.4561, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018416341617834915, |
|
"loss": 3.4599, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018560290789930596, |
|
"loss": 3.454, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018704826700904756, |
|
"loss": 3.4628, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018849946814705483, |
|
"loss": 3.4557, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001899564858503036, |
|
"loss": 3.4584, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019141929455371092, |
|
"loss": 3.4492, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019288786859058442, |
|
"loss": 3.4641, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019436218219307173, |
|
"loss": 3.4665, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019584220949261325, |
|
"loss": 3.4503, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019732792452039607, |
|
"loss": 3.4438, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019881930120780906, |
|
"loss": 3.4454, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00020031631338690114, |
|
"loss": 3.4357, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00020181893479083945, |
|
"loss": 3.4561, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00020332713905437056, |
|
"loss": 3.4535, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00020484089971428406, |
|
"loss": 3.4523, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00020636019020987535, |
|
"loss": 3.4449, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00020788498388341244, |
|
"loss": 3.4488, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00020941525398060392, |
|
"loss": 3.431, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00021095097365106798, |
|
"loss": 3.4441, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002124921159488034, |
|
"loss": 3.4197, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002140386538326632, |
|
"loss": 3.4281, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00021559056016682795, |
|
"loss": 3.4229, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00021714780772128265, |
|
"loss": 3.4486, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00021871036917229462, |
|
"loss": 3.4449, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00022027821710289205, |
|
"loss": 3.4099, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002218513240033463, |
|
"loss": 3.4309, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00022342966227165346, |
|
"loss": 3.4188, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00022501320421401943, |
|
"loss": 3.4238, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00022660192204534534, |
|
"loss": 3.4067, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00022819578788971543, |
|
"loss": 3.428, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000229794773780886, |
|
"loss": 3.4282, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00023139885166277606, |
|
"loss": 3.411, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00023300799338995962, |
|
"loss": 3.4144, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00023462217072816, |
|
"loss": 3.3912, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002362413553547441, |
|
"loss": 3.4082, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00023786551885922075, |
|
"loss": 3.4071, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002394946327437383, |
|
"loss": 3.3977, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00024112866842358467, |
|
"loss": 3.4155, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00024276759722768946, |
|
"loss": 3.4112, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002444113903991265, |
|
"loss": 3.4177, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002460600190956186, |
|
"loss": 3.3751, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002477134543900436, |
|
"loss": 3.4168, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00024937166727094203, |
|
"loss": 3.3941, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002510346286430259, |
|
"loss": 3.3995, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002527023093276896, |
|
"loss": 3.401, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002543746800635215, |
|
"loss": 3.3874, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002560517115068175, |
|
"loss": 3.3856, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00025773337423209606, |
|
"loss": 3.3732, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002594196387326143, |
|
"loss": 3.3986, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00026111047542088587, |
|
"loss": 3.3909, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00026280585462919984, |
|
"loss": 3.3851, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002645057466101418, |
|
"loss": 3.3819, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.000266210121537115, |
|
"loss": 3.3786, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002679189495048644, |
|
"loss": 3.3629, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00026963220053000114, |
|
"loss": 3.3762, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002713498445515287, |
|
"loss": 3.3814, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00027307185143137023, |
|
"loss": 3.3762, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002747981909548971, |
|
"loss": 3.3616, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002765288328314602, |
|
"loss": 3.3614, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002782637466949195, |
|
"loss": 3.3758, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002800029021041788, |
|
"loss": 3.367, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00028174626854371866, |
|
"loss": 3.3536, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002834938154241324, |
|
"loss": 3.3545, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00028524551208266224, |
|
"loss": 3.369, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002870013277837379, |
|
"loss": 3.3578, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00028876123171951576, |
|
"loss": 3.3439, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002905251930104192, |
|
"loss": 3.3568, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00029229318070568056, |
|
"loss": 3.3605, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002940651637838844, |
|
"loss": 3.358, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002958411111535111, |
|
"loss": 3.3603, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00029762099165348357, |
|
"loss": 3.3527, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002994047740537128, |
|
"loss": 3.3409, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00030119242705564664, |
|
"loss": 3.3495, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00030298391929281857, |
|
"loss": 3.328, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00030477921933139823, |
|
"loss": 3.3592, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00030657829567074305, |
|
"loss": 3.36, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0003083811167439507, |
|
"loss": 3.3558, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0003101876509184131, |
|
"loss": 3.3206, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00031199786649637145, |
|
"loss": 3.3376, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0003138117317154723, |
|
"loss": 3.3242, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0003156292147493255, |
|
"loss": 3.3368, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00031745028370806165, |
|
"loss": 3.3366, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00031927490663889203, |
|
"loss": 3.3267, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00032110305152666953, |
|
"loss": 3.302, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00032293468629445007, |
|
"loss": 3.3237, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00032476977880405546, |
|
"loss": 3.3207, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00032660829685663773, |
|
"loss": 3.3215, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00032845020819324334, |
|
"loss": 3.3107, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0003302954804953797, |
|
"loss": 3.3153, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00033214408138558256, |
|
"loss": 3.341, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0003339959784279831, |
|
"loss": 3.3183, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00033585113912887776, |
|
"loss": 3.3295, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0003377095309372985, |
|
"loss": 3.3293, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0003395711212455839, |
|
"loss": 3.3181, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0003414358773899506, |
|
"loss": 3.3075, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00034330376665106695, |
|
"loss": 3.3213, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0003451747562546278, |
|
"loss": 3.3105, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00034704881337192784, |
|
"loss": 3.2828, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00034892590512043947, |
|
"loss": 3.3044, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00035080599856438877, |
|
"loss": 3.306, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00035268906071533304, |
|
"loss": 3.3226, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0003545750585327406, |
|
"loss": 3.3227, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0003564639589245703, |
|
"loss": 3.2888, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0003583557287478512, |
|
"loss": 3.2918, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0003602503348092654, |
|
"loss": 3.2857, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0003621477438657296, |
|
"loss": 3.324, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00036404792262497885, |
|
"loss": 3.2991, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00036595083774615053, |
|
"loss": 3.2798, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0003678564558403689, |
|
"loss": 3.3164, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0003697647434713321, |
|
"loss": 3.2885, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0003716756671558975, |
|
"loss": 3.3157, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0003735891933646703, |
|
"loss": 3.2991, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00037550528852259106, |
|
"loss": 3.2628, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00037742391900952516, |
|
"loss": 3.2928, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0003793450511608526, |
|
"loss": 3.2828, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00038126865126805905, |
|
"loss": 3.2863, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0003831946855793267, |
|
"loss": 3.2951, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00038512312030012676, |
|
"loss": 3.2583, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0003870539215938128, |
|
"loss": 3.2872, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00038898705558221367, |
|
"loss": 3.2748, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00039092248834622883, |
|
"loss": 3.3026, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00039286018592642224, |
|
"loss": 3.2734, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00039480011432362007, |
|
"loss": 3.2849, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00039674223949950514, |
|
"loss": 3.2889, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0003986865273772159, |
|
"loss": 3.2938, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00040063294384194367, |
|
"loss": 3.2755, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0004025814547415307, |
|
"loss": 3.2774, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00040453202588707036, |
|
"loss": 3.2819, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0004064846230535067, |
|
"loss": 3.2867, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00040843921198023417, |
|
"loss": 3.2856, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004103957583717001, |
|
"loss": 3.2932, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004123542278980058, |
|
"loss": 3.272, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00041431458619550874, |
|
"loss": 3.2697, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00041627679886742527, |
|
"loss": 3.276, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004182408314844355, |
|
"loss": 3.2732, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00042020664958528574, |
|
"loss": 3.2535, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004221742186773941, |
|
"loss": 3.2791, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004241435042374555, |
|
"loss": 3.2718, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004261144717120477, |
|
"loss": 3.2672, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00042808708651823654, |
|
"loss": 3.2555, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00043006131404418424, |
|
"loss": 3.2717, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00043203711964975595, |
|
"loss": 3.2497, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00043401446866712684, |
|
"loss": 3.2525, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000435993326401392, |
|
"loss": 3.2523, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004379736581311737, |
|
"loss": 3.2431, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00043995542910923167, |
|
"loss": 3.2571, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004419386045630716, |
|
"loss": 3.2445, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004439231496955571, |
|
"loss": 3.2508, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00044590902968551834, |
|
"loss": 3.2435, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00044789620968836404, |
|
"loss": 3.2436, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004498846548366927, |
|
"loss": 3.2533, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004518743302409047, |
|
"loss": 3.2523, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00045386520098981335, |
|
"loss": 3.2284, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004558572321512592, |
|
"loss": 3.2285, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00045785038877272114, |
|
"loss": 3.2395, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00045984463588193104, |
|
"loss": 3.2592, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00046183993848748675, |
|
"loss": 3.2478, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004638362615794662, |
|
"loss": 3.2303, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00046583357013004194, |
|
"loss": 3.242, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00046783182909409496, |
|
"loss": 3.2582, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00046983100340983056, |
|
"loss": 3.2461, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00047183105799939297, |
|
"loss": 3.2495, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004738319577694809, |
|
"loss": 3.2325, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004758336676119636, |
|
"loss": 3.2335, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004778361524044967, |
|
"loss": 3.2499, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00047983937701113794, |
|
"loss": 3.2323, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00048184330628296484, |
|
"loss": 3.221, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004838479050586898, |
|
"loss": 3.2344, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004858531381652792, |
|
"loss": 3.241, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004878589704185682, |
|
"loss": 3.2434, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00048986536662388, |
|
"loss": 3.2375, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004918722915766426, |
|
"loss": 3.2237, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004938797100630057, |
|
"loss": 3.2665, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004958875868604602, |
|
"loss": 3.218, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004978958867384551, |
|
"loss": 3.2474, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.000499904574459016, |
|
"loss": 3.2423, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.000501913614777363, |
|
"loss": 3.2288, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005039229724425303, |
|
"loss": 3.2148, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005059326121979831, |
|
"loss": 3.2383, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005079424987822374, |
|
"loss": 3.236, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005099525969294778, |
|
"loss": 3.2231, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005119628713701773, |
|
"loss": 3.2215, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005139732868317155, |
|
"loss": 3.227, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005159838080389977, |
|
"loss": 3.2006, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005179943997150736, |
|
"loss": 3.2426, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005200050265817561, |
|
"loss": 3.2194, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005220156533602416, |
|
"loss": 3.223, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005240262447717271, |
|
"loss": 3.224, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005260367655380307, |
|
"loss": 3.2138, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0005280471803822096, |
|
"loss": 3.2123, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0005300574540291803, |
|
"loss": 3.2208, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000532067551206336, |
|
"loss": 3.2199, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0005340774366441665, |
|
"loss": 3.2367, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0005360870750768769, |
|
"loss": 3.21, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0005380964312430063, |
|
"loss": 3.2221, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0005401054698860466, |
|
"loss": 3.2021, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0005421141557550603, |
|
"loss": 3.2352, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0005441224536053012, |
|
"loss": 3.1962, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0005461303281988298, |
|
"loss": 3.2127, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000548137744305134, |
|
"loss": 3.2307, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0005501446667017461, |
|
"loss": 3.2053, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0005521510601748613, |
|
"loss": 3.1936, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0005541568895199552, |
|
"loss": 3.2019, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0005561621195424016, |
|
"loss": 3.203, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0005581667150580907, |
|
"loss": 3.2125, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0005601706408940451, |
|
"loss": 3.2205, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0005621738618890382, |
|
"loss": 3.2215, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0005641763428942106, |
|
"loss": 3.2052, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0005661780487736866, |
|
"loss": 3.2249, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0005681789444051913, |
|
"loss": 3.1952, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0005701789946806666, |
|
"loss": 3.1995, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0005721781645068867, |
|
"loss": 3.2116, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.000574176418806075, |
|
"loss": 3.2256, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0005761737225165182, |
|
"loss": 3.2221, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0005781700405931827, |
|
"loss": 3.1956, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0005801653380083288, |
|
"loss": 3.2031, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0005821595797521253, |
|
"loss": 3.2029, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0005841527308332645, |
|
"loss": 3.2065, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0005861447562795751, |
|
"loss": 3.1783, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0005881356211386371, |
|
"loss": 3.2181, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0005901252904783932, |
|
"loss": 3.1991, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0005921137293877644, |
|
"loss": 3.2011, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0005941009029772594, |
|
"loss": 3.1852, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00059608677637959, |
|
"loss": 3.1911, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00059807131475028, |
|
"loss": 3.1999, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.000600054483268279, |
|
"loss": 3.1853, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0006020362471365711, |
|
"loss": 3.1928, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0006040165715827878, |
|
"loss": 3.1833, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0006059954218598161, |
|
"loss": 3.1972, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0006079727632464092, |
|
"loss": 3.2052, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.000609948561047796, |
|
"loss": 3.1881, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0006119227805962891, |
|
"loss": 3.174, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0006138953872518932, |
|
"loss": 3.1831, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0006158663464029133, |
|
"loss": 3.1961, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0006178356234665618, |
|
"loss": 3.1759, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0006198031838895652, |
|
"loss": 3.1728, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0006217689931487707, |
|
"loss": 3.194, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0006237330167517514, |
|
"loss": 3.1747, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0006256952202374121, |
|
"loss": 3.1805, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0006276555691765935, |
|
"loss": 3.1781, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0006296140291726768, |
|
"loss": 3.1829, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0006315705658621865, |
|
"loss": 3.1832, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0006335251449153937, |
|
"loss": 3.1634, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0006354777320369192, |
|
"loss": 3.1664, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0006374282929663341, |
|
"loss": 3.1576, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0006393767934787615, |
|
"loss": 3.1667, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0006413231993854767, |
|
"loss": 3.1718, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0006432674765345077, |
|
"loss": 3.1645, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0006452095908112336, |
|
"loss": 3.2063, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0006471495081389843, |
|
"loss": 3.2638, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0006490871944796369, |
|
"loss": 3.2685, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0006510226158342146, |
|
"loss": 3.2519, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0006529557382434818, |
|
"loss": 3.2101, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0006548865277885411, |
|
"loss": 3.2109, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0006568149505914273, |
|
"loss": 3.2291, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0006587409728157031, |
|
"loss": 3.2282, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.000660664560667052, |
|
"loss": 3.2404, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0006625856803938711, |
|
"loss": 3.2334, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.000664504298287864, |
|
"loss": 3.243, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0006664203806846319, |
|
"loss": 3.2475, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0006683338939642638, |
|
"loss": 3.3556, |
|
"step": 90000 |
|
} |
|
], |
|
"max_steps": 500000, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.4344469839872e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|