|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9549530730091467, |
|
"global_step": 320000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9925394291171165e-05, |
|
"loss": 8.8395, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.985078858234232e-05, |
|
"loss": 8.3374, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9776182873513485e-05, |
|
"loss": 8.1726, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.970157716468464e-05, |
|
"loss": 8.0495, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9626971455855804e-05, |
|
"loss": 7.9605, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.955236574702696e-05, |
|
"loss": 7.9116, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.947776003819812e-05, |
|
"loss": 7.8568, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9403154329369286e-05, |
|
"loss": 7.8353, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.932854862054044e-05, |
|
"loss": 7.7883, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.925394291171161e-05, |
|
"loss": 7.7753, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.917933720288277e-05, |
|
"loss": 7.7461, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.910473149405393e-05, |
|
"loss": 7.7202, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.903012578522509e-05, |
|
"loss": 7.7102, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.895552007639625e-05, |
|
"loss": 7.6642, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.888091436756741e-05, |
|
"loss": 7.6294, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.880630865873857e-05, |
|
"loss": 7.6012, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.873170294990973e-05, |
|
"loss": 7.5369, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.865709724108089e-05, |
|
"loss": 7.4688, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.858249153225205e-05, |
|
"loss": 7.3744, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.850788582342321e-05, |
|
"loss": 7.2764, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.843328011459437e-05, |
|
"loss": 7.1243, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.835867440576553e-05, |
|
"loss": 7.0236, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.828406869693669e-05, |
|
"loss": 6.8878, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.820946298810785e-05, |
|
"loss": 6.795, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.813485727927901e-05, |
|
"loss": 6.694, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.806025157045017e-05, |
|
"loss": 6.6033, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.798564586162133e-05, |
|
"loss": 6.492, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.791104015279249e-05, |
|
"loss": 6.4154, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.7836434443963654e-05, |
|
"loss": 6.3564, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.776182873513482e-05, |
|
"loss": 6.2773, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.768722302630598e-05, |
|
"loss": 6.2294, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.7612617317477136e-05, |
|
"loss": 6.1543, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.75380116086483e-05, |
|
"loss": 6.1043, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.7463405899819455e-05, |
|
"loss": 6.0266, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.738880019099062e-05, |
|
"loss": 5.9925, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.731419448216178e-05, |
|
"loss": 5.955, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.723958877333294e-05, |
|
"loss": 5.8802, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.71649830645041e-05, |
|
"loss": 5.8472, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.7090377355675256e-05, |
|
"loss": 5.7843, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.701577164684642e-05, |
|
"loss": 5.7739, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.6941165938017575e-05, |
|
"loss": 5.6922, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.686656022918874e-05, |
|
"loss": 5.6593, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.67919545203599e-05, |
|
"loss": 5.6313, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.671734881153106e-05, |
|
"loss": 5.593, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.664274310270222e-05, |
|
"loss": 5.5471, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.656813739387338e-05, |
|
"loss": 5.49, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6493531685044546e-05, |
|
"loss": 5.4655, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.64189259762157e-05, |
|
"loss": 5.4349, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6344320267386866e-05, |
|
"loss": 5.3876, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.626971455855802e-05, |
|
"loss": 5.378, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.6195108849729185e-05, |
|
"loss": 5.3231, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.612050314090035e-05, |
|
"loss": 5.2968, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.6045897432071504e-05, |
|
"loss": 5.2649, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.597129172324267e-05, |
|
"loss": 5.2329, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.589668601441382e-05, |
|
"loss": 5.2015, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.5822080305584986e-05, |
|
"loss": 5.1858, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.574747459675615e-05, |
|
"loss": 5.1435, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5672868887927305e-05, |
|
"loss": 5.1239, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.559826317909847e-05, |
|
"loss": 5.0947, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5523657470269624e-05, |
|
"loss": 5.0852, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.544905176144079e-05, |
|
"loss": 5.0373, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5374446052611943e-05, |
|
"loss": 5.0134, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5299840343783106e-05, |
|
"loss": 5.0094, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.522523463495427e-05, |
|
"loss": 4.9855, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.5150628926125426e-05, |
|
"loss": 4.943, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.5076023217296595e-05, |
|
"loss": 4.9091, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.500141750846775e-05, |
|
"loss": 4.8988, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.4926811799638914e-05, |
|
"loss": 4.8703, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.485220609081007e-05, |
|
"loss": 4.8395, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.4777600381981234e-05, |
|
"loss": 4.8175, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.470299467315239e-05, |
|
"loss": 4.8181, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.462838896432355e-05, |
|
"loss": 4.8026, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.4553783255494716e-05, |
|
"loss": 4.7798, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.447917754666587e-05, |
|
"loss": 4.741, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.4404571837837035e-05, |
|
"loss": 4.7337, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.432996612900819e-05, |
|
"loss": 4.7278, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.4255360420179354e-05, |
|
"loss": 4.7057, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.418075471135052e-05, |
|
"loss": 4.6702, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.410614900252167e-05, |
|
"loss": 4.6704, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.4031543293692836e-05, |
|
"loss": 4.647, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.395693758486399e-05, |
|
"loss": 4.6325, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.3882331876035155e-05, |
|
"loss": 4.6142, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.380772616720631e-05, |
|
"loss": 4.5979, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.3733120458377474e-05, |
|
"loss": 4.5934, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.365851474954864e-05, |
|
"loss": 4.558, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.35839090407198e-05, |
|
"loss": 4.5537, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.350930333189096e-05, |
|
"loss": 4.5202, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.343469762306212e-05, |
|
"loss": 4.5238, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.336009191423328e-05, |
|
"loss": 4.5052, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.328548620540444e-05, |
|
"loss": 4.4972, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.32108804965756e-05, |
|
"loss": 4.4751, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.313627478774676e-05, |
|
"loss": 4.4441, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.306166907891792e-05, |
|
"loss": 4.4482, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.2987063370089084e-05, |
|
"loss": 4.4365, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.291245766126024e-05, |
|
"loss": 4.4211, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.28378519524314e-05, |
|
"loss": 4.4135, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.276324624360256e-05, |
|
"loss": 4.3697, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.268864053477372e-05, |
|
"loss": 4.4008, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.2614034825944885e-05, |
|
"loss": 4.3786, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.253942911711604e-05, |
|
"loss": 4.3466, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.2464823408287204e-05, |
|
"loss": 4.3375, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.239021769945836e-05, |
|
"loss": 4.3199, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.231561199062952e-05, |
|
"loss": 4.3171, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.2241006281800686e-05, |
|
"loss": 4.3012, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.216640057297185e-05, |
|
"loss": 4.273, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.2091794864143005e-05, |
|
"loss": 4.287, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.201718915531417e-05, |
|
"loss": 4.2666, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.194258344648533e-05, |
|
"loss": 4.2536, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.186797773765649e-05, |
|
"loss": 4.2345, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.179337202882765e-05, |
|
"loss": 4.2495, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.1718766319998807e-05, |
|
"loss": 4.2232, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.164416061116997e-05, |
|
"loss": 4.219, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.1569554902341126e-05, |
|
"loss": 4.203, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.149494919351229e-05, |
|
"loss": 4.208, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.142034348468345e-05, |
|
"loss": 4.1815, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.134573777585461e-05, |
|
"loss": 4.1828, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.127113206702577e-05, |
|
"loss": 4.1539, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.119652635819693e-05, |
|
"loss": 4.1365, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.112192064936809e-05, |
|
"loss": 4.1362, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.104731494053925e-05, |
|
"loss": 4.1366, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.097270923171041e-05, |
|
"loss": 4.1265, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.089810352288157e-05, |
|
"loss": 4.1251, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.0823497814052735e-05, |
|
"loss": 4.1033, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.07488921052239e-05, |
|
"loss": 4.0914, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.0674286396395054e-05, |
|
"loss": 4.064, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.059968068756622e-05, |
|
"loss": 4.0919, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.052507497873737e-05, |
|
"loss": 4.0719, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.0450469269908536e-05, |
|
"loss": 4.0527, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.03758635610797e-05, |
|
"loss": 4.0483, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.0301257852250855e-05, |
|
"loss": 4.0287, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.022665214342202e-05, |
|
"loss": 4.0354, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.0152046434593175e-05, |
|
"loss": 4.0273, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.007744072576434e-05, |
|
"loss": 4.0207, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.0002835016935494e-05, |
|
"loss": 4.0063, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.992822930810666e-05, |
|
"loss": 3.9946, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.985362359927782e-05, |
|
"loss": 3.9905, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.9779017890448976e-05, |
|
"loss": 3.9737, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.970441218162014e-05, |
|
"loss": 3.965, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.9629806472791295e-05, |
|
"loss": 3.9595, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.955520076396246e-05, |
|
"loss": 3.9496, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.948059505513362e-05, |
|
"loss": 3.9545, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.9405989346304784e-05, |
|
"loss": 3.9179, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.933138363747595e-05, |
|
"loss": 3.9365, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.92567779286471e-05, |
|
"loss": 3.9161, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.9182172219818266e-05, |
|
"loss": 3.8996, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.910756651098942e-05, |
|
"loss": 3.8962, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.9032960802160585e-05, |
|
"loss": 3.8996, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.895835509333174e-05, |
|
"loss": 3.8986, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.8883749384502904e-05, |
|
"loss": 3.8726, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.880914367567407e-05, |
|
"loss": 3.8836, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8734537966845223e-05, |
|
"loss": 3.8637, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8659932258016386e-05, |
|
"loss": 3.8579, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.858532654918754e-05, |
|
"loss": 3.8529, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8510720840358706e-05, |
|
"loss": 3.8406, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.843611513152986e-05, |
|
"loss": 3.8452, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8361509422701025e-05, |
|
"loss": 3.8343, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.828690371387219e-05, |
|
"loss": 3.8337, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.8212298005043344e-05, |
|
"loss": 3.82, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.813769229621451e-05, |
|
"loss": 3.8208, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.806308658738567e-05, |
|
"loss": 3.8178, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.798848087855683e-05, |
|
"loss": 3.7932, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.791387516972799e-05, |
|
"loss": 3.7966, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.783926946089915e-05, |
|
"loss": 3.7977, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.7764663752070315e-05, |
|
"loss": 3.7823, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.769005804324147e-05, |
|
"loss": 3.7802, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.7615452334412634e-05, |
|
"loss": 3.7686, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.754084662558379e-05, |
|
"loss": 3.7709, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.746624091675495e-05, |
|
"loss": 3.7671, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.739163520792611e-05, |
|
"loss": 3.7559, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.731702949909727e-05, |
|
"loss": 3.7743, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.7242423790268435e-05, |
|
"loss": 3.7467, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.716781808143959e-05, |
|
"loss": 3.745, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.7093212372610754e-05, |
|
"loss": 3.7379, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.701860666378191e-05, |
|
"loss": 3.7105, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.6944000954953074e-05, |
|
"loss": 3.7341, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.6869395246124237e-05, |
|
"loss": 3.7121, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.679478953729539e-05, |
|
"loss": 3.7154, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.6720183828466556e-05, |
|
"loss": 3.6959, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.664557811963772e-05, |
|
"loss": 3.6943, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.657097241080888e-05, |
|
"loss": 3.6909, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.649636670198004e-05, |
|
"loss": 3.6937, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.64217609931512e-05, |
|
"loss": 3.6994, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.634715528432236e-05, |
|
"loss": 3.6758, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.627254957549352e-05, |
|
"loss": 3.6812, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.619794386666468e-05, |
|
"loss": 3.6782, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.612333815783584e-05, |
|
"loss": 3.6732, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.6048732449007e-05, |
|
"loss": 3.6677, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.597412674017816e-05, |
|
"loss": 3.6845, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.589952103134932e-05, |
|
"loss": 3.6512, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.582491532252048e-05, |
|
"loss": 3.6636, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.575030961369164e-05, |
|
"loss": 3.6355, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.56757039048628e-05, |
|
"loss": 3.6325, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.560109819603396e-05, |
|
"loss": 3.6498, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.552649248720512e-05, |
|
"loss": 3.6384, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.545188677837628e-05, |
|
"loss": 3.6325, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.537728106954744e-05, |
|
"loss": 3.6313, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.5302675360718605e-05, |
|
"loss": 3.6097, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.522806965188977e-05, |
|
"loss": 3.6196, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.5153463943060924e-05, |
|
"loss": 3.6109, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.507885823423209e-05, |
|
"loss": 3.6011, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.500425252540325e-05, |
|
"loss": 3.6124, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.4929646816574406e-05, |
|
"loss": 3.5833, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.485504110774557e-05, |
|
"loss": 3.6067, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.4780435398916725e-05, |
|
"loss": 3.5863, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.470582969008789e-05, |
|
"loss": 3.6031, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.463122398125905e-05, |
|
"loss": 3.571, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.455661827243021e-05, |
|
"loss": 3.5863, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.448201256360137e-05, |
|
"loss": 3.5817, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4407406854772526e-05, |
|
"loss": 3.5754, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.433280114594369e-05, |
|
"loss": 3.5687, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4258195437114845e-05, |
|
"loss": 3.5607, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.418358972828601e-05, |
|
"loss": 3.5635, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.410898401945717e-05, |
|
"loss": 3.5597, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.403437831062833e-05, |
|
"loss": 3.5514, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.395977260179949e-05, |
|
"loss": 3.5589, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.388516689297065e-05, |
|
"loss": 3.5548, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.3810561184141816e-05, |
|
"loss": 3.5499, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.373595547531297e-05, |
|
"loss": 3.5236, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.3661349766484135e-05, |
|
"loss": 3.5433, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.358674405765529e-05, |
|
"loss": 3.5355, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.3512138348826455e-05, |
|
"loss": 3.5246, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.343753263999762e-05, |
|
"loss": 3.5352, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.3362926931168774e-05, |
|
"loss": 3.5268, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.328832122233994e-05, |
|
"loss": 3.5209, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.321371551351109e-05, |
|
"loss": 3.4999, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.3139109804682256e-05, |
|
"loss": 3.5197, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.306450409585342e-05, |
|
"loss": 3.5116, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.2989898387024575e-05, |
|
"loss": 3.5009, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.291529267819574e-05, |
|
"loss": 3.5102, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.2840686969366894e-05, |
|
"loss": 3.5053, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.276608126053806e-05, |
|
"loss": 3.4856, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.269147555170921e-05, |
|
"loss": 3.4952, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.2616869842880376e-05, |
|
"loss": 3.4888, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.254226413405154e-05, |
|
"loss": 3.493, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.24676584252227e-05, |
|
"loss": 3.4838, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.2393052716393865e-05, |
|
"loss": 3.4716, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.231844700756502e-05, |
|
"loss": 3.4816, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.2243841298736184e-05, |
|
"loss": 3.4613, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.216923558990734e-05, |
|
"loss": 3.4446, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.2094629881078503e-05, |
|
"loss": 3.4699, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.202002417224966e-05, |
|
"loss": 3.4482, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.194541846342082e-05, |
|
"loss": 3.464, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.1870812754591986e-05, |
|
"loss": 3.4557, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.179620704576314e-05, |
|
"loss": 3.4628, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.1721601336934305e-05, |
|
"loss": 3.4543, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.164699562810546e-05, |
|
"loss": 3.4459, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.1572389919276624e-05, |
|
"loss": 3.4314, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.149778421044779e-05, |
|
"loss": 3.4374, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.142317850161894e-05, |
|
"loss": 3.4492, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.1348572792790106e-05, |
|
"loss": 3.4237, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.127396708396126e-05, |
|
"loss": 3.4221, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.1199361375132425e-05, |
|
"loss": 3.4291, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.112475566630358e-05, |
|
"loss": 3.4286, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.105014995747475e-05, |
|
"loss": 3.4244, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.097554424864591e-05, |
|
"loss": 3.4044, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.090093853981707e-05, |
|
"loss": 3.414, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.082633283098823e-05, |
|
"loss": 3.3993, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.075172712215939e-05, |
|
"loss": 3.3988, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.067712141333055e-05, |
|
"loss": 3.4115, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.060251570450171e-05, |
|
"loss": 3.4092, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.052790999567287e-05, |
|
"loss": 3.4079, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.045330428684403e-05, |
|
"loss": 3.4052, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.037869857801519e-05, |
|
"loss": 3.4042, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.030409286918635e-05, |
|
"loss": 3.4048, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.022948716035751e-05, |
|
"loss": 3.394, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.0154881451528673e-05, |
|
"loss": 3.3992, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.0080275742699832e-05, |
|
"loss": 3.3902, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.0005670033870992e-05, |
|
"loss": 3.3865, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.993106432504215e-05, |
|
"loss": 3.3717, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.985645861621331e-05, |
|
"loss": 3.3644, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.978185290738447e-05, |
|
"loss": 3.3742, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9707247198555634e-05, |
|
"loss": 3.3768, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9632641489726797e-05, |
|
"loss": 3.3777, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.955803578089796e-05, |
|
"loss": 3.3829, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.948343007206912e-05, |
|
"loss": 3.3844, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.940882436324028e-05, |
|
"loss": 3.3808, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9334218654411438e-05, |
|
"loss": 3.383, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9259612945582598e-05, |
|
"loss": 3.3472, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.9185007236753757e-05, |
|
"loss": 3.3513, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.911040152792492e-05, |
|
"loss": 3.3708, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.903579581909608e-05, |
|
"loss": 3.3629, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.896119011026724e-05, |
|
"loss": 3.347, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.88865844014384e-05, |
|
"loss": 3.3549, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.881197869260956e-05, |
|
"loss": 3.3466, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8737372983780718e-05, |
|
"loss": 3.3347, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8662767274951878e-05, |
|
"loss": 3.3401, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.858816156612304e-05, |
|
"loss": 3.3469, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.85135558572942e-05, |
|
"loss": 3.338, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.843895014846536e-05, |
|
"loss": 3.3338, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.836434443963652e-05, |
|
"loss": 3.3499, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.828973873080768e-05, |
|
"loss": 3.3313, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8215133021978845e-05, |
|
"loss": 3.3263, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8140527313150005e-05, |
|
"loss": 3.3386, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8065921604321165e-05, |
|
"loss": 3.3229, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.7991315895492327e-05, |
|
"loss": 3.3186, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.7916710186663487e-05, |
|
"loss": 3.3272, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.7842104477834647e-05, |
|
"loss": 3.3052, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.7767498769005806e-05, |
|
"loss": 3.321, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7692893060176966e-05, |
|
"loss": 3.3188, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7618287351348125e-05, |
|
"loss": 3.3118, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7543681642519288e-05, |
|
"loss": 3.3176, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7469075933690448e-05, |
|
"loss": 3.3039, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7394470224861607e-05, |
|
"loss": 3.2896, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7319864516032767e-05, |
|
"loss": 3.2964, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.7245258807203927e-05, |
|
"loss": 3.2906, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.7170653098375086e-05, |
|
"loss": 3.2886, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.709604738954625e-05, |
|
"loss": 3.2987, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.702144168071741e-05, |
|
"loss": 3.272, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.6946835971888568e-05, |
|
"loss": 3.2735, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.6872230263059735e-05, |
|
"loss": 3.2632, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.6797624554230894e-05, |
|
"loss": 3.2933, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6723018845402054e-05, |
|
"loss": 3.2911, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6648413136573213e-05, |
|
"loss": 3.3155, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6573807427744373e-05, |
|
"loss": 3.2821, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6499201718915532e-05, |
|
"loss": 3.287, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6424596010086695e-05, |
|
"loss": 3.2701, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6349990301257855e-05, |
|
"loss": 3.2554, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6275384592429015e-05, |
|
"loss": 3.2938, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.6200778883600174e-05, |
|
"loss": 3.2812, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.6126173174771334e-05, |
|
"loss": 3.2739, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.6051567465942493e-05, |
|
"loss": 3.2512, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5976961757113656e-05, |
|
"loss": 3.2567, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5902356048284816e-05, |
|
"loss": 3.2687, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5827750339455975e-05, |
|
"loss": 3.2876, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5753144630627135e-05, |
|
"loss": 3.2741, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5678538921798295e-05, |
|
"loss": 3.2556, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5603933212969454e-05, |
|
"loss": 3.2568, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5529327504140617e-05, |
|
"loss": 3.2432, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.545472179531178e-05, |
|
"loss": 3.2467, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.538011608648294e-05, |
|
"loss": 3.2575, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5305510377654103e-05, |
|
"loss": 3.248, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.5230904668825262e-05, |
|
"loss": 3.2438, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.5156298959996422e-05, |
|
"loss": 3.2516, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.508169325116758e-05, |
|
"loss": 3.2483, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.500708754233874e-05, |
|
"loss": 3.2313, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.49324818335099e-05, |
|
"loss": 3.2331, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.4857876124681063e-05, |
|
"loss": 3.2462, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.4783270415852223e-05, |
|
"loss": 3.2219, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4708664707023383e-05, |
|
"loss": 3.2406, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4634058998194542e-05, |
|
"loss": 3.2086, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4559453289365702e-05, |
|
"loss": 3.2183, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.448484758053686e-05, |
|
"loss": 3.222, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4410241871708024e-05, |
|
"loss": 3.2144, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4335636162879187e-05, |
|
"loss": 3.2212, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4261030454050347e-05, |
|
"loss": 3.2072, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4186424745221506e-05, |
|
"loss": 3.2082, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4111819036392666e-05, |
|
"loss": 3.2211, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4037213327563826e-05, |
|
"loss": 3.1991, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.3962607618734985e-05, |
|
"loss": 3.222, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.3888001909906148e-05, |
|
"loss": 3.1956, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.3813396201077308e-05, |
|
"loss": 3.2101, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.3738790492248467e-05, |
|
"loss": 3.197, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.366418478341963e-05, |
|
"loss": 3.2057, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.358957907459079e-05, |
|
"loss": 3.1931, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.351497336576195e-05, |
|
"loss": 3.2, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.344036765693311e-05, |
|
"loss": 3.1849, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.336576194810427e-05, |
|
"loss": 3.2024, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.329115623927543e-05, |
|
"loss": 3.1923, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.321655053044659e-05, |
|
"loss": 3.1988, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.314194482161775e-05, |
|
"loss": 3.1788, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.306733911278891e-05, |
|
"loss": 3.1927, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.2992733403960073e-05, |
|
"loss": 3.1854, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.2918127695131233e-05, |
|
"loss": 3.2029, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.2843521986302392e-05, |
|
"loss": 3.1958, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.2768916277473555e-05, |
|
"loss": 3.1791, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2694310568644715e-05, |
|
"loss": 3.1791, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2619704859815874e-05, |
|
"loss": 3.1921, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2545099150987034e-05, |
|
"loss": 3.193, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2470493442158194e-05, |
|
"loss": 3.2009, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2395887733329353e-05, |
|
"loss": 3.1873, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2321282024500516e-05, |
|
"loss": 3.1703, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.2246676315671676e-05, |
|
"loss": 3.1822, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.217207060684284e-05, |
|
"loss": 3.1651, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.2097464898013998e-05, |
|
"loss": 3.161, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.2022859189185158e-05, |
|
"loss": 3.1794, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.1948253480356317e-05, |
|
"loss": 3.166, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.1873647771527477e-05, |
|
"loss": 3.1738, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.1799042062698636e-05, |
|
"loss": 3.1563, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.17244363538698e-05, |
|
"loss": 3.1549, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.164983064504096e-05, |
|
"loss": 3.152, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1575224936212122e-05, |
|
"loss": 3.1475, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.150061922738328e-05, |
|
"loss": 3.1561, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.142601351855444e-05, |
|
"loss": 3.1452, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.13514078097256e-05, |
|
"loss": 3.1559, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.127680210089676e-05, |
|
"loss": 3.1523, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.1202196392067923e-05, |
|
"loss": 3.1547, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.1127590683239083e-05, |
|
"loss": 3.1415, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.1052984974410242e-05, |
|
"loss": 3.1577, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0978379265581402e-05, |
|
"loss": 3.1476, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0903773556752565e-05, |
|
"loss": 3.1473, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0829167847923725e-05, |
|
"loss": 3.1553, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0754562139094884e-05, |
|
"loss": 3.1435, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0679956430266047e-05, |
|
"loss": 3.1384, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0605350721437207e-05, |
|
"loss": 3.1253, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0530745012608366e-05, |
|
"loss": 3.1279, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0456139303779526e-05, |
|
"loss": 3.1418, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0381533594950685e-05, |
|
"loss": 3.1448, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0306927886121845e-05, |
|
"loss": 3.1398, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0232322177293004e-05, |
|
"loss": 3.1293, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0157716468464167e-05, |
|
"loss": 3.1252, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.008311075963533e-05, |
|
"loss": 3.1121, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.000850505080649e-05, |
|
"loss": 3.1314, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.993389934197765e-05, |
|
"loss": 3.1345, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.985929363314881e-05, |
|
"loss": 3.1291, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.978468792431997e-05, |
|
"loss": 3.1316, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9710082215491128e-05, |
|
"loss": 3.1365, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.963547650666229e-05, |
|
"loss": 3.1363, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.956087079783345e-05, |
|
"loss": 3.1304, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9486265089004614e-05, |
|
"loss": 3.1117, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9411659380175773e-05, |
|
"loss": 3.116, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9337053671346933e-05, |
|
"loss": 3.1004, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9262447962518092e-05, |
|
"loss": 3.1145, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9187842253689252e-05, |
|
"loss": 3.1189, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9113236544860415e-05, |
|
"loss": 3.1107, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9038630836031575e-05, |
|
"loss": 3.1045, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8964025127202734e-05, |
|
"loss": 3.1394, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8889419418373894e-05, |
|
"loss": 3.1042, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8814813709545057e-05, |
|
"loss": 3.1121, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8740208000716216e-05, |
|
"loss": 3.1009, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8665602291887376e-05, |
|
"loss": 3.1056, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8590996583058535e-05, |
|
"loss": 3.1007, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.85163908742297e-05, |
|
"loss": 3.0885, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8441785165400858e-05, |
|
"loss": 3.0931, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8367179456572018e-05, |
|
"loss": 3.0962, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8292573747743177e-05, |
|
"loss": 3.0888, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8217968038914337e-05, |
|
"loss": 3.0904, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8143362330085496e-05, |
|
"loss": 3.1059, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.806875662125666e-05, |
|
"loss": 3.1071, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7994150912427822e-05, |
|
"loss": 3.0838, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7919545203598982e-05, |
|
"loss": 3.1019, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.784493949477014e-05, |
|
"loss": 3.0909, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.77703337859413e-05, |
|
"loss": 3.0948, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.769572807711246e-05, |
|
"loss": 3.0653, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.762112236828362e-05, |
|
"loss": 3.1038, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7546516659454783e-05, |
|
"loss": 3.0913, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7471910950625943e-05, |
|
"loss": 3.0811, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7397305241797106e-05, |
|
"loss": 3.0839, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7322699532968265e-05, |
|
"loss": 3.0852, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7248093824139425e-05, |
|
"loss": 3.0573, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7173488115310584e-05, |
|
"loss": 3.0985, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7098882406481744e-05, |
|
"loss": 3.0628, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7024276697652903e-05, |
|
"loss": 3.076, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6949670988824066e-05, |
|
"loss": 3.0765, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6875065279995226e-05, |
|
"loss": 3.0835, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6800459571166386e-05, |
|
"loss": 3.0633, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.672585386233755e-05, |
|
"loss": 3.0676, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6651248153508708e-05, |
|
"loss": 3.0719, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6576642444679868e-05, |
|
"loss": 3.0795, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6502036735851027e-05, |
|
"loss": 3.0631, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.642743102702219e-05, |
|
"loss": 3.0544, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.635282531819335e-05, |
|
"loss": 3.0803, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.627821960936451e-05, |
|
"loss": 3.0572, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.620361390053567e-05, |
|
"loss": 3.0625, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.612900819170683e-05, |
|
"loss": 3.076, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6054402482877988e-05, |
|
"loss": 3.059, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.597979677404915e-05, |
|
"loss": 3.0651, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5905191065220314e-05, |
|
"loss": 3.0331, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5830585356391474e-05, |
|
"loss": 3.073, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5755979647562633e-05, |
|
"loss": 3.0653, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5681373938733793e-05, |
|
"loss": 3.0541, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5606768229904952e-05, |
|
"loss": 3.0459, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5532162521076112e-05, |
|
"loss": 3.0559, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.545755681224727e-05, |
|
"loss": 3.0597, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5382951103418434e-05, |
|
"loss": 3.0524, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5308345394589597e-05, |
|
"loss": 3.0592, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5233739685760757e-05, |
|
"loss": 3.0682, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5159133976931917e-05, |
|
"loss": 3.0332, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5084528268103076e-05, |
|
"loss": 3.0509, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5009922559274237e-05, |
|
"loss": 3.0395, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.4935316850445397e-05, |
|
"loss": 3.063, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.4860711141616556e-05, |
|
"loss": 3.0484, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.4786105432787716e-05, |
|
"loss": 3.0505, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4711499723958877e-05, |
|
"loss": 3.0375, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4636894015130037e-05, |
|
"loss": 3.0443, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.45622883063012e-05, |
|
"loss": 3.0334, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.448768259747236e-05, |
|
"loss": 3.0349, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.441307688864352e-05, |
|
"loss": 3.0277, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.433847117981468e-05, |
|
"loss": 3.0345, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.426386547098584e-05, |
|
"loss": 3.0494, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4189259762157001e-05, |
|
"loss": 3.0363, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.411465405332816e-05, |
|
"loss": 3.0398, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.404004834449932e-05, |
|
"loss": 3.0372, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3965442635670482e-05, |
|
"loss": 3.0286, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3890836926841645e-05, |
|
"loss": 3.034, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3816231218012804e-05, |
|
"loss": 3.0132, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3741625509183964e-05, |
|
"loss": 3.0232, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3667019800355125e-05, |
|
"loss": 3.0399, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3592414091526284e-05, |
|
"loss": 3.029, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3517808382697444e-05, |
|
"loss": 3.0386, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3443202673868605e-05, |
|
"loss": 3.0186, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3368596965039765e-05, |
|
"loss": 3.0282, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3293991256210924e-05, |
|
"loss": 3.0252, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3219385547382087e-05, |
|
"loss": 3.0175, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3144779838553247e-05, |
|
"loss": 3.0221, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3070174129724408e-05, |
|
"loss": 3.0187, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2995568420895568e-05, |
|
"loss": 3.0149, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2920962712066727e-05, |
|
"loss": 3.021, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2846357003237889e-05, |
|
"loss": 3.0241, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2771751294409048e-05, |
|
"loss": 3.005, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2697145585580208e-05, |
|
"loss": 3.0068, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2622539876751369e-05, |
|
"loss": 3.0047, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2547934167922529e-05, |
|
"loss": 3.0196, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.247332845909369e-05, |
|
"loss": 3.0043, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.239872275026485e-05, |
|
"loss": 3.0094, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2324117041436013e-05, |
|
"loss": 3.0014, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2249511332607172e-05, |
|
"loss": 2.9893, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2174905623778332e-05, |
|
"loss": 2.9995, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2100299914949493e-05, |
|
"loss": 3.0127, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2025694206120652e-05, |
|
"loss": 3.0093, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1951088497291814e-05, |
|
"loss": 3.002, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1876482788462973e-05, |
|
"loss": 3.0212, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1801877079634135e-05, |
|
"loss": 3.0069, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1727271370805294e-05, |
|
"loss": 3.0155, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1652665661976454e-05, |
|
"loss": 2.9904, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1578059953147615e-05, |
|
"loss": 2.9839, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1503454244318776e-05, |
|
"loss": 3.0073, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1428848535489936e-05, |
|
"loss": 2.9917, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1354242826661095e-05, |
|
"loss": 3.0195, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1279637117832258e-05, |
|
"loss": 3.0205, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1205031409003418e-05, |
|
"loss": 2.9868, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1130425700174578e-05, |
|
"loss": 2.9808, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1055819991345739e-05, |
|
"loss": 3.0043, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.0981214282516898e-05, |
|
"loss": 3.0056, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.090660857368806e-05, |
|
"loss": 2.995, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.083200286485922e-05, |
|
"loss": 2.997, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.075739715603038e-05, |
|
"loss": 2.9913, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.068279144720154e-05, |
|
"loss": 2.9802, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.06081857383727e-05, |
|
"loss": 2.9788, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0533580029543861e-05, |
|
"loss": 2.9925, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0458974320715022e-05, |
|
"loss": 3.0028, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0384368611886182e-05, |
|
"loss": 2.9988, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0309762903057341e-05, |
|
"loss": 2.9944, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0235157194228504e-05, |
|
"loss": 2.9768, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0160551485399664e-05, |
|
"loss": 2.9891, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0085945776570823e-05, |
|
"loss": 2.9815, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0011340067741983e-05, |
|
"loss": 2.9839, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.936734358913144e-06, |
|
"loss": 2.9852, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.862128650084306e-06, |
|
"loss": 2.9759, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.787522941255465e-06, |
|
"loss": 2.985, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.712917232426626e-06, |
|
"loss": 2.9627, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.638311523597786e-06, |
|
"loss": 2.9925, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.563705814768946e-06, |
|
"loss": 2.9701, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.489100105940107e-06, |
|
"loss": 2.9856, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.414494397111268e-06, |
|
"loss": 2.9915, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.339888688282428e-06, |
|
"loss": 2.9746, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.265282979453587e-06, |
|
"loss": 2.9878, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.190677270624748e-06, |
|
"loss": 2.9855, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.11607156179591e-06, |
|
"loss": 2.96, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.04146585296707e-06, |
|
"loss": 2.9613, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.966860144138229e-06, |
|
"loss": 2.9575, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.89225443530939e-06, |
|
"loss": 2.9729, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.817648726480551e-06, |
|
"loss": 2.9773, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.743043017651711e-06, |
|
"loss": 2.9643, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.668437308822872e-06, |
|
"loss": 2.9676, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.593831599994032e-06, |
|
"loss": 2.953, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.519225891165191e-06, |
|
"loss": 2.9523, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.444620182336353e-06, |
|
"loss": 2.9833, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.370014473507514e-06, |
|
"loss": 2.971, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.295408764678674e-06, |
|
"loss": 2.9665, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.220803055849833e-06, |
|
"loss": 2.9677, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.146197347020994e-06, |
|
"loss": 2.9644, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.071591638192156e-06, |
|
"loss": 2.9662, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.996985929363315e-06, |
|
"loss": 2.9606, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.922380220534475e-06, |
|
"loss": 2.9538, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.847774511705636e-06, |
|
"loss": 2.956, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.773168802876797e-06, |
|
"loss": 2.9607, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.698563094047957e-06, |
|
"loss": 2.9597, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.623957385219117e-06, |
|
"loss": 2.959, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.549351676390278e-06, |
|
"loss": 2.9556, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.474745967561437e-06, |
|
"loss": 2.956, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.4001402587325994e-06, |
|
"loss": 2.9547, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.325534549903759e-06, |
|
"loss": 2.9661, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.2509288410749194e-06, |
|
"loss": 2.9524, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.176323132246079e-06, |
|
"loss": 2.9557, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.1017174234172394e-06, |
|
"loss": 2.9675, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.027111714588401e-06, |
|
"loss": 2.9587, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.952506005759561e-06, |
|
"loss": 2.9318, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.8779002969307216e-06, |
|
"loss": 2.9583, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.803294588101881e-06, |
|
"loss": 2.9556, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.728688879273043e-06, |
|
"loss": 2.9436, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.654083170444203e-06, |
|
"loss": 2.954, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.579477461615363e-06, |
|
"loss": 2.9691, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.504871752786524e-06, |
|
"loss": 2.9443, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.430266043957683e-06, |
|
"loss": 2.9236, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.3556603351288445e-06, |
|
"loss": 2.9331, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.281054626300005e-06, |
|
"loss": 2.9342, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.206448917471165e-06, |
|
"loss": 2.9598, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.131843208642326e-06, |
|
"loss": 2.9562, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.057237499813486e-06, |
|
"loss": 2.9264, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.982631790984647e-06, |
|
"loss": 2.9349, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.908026082155807e-06, |
|
"loss": 2.9384, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.8334203733269674e-06, |
|
"loss": 2.9622, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.758814664498128e-06, |
|
"loss": 2.9589, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.684208955669288e-06, |
|
"loss": 2.9395, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.609603246840448e-06, |
|
"loss": 2.9342, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.534997538011609e-06, |
|
"loss": 2.9262, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.460391829182769e-06, |
|
"loss": 2.9391, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.38578612035393e-06, |
|
"loss": 2.9451, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.31118041152509e-06, |
|
"loss": 2.9182, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.236574702696251e-06, |
|
"loss": 2.9519, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.161968993867411e-06, |
|
"loss": 2.9299, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.087363285038571e-06, |
|
"loss": 2.9176, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.012757576209732e-06, |
|
"loss": 2.9499, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.938151867380892e-06, |
|
"loss": 2.9258, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.863546158552053e-06, |
|
"loss": 2.9396, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.788940449723213e-06, |
|
"loss": 2.941, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.714334740894374e-06, |
|
"loss": 2.9308, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.639729032065534e-06, |
|
"loss": 2.9213, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.565123323236694e-06, |
|
"loss": 2.9097, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.490517614407855e-06, |
|
"loss": 2.9218, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.415911905579015e-06, |
|
"loss": 2.9276, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.341306196750176e-06, |
|
"loss": 2.9296, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.2667004879213354e-06, |
|
"loss": 2.9332, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.192094779092497e-06, |
|
"loss": 2.9266, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.117489070263657e-06, |
|
"loss": 2.9239, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.042883361434817e-06, |
|
"loss": 2.9279, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.968277652605978e-06, |
|
"loss": 2.9062, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8936719437771375e-06, |
|
"loss": 2.9261, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.819066234948299e-06, |
|
"loss": 2.9265, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.744460526119459e-06, |
|
"loss": 2.9235, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.6698548172906197e-06, |
|
"loss": 2.9121, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.5952491084617797e-06, |
|
"loss": 2.9207, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.5206433996329397e-06, |
|
"loss": 2.9282, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.4460376908041005e-06, |
|
"loss": 2.9205, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.371431981975261e-06, |
|
"loss": 2.9228, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.2968262731464213e-06, |
|
"loss": 2.9271, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.2222205643175818e-06, |
|
"loss": 2.9108, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.1476148554887426e-06, |
|
"loss": 2.9248, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.0730091466599026e-06, |
|
"loss": 2.9111, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.998403437831063e-06, |
|
"loss": 2.9206, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.9237977290022234e-06, |
|
"loss": 2.9175, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.8491920201733834e-06, |
|
"loss": 2.9326, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.7745863113445443e-06, |
|
"loss": 2.9216, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.6999806025157047e-06, |
|
"loss": 2.9272, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.625374893686865e-06, |
|
"loss": 2.903, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.5507691848580256e-06, |
|
"loss": 2.8962, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.476163476029186e-06, |
|
"loss": 2.9097, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.4015577672003464e-06, |
|
"loss": 2.9369, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.3269520583715064e-06, |
|
"loss": 2.9132, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.252346349542667e-06, |
|
"loss": 2.9185, |
|
"step": 320000 |
|
} |
|
], |
|
"max_steps": 335095, |
|
"num_train_epochs": 1, |
|
"total_flos": 2.1086164784256e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|