|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.12129203840172702, |
|
"eval_steps": 200, |
|
"global_step": 87200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00021075215336012696, |
|
"grad_norm": 0.5226424932479858, |
|
"learning_rate": 1.9999997880573555e-05, |
|
"loss": 2.036, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0004215043067202539, |
|
"grad_norm": 0.38226085901260376, |
|
"learning_rate": 1.9999991377996364e-05, |
|
"loss": 1.7571, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0006322564600803809, |
|
"grad_norm": 0.563613772392273, |
|
"learning_rate": 1.9999980491662426e-05, |
|
"loss": 1.8127, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0008430086134405078, |
|
"grad_norm": 0.5952104330062866, |
|
"learning_rate": 1.999996522157651e-05, |
|
"loss": 1.7384, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.0010537607668006349, |
|
"grad_norm": 0.5547693967819214, |
|
"learning_rate": 1.999994556774531e-05, |
|
"loss": 1.7468, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.0012645129201607618, |
|
"grad_norm": 0.43776935338974, |
|
"learning_rate": 1.9999921530177446e-05, |
|
"loss": 1.758, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.0014752650735208887, |
|
"grad_norm": 0.5462590456008911, |
|
"learning_rate": 1.999989310888345e-05, |
|
"loss": 1.7594, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.0016860172268810157, |
|
"grad_norm": 0.8200376033782959, |
|
"learning_rate": 1.9999860303875793e-05, |
|
"loss": 1.7043, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.0018967693802411426, |
|
"grad_norm": 0.7284532189369202, |
|
"learning_rate": 1.9999823115168838e-05, |
|
"loss": 1.7438, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.0021075215336012697, |
|
"grad_norm": 0.33967018127441406, |
|
"learning_rate": 1.9999781542778898e-05, |
|
"loss": 1.714, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.0023182736869613967, |
|
"grad_norm": 0.5259618759155273, |
|
"learning_rate": 1.9999735586724198e-05, |
|
"loss": 1.7203, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.0025290258403215236, |
|
"grad_norm": 0.43303459882736206, |
|
"learning_rate": 1.9999685247024884e-05, |
|
"loss": 1.727, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.0027397779936816505, |
|
"grad_norm": 0.46599942445755005, |
|
"learning_rate": 1.999963052370302e-05, |
|
"loss": 1.7606, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.0029505301470417774, |
|
"grad_norm": 0.3022066652774811, |
|
"learning_rate": 1.9999571416782594e-05, |
|
"loss": 1.6982, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.0031612823004019044, |
|
"grad_norm": 0.6201561093330383, |
|
"learning_rate": 1.999950792628952e-05, |
|
"loss": 1.7679, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.0033720344537620313, |
|
"grad_norm": 0.49575334787368774, |
|
"learning_rate": 1.9999440052251636e-05, |
|
"loss": 1.7536, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.0035827866071221582, |
|
"grad_norm": 0.5179804563522339, |
|
"learning_rate": 1.999936779469869e-05, |
|
"loss": 1.6947, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.003793538760482285, |
|
"grad_norm": 0.7314251661300659, |
|
"learning_rate": 1.9999291153662357e-05, |
|
"loss": 1.778, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.004004290913842412, |
|
"grad_norm": 0.6501708626747131, |
|
"learning_rate": 1.999921012917624e-05, |
|
"loss": 1.7251, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.0042150430672025395, |
|
"grad_norm": 0.4130661189556122, |
|
"learning_rate": 1.9999124721275855e-05, |
|
"loss": 1.7343, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.004425795220562666, |
|
"grad_norm": 0.8185787200927734, |
|
"learning_rate": 1.999903492999864e-05, |
|
"loss": 1.7199, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.004636547373922793, |
|
"grad_norm": 0.8078156113624573, |
|
"learning_rate": 1.999894075538396e-05, |
|
"loss": 1.6994, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.00484729952728292, |
|
"grad_norm": 0.5087378621101379, |
|
"learning_rate": 1.99988421974731e-05, |
|
"loss": 1.7405, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.005058051680643047, |
|
"grad_norm": 0.41504907608032227, |
|
"learning_rate": 1.9998739256309265e-05, |
|
"loss": 1.7625, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.005268803834003174, |
|
"grad_norm": 0.6914900541305542, |
|
"learning_rate": 1.9998631931937582e-05, |
|
"loss": 1.7243, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.005479555987363301, |
|
"grad_norm": 0.7715939283370972, |
|
"learning_rate": 1.99985202244051e-05, |
|
"loss": 1.7325, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.0056903081407234275, |
|
"grad_norm": 0.7590686678886414, |
|
"learning_rate": 1.9998404133760786e-05, |
|
"loss": 1.7087, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.005901060294083555, |
|
"grad_norm": 0.4283329248428345, |
|
"learning_rate": 1.999828366005553e-05, |
|
"loss": 1.6673, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.006111812447443681, |
|
"grad_norm": 0.340751051902771, |
|
"learning_rate": 1.9998158803342154e-05, |
|
"loss": 1.7053, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.006322564600803809, |
|
"grad_norm": 0.691828191280365, |
|
"learning_rate": 1.999802956367538e-05, |
|
"loss": 1.6969, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.006533316754163935, |
|
"grad_norm": 0.690230131149292, |
|
"learning_rate": 1.9997895941111877e-05, |
|
"loss": 1.6877, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.006744068907524063, |
|
"grad_norm": 0.493589848279953, |
|
"learning_rate": 1.999775793571021e-05, |
|
"loss": 1.7322, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.00695482106088419, |
|
"grad_norm": 0.3544859290122986, |
|
"learning_rate": 1.9997615547530883e-05, |
|
"loss": 1.6785, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.0071655732142443165, |
|
"grad_norm": 0.385469913482666, |
|
"learning_rate": 1.9997468776636312e-05, |
|
"loss": 1.6838, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.007376325367604444, |
|
"grad_norm": 0.5788832902908325, |
|
"learning_rate": 1.9997317623090845e-05, |
|
"loss": 1.6544, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.00758707752096457, |
|
"grad_norm": 0.5985785722732544, |
|
"learning_rate": 1.9997162086960737e-05, |
|
"loss": 1.7223, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.007797829674324698, |
|
"grad_norm": 0.316755473613739, |
|
"learning_rate": 1.999700216831417e-05, |
|
"loss": 1.69, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.008008581827684824, |
|
"grad_norm": 0.38338717818260193, |
|
"learning_rate": 1.999683786722126e-05, |
|
"loss": 1.6609, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.00821933398104495, |
|
"grad_norm": 0.5168259143829346, |
|
"learning_rate": 1.999666918375402e-05, |
|
"loss": 1.6581, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.008430086134405079, |
|
"grad_norm": 0.534095823764801, |
|
"learning_rate": 1.9996496117986404e-05, |
|
"loss": 1.6349, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.008640838287765205, |
|
"grad_norm": 0.3490186333656311, |
|
"learning_rate": 1.9996318669994275e-05, |
|
"loss": 1.6453, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.008851590441125332, |
|
"grad_norm": 0.8970847129821777, |
|
"learning_rate": 1.9996136839855426e-05, |
|
"loss": 1.7177, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.009062342594485458, |
|
"grad_norm": 0.26147884130477905, |
|
"learning_rate": 1.9995950627649567e-05, |
|
"loss": 1.7142, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.009273094747845587, |
|
"grad_norm": 0.2811965048313141, |
|
"learning_rate": 1.9995760033458323e-05, |
|
"loss": 1.6708, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.009483846901205713, |
|
"grad_norm": 0.38605690002441406, |
|
"learning_rate": 1.9995565057365255e-05, |
|
"loss": 1.7134, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.00969459905456584, |
|
"grad_norm": 0.3690793514251709, |
|
"learning_rate": 1.999536569945583e-05, |
|
"loss": 1.7195, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.009905351207925968, |
|
"grad_norm": 0.6093018054962158, |
|
"learning_rate": 1.9995161959817442e-05, |
|
"loss": 1.7219, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.010116103361286094, |
|
"grad_norm": 0.5218012928962708, |
|
"learning_rate": 1.9994953838539408e-05, |
|
"loss": 1.7018, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.01032685551464622, |
|
"grad_norm": 0.49458399415016174, |
|
"learning_rate": 1.9994741335712963e-05, |
|
"loss": 1.6825, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.010537607668006347, |
|
"grad_norm": 0.559432327747345, |
|
"learning_rate": 1.999452445143126e-05, |
|
"loss": 1.7167, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.010748359821366476, |
|
"grad_norm": 0.30228278040885925, |
|
"learning_rate": 1.999430318578938e-05, |
|
"loss": 1.6662, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.010959111974726602, |
|
"grad_norm": 0.7081576585769653, |
|
"learning_rate": 1.9994077538884315e-05, |
|
"loss": 1.7021, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.011169864128086729, |
|
"grad_norm": 0.35427266359329224, |
|
"learning_rate": 1.9993847510814986e-05, |
|
"loss": 1.7281, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.011380616281446855, |
|
"grad_norm": 0.5683895349502563, |
|
"learning_rate": 1.9993613101682236e-05, |
|
"loss": 1.7239, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.011591368434806983, |
|
"grad_norm": 0.38721588253974915, |
|
"learning_rate": 1.999337431158882e-05, |
|
"loss": 1.6956, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.01180212058816711, |
|
"grad_norm": 0.5694710612297058, |
|
"learning_rate": 1.9993131140639417e-05, |
|
"loss": 1.7068, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.012012872741527236, |
|
"grad_norm": 0.6137599945068359, |
|
"learning_rate": 1.9992883588940636e-05, |
|
"loss": 1.6573, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.012223624894887363, |
|
"grad_norm": 0.40165194869041443, |
|
"learning_rate": 1.999263165660098e-05, |
|
"loss": 1.7253, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.012434377048247491, |
|
"grad_norm": 0.6021196246147156, |
|
"learning_rate": 1.9992375343730913e-05, |
|
"loss": 1.7137, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.012645129201607617, |
|
"grad_norm": 0.26746639609336853, |
|
"learning_rate": 1.9992114650442776e-05, |
|
"loss": 1.6879, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.012855881354967744, |
|
"grad_norm": 0.2976267635822296, |
|
"learning_rate": 1.999184957685086e-05, |
|
"loss": 1.7046, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.01306663350832787, |
|
"grad_norm": 0.7765467166900635, |
|
"learning_rate": 1.999158012307137e-05, |
|
"loss": 1.659, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.013277385661687999, |
|
"grad_norm": 0.3900175392627716, |
|
"learning_rate": 1.999130628922242e-05, |
|
"loss": 1.6311, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.013488137815048125, |
|
"grad_norm": 0.5595497488975525, |
|
"learning_rate": 1.9991028075424058e-05, |
|
"loss": 1.7031, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.013698889968408252, |
|
"grad_norm": 0.4682876765727997, |
|
"learning_rate": 1.999074548179824e-05, |
|
"loss": 1.678, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.01390964212176838, |
|
"grad_norm": 0.29408198595046997, |
|
"learning_rate": 1.999045850846886e-05, |
|
"loss": 1.6933, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.014120394275128506, |
|
"grad_norm": 0.5688738226890564, |
|
"learning_rate": 1.999016715556171e-05, |
|
"loss": 1.7094, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.014331146428488633, |
|
"grad_norm": 0.33922871947288513, |
|
"learning_rate": 1.9989871423204515e-05, |
|
"loss": 1.7241, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.01454189858184876, |
|
"grad_norm": 0.5533545613288879, |
|
"learning_rate": 1.9989571311526917e-05, |
|
"loss": 1.6947, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.014752650735208888, |
|
"grad_norm": 0.4148263931274414, |
|
"learning_rate": 1.9989266820660477e-05, |
|
"loss": 1.6968, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.014963402888569014, |
|
"grad_norm": 0.41911232471466064, |
|
"learning_rate": 1.9988957950738678e-05, |
|
"loss": 1.6583, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.01517415504192914, |
|
"grad_norm": 0.6024166941642761, |
|
"learning_rate": 1.9988644701896922e-05, |
|
"loss": 1.7029, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.015384907195289267, |
|
"grad_norm": 1.0043667554855347, |
|
"learning_rate": 1.9988327074272528e-05, |
|
"loss": 1.6589, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.015595659348649395, |
|
"grad_norm": 0.2308570146560669, |
|
"learning_rate": 1.9988005068004734e-05, |
|
"loss": 1.6768, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.01580641150200952, |
|
"grad_norm": 0.37441328167915344, |
|
"learning_rate": 1.9987678683234707e-05, |
|
"loss": 1.6844, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.01601716365536965, |
|
"grad_norm": 0.5360813736915588, |
|
"learning_rate": 1.9987347920105517e-05, |
|
"loss": 1.6552, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.016227915808729777, |
|
"grad_norm": 0.6203900575637817, |
|
"learning_rate": 1.9987012778762173e-05, |
|
"loss": 1.6582, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.0164386679620899, |
|
"grad_norm": 0.44901394844055176, |
|
"learning_rate": 1.9986673259351584e-05, |
|
"loss": 1.6519, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.01664942011545003, |
|
"grad_norm": 0.35824868083000183, |
|
"learning_rate": 1.998632936202259e-05, |
|
"loss": 1.6868, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.016860172268810158, |
|
"grad_norm": 0.2986483871936798, |
|
"learning_rate": 1.998598108692595e-05, |
|
"loss": 1.681, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.017070924422170283, |
|
"grad_norm": 0.24179548025131226, |
|
"learning_rate": 1.9985628434214334e-05, |
|
"loss": 1.6609, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.01728167657553041, |
|
"grad_norm": 0.3813832402229309, |
|
"learning_rate": 1.9985271404042343e-05, |
|
"loss": 1.7062, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.01749242872889054, |
|
"grad_norm": 0.729052722454071, |
|
"learning_rate": 1.9984909996566487e-05, |
|
"loss": 1.6842, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.017703180882250664, |
|
"grad_norm": 0.4519130289554596, |
|
"learning_rate": 1.99845442119452e-05, |
|
"loss": 1.6985, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.017913933035610792, |
|
"grad_norm": 0.8739224672317505, |
|
"learning_rate": 1.998417405033883e-05, |
|
"loss": 1.7079, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.018124685188970917, |
|
"grad_norm": 0.6653211712837219, |
|
"learning_rate": 1.998379951190965e-05, |
|
"loss": 1.7096, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.018335437342331045, |
|
"grad_norm": 0.284675270318985, |
|
"learning_rate": 1.9983420596821848e-05, |
|
"loss": 1.6631, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.018546189495691173, |
|
"grad_norm": 0.3598421514034271, |
|
"learning_rate": 1.998303730524153e-05, |
|
"loss": 1.675, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.018756941649051298, |
|
"grad_norm": 0.4144167900085449, |
|
"learning_rate": 1.9982649637336722e-05, |
|
"loss": 1.6568, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.018967693802411426, |
|
"grad_norm": 0.3359282314777374, |
|
"learning_rate": 1.998225759327737e-05, |
|
"loss": 1.7094, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.019178445955771554, |
|
"grad_norm": 0.513903021812439, |
|
"learning_rate": 1.9981861173235337e-05, |
|
"loss": 1.7124, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.01938919810913168, |
|
"grad_norm": 0.6693827509880066, |
|
"learning_rate": 1.9981460377384402e-05, |
|
"loss": 1.7058, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.019599950262491807, |
|
"grad_norm": 0.5340267419815063, |
|
"learning_rate": 1.9981055205900263e-05, |
|
"loss": 1.6654, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.019810702415851936, |
|
"grad_norm": 0.5445379018783569, |
|
"learning_rate": 1.9980645658960543e-05, |
|
"loss": 1.7163, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.02002145456921206, |
|
"grad_norm": 0.4673166275024414, |
|
"learning_rate": 1.9980231736744772e-05, |
|
"loss": 1.6885, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.02023220672257219, |
|
"grad_norm": 0.3964649438858032, |
|
"learning_rate": 1.997981343943441e-05, |
|
"loss": 1.7349, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.020442958875932313, |
|
"grad_norm": 0.2978706955909729, |
|
"learning_rate": 1.9979390767212818e-05, |
|
"loss": 1.7043, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.02065371102929244, |
|
"grad_norm": 0.5393288135528564, |
|
"learning_rate": 1.9978963720265297e-05, |
|
"loss": 1.6713, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.02086446318265257, |
|
"grad_norm": 0.5377795100212097, |
|
"learning_rate": 1.9978532298779047e-05, |
|
"loss": 1.7386, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.021075215336012695, |
|
"grad_norm": 0.9713721871376038, |
|
"learning_rate": 1.997809650294319e-05, |
|
"loss": 1.666, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.021285967489372823, |
|
"grad_norm": 0.5377500057220459, |
|
"learning_rate": 1.9977656332948783e-05, |
|
"loss": 1.6333, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.02149671964273295, |
|
"grad_norm": 0.7222330570220947, |
|
"learning_rate": 1.997721178898877e-05, |
|
"loss": 1.647, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.021707471796093076, |
|
"grad_norm": 0.7166306972503662, |
|
"learning_rate": 1.9976762871258036e-05, |
|
"loss": 1.6702, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.021918223949453204, |
|
"grad_norm": 0.5046064257621765, |
|
"learning_rate": 1.9976309579953374e-05, |
|
"loss": 1.6834, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.02212897610281333, |
|
"grad_norm": 0.45860910415649414, |
|
"learning_rate": 1.9975851915273495e-05, |
|
"loss": 1.6752, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.022339728256173457, |
|
"grad_norm": 0.29681679606437683, |
|
"learning_rate": 1.9975389877419033e-05, |
|
"loss": 1.6773, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.022550480409533585, |
|
"grad_norm": 0.3104653060436249, |
|
"learning_rate": 1.9974923466592528e-05, |
|
"loss": 1.6612, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.02276123256289371, |
|
"grad_norm": 0.3063599467277527, |
|
"learning_rate": 1.9974452682998446e-05, |
|
"loss": 1.6728, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.02297198471625384, |
|
"grad_norm": 0.5225020051002502, |
|
"learning_rate": 1.9973977526843173e-05, |
|
"loss": 1.6858, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.023182736869613967, |
|
"grad_norm": 0.5143675208091736, |
|
"learning_rate": 1.9973497998334993e-05, |
|
"loss": 1.6589, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.02339348902297409, |
|
"grad_norm": 0.38197314739227295, |
|
"learning_rate": 1.9973014097684134e-05, |
|
"loss": 1.6792, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.02360424117633422, |
|
"grad_norm": 0.3367295563220978, |
|
"learning_rate": 1.9972525825102716e-05, |
|
"loss": 1.699, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.023814993329694348, |
|
"grad_norm": 0.7394578456878662, |
|
"learning_rate": 1.997203318080479e-05, |
|
"loss": 1.6465, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.024025745483054473, |
|
"grad_norm": 0.474181205034256, |
|
"learning_rate": 1.9971536165006323e-05, |
|
"loss": 1.6735, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.0242364976364146, |
|
"grad_norm": 0.23586097359657288, |
|
"learning_rate": 1.997103477792519e-05, |
|
"loss": 1.673, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.024447249789774726, |
|
"grad_norm": 0.6340083479881287, |
|
"learning_rate": 1.9970529019781188e-05, |
|
"loss": 1.6962, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.024658001943134854, |
|
"grad_norm": 0.6163159608840942, |
|
"learning_rate": 1.997001889079603e-05, |
|
"loss": 1.7392, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.024868754096494982, |
|
"grad_norm": 0.493683397769928, |
|
"learning_rate": 1.996950439119334e-05, |
|
"loss": 1.69, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.025079506249855107, |
|
"grad_norm": 0.5327634215354919, |
|
"learning_rate": 1.996898552119867e-05, |
|
"loss": 1.6646, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.025290258403215235, |
|
"grad_norm": 0.31081724166870117, |
|
"learning_rate": 1.9968462281039477e-05, |
|
"loss": 1.6577, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.025501010556575363, |
|
"grad_norm": 0.35480156540870667, |
|
"learning_rate": 1.9967934670945133e-05, |
|
"loss": 1.7186, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.025711762709935488, |
|
"grad_norm": 0.6122684478759766, |
|
"learning_rate": 1.996740269114694e-05, |
|
"loss": 1.6977, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.025922514863295616, |
|
"grad_norm": 0.5439947843551636, |
|
"learning_rate": 1.996686634187809e-05, |
|
"loss": 1.707, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.02613326701665574, |
|
"grad_norm": 0.2414426952600479, |
|
"learning_rate": 1.996632562337372e-05, |
|
"loss": 1.685, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.02634401917001587, |
|
"grad_norm": 0.5094660520553589, |
|
"learning_rate": 1.9965780535870857e-05, |
|
"loss": 1.7265, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.026554771323375997, |
|
"grad_norm": 0.47010356187820435, |
|
"learning_rate": 1.996523107960846e-05, |
|
"loss": 1.7038, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.026765523476736122, |
|
"grad_norm": 0.8133333325386047, |
|
"learning_rate": 1.99646772548274e-05, |
|
"loss": 1.6871, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.02697627563009625, |
|
"grad_norm": 0.38821008801460266, |
|
"learning_rate": 1.9964119061770457e-05, |
|
"loss": 1.708, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.02718702778345638, |
|
"grad_norm": 0.4741418957710266, |
|
"learning_rate": 1.9963556500682326e-05, |
|
"loss": 1.6105, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.027397779936816503, |
|
"grad_norm": 0.29607221484184265, |
|
"learning_rate": 1.9962989571809627e-05, |
|
"loss": 1.6973, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.02760853209017663, |
|
"grad_norm": 0.6345324516296387, |
|
"learning_rate": 1.9962418275400887e-05, |
|
"loss": 1.6509, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.02781928424353676, |
|
"grad_norm": 0.45032599568367004, |
|
"learning_rate": 1.996184261170654e-05, |
|
"loss": 1.7075, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.028030036396896885, |
|
"grad_norm": 0.5050271153450012, |
|
"learning_rate": 1.9961262580978954e-05, |
|
"loss": 1.7109, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.028240788550257013, |
|
"grad_norm": 0.41781535744667053, |
|
"learning_rate": 1.9960678183472398e-05, |
|
"loss": 1.6649, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.028451540703617138, |
|
"grad_norm": 0.26590654253959656, |
|
"learning_rate": 1.9960089419443054e-05, |
|
"loss": 1.6784, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.028662292856977266, |
|
"grad_norm": 0.2450639009475708, |
|
"learning_rate": 1.9959496289149025e-05, |
|
"loss": 1.6668, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.028873045010337394, |
|
"grad_norm": 0.4967813789844513, |
|
"learning_rate": 1.9958898792850324e-05, |
|
"loss": 1.6743, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.02908379716369752, |
|
"grad_norm": 0.6676028966903687, |
|
"learning_rate": 1.995829693080888e-05, |
|
"loss": 1.7498, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.029294549317057647, |
|
"grad_norm": 0.5373163819313049, |
|
"learning_rate": 1.995769070328854e-05, |
|
"loss": 1.6637, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.029505301470417775, |
|
"grad_norm": 0.519572377204895, |
|
"learning_rate": 1.9957080110555046e-05, |
|
"loss": 1.6743, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.0297160536237779, |
|
"grad_norm": 0.4347212612628937, |
|
"learning_rate": 1.9956465152876076e-05, |
|
"loss": 1.6588, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.02992680577713803, |
|
"grad_norm": 1.0741196870803833, |
|
"learning_rate": 1.9955845830521215e-05, |
|
"loss": 1.6594, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.030137557930498157, |
|
"grad_norm": 0.29438555240631104, |
|
"learning_rate": 1.9955222143761954e-05, |
|
"loss": 1.6742, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.03034831008385828, |
|
"grad_norm": 0.2416359782218933, |
|
"learning_rate": 1.995459409287171e-05, |
|
"loss": 1.6244, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.03055906223721841, |
|
"grad_norm": 0.3811536431312561, |
|
"learning_rate": 1.995396167812579e-05, |
|
"loss": 1.6768, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.030769814390578534, |
|
"grad_norm": 0.52689528465271, |
|
"learning_rate": 1.995332489980145e-05, |
|
"loss": 1.6966, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.030980566543938663, |
|
"grad_norm": 0.7789379358291626, |
|
"learning_rate": 1.9952683758177822e-05, |
|
"loss": 1.7243, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.03119131869729879, |
|
"grad_norm": 0.5525240898132324, |
|
"learning_rate": 1.9952038253535974e-05, |
|
"loss": 1.6921, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.031402070850658916, |
|
"grad_norm": 0.3712736666202545, |
|
"learning_rate": 1.995138838615888e-05, |
|
"loss": 1.7013, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.03161282300401904, |
|
"grad_norm": 0.9973317980766296, |
|
"learning_rate": 1.9950734156331425e-05, |
|
"loss": 1.6312, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.03182357515737917, |
|
"grad_norm": 0.47756654024124146, |
|
"learning_rate": 1.9950075564340406e-05, |
|
"loss": 1.6871, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.0320343273107393, |
|
"grad_norm": 0.6799851655960083, |
|
"learning_rate": 1.9949412610474533e-05, |
|
"loss": 1.6863, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.03224507946409942, |
|
"grad_norm": 0.4427473843097687, |
|
"learning_rate": 1.9948745295024436e-05, |
|
"loss": 1.6623, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.03245583161745955, |
|
"grad_norm": 0.39299216866493225, |
|
"learning_rate": 1.9948073618282646e-05, |
|
"loss": 1.6714, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.03266658377081968, |
|
"grad_norm": 0.49504077434539795, |
|
"learning_rate": 1.994739758054361e-05, |
|
"loss": 1.6637, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.0328773359241798, |
|
"grad_norm": 0.8702055215835571, |
|
"learning_rate": 1.9946717182103684e-05, |
|
"loss": 1.6592, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.033088088077539934, |
|
"grad_norm": 0.4576627314090729, |
|
"learning_rate": 1.994603242326114e-05, |
|
"loss": 1.6766, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.03329884023090006, |
|
"grad_norm": 0.4096079170703888, |
|
"learning_rate": 1.9945343304316168e-05, |
|
"loss": 1.6465, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.033509592384260184, |
|
"grad_norm": 0.20980533957481384, |
|
"learning_rate": 1.9944649825570846e-05, |
|
"loss": 1.642, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.033720344537620316, |
|
"grad_norm": 0.2815381586551666, |
|
"learning_rate": 1.994395198732919e-05, |
|
"loss": 1.6655, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.03393109669098044, |
|
"grad_norm": 0.3039368987083435, |
|
"learning_rate": 1.9943249789897115e-05, |
|
"loss": 1.6321, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.034141848844340565, |
|
"grad_norm": 0.3904463052749634, |
|
"learning_rate": 1.9942543233582442e-05, |
|
"loss": 1.7152, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.0343526009977007, |
|
"grad_norm": 0.44439107179641724, |
|
"learning_rate": 1.994183231869491e-05, |
|
"loss": 1.6447, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.03456335315106082, |
|
"grad_norm": 0.49679499864578247, |
|
"learning_rate": 1.9941117045546172e-05, |
|
"loss": 1.7256, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.034774105304420946, |
|
"grad_norm": 0.5180505514144897, |
|
"learning_rate": 1.994039741444978e-05, |
|
"loss": 1.6376, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.03498485745778108, |
|
"grad_norm": 0.425346314907074, |
|
"learning_rate": 1.9939673425721203e-05, |
|
"loss": 1.7076, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.0351956096111412, |
|
"grad_norm": 0.6802691221237183, |
|
"learning_rate": 1.9938945079677827e-05, |
|
"loss": 1.7051, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.03540636176450133, |
|
"grad_norm": 0.3185325562953949, |
|
"learning_rate": 1.9938212376638937e-05, |
|
"loss": 1.6973, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.03561711391786145, |
|
"grad_norm": 0.493886262178421, |
|
"learning_rate": 1.9937475316925734e-05, |
|
"loss": 1.7147, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.035827866071221584, |
|
"grad_norm": 0.3434235155582428, |
|
"learning_rate": 1.9936733900861324e-05, |
|
"loss": 1.7198, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.03603861822458171, |
|
"grad_norm": 0.40676742792129517, |
|
"learning_rate": 1.993598812877073e-05, |
|
"loss": 1.7005, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.036249370377941834, |
|
"grad_norm": 0.311731219291687, |
|
"learning_rate": 1.993523800098088e-05, |
|
"loss": 1.6024, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.036460122531301965, |
|
"grad_norm": 0.3956526517868042, |
|
"learning_rate": 1.993448351782061e-05, |
|
"loss": 1.6885, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.03667087468466209, |
|
"grad_norm": 0.5735802054405212, |
|
"learning_rate": 1.993372467962067e-05, |
|
"loss": 1.6835, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.036881626838022215, |
|
"grad_norm": 0.35788044333457947, |
|
"learning_rate": 1.993296148671371e-05, |
|
"loss": 1.641, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.037092378991382347, |
|
"grad_norm": 0.5725888013839722, |
|
"learning_rate": 1.9932193939434304e-05, |
|
"loss": 1.6075, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.03686056482904, |
|
"grad_norm": 0.37512072920799255, |
|
"learning_rate": 1.993303783123765e-05, |
|
"loss": 1.6621, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.03699966130009299, |
|
"grad_norm": 0.46840205788612366, |
|
"learning_rate": 1.993253202515022e-05, |
|
"loss": 1.6933, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.03713875777114597, |
|
"grad_norm": 0.6212956309318542, |
|
"learning_rate": 1.9932024322378897e-05, |
|
"loss": 1.6746, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.03727785424219895, |
|
"grad_norm": 0.44953858852386475, |
|
"learning_rate": 1.9931514723020624e-05, |
|
"loss": 1.6428, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.03741695071325193, |
|
"grad_norm": 0.30623960494995117, |
|
"learning_rate": 1.993100322717272e-05, |
|
"loss": 1.5969, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.037556047184304914, |
|
"grad_norm": 0.2042389065027237, |
|
"learning_rate": 1.9930489834932853e-05, |
|
"loss": 1.6739, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.03769514365535789, |
|
"grad_norm": 1.5293948650360107, |
|
"learning_rate": 1.9929974546399056e-05, |
|
"loss": 1.5911, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.03783424012641087, |
|
"grad_norm": 0.2645045220851898, |
|
"learning_rate": 1.9929457361669735e-05, |
|
"loss": 1.7516, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.03797333659746385, |
|
"grad_norm": 0.47690504789352417, |
|
"learning_rate": 1.992893828084365e-05, |
|
"loss": 1.667, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.03811243306851683, |
|
"grad_norm": 0.683849573135376, |
|
"learning_rate": 1.9928417304019915e-05, |
|
"loss": 1.6974, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.03825152953956982, |
|
"grad_norm": 0.5895049571990967, |
|
"learning_rate": 1.9927894431298016e-05, |
|
"loss": 1.6562, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.0383906260106228, |
|
"grad_norm": 0.41334617137908936, |
|
"learning_rate": 1.9927369662777806e-05, |
|
"loss": 1.6587, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.03852972248167578, |
|
"grad_norm": 0.4091622531414032, |
|
"learning_rate": 1.992684299855949e-05, |
|
"loss": 1.6477, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.03866881895272876, |
|
"grad_norm": 0.28317388892173767, |
|
"learning_rate": 1.992631443874363e-05, |
|
"loss": 1.6445, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.038807915423781744, |
|
"grad_norm": 0.4005356431007385, |
|
"learning_rate": 1.992578398343117e-05, |
|
"loss": 1.6825, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.038947011894834724, |
|
"grad_norm": 0.44340595602989197, |
|
"learning_rate": 1.9925251632723396e-05, |
|
"loss": 1.6805, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.0390861083658877, |
|
"grad_norm": 0.2671707570552826, |
|
"learning_rate": 1.9924717386721964e-05, |
|
"loss": 1.6489, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.03922520483694068, |
|
"grad_norm": 0.5683106780052185, |
|
"learning_rate": 1.9924181245528898e-05, |
|
"loss": 1.6548, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.03936430130799366, |
|
"grad_norm": 0.3212202787399292, |
|
"learning_rate": 1.9923643209246575e-05, |
|
"loss": 1.6416, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.03950339777904665, |
|
"grad_norm": 0.2874471843242645, |
|
"learning_rate": 1.9923103277977735e-05, |
|
"loss": 1.6848, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.03964249425009963, |
|
"grad_norm": 0.38741543889045715, |
|
"learning_rate": 1.992256145182548e-05, |
|
"loss": 1.7068, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.03978159072115261, |
|
"grad_norm": 1.436324954032898, |
|
"learning_rate": 1.9922017730893278e-05, |
|
"loss": 1.6259, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.03992068719220559, |
|
"grad_norm": 0.3459469676017761, |
|
"learning_rate": 1.9921472115284957e-05, |
|
"loss": 1.7207, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.040059783663258575, |
|
"grad_norm": 1.227866768836975, |
|
"learning_rate": 1.9920924605104708e-05, |
|
"loss": 1.6709, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.040198880134311554, |
|
"grad_norm": 0.35972604155540466, |
|
"learning_rate": 1.9920375200457074e-05, |
|
"loss": 1.6679, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.040337976605364534, |
|
"grad_norm": 0.32493817806243896, |
|
"learning_rate": 1.9919823901446976e-05, |
|
"loss": 1.6326, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.040477073076417514, |
|
"grad_norm": 0.4151223599910736, |
|
"learning_rate": 1.9919270708179682e-05, |
|
"loss": 1.709, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.04061616954747049, |
|
"grad_norm": 0.4418564438819885, |
|
"learning_rate": 1.991871562076083e-05, |
|
"loss": 1.6295, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.04075526601852348, |
|
"grad_norm": 0.826331377029419, |
|
"learning_rate": 1.991815863929642e-05, |
|
"loss": 1.6223, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.04089436248957646, |
|
"grad_norm": 0.592551589012146, |
|
"learning_rate": 1.991759976389281e-05, |
|
"loss": 1.6334, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.04103345896062944, |
|
"grad_norm": 0.5953264832496643, |
|
"learning_rate": 1.9917038994656715e-05, |
|
"loss": 1.7214, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.04117255543168242, |
|
"grad_norm": 0.37218430638313293, |
|
"learning_rate": 1.9916476331695228e-05, |
|
"loss": 1.6912, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.041311651902735405, |
|
"grad_norm": 0.5334930419921875, |
|
"learning_rate": 1.9915911775115785e-05, |
|
"loss": 1.6132, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.041450748373788385, |
|
"grad_norm": 0.2905101180076599, |
|
"learning_rate": 1.9915345325026195e-05, |
|
"loss": 1.6618, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.041589844844841364, |
|
"grad_norm": 0.3982454240322113, |
|
"learning_rate": 1.9914776981534633e-05, |
|
"loss": 1.6848, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.041728941315894344, |
|
"grad_norm": 0.4498521387577057, |
|
"learning_rate": 1.9914206744749612e-05, |
|
"loss": 1.6858, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.041868037786947324, |
|
"grad_norm": 0.4296523630619049, |
|
"learning_rate": 1.991363461478004e-05, |
|
"loss": 1.6255, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.04200713425800031, |
|
"grad_norm": 0.35572549700737, |
|
"learning_rate": 1.991306059173515e-05, |
|
"loss": 1.7085, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.04214623072905329, |
|
"grad_norm": 0.45710158348083496, |
|
"learning_rate": 1.9912484675724575e-05, |
|
"loss": 1.6886, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.04228532720010627, |
|
"grad_norm": 0.6244034171104431, |
|
"learning_rate": 1.9911906866858276e-05, |
|
"loss": 1.6668, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.04242442367115925, |
|
"grad_norm": 0.3790908753871918, |
|
"learning_rate": 1.9911327165246598e-05, |
|
"loss": 1.6264, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.04256352014221223, |
|
"grad_norm": 0.37423181533813477, |
|
"learning_rate": 1.991074557100023e-05, |
|
"loss": 1.6972, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.042702616613265215, |
|
"grad_norm": 0.3610687255859375, |
|
"learning_rate": 1.991016208423024e-05, |
|
"loss": 1.7148, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.042841713084318195, |
|
"grad_norm": 0.36563995480537415, |
|
"learning_rate": 1.9909576705048048e-05, |
|
"loss": 1.7165, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.042980809555371174, |
|
"grad_norm": 0.6822441816329956, |
|
"learning_rate": 1.990898943356543e-05, |
|
"loss": 1.6602, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.043119906026424154, |
|
"grad_norm": 0.5291351675987244, |
|
"learning_rate": 1.9908400269894534e-05, |
|
"loss": 1.6277, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.04325900249747714, |
|
"grad_norm": 0.5071974992752075, |
|
"learning_rate": 1.9907809214147863e-05, |
|
"loss": 1.6528, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.04339809896853012, |
|
"grad_norm": 0.3188241422176361, |
|
"learning_rate": 1.990721626643828e-05, |
|
"loss": 1.6864, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.0435371954395831, |
|
"grad_norm": 0.323962926864624, |
|
"learning_rate": 1.9906621426879026e-05, |
|
"loss": 1.7026, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.04367629191063608, |
|
"grad_norm": 0.3724607527256012, |
|
"learning_rate": 1.9906024695583674e-05, |
|
"loss": 1.6503, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.04381538838168906, |
|
"grad_norm": 0.6592810750007629, |
|
"learning_rate": 1.990542607266618e-05, |
|
"loss": 1.6498, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.043954484852742046, |
|
"grad_norm": 0.7135905027389526, |
|
"learning_rate": 1.9904825558240853e-05, |
|
"loss": 1.6419, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.044093581323795025, |
|
"grad_norm": 0.5441883206367493, |
|
"learning_rate": 1.990422315242237e-05, |
|
"loss": 1.6208, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.044232677794848005, |
|
"grad_norm": 0.6606343984603882, |
|
"learning_rate": 1.9903618855325762e-05, |
|
"loss": 1.7041, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.044371774265900985, |
|
"grad_norm": 0.26841533184051514, |
|
"learning_rate": 1.990301266706642e-05, |
|
"loss": 1.6535, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.04451087073695397, |
|
"grad_norm": 0.5965221524238586, |
|
"learning_rate": 1.9902404587760108e-05, |
|
"loss": 1.6738, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.04464996720800695, |
|
"grad_norm": 0.7239671349525452, |
|
"learning_rate": 1.9901794617522933e-05, |
|
"loss": 1.6598, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.04478906367905993, |
|
"grad_norm": 0.505463182926178, |
|
"learning_rate": 1.990118275647138e-05, |
|
"loss": 1.6669, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.08985625780678401, |
|
"grad_norm": 0.28192538022994995, |
|
"learning_rate": 1.9604251136197576e-05, |
|
"loss": 1.6758, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.09013445055541182, |
|
"grad_norm": 0.2823716402053833, |
|
"learning_rate": 1.9601813114871574e-05, |
|
"loss": 1.6962, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.09041264330403964, |
|
"grad_norm": 0.19964300096035004, |
|
"learning_rate": 1.9599367759381548e-05, |
|
"loss": 1.6809, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.09069083605266745, |
|
"grad_norm": 0.3048658072948456, |
|
"learning_rate": 1.9596915071595334e-05, |
|
"loss": 1.6526, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.09096902880129526, |
|
"grad_norm": 0.21376414597034454, |
|
"learning_rate": 1.9594455053386376e-05, |
|
"loss": 1.6969, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.09124722154992308, |
|
"grad_norm": 0.46542856097221375, |
|
"learning_rate": 1.9591987706633712e-05, |
|
"loss": 1.7277, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.09152541429855089, |
|
"grad_norm": 0.35781970620155334, |
|
"learning_rate": 1.9589513033221976e-05, |
|
"loss": 1.674, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.09180360704717871, |
|
"grad_norm": 0.36667361855506897, |
|
"learning_rate": 1.9587031035041403e-05, |
|
"loss": 1.6689, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.09208179979580652, |
|
"grad_norm": 0.28317973017692566, |
|
"learning_rate": 1.9584541713987823e-05, |
|
"loss": 1.6812, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.09235999254443433, |
|
"grad_norm": 0.3756510615348816, |
|
"learning_rate": 1.9582045071962648e-05, |
|
"loss": 1.712, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.09263818529306216, |
|
"grad_norm": 0.29829922318458557, |
|
"learning_rate": 1.9579541110872903e-05, |
|
"loss": 1.7115, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.09291637804168996, |
|
"grad_norm": 0.2587492763996124, |
|
"learning_rate": 1.957702983263118e-05, |
|
"loss": 1.7049, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.09319457079031777, |
|
"grad_norm": 0.220120370388031, |
|
"learning_rate": 1.9574511239155677e-05, |
|
"loss": 1.6979, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.0934727635389456, |
|
"grad_norm": 0.4158194959163666, |
|
"learning_rate": 1.9571985332370176e-05, |
|
"loss": 1.715, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.0937509562875734, |
|
"grad_norm": 0.3217870593070984, |
|
"learning_rate": 1.956945211420404e-05, |
|
"loss": 1.6753, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.09402914903620123, |
|
"grad_norm": 0.3095886707305908, |
|
"learning_rate": 1.956691158659222e-05, |
|
"loss": 1.6879, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.09430734178482904, |
|
"grad_norm": 0.31036627292633057, |
|
"learning_rate": 1.956436375147525e-05, |
|
"loss": 1.6815, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.09458553453345685, |
|
"grad_norm": 0.2732967138290405, |
|
"learning_rate": 1.9561808610799255e-05, |
|
"loss": 1.7006, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.09486372728208467, |
|
"grad_norm": 0.465425044298172, |
|
"learning_rate": 1.9559246166515917e-05, |
|
"loss": 1.6962, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.09514192003071248, |
|
"grad_norm": 0.2841686010360718, |
|
"learning_rate": 1.9556676420582517e-05, |
|
"loss": 1.6894, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.09542011277934029, |
|
"grad_norm": 0.30286672711372375, |
|
"learning_rate": 1.955409937496191e-05, |
|
"loss": 1.6704, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.09569830552796811, |
|
"grad_norm": 0.2298436164855957, |
|
"learning_rate": 1.955151503162252e-05, |
|
"loss": 1.7096, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.09597649827659592, |
|
"grad_norm": 0.24763423204421997, |
|
"learning_rate": 1.9548923392538346e-05, |
|
"loss": 1.7543, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.09625469102522374, |
|
"grad_norm": 0.31084269285202026, |
|
"learning_rate": 1.9546324459688967e-05, |
|
"loss": 1.6615, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.09653288377385155, |
|
"grad_norm": 0.43397852778434753, |
|
"learning_rate": 1.954371823505953e-05, |
|
"loss": 1.7036, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.09681107652247936, |
|
"grad_norm": 0.31707778573036194, |
|
"learning_rate": 1.954110472064075e-05, |
|
"loss": 1.6716, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.09708926927110718, |
|
"grad_norm": 0.7093697190284729, |
|
"learning_rate": 1.953848391842891e-05, |
|
"loss": 1.7113, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.097367462019735, |
|
"grad_norm": 0.2799917161464691, |
|
"learning_rate": 1.9535855830425857e-05, |
|
"loss": 1.704, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.0976456547683628, |
|
"grad_norm": 0.3946918845176697, |
|
"learning_rate": 1.9533220458639013e-05, |
|
"loss": 1.7351, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.09792384751699063, |
|
"grad_norm": 0.2851293981075287, |
|
"learning_rate": 1.953057780508135e-05, |
|
"loss": 1.686, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.09820204026561843, |
|
"grad_norm": 0.32784244418144226, |
|
"learning_rate": 1.9527927871771416e-05, |
|
"loss": 1.7294, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.09848023301424626, |
|
"grad_norm": 0.3254486918449402, |
|
"learning_rate": 1.9525270660733304e-05, |
|
"loss": 1.7089, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.09875842576287407, |
|
"grad_norm": 0.35317400097846985, |
|
"learning_rate": 1.9522606173996683e-05, |
|
"loss": 1.7065, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.09903661851150188, |
|
"grad_norm": 0.43726846575737, |
|
"learning_rate": 1.9519934413596768e-05, |
|
"loss": 1.6938, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.0993148112601297, |
|
"grad_norm": 0.3208189904689789, |
|
"learning_rate": 1.9517255381574337e-05, |
|
"loss": 1.6987, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 0.09959300400875751, |
|
"grad_norm": 0.5465477108955383, |
|
"learning_rate": 1.9514569079975705e-05, |
|
"loss": 1.6968, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 0.09987119675738532, |
|
"grad_norm": 0.3042430877685547, |
|
"learning_rate": 1.951187551085277e-05, |
|
"loss": 1.6851, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 0.10014938950601314, |
|
"grad_norm": 1.3345911502838135, |
|
"learning_rate": 1.950917467626295e-05, |
|
"loss": 1.747, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.10042758225464095, |
|
"grad_norm": 0.37963107228279114, |
|
"learning_rate": 1.9506466578269238e-05, |
|
"loss": 1.6539, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 0.10070577500326877, |
|
"grad_norm": 0.2997443974018097, |
|
"learning_rate": 1.9503751218940152e-05, |
|
"loss": 1.6752, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 0.10098396775189658, |
|
"grad_norm": 0.29101336002349854, |
|
"learning_rate": 1.9501028600349775e-05, |
|
"loss": 1.6912, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 0.10126216050052439, |
|
"grad_norm": 0.32952240109443665, |
|
"learning_rate": 1.949829872457773e-05, |
|
"loss": 1.6751, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 0.10154035324915221, |
|
"grad_norm": 0.6925126910209656, |
|
"learning_rate": 1.9495561593709174e-05, |
|
"loss": 1.6948, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.10181854599778002, |
|
"grad_norm": 0.3063753545284271, |
|
"learning_rate": 1.949281720983482e-05, |
|
"loss": 1.6583, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 0.10209673874640783, |
|
"grad_norm": 0.1839706301689148, |
|
"learning_rate": 1.949006557505091e-05, |
|
"loss": 1.7303, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 0.10237493149503565, |
|
"grad_norm": 0.3848772943019867, |
|
"learning_rate": 1.948730669145923e-05, |
|
"loss": 1.6642, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 0.10265312424366346, |
|
"grad_norm": 0.3976518213748932, |
|
"learning_rate": 1.94845405611671e-05, |
|
"loss": 1.7244, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 0.10293131699229129, |
|
"grad_norm": 0.4992425739765167, |
|
"learning_rate": 1.9481767186287377e-05, |
|
"loss": 1.7002, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.1032095097409191, |
|
"grad_norm": 0.36821067333221436, |
|
"learning_rate": 1.947898656893845e-05, |
|
"loss": 1.7047, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 0.1034877024895469, |
|
"grad_norm": 0.5008091926574707, |
|
"learning_rate": 1.9476198711244245e-05, |
|
"loss": 1.6921, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 0.10376589523817473, |
|
"grad_norm": 0.4330016076564789, |
|
"learning_rate": 1.947340361533421e-05, |
|
"loss": 1.7168, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 0.10404408798680254, |
|
"grad_norm": 0.342879980802536, |
|
"learning_rate": 1.9470601283343328e-05, |
|
"loss": 1.7438, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 0.10432228073543034, |
|
"grad_norm": 0.4086737632751465, |
|
"learning_rate": 1.9467791717412107e-05, |
|
"loss": 1.6795, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.10460047348405817, |
|
"grad_norm": 0.3051975965499878, |
|
"learning_rate": 1.9464974919686585e-05, |
|
"loss": 1.715, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 0.10487866623268598, |
|
"grad_norm": 0.40126869082450867, |
|
"learning_rate": 1.9462150892318315e-05, |
|
"loss": 1.7332, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 0.1051568589813138, |
|
"grad_norm": 0.2848926782608032, |
|
"learning_rate": 1.9459319637464376e-05, |
|
"loss": 1.715, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 0.10543505172994161, |
|
"grad_norm": 0.40455424785614014, |
|
"learning_rate": 1.945648115728737e-05, |
|
"loss": 1.7237, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 0.10571324447856942, |
|
"grad_norm": 0.40959736704826355, |
|
"learning_rate": 1.9453635453955425e-05, |
|
"loss": 1.7048, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.10599143722719724, |
|
"grad_norm": 0.30568796396255493, |
|
"learning_rate": 1.9450782529642164e-05, |
|
"loss": 1.7276, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 0.10626962997582505, |
|
"grad_norm": 0.29840388894081116, |
|
"learning_rate": 1.9447922386526752e-05, |
|
"loss": 1.7084, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 0.10654782272445286, |
|
"grad_norm": 0.2814345359802246, |
|
"learning_rate": 1.9445055026793846e-05, |
|
"loss": 1.6686, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 0.10682601547308068, |
|
"grad_norm": 0.5545098185539246, |
|
"learning_rate": 1.9442180452633628e-05, |
|
"loss": 1.6811, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 0.10710420822170849, |
|
"grad_norm": 0.18126177787780762, |
|
"learning_rate": 1.943929866624179e-05, |
|
"loss": 1.6961, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.10738240097033631, |
|
"grad_norm": 0.5672417879104614, |
|
"learning_rate": 1.9436409669819527e-05, |
|
"loss": 1.7022, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 0.10766059371896412, |
|
"grad_norm": 0.26212358474731445, |
|
"learning_rate": 1.9433513465573545e-05, |
|
"loss": 1.6836, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 0.10793878646759193, |
|
"grad_norm": 0.24932098388671875, |
|
"learning_rate": 1.9430610055716056e-05, |
|
"loss": 1.7245, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 0.10821697921621976, |
|
"grad_norm": 0.26785990595817566, |
|
"learning_rate": 1.9427699442464774e-05, |
|
"loss": 1.7242, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 0.10849517196484756, |
|
"grad_norm": 0.310847669839859, |
|
"learning_rate": 1.9424781628042915e-05, |
|
"loss": 1.736, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.10877336471347537, |
|
"grad_norm": 0.2551518380641937, |
|
"learning_rate": 1.94218566146792e-05, |
|
"loss": 1.706, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 0.1090515574621032, |
|
"grad_norm": 0.3088129162788391, |
|
"learning_rate": 1.9418924404607842e-05, |
|
"loss": 1.7089, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 0.109329750210731, |
|
"grad_norm": 0.3498319983482361, |
|
"learning_rate": 1.9415985000068556e-05, |
|
"loss": 1.6846, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 0.10960794295935881, |
|
"grad_norm": 0.2993554174900055, |
|
"learning_rate": 1.941303840330655e-05, |
|
"loss": 1.7197, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 0.10988613570798664, |
|
"grad_norm": 0.23916451632976532, |
|
"learning_rate": 1.941008461657253e-05, |
|
"loss": 1.6994, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.11016432845661445, |
|
"grad_norm": 0.5682498216629028, |
|
"learning_rate": 1.9407123642122686e-05, |
|
"loss": 1.6827, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 0.11044252120524227, |
|
"grad_norm": 0.25092509388923645, |
|
"learning_rate": 1.9404155482218703e-05, |
|
"loss": 1.6798, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 0.11072071395387008, |
|
"grad_norm": 0.4485166072845459, |
|
"learning_rate": 1.9401180139127755e-05, |
|
"loss": 1.6785, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 0.11099890670249789, |
|
"grad_norm": 0.33716893196105957, |
|
"learning_rate": 1.9398197615122504e-05, |
|
"loss": 1.6885, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 0.11127709945112571, |
|
"grad_norm": 0.2410348504781723, |
|
"learning_rate": 1.939520791248109e-05, |
|
"loss": 1.7109, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.11155529219975352, |
|
"grad_norm": 0.37753862142562866, |
|
"learning_rate": 1.939221103348714e-05, |
|
"loss": 1.6944, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 0.11183348494838133, |
|
"grad_norm": 0.31450897455215454, |
|
"learning_rate": 1.938920698042977e-05, |
|
"loss": 1.6899, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 0.11211167769700915, |
|
"grad_norm": 0.31832966208457947, |
|
"learning_rate": 1.9386195755603568e-05, |
|
"loss": 1.6958, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 0.11238987044563696, |
|
"grad_norm": 0.24022148549556732, |
|
"learning_rate": 1.9383177361308597e-05, |
|
"loss": 1.7042, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 0.11266806319426478, |
|
"grad_norm": 0.3811296820640564, |
|
"learning_rate": 1.9380151799850404e-05, |
|
"loss": 1.699, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.11294625594289259, |
|
"grad_norm": 0.23990687727928162, |
|
"learning_rate": 1.9377119073540007e-05, |
|
"loss": 1.7091, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 0.1132244486915204, |
|
"grad_norm": 0.265990287065506, |
|
"learning_rate": 1.9374079184693898e-05, |
|
"loss": 1.6931, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 0.11350264144014822, |
|
"grad_norm": 0.2768077850341797, |
|
"learning_rate": 1.9371032135634033e-05, |
|
"loss": 1.7058, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 0.11378083418877603, |
|
"grad_norm": 0.6246830821037292, |
|
"learning_rate": 1.936797792868785e-05, |
|
"loss": 1.7078, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 0.11405902693740384, |
|
"grad_norm": 0.28057006001472473, |
|
"learning_rate": 1.9364916566188242e-05, |
|
"loss": 1.712, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.11433721968603167, |
|
"grad_norm": 0.31619200110435486, |
|
"learning_rate": 1.9361848050473578e-05, |
|
"loss": 1.7087, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 0.11461541243465947, |
|
"grad_norm": 0.45818835496902466, |
|
"learning_rate": 1.9358772383887683e-05, |
|
"loss": 1.672, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 0.1148936051832873, |
|
"grad_norm": 0.2376299947500229, |
|
"learning_rate": 1.9355689568779844e-05, |
|
"loss": 1.678, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 0.11517179793191511, |
|
"grad_norm": 0.28687921166419983, |
|
"learning_rate": 1.935259960750482e-05, |
|
"loss": 1.6904, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 0.11544999068054292, |
|
"grad_norm": 0.3678719997406006, |
|
"learning_rate": 1.934950250242281e-05, |
|
"loss": 1.6848, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.11572818342917074, |
|
"grad_norm": 0.3978765606880188, |
|
"learning_rate": 1.9346398255899485e-05, |
|
"loss": 1.6557, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 0.11600637617779855, |
|
"grad_norm": 0.34782731533050537, |
|
"learning_rate": 1.9343286870305964e-05, |
|
"loss": 1.6658, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 0.11628456892642636, |
|
"grad_norm": 0.384232759475708, |
|
"learning_rate": 1.9340168348018822e-05, |
|
"loss": 1.7144, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 0.11656276167505418, |
|
"grad_norm": 0.6080924272537231, |
|
"learning_rate": 1.933704269142008e-05, |
|
"loss": 1.6828, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 0.11684095442368199, |
|
"grad_norm": 0.3429834246635437, |
|
"learning_rate": 1.9333909902897212e-05, |
|
"loss": 1.7374, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.11711914717230981, |
|
"grad_norm": 0.34908148646354675, |
|
"learning_rate": 1.9330769984843144e-05, |
|
"loss": 1.7273, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 0.11739733992093762, |
|
"grad_norm": 0.47220101952552795, |
|
"learning_rate": 1.932762293965624e-05, |
|
"loss": 1.6758, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 0.11767553266956543, |
|
"grad_norm": 0.5649632215499878, |
|
"learning_rate": 1.9324468769740307e-05, |
|
"loss": 1.6967, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 0.11795372541819325, |
|
"grad_norm": 0.3771503269672394, |
|
"learning_rate": 1.932130747750461e-05, |
|
"loss": 1.7156, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 0.11823191816682106, |
|
"grad_norm": 0.3423559367656708, |
|
"learning_rate": 1.9318139065363826e-05, |
|
"loss": 1.6854, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.11851011091544887, |
|
"grad_norm": 0.4594859182834625, |
|
"learning_rate": 1.93149635357381e-05, |
|
"loss": 1.7195, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 0.1187883036640767, |
|
"grad_norm": 0.29249799251556396, |
|
"learning_rate": 1.9311780891052998e-05, |
|
"loss": 1.6567, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 0.1190664964127045, |
|
"grad_norm": 0.23755620419979095, |
|
"learning_rate": 1.930859113373952e-05, |
|
"loss": 1.7054, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 0.11934468916133233, |
|
"grad_norm": 0.29518914222717285, |
|
"learning_rate": 1.9305394266234104e-05, |
|
"loss": 1.6406, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 0.11962288190996014, |
|
"grad_norm": 0.5197004675865173, |
|
"learning_rate": 1.9302190290978622e-05, |
|
"loss": 1.6807, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.11990107465858794, |
|
"grad_norm": 0.2740679979324341, |
|
"learning_rate": 1.929897921042036e-05, |
|
"loss": 1.6977, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 0.12017926740721577, |
|
"grad_norm": 0.33021771907806396, |
|
"learning_rate": 1.9295761027012046e-05, |
|
"loss": 1.6943, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 0.12045746015584358, |
|
"grad_norm": 0.32778891921043396, |
|
"learning_rate": 1.929253574321183e-05, |
|
"loss": 1.6941, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 0.12073565290447139, |
|
"grad_norm": 0.3531610369682312, |
|
"learning_rate": 1.9289303361483284e-05, |
|
"loss": 1.7031, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 0.12101384565309921, |
|
"grad_norm": 0.4716193377971649, |
|
"learning_rate": 1.9286063884295397e-05, |
|
"loss": 1.668, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.12129203840172702, |
|
"grad_norm": 0.35484832525253296, |
|
"learning_rate": 1.928281731412259e-05, |
|
"loss": 1.7128, |
|
"step": 87200 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 718926, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7288134689589985e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|