|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 563148, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0026635982015384943, |
|
"grad_norm": 0.2271278351545334, |
|
"learning_rate": 0.0001996, |
|
"loss": 8.7148, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.005327196403076989, |
|
"grad_norm": 0.448383092880249, |
|
"learning_rate": 0.0003996, |
|
"loss": 7.4094, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.007990794604615483, |
|
"grad_norm": 0.46370673179626465, |
|
"learning_rate": 0.0005996, |
|
"loss": 7.1049, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.010654392806153977, |
|
"grad_norm": 0.7845134735107422, |
|
"learning_rate": 0.0007996, |
|
"loss": 6.8619, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.013317991007692471, |
|
"grad_norm": 0.7677924036979675, |
|
"learning_rate": 0.0009996, |
|
"loss": 6.7206, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.015981589209230967, |
|
"grad_norm": 0.7272828817367554, |
|
"learning_rate": 0.0009991099584766199, |
|
"loss": 6.6171, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01864518741076946, |
|
"grad_norm": 0.7266383171081543, |
|
"learning_rate": 0.0009982181333028923, |
|
"loss": 6.4961, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.021308785612307955, |
|
"grad_norm": 0.8149316310882568, |
|
"learning_rate": 0.0009973263081291647, |
|
"loss": 6.3995, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.02397238381384645, |
|
"grad_norm": 0.8527867794036865, |
|
"learning_rate": 0.0009964344829554372, |
|
"loss": 6.3342, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.026635982015384942, |
|
"grad_norm": 1.2359241247177124, |
|
"learning_rate": 0.0009955444414320573, |
|
"loss": 6.2305, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.029299580216923436, |
|
"grad_norm": 1.1131370067596436, |
|
"learning_rate": 0.0009946526162583297, |
|
"loss": 6.0731, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.031963178418461934, |
|
"grad_norm": 1.185133457183838, |
|
"learning_rate": 0.0009937607910846021, |
|
"loss": 5.9349, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.034626776620000424, |
|
"grad_norm": 1.201166033744812, |
|
"learning_rate": 0.0009928689659108746, |
|
"loss": 5.7587, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.03729037482153892, |
|
"grad_norm": 1.2446848154067993, |
|
"learning_rate": 0.0009919789243874944, |
|
"loss": 5.6453, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.03995397302307741, |
|
"grad_norm": 1.2813904285430908, |
|
"learning_rate": 0.0009910870992137668, |
|
"loss": 5.5547, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.04261757122461591, |
|
"grad_norm": 0.9883731007575989, |
|
"learning_rate": 0.0009901952740400395, |
|
"loss": 5.3078, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.045281169426154406, |
|
"grad_norm": 0.9527985453605652, |
|
"learning_rate": 0.000989303448866312, |
|
"loss": 5.1301, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.0479447676276929, |
|
"grad_norm": 0.9772309064865112, |
|
"learning_rate": 0.0009884134073429318, |
|
"loss": 5.0381, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.050608365829231394, |
|
"grad_norm": 1.0352524518966675, |
|
"learning_rate": 0.0009875215821692042, |
|
"loss": 4.9814, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.053271964030769885, |
|
"grad_norm": 0.8517736196517944, |
|
"learning_rate": 0.0009866297569954767, |
|
"loss": 4.9238, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.05593556223230838, |
|
"grad_norm": 0.9034407138824463, |
|
"learning_rate": 0.000985737931821749, |
|
"loss": 4.8745, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.05859916043384687, |
|
"grad_norm": 0.8332895636558533, |
|
"learning_rate": 0.0009848461066480215, |
|
"loss": 4.845, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.06126275863538537, |
|
"grad_norm": 0.8637209534645081, |
|
"learning_rate": 0.0009839560651246416, |
|
"loss": 4.8014, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.06392635683692387, |
|
"grad_norm": 0.8696839213371277, |
|
"learning_rate": 0.000983064239950914, |
|
"loss": 4.7803, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.06658995503846236, |
|
"grad_norm": 0.8878291249275208, |
|
"learning_rate": 0.0009821724147771865, |
|
"loss": 4.7629, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.06925355324000085, |
|
"grad_norm": 0.8268778324127197, |
|
"learning_rate": 0.000981280589603459, |
|
"loss": 4.7312, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.07191715144153935, |
|
"grad_norm": 0.884635329246521, |
|
"learning_rate": 0.0009803887644297313, |
|
"loss": 4.7146, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.07458074964307784, |
|
"grad_norm": 0.7639057636260986, |
|
"learning_rate": 0.0009794969392560038, |
|
"loss": 4.6961, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.07724434784461634, |
|
"grad_norm": 0.8192263245582581, |
|
"learning_rate": 0.0009786051140822762, |
|
"loss": 4.6766, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.07990794604615482, |
|
"grad_norm": 0.8075643181800842, |
|
"learning_rate": 0.0009777132889085486, |
|
"loss": 4.6582, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.08257154424769332, |
|
"grad_norm": 0.7193809151649475, |
|
"learning_rate": 0.0009768232473851685, |
|
"loss": 4.655, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.08523514244923182, |
|
"grad_norm": 0.8761749267578125, |
|
"learning_rate": 0.000975931422211441, |
|
"loss": 4.6378, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.08789874065077032, |
|
"grad_norm": 0.8616175055503845, |
|
"learning_rate": 0.0009750395970377135, |
|
"loss": 4.6265, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.09056233885230881, |
|
"grad_norm": 0.8099841475486755, |
|
"learning_rate": 0.000974147771863986, |
|
"loss": 4.6079, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.0932259370538473, |
|
"grad_norm": 0.811244010925293, |
|
"learning_rate": 0.000973257730340606, |
|
"loss": 4.5949, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.0958895352553858, |
|
"grad_norm": 0.8826119303703308, |
|
"learning_rate": 0.0009723659051668784, |
|
"loss": 4.589, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.09855313345692429, |
|
"grad_norm": 0.8135235905647278, |
|
"learning_rate": 0.0009714740799931508, |
|
"loss": 4.5715, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.10121673165846279, |
|
"grad_norm": 0.8390595316886902, |
|
"learning_rate": 0.0009705822548194233, |
|
"loss": 4.5581, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.10388032986000127, |
|
"grad_norm": 0.7602077126502991, |
|
"learning_rate": 0.0009696922132960431, |
|
"loss": 4.5527, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.10654392806153977, |
|
"grad_norm": 0.8945237994194031, |
|
"learning_rate": 0.0009688003881223157, |
|
"loss": 4.5301, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.10920752626307827, |
|
"grad_norm": 0.6963039040565491, |
|
"learning_rate": 0.0009679085629485881, |
|
"loss": 4.5186, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.11187112446461676, |
|
"grad_norm": 0.7871098518371582, |
|
"learning_rate": 0.0009670167377748605, |
|
"loss": 4.5069, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.11453472266615526, |
|
"grad_norm": 0.7853402495384216, |
|
"learning_rate": 0.000966124912601133, |
|
"loss": 4.4966, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.11719832086769374, |
|
"grad_norm": 0.7557271718978882, |
|
"learning_rate": 0.0009652348710777528, |
|
"loss": 4.4857, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.11986191906923224, |
|
"grad_norm": 0.7256771326065063, |
|
"learning_rate": 0.0009643430459040254, |
|
"loss": 4.4756, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.12252551727077074, |
|
"grad_norm": 0.7980550527572632, |
|
"learning_rate": 0.0009634512207302978, |
|
"loss": 4.4726, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.12518911547230924, |
|
"grad_norm": 0.7480477690696716, |
|
"learning_rate": 0.0009625593955565702, |
|
"loss": 4.4558, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.12785271367384773, |
|
"grad_norm": 0.7309882044792175, |
|
"learning_rate": 0.0009616675703828427, |
|
"loss": 4.4546, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.13051631187538623, |
|
"grad_norm": 0.8072414398193359, |
|
"learning_rate": 0.0009607775288594626, |
|
"loss": 4.4408, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.13317991007692473, |
|
"grad_norm": 0.7929727435112, |
|
"learning_rate": 0.0009598857036857352, |
|
"loss": 4.4436, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.1358435082784632, |
|
"grad_norm": 0.7073729038238525, |
|
"learning_rate": 0.0009589938785120076, |
|
"loss": 4.4261, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.1385071064800017, |
|
"grad_norm": 0.7210267782211304, |
|
"learning_rate": 0.00095810205333828, |
|
"loss": 4.425, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.1411707046815402, |
|
"grad_norm": 0.6783360838890076, |
|
"learning_rate": 0.0009572102281645525, |
|
"loss": 4.4123, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.1438343028830787, |
|
"grad_norm": 0.7039027214050293, |
|
"learning_rate": 0.0009563184029908249, |
|
"loss": 4.414, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.1464979010846172, |
|
"grad_norm": 0.7899590730667114, |
|
"learning_rate": 0.0009554265778170974, |
|
"loss": 4.3951, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.14916149928615569, |
|
"grad_norm": 0.7651330828666687, |
|
"learning_rate": 0.0009545347526433699, |
|
"loss": 4.3997, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.15182509748769418, |
|
"grad_norm": 0.8091022372245789, |
|
"learning_rate": 0.0009536447111199897, |
|
"loss": 4.3865, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.15448869568923268, |
|
"grad_norm": 0.7238765954971313, |
|
"learning_rate": 0.0009527528859462622, |
|
"loss": 4.3845, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.15715229389077118, |
|
"grad_norm": 0.7803590893745422, |
|
"learning_rate": 0.0009518610607725346, |
|
"loss": 4.3805, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.15981589209230965, |
|
"grad_norm": 0.778491735458374, |
|
"learning_rate": 0.0009509692355988071, |
|
"loss": 4.3794, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.16247949029384814, |
|
"grad_norm": 0.7399048209190369, |
|
"learning_rate": 0.000950079194075427, |
|
"loss": 4.3795, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.16514308849538664, |
|
"grad_norm": 0.7823745012283325, |
|
"learning_rate": 0.0009491873689016994, |
|
"loss": 4.3782, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.16780668669692514, |
|
"grad_norm": 0.7693122029304504, |
|
"learning_rate": 0.0009482955437279719, |
|
"loss": 4.3612, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.17047028489846364, |
|
"grad_norm": 0.7326549887657166, |
|
"learning_rate": 0.0009474037185542443, |
|
"loss": 4.3658, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.17313388310000213, |
|
"grad_norm": 0.6827363967895508, |
|
"learning_rate": 0.0009465136770308644, |
|
"loss": 4.3621, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.17579748130154063, |
|
"grad_norm": 0.7000982761383057, |
|
"learning_rate": 0.0009456218518571368, |
|
"loss": 4.3566, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.17846107950307913, |
|
"grad_norm": 0.7949216365814209, |
|
"learning_rate": 0.0009447300266834092, |
|
"loss": 4.349, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.18112467770461763, |
|
"grad_norm": 0.7766338586807251, |
|
"learning_rate": 0.0009438382015096817, |
|
"loss": 4.3564, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.1837882759061561, |
|
"grad_norm": 0.7235038876533508, |
|
"learning_rate": 0.0009429481599863015, |
|
"loss": 4.3434, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.1864518741076946, |
|
"grad_norm": 0.7254591584205627, |
|
"learning_rate": 0.0009420563348125741, |
|
"loss": 4.3352, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.1891154723092331, |
|
"grad_norm": 0.6868504285812378, |
|
"learning_rate": 0.0009411645096388465, |
|
"loss": 4.34, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.1917790705107716, |
|
"grad_norm": 0.7674193978309631, |
|
"learning_rate": 0.0009402726844651189, |
|
"loss": 4.3333, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.19444266871231008, |
|
"grad_norm": 0.778035581111908, |
|
"learning_rate": 0.0009393826429417389, |
|
"loss": 4.3314, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.19710626691384858, |
|
"grad_norm": 0.7400960922241211, |
|
"learning_rate": 0.0009384908177680113, |
|
"loss": 4.3319, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.19976986511538708, |
|
"grad_norm": 0.7500663995742798, |
|
"learning_rate": 0.0009375989925942838, |
|
"loss": 4.328, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.20243346331692558, |
|
"grad_norm": 0.683749794960022, |
|
"learning_rate": 0.0009367071674205563, |
|
"loss": 4.3268, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.20509706151846407, |
|
"grad_norm": 0.7642583250999451, |
|
"learning_rate": 0.0009358171258971762, |
|
"loss": 4.3269, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.20776065972000254, |
|
"grad_norm": 0.6992856860160828, |
|
"learning_rate": 0.0009349253007234486, |
|
"loss": 4.3218, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.21042425792154104, |
|
"grad_norm": 0.7553698420524597, |
|
"learning_rate": 0.000934033475549721, |
|
"loss": 4.3209, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.21308785612307954, |
|
"grad_norm": 0.6873403787612915, |
|
"learning_rate": 0.0009331416503759935, |
|
"loss": 4.3157, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.21575145432461804, |
|
"grad_norm": 0.7638967633247375, |
|
"learning_rate": 0.0009322516088526134, |
|
"loss": 4.3163, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.21841505252615653, |
|
"grad_norm": 0.6896612048149109, |
|
"learning_rate": 0.0009313597836788859, |
|
"loss": 4.3123, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.22107865072769503, |
|
"grad_norm": 0.7294336557388306, |
|
"learning_rate": 0.0009304679585051583, |
|
"loss": 4.3142, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.22374224892923353, |
|
"grad_norm": 0.7498676776885986, |
|
"learning_rate": 0.0009295761333314307, |
|
"loss": 4.3038, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.22640584713077203, |
|
"grad_norm": 0.7050178647041321, |
|
"learning_rate": 0.0009286860918080507, |
|
"loss": 4.2978, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.22906944533231052, |
|
"grad_norm": 0.7527032494544983, |
|
"learning_rate": 0.0009277942666343233, |
|
"loss": 4.3067, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.231733043533849, |
|
"grad_norm": 0.6919755935668945, |
|
"learning_rate": 0.0009269024414605957, |
|
"loss": 4.295, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.2343966417353875, |
|
"grad_norm": 0.7255104184150696, |
|
"learning_rate": 0.0009260106162868681, |
|
"loss": 4.2946, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.237060239936926, |
|
"grad_norm": 0.6978445649147034, |
|
"learning_rate": 0.000925120574763488, |
|
"loss": 4.2937, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.23972383813846448, |
|
"grad_norm": 0.7008663415908813, |
|
"learning_rate": 0.0009242287495897604, |
|
"loss": 4.2974, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.24238743634000298, |
|
"grad_norm": 0.704937756061554, |
|
"learning_rate": 0.000923336924416033, |
|
"loss": 4.2857, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.24505103454154148, |
|
"grad_norm": 0.7343337535858154, |
|
"learning_rate": 0.0009224450992423054, |
|
"loss": 4.2891, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.24771463274307998, |
|
"grad_norm": 0.7263538241386414, |
|
"learning_rate": 0.0009215550577189252, |
|
"loss": 4.2895, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.2503782309446185, |
|
"grad_norm": 0.7095937728881836, |
|
"learning_rate": 0.0009206632325451977, |
|
"loss": 4.2853, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.25304182914615697, |
|
"grad_norm": 0.7221779823303223, |
|
"learning_rate": 0.0009197714073714701, |
|
"loss": 4.2858, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.25570542734769547, |
|
"grad_norm": 0.7522983551025391, |
|
"learning_rate": 0.0009188795821977425, |
|
"loss": 4.2795, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.25836902554923397, |
|
"grad_norm": 0.7212731838226318, |
|
"learning_rate": 0.0009179895406743626, |
|
"loss": 4.2749, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.26103262375077246, |
|
"grad_norm": 0.75824373960495, |
|
"learning_rate": 0.000917097715500635, |
|
"loss": 4.2738, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.26369622195231096, |
|
"grad_norm": 0.7861409783363342, |
|
"learning_rate": 0.0009162058903269075, |
|
"loss": 4.2781, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.26635982015384946, |
|
"grad_norm": 0.7585176229476929, |
|
"learning_rate": 0.0009153140651531799, |
|
"loss": 4.2742, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.2690234183553879, |
|
"grad_norm": 0.7468889951705933, |
|
"learning_rate": 0.0009144240236297998, |
|
"loss": 4.2779, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.2716870165569264, |
|
"grad_norm": 0.7378383278846741, |
|
"learning_rate": 0.0009135321984560723, |
|
"loss": 4.2724, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.2743506147584649, |
|
"grad_norm": 0.6867294907569885, |
|
"learning_rate": 0.0009126403732823447, |
|
"loss": 4.2753, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.2770142129600034, |
|
"grad_norm": 0.6850928068161011, |
|
"learning_rate": 0.0009117485481086172, |
|
"loss": 4.2718, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.2796778111615419, |
|
"grad_norm": 0.7450153827667236, |
|
"learning_rate": 0.000910858506585237, |
|
"loss": 4.2711, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.2823414093630804, |
|
"grad_norm": 0.7175604104995728, |
|
"learning_rate": 0.0009099666814115095, |
|
"loss": 4.2636, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.2850050075646189, |
|
"grad_norm": 0.7004239559173584, |
|
"learning_rate": 0.000909074856237782, |
|
"loss": 4.273, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.2876686057661574, |
|
"grad_norm": 0.7755109667778015, |
|
"learning_rate": 0.0009081830310640544, |
|
"loss": 4.262, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.2903322039676959, |
|
"grad_norm": 0.7420957684516907, |
|
"learning_rate": 0.0009072929895406744, |
|
"loss": 4.2703, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.2929958021692344, |
|
"grad_norm": 0.7163523435592651, |
|
"learning_rate": 0.0009064011643669468, |
|
"loss": 4.265, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.2956594003707729, |
|
"grad_norm": 0.7003483176231384, |
|
"learning_rate": 0.0009055093391932193, |
|
"loss": 4.2529, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.29832299857231137, |
|
"grad_norm": 0.7118489742279053, |
|
"learning_rate": 0.0009046175140194918, |
|
"loss": 4.2556, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.30098659677384987, |
|
"grad_norm": 0.7034066319465637, |
|
"learning_rate": 0.0009037274724961117, |
|
"loss": 4.2547, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.30365019497538837, |
|
"grad_norm": 0.6700213551521301, |
|
"learning_rate": 0.0009028356473223841, |
|
"loss": 4.2561, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.30631379317692686, |
|
"grad_norm": 0.738164484500885, |
|
"learning_rate": 0.0009019438221486565, |
|
"loss": 4.26, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.30897739137846536, |
|
"grad_norm": 0.7396353483200073, |
|
"learning_rate": 0.000901051996974929, |
|
"loss": 4.2562, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.31164098958000386, |
|
"grad_norm": 0.7478146553039551, |
|
"learning_rate": 0.0009001619554515488, |
|
"loss": 4.25, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.31430458778154235, |
|
"grad_norm": 0.7298335433006287, |
|
"learning_rate": 0.0008992701302778215, |
|
"loss": 4.2562, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.3169681859830808, |
|
"grad_norm": 0.7685016989707947, |
|
"learning_rate": 0.0008983783051040939, |
|
"loss": 4.2551, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.3196317841846193, |
|
"grad_norm": 0.8017458915710449, |
|
"learning_rate": 0.0008974864799303664, |
|
"loss": 4.2481, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.3222953823861578, |
|
"grad_norm": 0.7588088512420654, |
|
"learning_rate": 0.0008965964384069862, |
|
"loss": 4.2537, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.3249589805876963, |
|
"grad_norm": 0.7897168397903442, |
|
"learning_rate": 0.0008957046132332586, |
|
"loss": 4.2427, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.3276225787892348, |
|
"grad_norm": 0.7311574220657349, |
|
"learning_rate": 0.0008948127880595312, |
|
"loss": 4.2518, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.3302861769907733, |
|
"grad_norm": 0.7892371416091919, |
|
"learning_rate": 0.0008939209628858036, |
|
"loss": 4.234, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.3329497751923118, |
|
"grad_norm": 0.6944438815116882, |
|
"learning_rate": 0.0008930309213624235, |
|
"loss": 4.2382, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.3356133733938503, |
|
"grad_norm": 0.7701837420463562, |
|
"learning_rate": 0.0008921390961886959, |
|
"loss": 4.2474, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.3382769715953888, |
|
"grad_norm": 0.7789635062217712, |
|
"learning_rate": 0.0008912472710149683, |
|
"loss": 4.2379, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.3409405697969273, |
|
"grad_norm": 0.7212055921554565, |
|
"learning_rate": 0.0008903554458412409, |
|
"loss": 4.2407, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.34360416799846577, |
|
"grad_norm": 0.7439520359039307, |
|
"learning_rate": 0.0008894654043178609, |
|
"loss": 4.2386, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.34626776620000427, |
|
"grad_norm": 0.6747229695320129, |
|
"learning_rate": 0.0008885735791441333, |
|
"loss": 4.2391, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.34893136440154277, |
|
"grad_norm": 0.7761566638946533, |
|
"learning_rate": 0.0008876817539704057, |
|
"loss": 4.2337, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.35159496260308126, |
|
"grad_norm": 0.7024859189987183, |
|
"learning_rate": 0.0008867899287966782, |
|
"loss": 4.2299, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.35425856080461976, |
|
"grad_norm": 0.7179946303367615, |
|
"learning_rate": 0.000885899887273298, |
|
"loss": 4.2379, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.35692215900615826, |
|
"grad_norm": 0.699834942817688, |
|
"learning_rate": 0.0008850080620995706, |
|
"loss": 4.2321, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.35958575720769675, |
|
"grad_norm": 0.6902332901954651, |
|
"learning_rate": 0.000884116236925843, |
|
"loss": 4.2376, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.36224935540923525, |
|
"grad_norm": 0.7003384232521057, |
|
"learning_rate": 0.0008832244117521154, |
|
"loss": 4.2261, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.36491295361077375, |
|
"grad_norm": 0.7879477739334106, |
|
"learning_rate": 0.0008823343702287353, |
|
"loss": 4.2292, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.3675765518123122, |
|
"grad_norm": 0.6793246269226074, |
|
"learning_rate": 0.0008814425450550077, |
|
"loss": 4.2342, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.3702401500138507, |
|
"grad_norm": 0.7284209728240967, |
|
"learning_rate": 0.0008805507198812803, |
|
"loss": 4.2276, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.3729037482153892, |
|
"grad_norm": 0.7192456722259521, |
|
"learning_rate": 0.0008796588947075527, |
|
"loss": 4.2248, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.3755673464169277, |
|
"grad_norm": 0.7695698738098145, |
|
"learning_rate": 0.0008787688531841727, |
|
"loss": 4.2276, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.3782309446184662, |
|
"grad_norm": 0.740368664264679, |
|
"learning_rate": 0.0008778770280104451, |
|
"loss": 4.2286, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.3808945428200047, |
|
"grad_norm": 0.7393242716789246, |
|
"learning_rate": 0.0008769852028367175, |
|
"loss": 4.2239, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.3835581410215432, |
|
"grad_norm": 0.7269551157951355, |
|
"learning_rate": 0.0008760933776629901, |
|
"loss": 4.2196, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.3862217392230817, |
|
"grad_norm": 0.6773830056190491, |
|
"learning_rate": 0.0008752033361396099, |
|
"loss": 4.2283, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.38888533742462017, |
|
"grad_norm": 0.7091046571731567, |
|
"learning_rate": 0.0008743115109658824, |
|
"loss": 4.2252, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.39154893562615867, |
|
"grad_norm": 0.7202826738357544, |
|
"learning_rate": 0.0008734196857921548, |
|
"loss": 4.2102, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.39421253382769716, |
|
"grad_norm": 0.6965381503105164, |
|
"learning_rate": 0.0008725278606184272, |
|
"loss": 4.222, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.39687613202923566, |
|
"grad_norm": 0.7711541652679443, |
|
"learning_rate": 0.0008716378190950471, |
|
"loss": 4.2138, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.39953973023077416, |
|
"grad_norm": 0.6982942223548889, |
|
"learning_rate": 0.0008707459939213196, |
|
"loss": 4.2209, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.40220332843231266, |
|
"grad_norm": 0.700356662273407, |
|
"learning_rate": 0.0008698541687475921, |
|
"loss": 4.2153, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.40486692663385115, |
|
"grad_norm": 0.7417271137237549, |
|
"learning_rate": 0.0008689623435738645, |
|
"loss": 4.216, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.40753052483538965, |
|
"grad_norm": 0.7237849235534668, |
|
"learning_rate": 0.0008680723020504845, |
|
"loss": 4.2172, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.41019412303692815, |
|
"grad_norm": 0.7940893769264221, |
|
"learning_rate": 0.0008671804768767569, |
|
"loss": 4.2224, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.41285772123846665, |
|
"grad_norm": 0.7201411724090576, |
|
"learning_rate": 0.0008662886517030294, |
|
"loss": 4.2203, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.4155213194400051, |
|
"grad_norm": 0.7360599637031555, |
|
"learning_rate": 0.0008653968265293019, |
|
"loss": 4.2208, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.4181849176415436, |
|
"grad_norm": 0.7827675938606262, |
|
"learning_rate": 0.0008645067850059217, |
|
"loss": 4.2095, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.4208485158430821, |
|
"grad_norm": 0.7322735786437988, |
|
"learning_rate": 0.0008636149598321942, |
|
"loss": 4.2085, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.4235121140446206, |
|
"grad_norm": 0.6896507740020752, |
|
"learning_rate": 0.0008627231346584666, |
|
"loss": 4.2045, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.4261757122461591, |
|
"grad_norm": 0.780642569065094, |
|
"learning_rate": 0.0008618313094847391, |
|
"loss": 4.2157, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.4288393104476976, |
|
"grad_norm": 0.717087984085083, |
|
"learning_rate": 0.000860941267961359, |
|
"loss": 4.208, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.43150290864923607, |
|
"grad_norm": 0.7145330309867859, |
|
"learning_rate": 0.0008600494427876314, |
|
"loss": 4.2128, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.43416650685077457, |
|
"grad_norm": 0.7336823344230652, |
|
"learning_rate": 0.0008591576176139039, |
|
"loss": 4.2124, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.43683010505231307, |
|
"grad_norm": 0.6869795322418213, |
|
"learning_rate": 0.0008582657924401764, |
|
"loss": 4.2103, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.43949370325385156, |
|
"grad_norm": 0.7188379168510437, |
|
"learning_rate": 0.0008573757509167964, |
|
"loss": 4.2084, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.44215730145539006, |
|
"grad_norm": 0.7271597981452942, |
|
"learning_rate": 0.0008564839257430688, |
|
"loss": 4.2087, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.44482089965692856, |
|
"grad_norm": 0.7935476303100586, |
|
"learning_rate": 0.0008555921005693412, |
|
"loss": 4.199, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.44748449785846706, |
|
"grad_norm": 0.732509195804596, |
|
"learning_rate": 0.0008547002753956137, |
|
"loss": 4.2014, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.45014809606000555, |
|
"grad_norm": 0.7381872534751892, |
|
"learning_rate": 0.0008538102338722335, |
|
"loss": 4.2078, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.45281169426154405, |
|
"grad_norm": 0.697894811630249, |
|
"learning_rate": 0.0008529184086985061, |
|
"loss": 4.1978, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.45547529246308255, |
|
"grad_norm": 0.715933084487915, |
|
"learning_rate": 0.0008520265835247785, |
|
"loss": 4.205, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.45813889066462105, |
|
"grad_norm": 0.7199248671531677, |
|
"learning_rate": 0.0008511347583510509, |
|
"loss": 4.201, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.46080248886615954, |
|
"grad_norm": 0.7358156442642212, |
|
"learning_rate": 0.0008502447168276709, |
|
"loss": 4.2025, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.463466087067698, |
|
"grad_norm": 0.8218105435371399, |
|
"learning_rate": 0.0008493528916539433, |
|
"loss": 4.2017, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.4661296852692365, |
|
"grad_norm": 0.77776700258255, |
|
"learning_rate": 0.0008484610664802158, |
|
"loss": 4.1905, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.468793283470775, |
|
"grad_norm": 0.6795767545700073, |
|
"learning_rate": 0.0008475692413064883, |
|
"loss": 4.1913, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.4714568816723135, |
|
"grad_norm": 0.7476922869682312, |
|
"learning_rate": 0.0008466791997831082, |
|
"loss": 4.1935, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.474120479873852, |
|
"grad_norm": 0.7420318722724915, |
|
"learning_rate": 0.0008457873746093806, |
|
"loss": 4.1989, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.47678407807539047, |
|
"grad_norm": 0.677543044090271, |
|
"learning_rate": 0.000844895549435653, |
|
"loss": 4.1921, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.47944767627692897, |
|
"grad_norm": 0.7159215211868286, |
|
"learning_rate": 0.0008440037242619255, |
|
"loss": 4.1935, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.48211127447846747, |
|
"grad_norm": 0.7259414792060852, |
|
"learning_rate": 0.0008431136827385454, |
|
"loss": 4.2041, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.48477487268000596, |
|
"grad_norm": 0.6838536262512207, |
|
"learning_rate": 0.0008422218575648179, |
|
"loss": 4.1954, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.48743847088154446, |
|
"grad_norm": 0.6978190541267395, |
|
"learning_rate": 0.0008413300323910903, |
|
"loss": 4.1944, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.49010206908308296, |
|
"grad_norm": 0.7434132695198059, |
|
"learning_rate": 0.0008404382072173627, |
|
"loss": 4.1932, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.49276566728462146, |
|
"grad_norm": 0.6992717981338501, |
|
"learning_rate": 0.0008395481656939827, |
|
"loss": 4.1963, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.49542926548615995, |
|
"grad_norm": 0.7276673316955566, |
|
"learning_rate": 0.0008386563405202552, |
|
"loss": 4.1967, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.49809286368769845, |
|
"grad_norm": 0.7243706583976746, |
|
"learning_rate": 0.0008377645153465277, |
|
"loss": 4.1938, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.500756461889237, |
|
"grad_norm": 0.7238306999206543, |
|
"learning_rate": 0.0008368726901728001, |
|
"loss": 4.1944, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.5034200600907754, |
|
"grad_norm": 0.7251293063163757, |
|
"learning_rate": 0.00083598264864942, |
|
"loss": 4.187, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.5060836582923139, |
|
"grad_norm": 0.6981387734413147, |
|
"learning_rate": 0.0008350908234756924, |
|
"loss": 4.1942, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.5087472564938524, |
|
"grad_norm": 0.7512865662574768, |
|
"learning_rate": 0.0008341989983019649, |
|
"loss": 4.1896, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.5114108546953909, |
|
"grad_norm": 0.76689213514328, |
|
"learning_rate": 0.0008333071731282374, |
|
"loss": 4.1895, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.5140744528969294, |
|
"grad_norm": 0.7794478535652161, |
|
"learning_rate": 0.0008324171316048572, |
|
"loss": 4.1877, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.5167380510984679, |
|
"grad_norm": 0.7624120712280273, |
|
"learning_rate": 0.0008315253064311297, |
|
"loss": 4.1905, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.5194016493000064, |
|
"grad_norm": 0.812703549861908, |
|
"learning_rate": 0.0008306334812574021, |
|
"loss": 4.1918, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.5220652475015449, |
|
"grad_norm": 0.7445054054260254, |
|
"learning_rate": 0.0008297416560836745, |
|
"loss": 4.1932, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.5247288457030834, |
|
"grad_norm": 0.6916468143463135, |
|
"learning_rate": 0.0008288498309099471, |
|
"loss": 4.1927, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.5273924439046219, |
|
"grad_norm": 0.7391178011894226, |
|
"learning_rate": 0.000827959789386567, |
|
"loss": 4.1822, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.5300560421061604, |
|
"grad_norm": 0.7245861887931824, |
|
"learning_rate": 0.0008270679642128395, |
|
"loss": 4.1897, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.5327196403076989, |
|
"grad_norm": 0.7156808376312256, |
|
"learning_rate": 0.0008261761390391119, |
|
"loss": 4.186, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.5353832385092374, |
|
"grad_norm": 0.7185246348381042, |
|
"learning_rate": 0.0008252843138653843, |
|
"loss": 4.182, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.5380468367107758, |
|
"grad_norm": 0.7230123281478882, |
|
"learning_rate": 0.0008243942723420043, |
|
"loss": 4.1888, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.5407104349123143, |
|
"grad_norm": 0.6807687282562256, |
|
"learning_rate": 0.0008235024471682767, |
|
"loss": 4.1757, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.5433740331138528, |
|
"grad_norm": 0.6942833065986633, |
|
"learning_rate": 0.0008226106219945492, |
|
"loss": 4.1818, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.5460376313153913, |
|
"grad_norm": 0.7553761601448059, |
|
"learning_rate": 0.0008217187968208216, |
|
"loss": 4.1876, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.5487012295169298, |
|
"grad_norm": 0.8295273184776306, |
|
"learning_rate": 0.0008208287552974415, |
|
"loss": 4.1763, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.5513648277184683, |
|
"grad_norm": 0.7182528972625732, |
|
"learning_rate": 0.000819936930123714, |
|
"loss": 4.1867, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.5540284259200068, |
|
"grad_norm": 0.7191228270530701, |
|
"learning_rate": 0.0008190451049499864, |
|
"loss": 4.1822, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.5566920241215453, |
|
"grad_norm": 0.7880285382270813, |
|
"learning_rate": 0.0008181532797762589, |
|
"loss": 4.178, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.5593556223230838, |
|
"grad_norm": 0.7537713050842285, |
|
"learning_rate": 0.0008172632382528788, |
|
"loss": 4.1865, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.5620192205246223, |
|
"grad_norm": 0.7707012891769409, |
|
"learning_rate": 0.0008163714130791513, |
|
"loss": 4.1847, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.5646828187261608, |
|
"grad_norm": 0.7433204054832458, |
|
"learning_rate": 0.0008154795879054238, |
|
"loss": 4.1778, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.5673464169276993, |
|
"grad_norm": 0.760553240776062, |
|
"learning_rate": 0.0008145877627316962, |
|
"loss": 4.1804, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.5700100151292378, |
|
"grad_norm": 0.744844913482666, |
|
"learning_rate": 0.0008136977212083161, |
|
"loss": 4.1809, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.5726736133307763, |
|
"grad_norm": 0.7252081036567688, |
|
"learning_rate": 0.0008128058960345885, |
|
"loss": 4.1731, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.5753372115323148, |
|
"grad_norm": 0.6822036504745483, |
|
"learning_rate": 0.000811914070860861, |
|
"loss": 4.1799, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.5780008097338533, |
|
"grad_norm": 0.7590454816818237, |
|
"learning_rate": 0.0008110222456871334, |
|
"loss": 4.1771, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.5806644079353918, |
|
"grad_norm": 0.7851970791816711, |
|
"learning_rate": 0.0008101322041637535, |
|
"loss": 4.1762, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.5833280061369303, |
|
"grad_norm": 0.7638763785362244, |
|
"learning_rate": 0.0008092403789900259, |
|
"loss": 4.1699, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.5859916043384688, |
|
"grad_norm": 0.7190741896629333, |
|
"learning_rate": 0.0008083485538162983, |
|
"loss": 4.181, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.5886552025400072, |
|
"grad_norm": 0.8082555532455444, |
|
"learning_rate": 0.0008074567286425708, |
|
"loss": 4.1711, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.5913188007415457, |
|
"grad_norm": 0.7326035499572754, |
|
"learning_rate": 0.0008065666871191906, |
|
"loss": 4.1743, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.5939823989430842, |
|
"grad_norm": 0.7412554621696472, |
|
"learning_rate": 0.0008056748619454632, |
|
"loss": 4.1761, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.5966459971446227, |
|
"grad_norm": 0.6986061930656433, |
|
"learning_rate": 0.0008047830367717356, |
|
"loss": 4.1788, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.5993095953461612, |
|
"grad_norm": 0.8155457973480225, |
|
"learning_rate": 0.000803891211598008, |
|
"loss": 4.1801, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.6019731935476997, |
|
"grad_norm": 0.7332949042320251, |
|
"learning_rate": 0.0008030011700746279, |
|
"loss": 4.1678, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.6046367917492382, |
|
"grad_norm": 0.8117866516113281, |
|
"learning_rate": 0.0008021093449009003, |
|
"loss": 4.1781, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.6073003899507767, |
|
"grad_norm": 0.7188646197319031, |
|
"learning_rate": 0.0008012175197271729, |
|
"loss": 4.1702, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.6099639881523152, |
|
"grad_norm": 0.7319905757904053, |
|
"learning_rate": 0.0008003256945534453, |
|
"loss": 4.1709, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.6126275863538537, |
|
"grad_norm": 0.7118169069290161, |
|
"learning_rate": 0.0007994356530300653, |
|
"loss": 4.1709, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.6152911845553922, |
|
"grad_norm": 0.7694860696792603, |
|
"learning_rate": 0.0007985438278563377, |
|
"loss": 4.1723, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.6179547827569307, |
|
"grad_norm": 0.7366968989372253, |
|
"learning_rate": 0.0007976520026826101, |
|
"loss": 4.1676, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.6206183809584692, |
|
"grad_norm": 0.7481387257575989, |
|
"learning_rate": 0.0007967601775088827, |
|
"loss": 4.1729, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.6232819791600077, |
|
"grad_norm": 0.7446570992469788, |
|
"learning_rate": 0.0007958701359855025, |
|
"loss": 4.1657, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.6259455773615462, |
|
"grad_norm": 0.7612956166267395, |
|
"learning_rate": 0.000794978310811775, |
|
"loss": 4.1685, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.6286091755630847, |
|
"grad_norm": 0.7427545189857483, |
|
"learning_rate": 0.0007940864856380474, |
|
"loss": 4.1685, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.6312727737646232, |
|
"grad_norm": 0.7789895534515381, |
|
"learning_rate": 0.0007931946604643198, |
|
"loss": 4.1726, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.6339363719661616, |
|
"grad_norm": 0.751118540763855, |
|
"learning_rate": 0.0007923046189409397, |
|
"loss": 4.1693, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.6365999701677001, |
|
"grad_norm": 0.8121469616889954, |
|
"learning_rate": 0.0007914127937672122, |
|
"loss": 4.1667, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.6392635683692386, |
|
"grad_norm": 0.7127716541290283, |
|
"learning_rate": 0.0007905209685934847, |
|
"loss": 4.1604, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.6419271665707771, |
|
"grad_norm": 0.7496224045753479, |
|
"learning_rate": 0.0007896291434197571, |
|
"loss": 4.1655, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.6445907647723156, |
|
"grad_norm": 0.7957298755645752, |
|
"learning_rate": 0.0007887391018963771, |
|
"loss": 4.1685, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.6472543629738541, |
|
"grad_norm": 0.708066463470459, |
|
"learning_rate": 0.0007878472767226495, |
|
"loss": 4.1684, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.6499179611753926, |
|
"grad_norm": 0.8204523324966431, |
|
"learning_rate": 0.000786955451548922, |
|
"loss": 4.1685, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.6525815593769311, |
|
"grad_norm": 0.7236646413803101, |
|
"learning_rate": 0.0007860636263751945, |
|
"loss": 4.1692, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.6552451575784696, |
|
"grad_norm": 0.7952857613563538, |
|
"learning_rate": 0.0007851735848518143, |
|
"loss": 4.1623, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.6579087557800081, |
|
"grad_norm": 0.7337407469749451, |
|
"learning_rate": 0.0007842817596780868, |
|
"loss": 4.1675, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.6605723539815466, |
|
"grad_norm": 0.740993082523346, |
|
"learning_rate": 0.0007833899345043592, |
|
"loss": 4.1643, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.6632359521830851, |
|
"grad_norm": 0.7212578654289246, |
|
"learning_rate": 0.0007824981093306317, |
|
"loss": 4.1656, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.6658995503846236, |
|
"grad_norm": 0.7532219886779785, |
|
"learning_rate": 0.0007816080678072516, |
|
"loss": 4.1682, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.6685631485861621, |
|
"grad_norm": 0.759222686290741, |
|
"learning_rate": 0.000780716242633524, |
|
"loss": 4.165, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.6712267467877006, |
|
"grad_norm": 0.7389349937438965, |
|
"learning_rate": 0.0007798244174597965, |
|
"loss": 4.1623, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.673890344989239, |
|
"grad_norm": 0.7558398246765137, |
|
"learning_rate": 0.0007789325922860689, |
|
"loss": 4.165, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.6765539431907776, |
|
"grad_norm": 0.778786838054657, |
|
"learning_rate": 0.0007780425507626889, |
|
"loss": 4.1636, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.679217541392316, |
|
"grad_norm": 0.7308077812194824, |
|
"learning_rate": 0.0007771507255889614, |
|
"loss": 4.1609, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.6818811395938545, |
|
"grad_norm": 0.7642717361450195, |
|
"learning_rate": 0.0007762589004152338, |
|
"loss": 4.1623, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.684544737795393, |
|
"grad_norm": 0.7278922200202942, |
|
"learning_rate": 0.0007753670752415063, |
|
"loss": 4.1636, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.6872083359969315, |
|
"grad_norm": 0.7422888278961182, |
|
"learning_rate": 0.0007744770337181261, |
|
"loss": 4.1542, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.68987193419847, |
|
"grad_norm": 0.7136949896812439, |
|
"learning_rate": 0.0007735852085443986, |
|
"loss": 4.1579, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.6925355324000085, |
|
"grad_norm": 0.7696181535720825, |
|
"learning_rate": 0.0007726933833706711, |
|
"loss": 4.1615, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.695199130601547, |
|
"grad_norm": 0.7375788688659668, |
|
"learning_rate": 0.0007718015581969435, |
|
"loss": 4.1625, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.6978627288030855, |
|
"grad_norm": 0.7175765037536621, |
|
"learning_rate": 0.0007709115166735635, |
|
"loss": 4.1562, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.700526327004624, |
|
"grad_norm": 0.7179591655731201, |
|
"learning_rate": 0.000770019691499836, |
|
"loss": 4.1604, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.7031899252061625, |
|
"grad_norm": 0.7693660259246826, |
|
"learning_rate": 0.0007691278663261084, |
|
"loss": 4.1623, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.705853523407701, |
|
"grad_norm": 0.7547662854194641, |
|
"learning_rate": 0.0007682360411523809, |
|
"loss": 4.1604, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.7085171216092395, |
|
"grad_norm": 0.7436234951019287, |
|
"learning_rate": 0.0007673459996290008, |
|
"loss": 4.159, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.711180719810778, |
|
"grad_norm": 0.7248745560646057, |
|
"learning_rate": 0.0007664541744552732, |
|
"loss": 4.155, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.7138443180123165, |
|
"grad_norm": 0.7338257431983948, |
|
"learning_rate": 0.0007655623492815456, |
|
"loss": 4.1573, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.716507916213855, |
|
"grad_norm": 0.7636457085609436, |
|
"learning_rate": 0.0007646705241078181, |
|
"loss": 4.1568, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.7191715144153935, |
|
"grad_norm": 0.7198740243911743, |
|
"learning_rate": 0.000763780482584438, |
|
"loss": 4.1597, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.721835112616932, |
|
"grad_norm": 0.7390605807304382, |
|
"learning_rate": 0.0007628886574107105, |
|
"loss": 4.1471, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.7244987108184705, |
|
"grad_norm": 0.7730891108512878, |
|
"learning_rate": 0.0007619968322369829, |
|
"loss": 4.1518, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.727162309020009, |
|
"grad_norm": 0.7512543797492981, |
|
"learning_rate": 0.0007611050070632553, |
|
"loss": 4.1602, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.7298259072215475, |
|
"grad_norm": 0.7366748452186584, |
|
"learning_rate": 0.0007602149655398753, |
|
"loss": 4.1583, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.7324895054230859, |
|
"grad_norm": 0.7468605041503906, |
|
"learning_rate": 0.0007593231403661477, |
|
"loss": 4.1535, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.7351531036246244, |
|
"grad_norm": 0.7176985144615173, |
|
"learning_rate": 0.0007584313151924203, |
|
"loss": 4.1525, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.7378167018261629, |
|
"grad_norm": 0.7422710657119751, |
|
"learning_rate": 0.0007575394900186927, |
|
"loss": 4.1507, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.7404803000277014, |
|
"grad_norm": 0.7459094524383545, |
|
"learning_rate": 0.0007566494484953126, |
|
"loss": 4.1541, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.7431438982292399, |
|
"grad_norm": 0.7306596636772156, |
|
"learning_rate": 0.000755757623321585, |
|
"loss": 4.1502, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.7458074964307784, |
|
"grad_norm": 0.7191296219825745, |
|
"learning_rate": 0.0007548657981478574, |
|
"loss": 4.1483, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.7484710946323169, |
|
"grad_norm": 0.7819980382919312, |
|
"learning_rate": 0.00075397397297413, |
|
"loss": 4.1589, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.7511346928338554, |
|
"grad_norm": 0.7624921202659607, |
|
"learning_rate": 0.0007530839314507498, |
|
"loss": 4.1531, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.7537982910353939, |
|
"grad_norm": 0.7341359257698059, |
|
"learning_rate": 0.0007521921062770223, |
|
"loss": 4.1514, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.7564618892369324, |
|
"grad_norm": 0.7539492249488831, |
|
"learning_rate": 0.0007513002811032947, |
|
"loss": 4.153, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.7591254874384709, |
|
"grad_norm": 0.7897160053253174, |
|
"learning_rate": 0.0007504084559295671, |
|
"loss": 4.1462, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.7617890856400094, |
|
"grad_norm": 0.7714428901672363, |
|
"learning_rate": 0.0007495184144061872, |
|
"loss": 4.1436, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.7644526838415479, |
|
"grad_norm": 0.8038801550865173, |
|
"learning_rate": 0.0007486265892324597, |
|
"loss": 4.1506, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.7671162820430864, |
|
"grad_norm": 0.7296925187110901, |
|
"learning_rate": 0.0007477347640587321, |
|
"loss": 4.1493, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.7697798802446248, |
|
"grad_norm": 0.7423230409622192, |
|
"learning_rate": 0.0007468429388850045, |
|
"loss": 4.1464, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.7724434784461633, |
|
"grad_norm": 0.7713762521743774, |
|
"learning_rate": 0.0007459528973616244, |
|
"loss": 4.151, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.7751070766477018, |
|
"grad_norm": 0.7986962199211121, |
|
"learning_rate": 0.0007450610721878969, |
|
"loss": 4.1448, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.7777706748492403, |
|
"grad_norm": 0.794867217540741, |
|
"learning_rate": 0.0007441692470141694, |
|
"loss": 4.1523, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.7804342730507788, |
|
"grad_norm": 0.7599649429321289, |
|
"learning_rate": 0.0007432774218404418, |
|
"loss": 4.1454, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.7830978712523173, |
|
"grad_norm": 0.7340590357780457, |
|
"learning_rate": 0.0007423873803170616, |
|
"loss": 4.144, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.7857614694538558, |
|
"grad_norm": 0.7674250602722168, |
|
"learning_rate": 0.0007414955551433341, |
|
"loss": 4.1502, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.7884250676553943, |
|
"grad_norm": 0.7552058696746826, |
|
"learning_rate": 0.0007406037299696065, |
|
"loss": 4.1453, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.7910886658569328, |
|
"grad_norm": 0.7295849323272705, |
|
"learning_rate": 0.0007397119047958791, |
|
"loss": 4.1506, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.7937522640584713, |
|
"grad_norm": 0.754206120967865, |
|
"learning_rate": 0.000738821863272499, |
|
"loss": 4.1452, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.7964158622600098, |
|
"grad_norm": 0.8196142911911011, |
|
"learning_rate": 0.0007379300380987715, |
|
"loss": 4.153, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.7990794604615483, |
|
"grad_norm": 0.7535151243209839, |
|
"learning_rate": 0.0007370382129250439, |
|
"loss": 4.1493, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.8017430586630868, |
|
"grad_norm": 0.8634600043296814, |
|
"learning_rate": 0.0007361463877513163, |
|
"loss": 4.1483, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.8044066568646253, |
|
"grad_norm": 0.7539383769035339, |
|
"learning_rate": 0.0007352563462279363, |
|
"loss": 4.1511, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.8070702550661638, |
|
"grad_norm": 0.7170119881629944, |
|
"learning_rate": 0.0007343645210542087, |
|
"loss": 4.1504, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.8097338532677023, |
|
"grad_norm": 0.7679442763328552, |
|
"learning_rate": 0.0007334726958804812, |
|
"loss": 4.1455, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.8123974514692408, |
|
"grad_norm": 0.7368362545967102, |
|
"learning_rate": 0.0007325808707067536, |
|
"loss": 4.1481, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.8150610496707793, |
|
"grad_norm": 0.7174336910247803, |
|
"learning_rate": 0.000731689045533026, |
|
"loss": 4.1451, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.8177246478723178, |
|
"grad_norm": 0.7762460708618164, |
|
"learning_rate": 0.0007307990040096461, |
|
"loss": 4.1437, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.8203882460738563, |
|
"grad_norm": 0.6886820197105408, |
|
"learning_rate": 0.0007299071788359185, |
|
"loss": 4.1429, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.8230518442753948, |
|
"grad_norm": 0.7819857597351074, |
|
"learning_rate": 0.000729015353662191, |
|
"loss": 4.1408, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.8257154424769333, |
|
"grad_norm": 0.78780198097229, |
|
"learning_rate": 0.0007281235284884634, |
|
"loss": 4.147, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.8283790406784717, |
|
"grad_norm": 0.7623980045318604, |
|
"learning_rate": 0.0007272334869650833, |
|
"loss": 4.1449, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.8310426388800102, |
|
"grad_norm": 0.7452903389930725, |
|
"learning_rate": 0.0007263416617913558, |
|
"loss": 4.1444, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.8337062370815487, |
|
"grad_norm": 0.7188674807548523, |
|
"learning_rate": 0.0007254498366176282, |
|
"loss": 4.1378, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.8363698352830872, |
|
"grad_norm": 0.7653003931045532, |
|
"learning_rate": 0.0007245580114439007, |
|
"loss": 4.1454, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.8390334334846257, |
|
"grad_norm": 0.7343904376029968, |
|
"learning_rate": 0.0007236679699205205, |
|
"loss": 4.1479, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.8416970316861642, |
|
"grad_norm": 0.7688188552856445, |
|
"learning_rate": 0.000722776144746793, |
|
"loss": 4.1353, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.8443606298877027, |
|
"grad_norm": 0.7669944167137146, |
|
"learning_rate": 0.0007218843195730654, |
|
"loss": 4.1369, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.8470242280892412, |
|
"grad_norm": 0.7605074048042297, |
|
"learning_rate": 0.0007209924943993379, |
|
"loss": 4.1446, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.8496878262907797, |
|
"grad_norm": 0.7343530058860779, |
|
"learning_rate": 0.0007201024528759579, |
|
"loss": 4.1409, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.8523514244923182, |
|
"grad_norm": 0.7942246198654175, |
|
"learning_rate": 0.0007192106277022303, |
|
"loss": 4.144, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.8550150226938567, |
|
"grad_norm": 0.7736623287200928, |
|
"learning_rate": 0.0007183188025285028, |
|
"loss": 4.141, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.8576786208953951, |
|
"grad_norm": 0.7663691639900208, |
|
"learning_rate": 0.0007174269773547752, |
|
"loss": 4.1434, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.8603422190969336, |
|
"grad_norm": 0.7635341286659241, |
|
"learning_rate": 0.0007165369358313952, |
|
"loss": 4.1439, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.8630058172984721, |
|
"grad_norm": 0.797211766242981, |
|
"learning_rate": 0.0007156451106576676, |
|
"loss": 4.1331, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.8656694155000106, |
|
"grad_norm": 0.7563562393188477, |
|
"learning_rate": 0.00071475328548394, |
|
"loss": 4.1429, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.8683330137015491, |
|
"grad_norm": 0.7162951827049255, |
|
"learning_rate": 0.0007138614603102125, |
|
"loss": 4.1389, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.8709966119030876, |
|
"grad_norm": 0.7123258709907532, |
|
"learning_rate": 0.0007129714187868323, |
|
"loss": 4.136, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.8736602101046261, |
|
"grad_norm": 0.728543221950531, |
|
"learning_rate": 0.0007120795936131049, |
|
"loss": 4.1325, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.8763238083061646, |
|
"grad_norm": 0.7728511691093445, |
|
"learning_rate": 0.0007111877684393773, |
|
"loss": 4.1348, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.8789874065077031, |
|
"grad_norm": 0.7468729019165039, |
|
"learning_rate": 0.0007102959432656497, |
|
"loss": 4.1361, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.8816510047092416, |
|
"grad_norm": 0.7346534132957458, |
|
"learning_rate": 0.0007094059017422697, |
|
"loss": 4.1396, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.8843146029107801, |
|
"grad_norm": 0.7773277759552002, |
|
"learning_rate": 0.0007085140765685421, |
|
"loss": 4.1401, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.8869782011123186, |
|
"grad_norm": 0.709701657295227, |
|
"learning_rate": 0.0007076222513948147, |
|
"loss": 4.1317, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.8896417993138571, |
|
"grad_norm": 0.7487180233001709, |
|
"learning_rate": 0.0007067304262210871, |
|
"loss": 4.13, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.8923053975153956, |
|
"grad_norm": 0.7227104306221008, |
|
"learning_rate": 0.000705840384697707, |
|
"loss": 4.1367, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.8949689957169341, |
|
"grad_norm": 0.7912375330924988, |
|
"learning_rate": 0.0007049485595239794, |
|
"loss": 4.1294, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.8976325939184726, |
|
"grad_norm": 0.8671672344207764, |
|
"learning_rate": 0.0007040567343502518, |
|
"loss": 4.129, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.9002961921200111, |
|
"grad_norm": 0.7554329633712769, |
|
"learning_rate": 0.0007031649091765244, |
|
"loss": 4.1381, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.9029597903215496, |
|
"grad_norm": 0.7798919081687927, |
|
"learning_rate": 0.0007022748676531442, |
|
"loss": 4.1297, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.9056233885230881, |
|
"grad_norm": 0.7176423668861389, |
|
"learning_rate": 0.0007013830424794167, |
|
"loss": 4.132, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.9082869867246266, |
|
"grad_norm": 0.7016908526420593, |
|
"learning_rate": 0.0007004912173056891, |
|
"loss": 4.132, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.9109505849261651, |
|
"grad_norm": 0.7394859790802002, |
|
"learning_rate": 0.0006995993921319615, |
|
"loss": 4.1337, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.9136141831277036, |
|
"grad_norm": 0.745543897151947, |
|
"learning_rate": 0.0006987093506085815, |
|
"loss": 4.1316, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.9162777813292421, |
|
"grad_norm": 0.7842167019844055, |
|
"learning_rate": 0.000697817525434854, |
|
"loss": 4.1314, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.9189413795307806, |
|
"grad_norm": 0.7487747073173523, |
|
"learning_rate": 0.0006969257002611265, |
|
"loss": 4.1281, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.9216049777323191, |
|
"grad_norm": 0.737399160861969, |
|
"learning_rate": 0.0006960338750873989, |
|
"loss": 4.1325, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.9242685759338576, |
|
"grad_norm": 0.7666307687759399, |
|
"learning_rate": 0.0006951438335640188, |
|
"loss": 4.1333, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.926932174135396, |
|
"grad_norm": 0.7485344409942627, |
|
"learning_rate": 0.0006942520083902912, |
|
"loss": 4.1317, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.9295957723369345, |
|
"grad_norm": 0.7282237410545349, |
|
"learning_rate": 0.0006933601832165637, |
|
"loss": 4.1326, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.932259370538473, |
|
"grad_norm": 0.7747819423675537, |
|
"learning_rate": 0.0006924701416931836, |
|
"loss": 4.1362, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.9349229687400115, |
|
"grad_norm": 0.7578604817390442, |
|
"learning_rate": 0.000691578316519456, |
|
"loss": 4.1383, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.93758656694155, |
|
"grad_norm": 0.7957220673561096, |
|
"learning_rate": 0.0006906864913457285, |
|
"loss": 4.128, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.9402501651430885, |
|
"grad_norm": 0.7936584949493408, |
|
"learning_rate": 0.000689794666172001, |
|
"loss": 4.122, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.942913763344627, |
|
"grad_norm": 0.8081178069114685, |
|
"learning_rate": 0.0006889028409982735, |
|
"loss": 4.1298, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.9455773615461655, |
|
"grad_norm": 0.7892795205116272, |
|
"learning_rate": 0.000688011015824546, |
|
"loss": 4.1267, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.948240959747704, |
|
"grad_norm": 0.7274259328842163, |
|
"learning_rate": 0.0006871191906508184, |
|
"loss": 4.1232, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.9509045579492424, |
|
"grad_norm": 0.7544950246810913, |
|
"learning_rate": 0.0006862291491274383, |
|
"loss": 4.1267, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.9535681561507809, |
|
"grad_norm": 0.798841655254364, |
|
"learning_rate": 0.0006853373239537107, |
|
"loss": 4.1328, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.9562317543523194, |
|
"grad_norm": 0.7239564657211304, |
|
"learning_rate": 0.0006844454987799832, |
|
"loss": 4.1336, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.9588953525538579, |
|
"grad_norm": 0.8423783779144287, |
|
"learning_rate": 0.0006835536736062557, |
|
"loss": 4.1286, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.9615589507553964, |
|
"grad_norm": 0.7887551784515381, |
|
"learning_rate": 0.0006826618484325281, |
|
"loss": 4.1199, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.9642225489569349, |
|
"grad_norm": 0.7365000247955322, |
|
"learning_rate": 0.0006817700232588005, |
|
"loss": 4.1321, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.9668861471584734, |
|
"grad_norm": 0.7989848256111145, |
|
"learning_rate": 0.0006808799817354204, |
|
"loss": 4.1327, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.9695497453600119, |
|
"grad_norm": 0.7484691143035889, |
|
"learning_rate": 0.0006799881565616928, |
|
"loss": 4.1239, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.9722133435615504, |
|
"grad_norm": 0.8183499574661255, |
|
"learning_rate": 0.0006790963313879654, |
|
"loss": 4.1253, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.9748769417630889, |
|
"grad_norm": 0.7121425271034241, |
|
"learning_rate": 0.0006782045062142378, |
|
"loss": 4.1342, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.9775405399646274, |
|
"grad_norm": 0.7777406573295593, |
|
"learning_rate": 0.0006773144646908578, |
|
"loss": 4.1286, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 0.9802041381661659, |
|
"grad_norm": 0.7477155327796936, |
|
"learning_rate": 0.0006764226395171302, |
|
"loss": 4.1278, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.9828677363677044, |
|
"grad_norm": 0.8153510093688965, |
|
"learning_rate": 0.0006755308143434026, |
|
"loss": 4.1232, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 0.9855313345692429, |
|
"grad_norm": 0.7904220819473267, |
|
"learning_rate": 0.0006746389891696752, |
|
"loss": 4.1283, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.9881949327707814, |
|
"grad_norm": 0.8383620977401733, |
|
"learning_rate": 0.0006737471639959476, |
|
"loss": 4.1334, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 0.9908585309723199, |
|
"grad_norm": 0.7521381378173828, |
|
"learning_rate": 0.0006728571224725675, |
|
"loss": 4.1339, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.9935221291738584, |
|
"grad_norm": 0.7851571440696716, |
|
"learning_rate": 0.0006719652972988399, |
|
"loss": 4.1289, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 0.9961857273753969, |
|
"grad_norm": 0.7758961319923401, |
|
"learning_rate": 0.0006710734721251123, |
|
"loss": 4.1294, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.9988493255769354, |
|
"grad_norm": 0.7806641459465027, |
|
"learning_rate": 0.0006701816469513849, |
|
"loss": 4.1285, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 1.001512923778474, |
|
"grad_norm": 0.7453823685646057, |
|
"learning_rate": 0.0006692916054280047, |
|
"loss": 4.1283, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 1.0041765219800123, |
|
"grad_norm": 0.7377151846885681, |
|
"learning_rate": 0.0006683997802542772, |
|
"loss": 4.1297, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 1.006840120181551, |
|
"grad_norm": 0.7941287755966187, |
|
"learning_rate": 0.0006675079550805496, |
|
"loss": 4.1212, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 1.0095037183830893, |
|
"grad_norm": 0.767425000667572, |
|
"learning_rate": 0.000666616129906822, |
|
"loss": 4.1229, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 1.0121673165846279, |
|
"grad_norm": 0.7483153343200684, |
|
"learning_rate": 0.0006657243047330946, |
|
"loss": 4.1242, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 1.0148309147861663, |
|
"grad_norm": 0.7890580892562866, |
|
"learning_rate": 0.0006648342632097145, |
|
"loss": 4.1306, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 1.0174945129877049, |
|
"grad_norm": 0.7415242791175842, |
|
"learning_rate": 0.000663942438035987, |
|
"loss": 4.1285, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 1.0201581111892433, |
|
"grad_norm": 0.7596645951271057, |
|
"learning_rate": 0.0006630506128622594, |
|
"loss": 4.1258, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 1.0228217093907819, |
|
"grad_norm": 0.8304431438446045, |
|
"learning_rate": 0.0006621587876885318, |
|
"loss": 4.1232, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.0254853075923203, |
|
"grad_norm": 0.77840656042099, |
|
"learning_rate": 0.0006612687461651517, |
|
"loss": 4.1195, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 1.0281489057938589, |
|
"grad_norm": 0.7862575650215149, |
|
"learning_rate": 0.0006603769209914242, |
|
"loss": 4.1258, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 1.0308125039953973, |
|
"grad_norm": 0.7667100429534912, |
|
"learning_rate": 0.0006594850958176967, |
|
"loss": 4.1185, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 1.0334761021969359, |
|
"grad_norm": 0.7835633754730225, |
|
"learning_rate": 0.0006585932706439691, |
|
"loss": 4.1224, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 1.0361397003984743, |
|
"grad_norm": 0.7486304640769958, |
|
"learning_rate": 0.000657703229120589, |
|
"loss": 4.124, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 1.0388032986000129, |
|
"grad_norm": 0.7897284030914307, |
|
"learning_rate": 0.0006568114039468614, |
|
"loss": 4.1203, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 1.0414668968015512, |
|
"grad_norm": 0.7997919321060181, |
|
"learning_rate": 0.0006559195787731339, |
|
"loss": 4.1202, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 1.0441304950030899, |
|
"grad_norm": 0.7987415194511414, |
|
"learning_rate": 0.0006550277535994064, |
|
"loss": 4.1231, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 1.0467940932046282, |
|
"grad_norm": 0.7434735894203186, |
|
"learning_rate": 0.0006541377120760263, |
|
"loss": 4.1196, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 1.0494576914061668, |
|
"grad_norm": 0.806969404220581, |
|
"learning_rate": 0.0006532458869022988, |
|
"loss": 4.1185, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 1.0521212896077052, |
|
"grad_norm": 0.8006301522254944, |
|
"learning_rate": 0.0006523540617285712, |
|
"loss": 4.1209, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 1.0547848878092438, |
|
"grad_norm": 0.759758472442627, |
|
"learning_rate": 0.0006514622365548438, |
|
"loss": 4.1194, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 1.0574484860107822, |
|
"grad_norm": 0.8778506517410278, |
|
"learning_rate": 0.0006505704113811162, |
|
"loss": 4.1293, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 1.0601120842123208, |
|
"grad_norm": 0.7795832753181458, |
|
"learning_rate": 0.000649680369857736, |
|
"loss": 4.1152, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 1.0627756824138592, |
|
"grad_norm": 0.7928754687309265, |
|
"learning_rate": 0.0006487885446840085, |
|
"loss": 4.1177, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 1.0654392806153978, |
|
"grad_norm": 0.8119847774505615, |
|
"learning_rate": 0.0006478967195102809, |
|
"loss": 4.1205, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.0681028788169362, |
|
"grad_norm": 0.739378035068512, |
|
"learning_rate": 0.0006470048943365535, |
|
"loss": 4.1111, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 1.0707664770184748, |
|
"grad_norm": 0.7906088829040527, |
|
"learning_rate": 0.0006461148528131734, |
|
"loss": 4.1186, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 1.0734300752200132, |
|
"grad_norm": 0.7810208797454834, |
|
"learning_rate": 0.0006452230276394459, |
|
"loss": 4.1204, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 1.0760936734215516, |
|
"grad_norm": 0.741383969783783, |
|
"learning_rate": 0.0006443312024657183, |
|
"loss": 4.1222, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 1.0787572716230902, |
|
"grad_norm": 0.7824720740318298, |
|
"learning_rate": 0.0006434393772919907, |
|
"loss": 4.1174, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 1.0814208698246286, |
|
"grad_norm": 0.7920011281967163, |
|
"learning_rate": 0.0006425493357686106, |
|
"loss": 4.1196, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 1.0840844680261672, |
|
"grad_norm": 0.792914628982544, |
|
"learning_rate": 0.0006416575105948831, |
|
"loss": 4.1153, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 1.0867480662277056, |
|
"grad_norm": 0.7724523544311523, |
|
"learning_rate": 0.0006407656854211556, |
|
"loss": 4.1105, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 1.0894116644292442, |
|
"grad_norm": 0.7834595441818237, |
|
"learning_rate": 0.000639873860247428, |
|
"loss": 4.1179, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 1.0920752626307826, |
|
"grad_norm": 0.8056479096412659, |
|
"learning_rate": 0.0006389838187240478, |
|
"loss": 4.1126, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 1.0947388608323212, |
|
"grad_norm": 0.7697902321815491, |
|
"learning_rate": 0.0006380919935503203, |
|
"loss": 4.1193, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 1.0974024590338596, |
|
"grad_norm": 0.7807758450508118, |
|
"learning_rate": 0.0006372001683765928, |
|
"loss": 4.1192, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 1.1000660572353982, |
|
"grad_norm": 0.7408417463302612, |
|
"learning_rate": 0.0006363083432028652, |
|
"loss": 4.1119, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 1.1027296554369366, |
|
"grad_norm": 0.9000714421272278, |
|
"learning_rate": 0.0006354165180291377, |
|
"loss": 4.1185, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 1.1053932536384752, |
|
"grad_norm": 0.8088692426681519, |
|
"learning_rate": 0.0006345264765057577, |
|
"loss": 4.1177, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 1.1080568518400136, |
|
"grad_norm": 0.778122067451477, |
|
"learning_rate": 0.0006336346513320301, |
|
"loss": 4.1143, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 1.1107204500415522, |
|
"grad_norm": 0.8222107291221619, |
|
"learning_rate": 0.0006327428261583026, |
|
"loss": 4.1136, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 1.1133840482430906, |
|
"grad_norm": 0.7356205582618713, |
|
"learning_rate": 0.0006318510009845751, |
|
"loss": 4.1187, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 1.1160476464446292, |
|
"grad_norm": 0.7457647919654846, |
|
"learning_rate": 0.0006309609594611949, |
|
"loss": 4.1123, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 1.1187112446461676, |
|
"grad_norm": 0.789622962474823, |
|
"learning_rate": 0.0006300691342874674, |
|
"loss": 4.1175, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 1.1213748428477062, |
|
"grad_norm": 0.8369338512420654, |
|
"learning_rate": 0.0006291773091137398, |
|
"loss": 4.1147, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 1.1240384410492446, |
|
"grad_norm": 0.8210717439651489, |
|
"learning_rate": 0.0006282854839400123, |
|
"loss": 4.1142, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 1.1267020392507832, |
|
"grad_norm": 0.7775838375091553, |
|
"learning_rate": 0.0006273954424166322, |
|
"loss": 4.1203, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 1.1293656374523215, |
|
"grad_norm": 0.7949962019920349, |
|
"learning_rate": 0.0006265036172429046, |
|
"loss": 4.1139, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 1.1320292356538602, |
|
"grad_norm": 0.7534223794937134, |
|
"learning_rate": 0.000625611792069177, |
|
"loss": 4.1177, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 1.1346928338553985, |
|
"grad_norm": 0.8075549602508545, |
|
"learning_rate": 0.0006247199668954495, |
|
"loss": 4.1147, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 1.1373564320569371, |
|
"grad_norm": 0.7999294400215149, |
|
"learning_rate": 0.0006238299253720696, |
|
"loss": 4.116, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 1.1400200302584755, |
|
"grad_norm": 0.7690563797950745, |
|
"learning_rate": 0.000622938100198342, |
|
"loss": 4.1108, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 1.1426836284600141, |
|
"grad_norm": 0.7599471211433411, |
|
"learning_rate": 0.0006220462750246144, |
|
"loss": 4.1155, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 1.1453472266615525, |
|
"grad_norm": 0.7433050274848938, |
|
"learning_rate": 0.0006211544498508869, |
|
"loss": 4.1172, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 1.1480108248630911, |
|
"grad_norm": 0.781114935874939, |
|
"learning_rate": 0.0006202644083275067, |
|
"loss": 4.1084, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 1.1506744230646295, |
|
"grad_norm": 0.7194410562515259, |
|
"learning_rate": 0.0006193725831537791, |
|
"loss": 4.1127, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 1.1533380212661681, |
|
"grad_norm": 0.8126916289329529, |
|
"learning_rate": 0.0006184807579800517, |
|
"loss": 4.1126, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 1.1560016194677065, |
|
"grad_norm": 0.8229861855506897, |
|
"learning_rate": 0.0006175889328063241, |
|
"loss": 4.1121, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 1.158665217669245, |
|
"grad_norm": 0.8246269226074219, |
|
"learning_rate": 0.000616698891282944, |
|
"loss": 4.1092, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 1.1613288158707835, |
|
"grad_norm": 0.8146107196807861, |
|
"learning_rate": 0.0006158070661092164, |
|
"loss": 4.1091, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 1.1639924140723221, |
|
"grad_norm": 0.7878261208534241, |
|
"learning_rate": 0.0006149152409354888, |
|
"loss": 4.1161, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 1.1666560122738605, |
|
"grad_norm": 0.7780360579490662, |
|
"learning_rate": 0.0006140234157617614, |
|
"loss": 4.1079, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 1.169319610475399, |
|
"grad_norm": 0.7969585657119751, |
|
"learning_rate": 0.0006131333742383814, |
|
"loss": 4.1134, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 1.1719832086769375, |
|
"grad_norm": 0.8402618765830994, |
|
"learning_rate": 0.0006122415490646538, |
|
"loss": 4.1143, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 1.1746468068784761, |
|
"grad_norm": 0.7946035861968994, |
|
"learning_rate": 0.0006113497238909262, |
|
"loss": 4.114, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 1.1773104050800145, |
|
"grad_norm": 0.7864482402801514, |
|
"learning_rate": 0.0006104578987171987, |
|
"loss": 4.1126, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 1.1799740032815529, |
|
"grad_norm": 0.8313577771186829, |
|
"learning_rate": 0.0006095678571938186, |
|
"loss": 4.106, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 1.1826376014830915, |
|
"grad_norm": 0.8574484586715698, |
|
"learning_rate": 0.0006086760320200911, |
|
"loss": 4.1085, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 1.1853011996846299, |
|
"grad_norm": 0.7599306702613831, |
|
"learning_rate": 0.0006077842068463635, |
|
"loss": 4.1071, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 1.1879647978861685, |
|
"grad_norm": 0.7732433676719666, |
|
"learning_rate": 0.0006068923816726359, |
|
"loss": 4.1185, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 1.1906283960877069, |
|
"grad_norm": 0.8210047483444214, |
|
"learning_rate": 0.0006060023401492559, |
|
"loss": 4.1099, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 1.1932919942892455, |
|
"grad_norm": 0.8054102063179016, |
|
"learning_rate": 0.0006051105149755284, |
|
"loss": 4.1181, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 1.1959555924907839, |
|
"grad_norm": 0.7870852947235107, |
|
"learning_rate": 0.0006042186898018009, |
|
"loss": 4.1016, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 1.1986191906923225, |
|
"grad_norm": 0.8508167266845703, |
|
"learning_rate": 0.0006033268646280733, |
|
"loss": 4.1202, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 1.2012827888938609, |
|
"grad_norm": 0.7744969129562378, |
|
"learning_rate": 0.0006024368231046932, |
|
"loss": 4.1094, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 1.2039463870953995, |
|
"grad_norm": 0.7836142778396606, |
|
"learning_rate": 0.0006015449979309656, |
|
"loss": 4.1079, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 1.2066099852969379, |
|
"grad_norm": 0.7741486430168152, |
|
"learning_rate": 0.000600653172757238, |
|
"loss": 4.1088, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 1.2092735834984765, |
|
"grad_norm": 0.77290940284729, |
|
"learning_rate": 0.0005997613475835106, |
|
"loss": 4.1025, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 1.2119371817000149, |
|
"grad_norm": 0.8240610361099243, |
|
"learning_rate": 0.0005988713060601304, |
|
"loss": 4.104, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 1.2146007799015535, |
|
"grad_norm": 0.7438703775405884, |
|
"learning_rate": 0.0005979794808864029, |
|
"loss": 4.1084, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 1.2172643781030918, |
|
"grad_norm": 0.837753415107727, |
|
"learning_rate": 0.0005970876557126753, |
|
"loss": 4.1017, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 1.2199279763046305, |
|
"grad_norm": 0.7918710112571716, |
|
"learning_rate": 0.0005961958305389477, |
|
"loss": 4.1094, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 1.2225915745061688, |
|
"grad_norm": 0.8078004121780396, |
|
"learning_rate": 0.0005953040053652203, |
|
"loss": 4.1043, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 1.2252551727077075, |
|
"grad_norm": 0.8458930253982544, |
|
"learning_rate": 0.0005944139638418402, |
|
"loss": 4.1069, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 1.2279187709092458, |
|
"grad_norm": 0.7811508178710938, |
|
"learning_rate": 0.0005935221386681127, |
|
"loss": 4.1071, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 1.2305823691107844, |
|
"grad_norm": 0.8446598649024963, |
|
"learning_rate": 0.0005926303134943851, |
|
"loss": 4.1063, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 1.2332459673123228, |
|
"grad_norm": 0.8074429035186768, |
|
"learning_rate": 0.0005917384883206575, |
|
"loss": 4.109, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 1.2359095655138614, |
|
"grad_norm": 0.8163787722587585, |
|
"learning_rate": 0.0005908484467972775, |
|
"loss": 4.1028, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 1.2385731637153998, |
|
"grad_norm": 0.7774120569229126, |
|
"learning_rate": 0.0005899566216235499, |
|
"loss": 4.1084, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 1.2412367619169384, |
|
"grad_norm": 0.7910379767417908, |
|
"learning_rate": 0.0005890647964498224, |
|
"loss": 4.1002, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 1.2439003601184768, |
|
"grad_norm": 0.8428027629852295, |
|
"learning_rate": 0.0005881729712760948, |
|
"loss": 4.1127, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 1.2465639583200154, |
|
"grad_norm": 0.7961114645004272, |
|
"learning_rate": 0.0005872829297527147, |
|
"loss": 4.1046, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 1.2492275565215538, |
|
"grad_norm": 0.8194419145584106, |
|
"learning_rate": 0.0005863911045789872, |
|
"loss": 4.1088, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 1.2518911547230922, |
|
"grad_norm": 0.783875584602356, |
|
"learning_rate": 0.0005854992794052596, |
|
"loss": 4.1086, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 1.2545547529246308, |
|
"grad_norm": 0.7610777020454407, |
|
"learning_rate": 0.0005846074542315321, |
|
"loss": 4.1024, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 1.2572183511261694, |
|
"grad_norm": 0.7696565389633179, |
|
"learning_rate": 0.000583717412708152, |
|
"loss": 4.1016, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 1.2598819493277078, |
|
"grad_norm": 0.82817542552948, |
|
"learning_rate": 0.0005828255875344245, |
|
"loss": 4.0958, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 1.2625455475292462, |
|
"grad_norm": 0.8974746465682983, |
|
"learning_rate": 0.0005819337623606969, |
|
"loss": 4.1077, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 1.2652091457307848, |
|
"grad_norm": 0.7882625460624695, |
|
"learning_rate": 0.0005810419371869694, |
|
"loss": 4.1027, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 1.2678727439323234, |
|
"grad_norm": 0.7710665464401245, |
|
"learning_rate": 0.0005801518956635893, |
|
"loss": 4.1071, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 1.2705363421338618, |
|
"grad_norm": 0.8462359309196472, |
|
"learning_rate": 0.0005792600704898617, |
|
"loss": 4.0993, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 1.2731999403354002, |
|
"grad_norm": 0.7785073518753052, |
|
"learning_rate": 0.0005783682453161342, |
|
"loss": 4.1051, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 1.2758635385369388, |
|
"grad_norm": 0.7724746465682983, |
|
"learning_rate": 0.0005774764201424066, |
|
"loss": 4.1082, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 1.2785271367384774, |
|
"grad_norm": 0.8276979923248291, |
|
"learning_rate": 0.0005765863786190266, |
|
"loss": 4.095, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.2811907349400158, |
|
"grad_norm": 0.7959253191947937, |
|
"learning_rate": 0.000575694553445299, |
|
"loss": 4.1026, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 1.2838543331415542, |
|
"grad_norm": 0.806239664554596, |
|
"learning_rate": 0.0005748027282715714, |
|
"loss": 4.1019, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 1.2865179313430928, |
|
"grad_norm": 0.9089943170547485, |
|
"learning_rate": 0.0005739109030978439, |
|
"loss": 4.0955, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 1.2891815295446314, |
|
"grad_norm": 0.8239426612854004, |
|
"learning_rate": 0.0005730208615744638, |
|
"loss": 4.1033, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 1.2918451277461698, |
|
"grad_norm": 0.8066053986549377, |
|
"learning_rate": 0.0005721290364007364, |
|
"loss": 4.1068, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 1.2945087259477082, |
|
"grad_norm": 0.7600257396697998, |
|
"learning_rate": 0.0005712372112270088, |
|
"loss": 4.1006, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 1.2971723241492468, |
|
"grad_norm": 0.7940685749053955, |
|
"learning_rate": 0.0005703471697036287, |
|
"loss": 4.1004, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 1.2998359223507852, |
|
"grad_norm": 0.7310413718223572, |
|
"learning_rate": 0.0005694553445299011, |
|
"loss": 4.1028, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 1.3024995205523238, |
|
"grad_norm": 0.8132951855659485, |
|
"learning_rate": 0.0005685635193561735, |
|
"loss": 4.1104, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 1.3051631187538622, |
|
"grad_norm": 0.8280708193778992, |
|
"learning_rate": 0.0005676716941824461, |
|
"loss": 4.1029, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 1.3078267169554008, |
|
"grad_norm": 0.7521162629127502, |
|
"learning_rate": 0.0005667798690087185, |
|
"loss": 4.0991, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 1.3104903151569391, |
|
"grad_norm": 0.8909037709236145, |
|
"learning_rate": 0.0005658880438349909, |
|
"loss": 4.1005, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 1.3131539133584778, |
|
"grad_norm": 0.8605440855026245, |
|
"learning_rate": 0.0005649962186612634, |
|
"loss": 4.0999, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 1.3158175115600161, |
|
"grad_norm": 0.9294172525405884, |
|
"learning_rate": 0.0005641043934875358, |
|
"loss": 4.0978, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 1.3184811097615547, |
|
"grad_norm": 0.8271783590316772, |
|
"learning_rate": 0.0005632143519641559, |
|
"loss": 4.1005, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 1.3211447079630931, |
|
"grad_norm": 0.7716344594955444, |
|
"learning_rate": 0.0005623225267904283, |
|
"loss": 4.0972, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 1.3238083061646317, |
|
"grad_norm": 0.7663143873214722, |
|
"learning_rate": 0.0005614307016167007, |
|
"loss": 4.1068, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 1.3264719043661701, |
|
"grad_norm": 0.8361650705337524, |
|
"learning_rate": 0.0005605388764429732, |
|
"loss": 4.0955, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 1.3291355025677087, |
|
"grad_norm": 0.8032039403915405, |
|
"learning_rate": 0.000559648834919593, |
|
"loss": 4.0981, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 1.3317991007692471, |
|
"grad_norm": 0.7755228281021118, |
|
"learning_rate": 0.0005587570097458655, |
|
"loss": 4.0985, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 1.3344626989707857, |
|
"grad_norm": 0.8239076733589172, |
|
"learning_rate": 0.000557865184572138, |
|
"loss": 4.102, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 1.3371262971723241, |
|
"grad_norm": 0.849665105342865, |
|
"learning_rate": 0.0005569733593984104, |
|
"loss": 4.1022, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 1.3397898953738627, |
|
"grad_norm": 0.7836341857910156, |
|
"learning_rate": 0.0005560833178750303, |
|
"loss": 4.0985, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 1.3424534935754011, |
|
"grad_norm": 0.7993196845054626, |
|
"learning_rate": 0.0005551914927013027, |
|
"loss": 4.0959, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 1.3451170917769395, |
|
"grad_norm": 0.8100605010986328, |
|
"learning_rate": 0.0005542996675275752, |
|
"loss": 4.0938, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 1.347780689978478, |
|
"grad_norm": 0.8267188668251038, |
|
"learning_rate": 0.0005534078423538477, |
|
"loss": 4.0975, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 1.3504442881800167, |
|
"grad_norm": 0.7876518964767456, |
|
"learning_rate": 0.0005525178008304677, |
|
"loss": 4.0966, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 1.353107886381555, |
|
"grad_norm": 0.8013073801994324, |
|
"learning_rate": 0.0005516259756567401, |
|
"loss": 4.0993, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 1.3557714845830935, |
|
"grad_norm": 0.7732263207435608, |
|
"learning_rate": 0.0005507341504830125, |
|
"loss": 4.0955, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 1.358435082784632, |
|
"grad_norm": 0.8235819935798645, |
|
"learning_rate": 0.000549842325309285, |
|
"loss": 4.0997, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 1.3610986809861707, |
|
"grad_norm": 0.7818782329559326, |
|
"learning_rate": 0.0005489505001355575, |
|
"loss": 4.1026, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 1.363762279187709, |
|
"grad_norm": 0.8184423446655273, |
|
"learning_rate": 0.0005480604586121774, |
|
"loss": 4.092, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 1.3664258773892475, |
|
"grad_norm": 0.7807801365852356, |
|
"learning_rate": 0.0005471686334384498, |
|
"loss": 4.0938, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 1.369089475590786, |
|
"grad_norm": 0.8043480515480042, |
|
"learning_rate": 0.0005462768082647222, |
|
"loss": 4.0964, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 1.3717530737923247, |
|
"grad_norm": 0.8113440871238708, |
|
"learning_rate": 0.0005453849830909947, |
|
"loss": 4.092, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 1.374416671993863, |
|
"grad_norm": 0.776531994342804, |
|
"learning_rate": 0.0005444949415676145, |
|
"loss": 4.1043, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 1.3770802701954015, |
|
"grad_norm": 0.9090542197227478, |
|
"learning_rate": 0.0005436031163938871, |
|
"loss": 4.1026, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 1.37974386839694, |
|
"grad_norm": 0.8724551796913147, |
|
"learning_rate": 0.0005427112912201595, |
|
"loss": 4.0983, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 1.3824074665984787, |
|
"grad_norm": 0.7889623045921326, |
|
"learning_rate": 0.0005418194660464319, |
|
"loss": 4.1027, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 1.385071064800017, |
|
"grad_norm": 0.7813825011253357, |
|
"learning_rate": 0.0005409294245230519, |
|
"loss": 4.092, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 1.3877346630015555, |
|
"grad_norm": 0.8187386989593506, |
|
"learning_rate": 0.0005400393829996718, |
|
"loss": 4.0955, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 1.390398261203094, |
|
"grad_norm": 0.8593798279762268, |
|
"learning_rate": 0.0005391475578259443, |
|
"loss": 4.094, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 1.3930618594046325, |
|
"grad_norm": 0.8074827194213867, |
|
"learning_rate": 0.0005382557326522167, |
|
"loss": 4.095, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 1.395725457606171, |
|
"grad_norm": 0.8229965567588806, |
|
"learning_rate": 0.0005373639074784892, |
|
"loss": 4.0909, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 1.3983890558077094, |
|
"grad_norm": 0.7867224216461182, |
|
"learning_rate": 0.0005364720823047616, |
|
"loss": 4.0934, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 1.401052654009248, |
|
"grad_norm": 0.9083333611488342, |
|
"learning_rate": 0.000535580257131034, |
|
"loss": 4.0982, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 1.4037162522107864, |
|
"grad_norm": 0.8077040314674377, |
|
"learning_rate": 0.0005346884319573066, |
|
"loss": 4.0949, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 1.406379850412325, |
|
"grad_norm": 0.871181070804596, |
|
"learning_rate": 0.000533796606783579, |
|
"loss": 4.096, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 1.4090434486138634, |
|
"grad_norm": 0.8004094958305359, |
|
"learning_rate": 0.0005329065652601989, |
|
"loss": 4.0969, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 1.411707046815402, |
|
"grad_norm": 0.8624884486198425, |
|
"learning_rate": 0.0005320147400864713, |
|
"loss": 4.0964, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 1.4143706450169404, |
|
"grad_norm": 0.7955045104026794, |
|
"learning_rate": 0.0005311229149127437, |
|
"loss": 4.0944, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 1.417034243218479, |
|
"grad_norm": 0.7732199430465698, |
|
"learning_rate": 0.0005302310897390163, |
|
"loss": 4.0906, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 1.4196978414200174, |
|
"grad_norm": 0.8164415955543518, |
|
"learning_rate": 0.0005293410482156362, |
|
"loss": 4.0887, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 1.422361439621556, |
|
"grad_norm": 0.8961130380630493, |
|
"learning_rate": 0.0005284492230419087, |
|
"loss": 4.1001, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 1.4250250378230944, |
|
"grad_norm": 0.8140637874603271, |
|
"learning_rate": 0.0005275573978681811, |
|
"loss": 4.0898, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 1.427688636024633, |
|
"grad_norm": 0.8230092525482178, |
|
"learning_rate": 0.0005266655726944535, |
|
"loss": 4.0994, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 1.4303522342261714, |
|
"grad_norm": 0.800144612789154, |
|
"learning_rate": 0.0005257755311710735, |
|
"loss": 4.0914, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 1.43301583242771, |
|
"grad_norm": 0.8252524733543396, |
|
"learning_rate": 0.000524883705997346, |
|
"loss": 4.0944, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 1.4356794306292484, |
|
"grad_norm": 0.7676013708114624, |
|
"learning_rate": 0.0005239918808236184, |
|
"loss": 4.092, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 1.4383430288307868, |
|
"grad_norm": 0.8423929810523987, |
|
"learning_rate": 0.0005231000556498908, |
|
"loss": 4.0871, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 1.4410066270323254, |
|
"grad_norm": 0.7545808553695679, |
|
"learning_rate": 0.0005222100141265108, |
|
"loss": 4.0923, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 1.443670225233864, |
|
"grad_norm": 0.820381224155426, |
|
"learning_rate": 0.0005213181889527832, |
|
"loss": 4.0827, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 1.4463338234354024, |
|
"grad_norm": 0.8105764985084534, |
|
"learning_rate": 0.0005204263637790558, |
|
"loss": 4.0943, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 1.4489974216369408, |
|
"grad_norm": 0.7974145412445068, |
|
"learning_rate": 0.0005195345386053282, |
|
"loss": 4.0852, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 1.4516610198384794, |
|
"grad_norm": 0.7740100026130676, |
|
"learning_rate": 0.000518644497081948, |
|
"loss": 4.0943, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 1.454324618040018, |
|
"grad_norm": 0.8262558579444885, |
|
"learning_rate": 0.0005177526719082205, |
|
"loss": 4.0889, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 1.4569882162415564, |
|
"grad_norm": 0.8640192747116089, |
|
"learning_rate": 0.0005168608467344929, |
|
"loss": 4.0844, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 1.4596518144430948, |
|
"grad_norm": 0.8319873809814453, |
|
"learning_rate": 0.0005159690215607655, |
|
"loss": 4.0936, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 1.4623154126446334, |
|
"grad_norm": 0.876741886138916, |
|
"learning_rate": 0.0005150789800373853, |
|
"loss": 4.0855, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 1.464979010846172, |
|
"grad_norm": 0.8290923833847046, |
|
"learning_rate": 0.0005141871548636577, |
|
"loss": 4.0949, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 1.4676426090477104, |
|
"grad_norm": 0.7827680110931396, |
|
"learning_rate": 0.0005132953296899302, |
|
"loss": 4.0821, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 1.4703062072492488, |
|
"grad_norm": 0.8360860347747803, |
|
"learning_rate": 0.0005124035045162026, |
|
"loss": 4.0921, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 1.4729698054507874, |
|
"grad_norm": 0.7869288325309753, |
|
"learning_rate": 0.0005115134629928227, |
|
"loss": 4.0795, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 1.475633403652326, |
|
"grad_norm": 0.8743867874145508, |
|
"learning_rate": 0.0005106216378190951, |
|
"loss": 4.0867, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 1.4782970018538644, |
|
"grad_norm": 0.8454434871673584, |
|
"learning_rate": 0.0005097298126453676, |
|
"loss": 4.083, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 1.4809606000554028, |
|
"grad_norm": 0.8108798265457153, |
|
"learning_rate": 0.00050883798747164, |
|
"loss": 4.086, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 1.4836241982569414, |
|
"grad_norm": 0.8548552989959717, |
|
"learning_rate": 0.0005079479459482598, |
|
"loss": 4.0853, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 1.4862877964584797, |
|
"grad_norm": 0.8752163052558899, |
|
"learning_rate": 0.0005070561207745324, |
|
"loss": 4.0891, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 1.4889513946600184, |
|
"grad_norm": 0.9157357811927795, |
|
"learning_rate": 0.0005061642956008048, |
|
"loss": 4.0872, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 1.4916149928615567, |
|
"grad_norm": 0.8573022484779358, |
|
"learning_rate": 0.0005052724704270773, |
|
"loss": 4.0854, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.4942785910630954, |
|
"grad_norm": 0.8331462740898132, |
|
"learning_rate": 0.0005043806452533497, |
|
"loss": 4.0887, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 1.4969421892646337, |
|
"grad_norm": 0.7753505110740662, |
|
"learning_rate": 0.0005034888200796221, |
|
"loss": 4.0901, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 1.4996057874661723, |
|
"grad_norm": 0.781449556350708, |
|
"learning_rate": 0.0005025969949058947, |
|
"loss": 4.0844, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 1.5022693856677107, |
|
"grad_norm": 0.9343318343162537, |
|
"learning_rate": 0.0005017051697321671, |
|
"loss": 4.0906, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 1.5049329838692493, |
|
"grad_norm": 0.8867080807685852, |
|
"learning_rate": 0.000500815128208787, |
|
"loss": 4.08, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 1.507596582070788, |
|
"grad_norm": 0.8553933501243591, |
|
"learning_rate": 0.0004999233030350595, |
|
"loss": 4.0898, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 1.5102601802723261, |
|
"grad_norm": 0.849162757396698, |
|
"learning_rate": 0.0004990314778613319, |
|
"loss": 4.0894, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 1.5129237784738647, |
|
"grad_norm": 0.787109375, |
|
"learning_rate": 0.0004981396526876044, |
|
"loss": 4.085, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 1.5155873766754033, |
|
"grad_norm": 0.8072954416275024, |
|
"learning_rate": 0.0004972496111642243, |
|
"loss": 4.0842, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 1.5182509748769417, |
|
"grad_norm": 0.8034284114837646, |
|
"learning_rate": 0.0004963595696408442, |
|
"loss": 4.0866, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 1.52091457307848, |
|
"grad_norm": 0.8554684519767761, |
|
"learning_rate": 0.0004954677444671166, |
|
"loss": 4.0851, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 1.5235781712800187, |
|
"grad_norm": 0.8422802686691284, |
|
"learning_rate": 0.000494575919293389, |
|
"loss": 4.0869, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 1.5262417694815573, |
|
"grad_norm": 0.7712003588676453, |
|
"learning_rate": 0.0004936840941196615, |
|
"loss": 4.0808, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 1.5289053676830957, |
|
"grad_norm": 0.8626993894577026, |
|
"learning_rate": 0.000492792268945934, |
|
"loss": 4.0805, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 1.531568965884634, |
|
"grad_norm": 0.8277269601821899, |
|
"learning_rate": 0.0004919022274225539, |
|
"loss": 4.0906, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 1.5342325640861727, |
|
"grad_norm": 0.8013060688972473, |
|
"learning_rate": 0.0004910104022488263, |
|
"loss": 4.0836, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.5368961622877113, |
|
"grad_norm": 0.7702099084854126, |
|
"learning_rate": 0.0004901185770750989, |
|
"loss": 4.0777, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 1.5395597604892497, |
|
"grad_norm": 0.8085469603538513, |
|
"learning_rate": 0.0004892267519013713, |
|
"loss": 4.0898, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 1.542223358690788, |
|
"grad_norm": 0.7977801561355591, |
|
"learning_rate": 0.0004883349267276437, |
|
"loss": 4.0955, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 1.5448869568923267, |
|
"grad_norm": 0.8373309969902039, |
|
"learning_rate": 0.0004874431015539162, |
|
"loss": 4.0783, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 1.5475505550938653, |
|
"grad_norm": 0.7764778733253479, |
|
"learning_rate": 0.0004865530600305361, |
|
"loss": 4.0861, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 1.5502141532954037, |
|
"grad_norm": 0.8451995849609375, |
|
"learning_rate": 0.00048566123485680856, |
|
"loss": 4.0817, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 1.552877751496942, |
|
"grad_norm": 0.8463019728660583, |
|
"learning_rate": 0.00048476940968308105, |
|
"loss": 4.0822, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 1.5555413496984807, |
|
"grad_norm": 0.8065968155860901, |
|
"learning_rate": 0.0004838775845093535, |
|
"loss": 4.089, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 1.5582049479000193, |
|
"grad_norm": 0.8490435481071472, |
|
"learning_rate": 0.00048298754298597334, |
|
"loss": 4.0765, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 1.5608685461015577, |
|
"grad_norm": 0.8057785630226135, |
|
"learning_rate": 0.0004820957178122458, |
|
"loss": 4.0809, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 1.563532144303096, |
|
"grad_norm": 0.9338017702102661, |
|
"learning_rate": 0.00048120389263851826, |
|
"loss": 4.0787, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 1.5661957425046347, |
|
"grad_norm": 0.9003413915634155, |
|
"learning_rate": 0.00048031206746479074, |
|
"loss": 4.0756, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 1.5688593407061733, |
|
"grad_norm": 0.779014527797699, |
|
"learning_rate": 0.00047942024229106323, |
|
"loss": 4.0832, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 1.5715229389077117, |
|
"grad_norm": 0.8321064114570618, |
|
"learning_rate": 0.0004785302007676831, |
|
"loss": 4.0885, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 1.57418653710925, |
|
"grad_norm": 0.8152427077293396, |
|
"learning_rate": 0.0004776383755939556, |
|
"loss": 4.0847, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 1.5768501353107887, |
|
"grad_norm": 0.8888664245605469, |
|
"learning_rate": 0.000476746550420228, |
|
"loss": 4.0777, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 1.5795137335123273, |
|
"grad_norm": 0.8546236157417297, |
|
"learning_rate": 0.0004758547252465005, |
|
"loss": 4.0898, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 1.5821773317138657, |
|
"grad_norm": 0.7983977794647217, |
|
"learning_rate": 0.00047496290007277293, |
|
"loss": 4.0869, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 1.584840929915404, |
|
"grad_norm": 0.9709325432777405, |
|
"learning_rate": 0.00047407107489904536, |
|
"loss": 4.0864, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 1.5875045281169426, |
|
"grad_norm": 0.8570044040679932, |
|
"learning_rate": 0.00047317924972531785, |
|
"loss": 4.0886, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 1.5901681263184813, |
|
"grad_norm": 0.8361437320709229, |
|
"learning_rate": 0.00047228920820193776, |
|
"loss": 4.0794, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 1.5928317245200196, |
|
"grad_norm": 0.8911067247390747, |
|
"learning_rate": 0.00047139738302821025, |
|
"loss": 4.0836, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 1.595495322721558, |
|
"grad_norm": 0.8150638341903687, |
|
"learning_rate": 0.0004705055578544827, |
|
"loss": 4.0806, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 1.5981589209230966, |
|
"grad_norm": 0.8484770059585571, |
|
"learning_rate": 0.0004696137326807551, |
|
"loss": 4.0796, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 1.6008225191246352, |
|
"grad_norm": 0.8199454545974731, |
|
"learning_rate": 0.0004687219075070276, |
|
"loss": 4.0789, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 1.6034861173261736, |
|
"grad_norm": 0.8845428824424744, |
|
"learning_rate": 0.0004678318659836475, |
|
"loss": 4.073, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 1.606149715527712, |
|
"grad_norm": 0.8244544267654419, |
|
"learning_rate": 0.00046694004080991995, |
|
"loss": 4.0753, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 1.6088133137292506, |
|
"grad_norm": 0.8862385153770447, |
|
"learning_rate": 0.00046604821563619244, |
|
"loss": 4.0784, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 1.611476911930789, |
|
"grad_norm": 0.8142257928848267, |
|
"learning_rate": 0.00046515639046246487, |
|
"loss": 4.0806, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 1.6141405101323274, |
|
"grad_norm": 0.850913941860199, |
|
"learning_rate": 0.00046426456528873735, |
|
"loss": 4.0821, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 1.616804108333866, |
|
"grad_norm": 0.7964518666267395, |
|
"learning_rate": 0.0004633727401150098, |
|
"loss": 4.0802, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 1.6194677065354046, |
|
"grad_norm": 0.8475667834281921, |
|
"learning_rate": 0.0004624809149412823, |
|
"loss": 4.0825, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.622131304736943, |
|
"grad_norm": 0.8427020311355591, |
|
"learning_rate": 0.0004615890897675547, |
|
"loss": 4.0746, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 1.6247949029384814, |
|
"grad_norm": 0.8353922367095947, |
|
"learning_rate": 0.0004606990482441746, |
|
"loss": 4.0785, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 1.62745850114002, |
|
"grad_norm": 0.8765130043029785, |
|
"learning_rate": 0.0004598072230704471, |
|
"loss": 4.0827, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 1.6301220993415586, |
|
"grad_norm": 0.7863726615905762, |
|
"learning_rate": 0.00045891718154706697, |
|
"loss": 4.0782, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 1.632785697543097, |
|
"grad_norm": 0.7965743541717529, |
|
"learning_rate": 0.0004580253563733394, |
|
"loss": 4.0751, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 1.6354492957446354, |
|
"grad_norm": 0.7712193131446838, |
|
"learning_rate": 0.0004571335311996119, |
|
"loss": 4.0775, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 1.638112893946174, |
|
"grad_norm": 0.8547102212905884, |
|
"learning_rate": 0.0004562417060258843, |
|
"loss": 4.0687, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 1.6407764921477126, |
|
"grad_norm": 0.794670581817627, |
|
"learning_rate": 0.00045535166450250423, |
|
"loss": 4.0809, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 1.643440090349251, |
|
"grad_norm": 0.8939191102981567, |
|
"learning_rate": 0.0004544598393287767, |
|
"loss": 4.0755, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 1.6461036885507894, |
|
"grad_norm": 0.830675482749939, |
|
"learning_rate": 0.00045356801415504915, |
|
"loss": 4.0849, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 1.648767286752328, |
|
"grad_norm": 0.8708091378211975, |
|
"learning_rate": 0.00045267618898132164, |
|
"loss": 4.0664, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 1.6514308849538666, |
|
"grad_norm": 0.7933617830276489, |
|
"learning_rate": 0.00045178436380759407, |
|
"loss": 4.0802, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 1.654094483155405, |
|
"grad_norm": 0.8032438158988953, |
|
"learning_rate": 0.000450894322284214, |
|
"loss": 4.0783, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 1.6567580813569434, |
|
"grad_norm": 0.8478823304176331, |
|
"learning_rate": 0.0004500024971104865, |
|
"loss": 4.0831, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 1.659421679558482, |
|
"grad_norm": 0.8288933634757996, |
|
"learning_rate": 0.0004491106719367589, |
|
"loss": 4.0801, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 1.6620852777600206, |
|
"grad_norm": 0.8561184406280518, |
|
"learning_rate": 0.0004482188467630314, |
|
"loss": 4.0788, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 1.664748875961559, |
|
"grad_norm": 0.9229483008384705, |
|
"learning_rate": 0.0004473270215893038, |
|
"loss": 4.0813, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 1.6674124741630973, |
|
"grad_norm": 0.8853760361671448, |
|
"learning_rate": 0.0004464369800659237, |
|
"loss": 4.0728, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 1.670076072364636, |
|
"grad_norm": 0.8472786545753479, |
|
"learning_rate": 0.0004455451548921962, |
|
"loss": 4.076, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 1.6727396705661746, |
|
"grad_norm": 0.834415853023529, |
|
"learning_rate": 0.0004446533297184686, |
|
"loss": 4.0776, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 1.675403268767713, |
|
"grad_norm": 0.8151890635490417, |
|
"learning_rate": 0.0004437615045447411, |
|
"loss": 4.0712, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 1.6780668669692513, |
|
"grad_norm": 0.8340436816215515, |
|
"learning_rate": 0.0004428696793710135, |
|
"loss": 4.0773, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 1.68073046517079, |
|
"grad_norm": 0.7873215079307556, |
|
"learning_rate": 0.00044197963784763344, |
|
"loss": 4.0796, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 1.6833940633723286, |
|
"grad_norm": 0.7956321835517883, |
|
"learning_rate": 0.0004410878126739059, |
|
"loss": 4.0738, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 1.686057661573867, |
|
"grad_norm": 0.8906182646751404, |
|
"learning_rate": 0.00044019598750017836, |
|
"loss": 4.0776, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 1.6887212597754053, |
|
"grad_norm": 0.8356565833091736, |
|
"learning_rate": 0.0004393041623264508, |
|
"loss": 4.0686, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 1.691384857976944, |
|
"grad_norm": 0.8309632539749146, |
|
"learning_rate": 0.0004384123371527233, |
|
"loss": 4.0786, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 1.6940484561784825, |
|
"grad_norm": 0.8648601770401001, |
|
"learning_rate": 0.0004375205119789957, |
|
"loss": 4.076, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 1.696712054380021, |
|
"grad_norm": 0.799662172794342, |
|
"learning_rate": 0.0004366304704556157, |
|
"loss": 4.0769, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 1.6993756525815593, |
|
"grad_norm": 0.884032130241394, |
|
"learning_rate": 0.0004357386452818881, |
|
"loss": 4.0742, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 1.702039250783098, |
|
"grad_norm": 0.8695617914199829, |
|
"learning_rate": 0.00043484682010816054, |
|
"loss": 4.0721, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 1.7047028489846365, |
|
"grad_norm": 0.801929235458374, |
|
"learning_rate": 0.00043395499493443303, |
|
"loss": 4.0722, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.7073664471861747, |
|
"grad_norm": 0.7920409440994263, |
|
"learning_rate": 0.00043306495341105295, |
|
"loss": 4.076, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 1.7100300453877133, |
|
"grad_norm": 0.821932852268219, |
|
"learning_rate": 0.00043217312823732543, |
|
"loss": 4.076, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 1.712693643589252, |
|
"grad_norm": 0.8553212881088257, |
|
"learning_rate": 0.00043128130306359786, |
|
"loss": 4.0748, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 1.7153572417907903, |
|
"grad_norm": 0.911418080329895, |
|
"learning_rate": 0.0004303894778898703, |
|
"loss": 4.0794, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 1.7180208399923287, |
|
"grad_norm": 0.8463834524154663, |
|
"learning_rate": 0.0004294976527161428, |
|
"loss": 4.0676, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 1.7206844381938673, |
|
"grad_norm": 0.8559086322784424, |
|
"learning_rate": 0.0004286058275424152, |
|
"loss": 4.0771, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 1.723348036395406, |
|
"grad_norm": 0.8981167674064636, |
|
"learning_rate": 0.0004277140023686877, |
|
"loss": 4.0688, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 1.7260116345969443, |
|
"grad_norm": 0.8651977181434631, |
|
"learning_rate": 0.00042682396084530756, |
|
"loss": 4.0728, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 1.7286752327984827, |
|
"grad_norm": 0.9066988229751587, |
|
"learning_rate": 0.00042593213567158, |
|
"loss": 4.072, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 1.7313388310000213, |
|
"grad_norm": 0.8543113470077515, |
|
"learning_rate": 0.0004250403104978525, |
|
"loss": 4.0727, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 1.73400242920156, |
|
"grad_norm": 0.8599368333816528, |
|
"learning_rate": 0.00042414848532412497, |
|
"loss": 4.0665, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 1.7366660274030983, |
|
"grad_norm": 0.8290531039237976, |
|
"learning_rate": 0.00042325666015039746, |
|
"loss": 4.0739, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 1.7393296256046367, |
|
"grad_norm": 0.8055272102355957, |
|
"learning_rate": 0.0004223666186270173, |
|
"loss": 4.0735, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 1.7419932238061753, |
|
"grad_norm": 0.8045780658721924, |
|
"learning_rate": 0.00042147479345328975, |
|
"loss": 4.071, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 1.7446568220077139, |
|
"grad_norm": 0.8758577108383179, |
|
"learning_rate": 0.00042058296827956224, |
|
"loss": 4.0735, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 1.7473204202092523, |
|
"grad_norm": 0.8138041496276855, |
|
"learning_rate": 0.00041969114310583467, |
|
"loss": 4.0686, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 1.7499840184107907, |
|
"grad_norm": 0.8927600979804993, |
|
"learning_rate": 0.0004188011015824546, |
|
"loss": 4.0749, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 1.7526476166123293, |
|
"grad_norm": 0.8370145559310913, |
|
"learning_rate": 0.00041790927640872707, |
|
"loss": 4.0723, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 1.7553112148138679, |
|
"grad_norm": 0.8793504238128662, |
|
"learning_rate": 0.0004170174512349995, |
|
"loss": 4.0674, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 1.7579748130154063, |
|
"grad_norm": 0.8913201689720154, |
|
"learning_rate": 0.000416125626061272, |
|
"loss": 4.0699, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 1.7606384112169446, |
|
"grad_norm": 0.8198757767677307, |
|
"learning_rate": 0.0004152338008875444, |
|
"loss": 4.0738, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 1.7633020094184833, |
|
"grad_norm": 0.8716715574264526, |
|
"learning_rate": 0.00041434375936416434, |
|
"loss": 4.0762, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 1.7659656076200219, |
|
"grad_norm": 0.8413424491882324, |
|
"learning_rate": 0.0004134519341904368, |
|
"loss": 4.0635, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 1.7686292058215602, |
|
"grad_norm": 0.838036060333252, |
|
"learning_rate": 0.00041256010901670926, |
|
"loss": 4.0731, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 1.7712928040230986, |
|
"grad_norm": 0.8625719547271729, |
|
"learning_rate": 0.00041166828384298174, |
|
"loss": 4.0765, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 1.7739564022246372, |
|
"grad_norm": 0.8333448171615601, |
|
"learning_rate": 0.0004107782423196016, |
|
"loss": 4.0691, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 1.7766200004261758, |
|
"grad_norm": 0.8514916300773621, |
|
"learning_rate": 0.00040988641714587403, |
|
"loss": 4.0682, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 1.7792835986277142, |
|
"grad_norm": 0.8220165371894836, |
|
"learning_rate": 0.0004089945919721465, |
|
"loss": 4.0796, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 1.7819471968292526, |
|
"grad_norm": 0.838065505027771, |
|
"learning_rate": 0.00040810276679841895, |
|
"loss": 4.0672, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 1.7846107950307912, |
|
"grad_norm": 0.8731646537780762, |
|
"learning_rate": 0.00040721272527503887, |
|
"loss": 4.0667, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 1.7872743932323298, |
|
"grad_norm": 0.8466665148735046, |
|
"learning_rate": 0.00040632090010131136, |
|
"loss": 4.0733, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 1.7899379914338682, |
|
"grad_norm": 0.9406811594963074, |
|
"learning_rate": 0.0004054290749275838, |
|
"loss": 4.0708, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.7926015896354066, |
|
"grad_norm": 0.8663309812545776, |
|
"learning_rate": 0.0004045372497538563, |
|
"loss": 4.0688, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 1.7952651878369452, |
|
"grad_norm": 0.8506413698196411, |
|
"learning_rate": 0.0004036454245801287, |
|
"loss": 4.0795, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 1.7979287860384838, |
|
"grad_norm": 0.8088420033454895, |
|
"learning_rate": 0.0004027553830567486, |
|
"loss": 4.0724, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 1.8005923842400222, |
|
"grad_norm": 0.8378006815910339, |
|
"learning_rate": 0.0004018635578830211, |
|
"loss": 4.0668, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 1.8032559824415606, |
|
"grad_norm": 0.8574025630950928, |
|
"learning_rate": 0.00040097173270929354, |
|
"loss": 4.0678, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 1.8059195806430992, |
|
"grad_norm": 0.8278779983520508, |
|
"learning_rate": 0.00040007990753556603, |
|
"loss": 4.0695, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 1.8085831788446376, |
|
"grad_norm": 0.9120043516159058, |
|
"learning_rate": 0.00039918986601218594, |
|
"loss": 4.0629, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 1.811246777046176, |
|
"grad_norm": 0.822943925857544, |
|
"learning_rate": 0.0003982980408384584, |
|
"loss": 4.0674, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 1.8139103752477146, |
|
"grad_norm": 0.8420679569244385, |
|
"learning_rate": 0.00039740621566473086, |
|
"loss": 4.0683, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 1.8165739734492532, |
|
"grad_norm": 0.8428717851638794, |
|
"learning_rate": 0.0003965143904910033, |
|
"loss": 4.0672, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 1.8192375716507916, |
|
"grad_norm": 0.8921811580657959, |
|
"learning_rate": 0.0003956225653172757, |
|
"loss": 4.0655, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 1.82190116985233, |
|
"grad_norm": 0.8687016367912292, |
|
"learning_rate": 0.0003947307401435482, |
|
"loss": 4.0712, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 1.8245647680538686, |
|
"grad_norm": 0.8464400172233582, |
|
"learning_rate": 0.0003938406986201681, |
|
"loss": 4.0687, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 1.8272283662554072, |
|
"grad_norm": 0.8673765063285828, |
|
"learning_rate": 0.00039294887344644056, |
|
"loss": 4.0628, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 1.8298919644569456, |
|
"grad_norm": 0.9040893316268921, |
|
"learning_rate": 0.000392057048272713, |
|
"loss": 4.0633, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 1.832555562658484, |
|
"grad_norm": 0.8810034394264221, |
|
"learning_rate": 0.0003911652230989854, |
|
"loss": 4.0637, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 1.8352191608600226, |
|
"grad_norm": 0.8870866894721985, |
|
"learning_rate": 0.0003902733979252579, |
|
"loss": 4.0712, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 1.8378827590615612, |
|
"grad_norm": 0.8724194169044495, |
|
"learning_rate": 0.0003893833564018778, |
|
"loss": 4.0761, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 1.8405463572630996, |
|
"grad_norm": 1.1327623128890991, |
|
"learning_rate": 0.00038849153122815026, |
|
"loss": 4.0656, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 1.843209955464638, |
|
"grad_norm": 0.8693875670433044, |
|
"learning_rate": 0.00038759970605442275, |
|
"loss": 4.0692, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 1.8458735536661766, |
|
"grad_norm": 0.9146456122398376, |
|
"learning_rate": 0.0003867078808806952, |
|
"loss": 4.0663, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 1.8485371518677152, |
|
"grad_norm": 0.8626604676246643, |
|
"learning_rate": 0.00038581605570696766, |
|
"loss": 4.0618, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 1.8512007500692536, |
|
"grad_norm": 1.0062013864517212, |
|
"learning_rate": 0.0003849242305332401, |
|
"loss": 4.0678, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 1.853864348270792, |
|
"grad_norm": 0.842510461807251, |
|
"learning_rate": 0.00038403418900986, |
|
"loss": 4.065, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 1.8565279464723305, |
|
"grad_norm": 0.8646286129951477, |
|
"learning_rate": 0.0003831423638361325, |
|
"loss": 4.0629, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 1.8591915446738692, |
|
"grad_norm": 0.8638767004013062, |
|
"learning_rate": 0.00038225053866240493, |
|
"loss": 4.0656, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 1.8618551428754075, |
|
"grad_norm": 0.8934078216552734, |
|
"learning_rate": 0.0003813587134886774, |
|
"loss": 4.0714, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 1.864518741076946, |
|
"grad_norm": 0.8266724944114685, |
|
"learning_rate": 0.00038046688831494985, |
|
"loss": 4.0645, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 1.8671823392784845, |
|
"grad_norm": 0.8602758646011353, |
|
"learning_rate": 0.00037957684679156977, |
|
"loss": 4.0642, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 1.8698459374800231, |
|
"grad_norm": 0.8677871823310852, |
|
"learning_rate": 0.00037868502161784225, |
|
"loss": 4.0685, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 1.8725095356815615, |
|
"grad_norm": 0.870879590511322, |
|
"learning_rate": 0.0003777931964441147, |
|
"loss": 4.0747, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 1.8751731338831, |
|
"grad_norm": 0.8714147806167603, |
|
"learning_rate": 0.00037690137127038717, |
|
"loss": 4.061, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.8778367320846385, |
|
"grad_norm": 0.8625131249427795, |
|
"learning_rate": 0.00037601132974700703, |
|
"loss": 4.06, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 1.8805003302861771, |
|
"grad_norm": 0.9685169458389282, |
|
"learning_rate": 0.00037511950457327946, |
|
"loss": 4.071, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 1.8831639284877155, |
|
"grad_norm": 0.9301902055740356, |
|
"learning_rate": 0.00037422767939955195, |
|
"loss": 4.0663, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 1.885827526689254, |
|
"grad_norm": 0.8485379219055176, |
|
"learning_rate": 0.0003733358542258244, |
|
"loss": 4.0709, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 1.8884911248907925, |
|
"grad_norm": 0.833081841468811, |
|
"learning_rate": 0.00037244402905209687, |
|
"loss": 4.0596, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 1.8911547230923311, |
|
"grad_norm": 0.8548697829246521, |
|
"learning_rate": 0.0003715539875287168, |
|
"loss": 4.0701, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 1.8938183212938695, |
|
"grad_norm": 0.8501580357551575, |
|
"learning_rate": 0.0003706621623549892, |
|
"loss": 4.0567, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 1.896481919495408, |
|
"grad_norm": 0.8642673492431641, |
|
"learning_rate": 0.0003697703371812617, |
|
"loss": 4.0621, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 1.8991455176969465, |
|
"grad_norm": 0.8171157240867615, |
|
"learning_rate": 0.00036887851200753414, |
|
"loss": 4.0542, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 1.901809115898485, |
|
"grad_norm": 0.873189389705658, |
|
"learning_rate": 0.00036798668683380657, |
|
"loss": 4.06, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 1.9044727141000233, |
|
"grad_norm": 0.8762955665588379, |
|
"learning_rate": 0.00036709664531042654, |
|
"loss": 4.063, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 1.9071363123015619, |
|
"grad_norm": 0.8550353050231934, |
|
"learning_rate": 0.00036620482013669897, |
|
"loss": 4.0597, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 1.9097999105031005, |
|
"grad_norm": 0.8709129691123962, |
|
"learning_rate": 0.00036531299496297146, |
|
"loss": 4.0578, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 1.9124635087046389, |
|
"grad_norm": 0.9054292440414429, |
|
"learning_rate": 0.0003644211697892439, |
|
"loss": 4.0589, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 1.9151271069061773, |
|
"grad_norm": 0.8816952705383301, |
|
"learning_rate": 0.0003635293446155163, |
|
"loss": 4.0563, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 1.9177907051077159, |
|
"grad_norm": 0.8601788282394409, |
|
"learning_rate": 0.0003626393030921363, |
|
"loss": 4.057, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.9204543033092545, |
|
"grad_norm": 0.933283269405365, |
|
"learning_rate": 0.0003617474779184087, |
|
"loss": 4.0688, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 1.9231179015107929, |
|
"grad_norm": 0.9095755815505981, |
|
"learning_rate": 0.0003608556527446812, |
|
"loss": 4.0531, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 1.9257814997123313, |
|
"grad_norm": 0.8889813423156738, |
|
"learning_rate": 0.00035996382757095364, |
|
"loss": 4.0638, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 1.9284450979138699, |
|
"grad_norm": 0.8663842678070068, |
|
"learning_rate": 0.0003590737860475735, |
|
"loss": 4.062, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 1.9311086961154085, |
|
"grad_norm": 0.8386211395263672, |
|
"learning_rate": 0.000358181960873846, |
|
"loss": 4.0561, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 1.9337722943169469, |
|
"grad_norm": 0.8373234868049622, |
|
"learning_rate": 0.0003572901357001184, |
|
"loss": 4.0666, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 1.9364358925184852, |
|
"grad_norm": 0.8931795954704285, |
|
"learning_rate": 0.00035639831052639085, |
|
"loss": 4.0554, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 1.9390994907200239, |
|
"grad_norm": 0.8433584570884705, |
|
"learning_rate": 0.0003555082690030108, |
|
"loss": 4.0583, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 1.9417630889215625, |
|
"grad_norm": 0.8926225900650024, |
|
"learning_rate": 0.00035461644382928326, |
|
"loss": 4.0585, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 1.9444266871231008, |
|
"grad_norm": 0.865616500377655, |
|
"learning_rate": 0.00035372461865555574, |
|
"loss": 4.0633, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 1.9470902853246392, |
|
"grad_norm": 0.8474301099777222, |
|
"learning_rate": 0.0003528327934818282, |
|
"loss": 4.0602, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 1.9497538835261778, |
|
"grad_norm": 0.8580695986747742, |
|
"learning_rate": 0.0003519427519584481, |
|
"loss": 4.0544, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 1.9524174817277165, |
|
"grad_norm": 0.8627407550811768, |
|
"learning_rate": 0.0003510509267847206, |
|
"loss": 4.0481, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 1.9550810799292548, |
|
"grad_norm": 0.8328742384910583, |
|
"learning_rate": 0.000350159101610993, |
|
"loss": 4.0581, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 1.9577446781307932, |
|
"grad_norm": 0.8515557050704956, |
|
"learning_rate": 0.0003492672764372655, |
|
"loss": 4.06, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 1.9604082763323318, |
|
"grad_norm": 0.9069979786872864, |
|
"learning_rate": 0.00034837545126353793, |
|
"loss": 4.0602, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.9630718745338704, |
|
"grad_norm": 0.8612348437309265, |
|
"learning_rate": 0.0003474854097401578, |
|
"loss": 4.0565, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 1.9657354727354088, |
|
"grad_norm": 0.9286240339279175, |
|
"learning_rate": 0.0003465935845664303, |
|
"loss": 4.0605, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 1.9683990709369472, |
|
"grad_norm": 0.8804614543914795, |
|
"learning_rate": 0.00034570175939270276, |
|
"loss": 4.0575, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 1.9710626691384858, |
|
"grad_norm": 0.8332533836364746, |
|
"learning_rate": 0.0003448099342189752, |
|
"loss": 4.0587, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 1.9737262673400244, |
|
"grad_norm": 0.8402279615402222, |
|
"learning_rate": 0.0003439198926955951, |
|
"loss": 4.0569, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 1.9763898655415628, |
|
"grad_norm": 0.8684757351875305, |
|
"learning_rate": 0.00034302806752186754, |
|
"loss": 4.0668, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 1.9790534637431012, |
|
"grad_norm": 0.880416750907898, |
|
"learning_rate": 0.00034213624234814003, |
|
"loss": 4.0612, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 1.9817170619446398, |
|
"grad_norm": 0.9281913042068481, |
|
"learning_rate": 0.00034124441717441246, |
|
"loss": 4.0583, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 1.9843806601461784, |
|
"grad_norm": 0.8712506294250488, |
|
"learning_rate": 0.0003403525920006849, |
|
"loss": 4.0539, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 1.9870442583477168, |
|
"grad_norm": 0.8760526180267334, |
|
"learning_rate": 0.00033946255047730486, |
|
"loss": 4.0502, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 1.9897078565492552, |
|
"grad_norm": 0.8705692291259766, |
|
"learning_rate": 0.0003385707253035773, |
|
"loss": 4.0592, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 1.9923714547507938, |
|
"grad_norm": 0.8519155383110046, |
|
"learning_rate": 0.00033767890012984973, |
|
"loss": 4.0607, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 1.9950350529523324, |
|
"grad_norm": 0.879636287689209, |
|
"learning_rate": 0.0003367870749561222, |
|
"loss": 4.0566, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 1.9976986511538706, |
|
"grad_norm": 0.8572770357131958, |
|
"learning_rate": 0.00033589703343274213, |
|
"loss": 4.0504, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 2.000362249355409, |
|
"grad_norm": 0.8497179746627808, |
|
"learning_rate": 0.0003350052082590146, |
|
"loss": 4.0603, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 2.003025847556948, |
|
"grad_norm": 0.8854038715362549, |
|
"learning_rate": 0.00033411338308528705, |
|
"loss": 4.055, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 2.0056894457584864, |
|
"grad_norm": 0.9853951334953308, |
|
"learning_rate": 0.0003332215579115595, |
|
"loss": 4.057, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 2.0083530439600246, |
|
"grad_norm": 0.9749231934547424, |
|
"learning_rate": 0.0003323315163881794, |
|
"loss": 4.0497, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 2.011016642161563, |
|
"grad_norm": 0.9801936745643616, |
|
"learning_rate": 0.00033143969121445183, |
|
"loss": 4.0609, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 2.013680240363102, |
|
"grad_norm": 0.9140198826789856, |
|
"learning_rate": 0.0003305478660407243, |
|
"loss": 4.0491, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 2.0163438385646404, |
|
"grad_norm": 0.9118580222129822, |
|
"learning_rate": 0.00032965604086699675, |
|
"loss": 4.0484, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 2.0190074367661786, |
|
"grad_norm": 1.0234750509262085, |
|
"learning_rate": 0.0003287642156932692, |
|
"loss": 4.0466, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 2.021671034967717, |
|
"grad_norm": 0.8892688751220703, |
|
"learning_rate": 0.00032787239051954167, |
|
"loss": 4.0569, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 2.0243346331692558, |
|
"grad_norm": 0.860365092754364, |
|
"learning_rate": 0.0003269823489961616, |
|
"loss": 4.0592, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 2.0269982313707944, |
|
"grad_norm": 0.8938810229301453, |
|
"learning_rate": 0.000326090523822434, |
|
"loss": 4.0523, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 2.0296618295723325, |
|
"grad_norm": 0.885435163974762, |
|
"learning_rate": 0.0003251986986487065, |
|
"loss": 4.0574, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 2.032325427773871, |
|
"grad_norm": 0.9123975038528442, |
|
"learning_rate": 0.00032430687347497893, |
|
"loss": 4.046, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 2.0349890259754098, |
|
"grad_norm": 0.9096443057060242, |
|
"learning_rate": 0.0003234168319515989, |
|
"loss": 4.0551, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 2.0376526241769484, |
|
"grad_norm": 0.8680484890937805, |
|
"learning_rate": 0.00032252500677787133, |
|
"loss": 4.0532, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 2.0403162223784865, |
|
"grad_norm": 0.8725469708442688, |
|
"learning_rate": 0.00032163318160414377, |
|
"loss": 4.0563, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 2.042979820580025, |
|
"grad_norm": 0.9647555947303772, |
|
"learning_rate": 0.00032074135643041625, |
|
"loss": 4.0536, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 2.0456434187815637, |
|
"grad_norm": 0.8826559782028198, |
|
"learning_rate": 0.0003198495312566887, |
|
"loss": 4.0527, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 2.0483070169831024, |
|
"grad_norm": 0.9342438578605652, |
|
"learning_rate": 0.0003189594897333086, |
|
"loss": 4.0607, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 2.0509706151846405, |
|
"grad_norm": 0.9360005855560303, |
|
"learning_rate": 0.0003180676645595811, |
|
"loss": 4.0472, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 2.053634213386179, |
|
"grad_norm": 0.9147686958312988, |
|
"learning_rate": 0.0003171758393858535, |
|
"loss": 4.0485, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 2.0562978115877177, |
|
"grad_norm": 0.8479260206222534, |
|
"learning_rate": 0.000316284014212126, |
|
"loss": 4.0504, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 2.058961409789256, |
|
"grad_norm": 0.8525492548942566, |
|
"learning_rate": 0.00031539218903839844, |
|
"loss": 4.0496, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 2.0616250079907945, |
|
"grad_norm": 0.8503657579421997, |
|
"learning_rate": 0.0003145021475150183, |
|
"loss": 4.0571, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 2.064288606192333, |
|
"grad_norm": 0.8873237371444702, |
|
"learning_rate": 0.0003136103223412908, |
|
"loss": 4.0511, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 2.0669522043938717, |
|
"grad_norm": 0.9111925959587097, |
|
"learning_rate": 0.0003127184971675632, |
|
"loss": 4.0477, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 2.06961580259541, |
|
"grad_norm": 0.864146888256073, |
|
"learning_rate": 0.0003118266719938357, |
|
"loss": 4.0526, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 2.0722794007969485, |
|
"grad_norm": 0.8477506637573242, |
|
"learning_rate": 0.00031093484682010814, |
|
"loss": 4.054, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 2.074942998998487, |
|
"grad_norm": 0.9023974537849426, |
|
"learning_rate": 0.00031004480529672805, |
|
"loss": 4.0579, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 2.0776065972000257, |
|
"grad_norm": 0.8909152150154114, |
|
"learning_rate": 0.00030915298012300054, |
|
"loss": 4.0521, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 2.080270195401564, |
|
"grad_norm": 0.9014437794685364, |
|
"learning_rate": 0.00030826115494927297, |
|
"loss": 4.0553, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 2.0829337936031025, |
|
"grad_norm": 0.8972243666648865, |
|
"learning_rate": 0.00030736932977554546, |
|
"loss": 4.0507, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 2.085597391804641, |
|
"grad_norm": 0.8825047016143799, |
|
"learning_rate": 0.0003064792882521654, |
|
"loss": 4.0526, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 2.0882609900061797, |
|
"grad_norm": 0.924751341342926, |
|
"learning_rate": 0.0003055874630784378, |
|
"loss": 4.0521, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 2.090924588207718, |
|
"grad_norm": 0.8999988436698914, |
|
"learning_rate": 0.0003046956379047103, |
|
"loss": 4.0524, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 2.0935881864092565, |
|
"grad_norm": 0.8595131635665894, |
|
"learning_rate": 0.0003038038127309827, |
|
"loss": 4.0519, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 2.096251784610795, |
|
"grad_norm": 0.9281662106513977, |
|
"learning_rate": 0.00030291377120760264, |
|
"loss": 4.0489, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 2.0989153828123337, |
|
"grad_norm": 0.8841512799263, |
|
"learning_rate": 0.0003020219460338751, |
|
"loss": 4.0504, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 2.101578981013872, |
|
"grad_norm": 0.8970746994018555, |
|
"learning_rate": 0.00030113012086014756, |
|
"loss": 4.0453, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 2.1042425792154105, |
|
"grad_norm": 0.946937084197998, |
|
"learning_rate": 0.00030023829568642005, |
|
"loss": 4.0443, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 2.106906177416949, |
|
"grad_norm": 1.066956877708435, |
|
"learning_rate": 0.0002993482541630399, |
|
"loss": 4.0591, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 2.1095697756184877, |
|
"grad_norm": 0.8527683615684509, |
|
"learning_rate": 0.00029845642898931234, |
|
"loss": 4.0498, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 2.112233373820026, |
|
"grad_norm": 0.9100342988967896, |
|
"learning_rate": 0.0002975646038155848, |
|
"loss": 4.0463, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 2.1148969720215645, |
|
"grad_norm": 0.9486255645751953, |
|
"learning_rate": 0.00029667277864185726, |
|
"loss": 4.0541, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 2.117560570223103, |
|
"grad_norm": 0.9460600018501282, |
|
"learning_rate": 0.00029578273711847717, |
|
"loss": 4.0481, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 2.1202241684246417, |
|
"grad_norm": 0.9710919857025146, |
|
"learning_rate": 0.00029489091194474966, |
|
"loss": 4.0486, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 2.12288776662618, |
|
"grad_norm": 0.9194395542144775, |
|
"learning_rate": 0.0002939990867710221, |
|
"loss": 4.0458, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 2.1255513648277184, |
|
"grad_norm": 0.8708109855651855, |
|
"learning_rate": 0.0002931072615972946, |
|
"loss": 4.0465, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 2.128214963029257, |
|
"grad_norm": 0.8814635276794434, |
|
"learning_rate": 0.0002922172200739145, |
|
"loss": 4.0441, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 2.1308785612307957, |
|
"grad_norm": 0.9306267499923706, |
|
"learning_rate": 0.0002913253949001869, |
|
"loss": 4.0417, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 2.133542159432334, |
|
"grad_norm": 0.9086319208145142, |
|
"learning_rate": 0.0002904335697264594, |
|
"loss": 4.0485, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 2.1362057576338724, |
|
"grad_norm": 0.9667945504188538, |
|
"learning_rate": 0.00028954174455273184, |
|
"loss": 4.0387, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 2.138869355835411, |
|
"grad_norm": 0.9225121736526489, |
|
"learning_rate": 0.00028864991937900433, |
|
"loss": 4.0424, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 2.1415329540369497, |
|
"grad_norm": 0.891379714012146, |
|
"learning_rate": 0.0002877598778556242, |
|
"loss": 4.046, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 2.144196552238488, |
|
"grad_norm": 0.9507352709770203, |
|
"learning_rate": 0.0002868680526818966, |
|
"loss": 4.0477, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 2.1468601504400264, |
|
"grad_norm": 0.9602506756782532, |
|
"learning_rate": 0.00028597622750816917, |
|
"loss": 4.0498, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 2.149523748641565, |
|
"grad_norm": 0.9250164031982422, |
|
"learning_rate": 0.0002850844023344416, |
|
"loss": 4.0404, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 2.152187346843103, |
|
"grad_norm": 0.917396605014801, |
|
"learning_rate": 0.00028419436081106146, |
|
"loss": 4.0488, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 2.154850945044642, |
|
"grad_norm": 0.8889843821525574, |
|
"learning_rate": 0.00028330253563733395, |
|
"loss": 4.0412, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 2.1575145432461804, |
|
"grad_norm": 0.9360488653182983, |
|
"learning_rate": 0.0002824107104636064, |
|
"loss": 4.0407, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 2.160178141447719, |
|
"grad_norm": 0.9107580184936523, |
|
"learning_rate": 0.00028151888528987886, |
|
"loss": 4.0439, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 2.162841739649257, |
|
"grad_norm": 0.9053534865379333, |
|
"learning_rate": 0.0002806270601161513, |
|
"loss": 4.042, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 2.165505337850796, |
|
"grad_norm": 0.8875529766082764, |
|
"learning_rate": 0.0002797370185927712, |
|
"loss": 4.0429, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 2.1681689360523344, |
|
"grad_norm": 0.9056974053382874, |
|
"learning_rate": 0.0002788451934190437, |
|
"loss": 4.0461, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 2.170832534253873, |
|
"grad_norm": 0.8870306015014648, |
|
"learning_rate": 0.00027795336824531613, |
|
"loss": 4.0473, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 2.173496132455411, |
|
"grad_norm": 0.9122534394264221, |
|
"learning_rate": 0.0002770615430715886, |
|
"loss": 4.0423, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 2.17615973065695, |
|
"grad_norm": 0.8884118795394897, |
|
"learning_rate": 0.00027617150154820853, |
|
"loss": 4.0455, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 2.1788233288584884, |
|
"grad_norm": 0.8788624405860901, |
|
"learning_rate": 0.00027527967637448096, |
|
"loss": 4.0396, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 2.181486927060027, |
|
"grad_norm": 0.9050582647323608, |
|
"learning_rate": 0.00027438785120075345, |
|
"loss": 4.0364, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 2.184150525261565, |
|
"grad_norm": 0.9116672277450562, |
|
"learning_rate": 0.0002734960260270259, |
|
"loss": 4.0479, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 2.1868141234631038, |
|
"grad_norm": 0.8476006984710693, |
|
"learning_rate": 0.00027260420085329837, |
|
"loss": 4.0407, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 2.1894777216646424, |
|
"grad_norm": 0.9175940752029419, |
|
"learning_rate": 0.00027171415932991823, |
|
"loss": 4.0469, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 2.192141319866181, |
|
"grad_norm": 0.9391987919807434, |
|
"learning_rate": 0.00027082233415619066, |
|
"loss": 4.0477, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 2.194804918067719, |
|
"grad_norm": 0.880539059638977, |
|
"learning_rate": 0.00026993050898246315, |
|
"loss": 4.0483, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 2.1974685162692578, |
|
"grad_norm": 0.9159991145133972, |
|
"learning_rate": 0.0002690386838087356, |
|
"loss": 4.0439, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 2.2001321144707964, |
|
"grad_norm": 0.846324622631073, |
|
"learning_rate": 0.0002681486422853555, |
|
"loss": 4.0491, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 2.202795712672335, |
|
"grad_norm": 0.9291318655014038, |
|
"learning_rate": 0.000267256817111628, |
|
"loss": 4.0433, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 2.205459310873873, |
|
"grad_norm": 0.9299983978271484, |
|
"learning_rate": 0.0002663649919379004, |
|
"loss": 4.039, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 2.2081229090754118, |
|
"grad_norm": 0.9034929275512695, |
|
"learning_rate": 0.0002654731667641729, |
|
"loss": 4.0426, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 2.2107865072769504, |
|
"grad_norm": 0.8487489223480225, |
|
"learning_rate": 0.0002645831252407928, |
|
"loss": 4.0382, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 2.213450105478489, |
|
"grad_norm": 0.9376189112663269, |
|
"learning_rate": 0.00026369130006706525, |
|
"loss": 4.0478, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 2.216113703680027, |
|
"grad_norm": 0.9032031297683716, |
|
"learning_rate": 0.00026279947489333774, |
|
"loss": 4.0446, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 2.2187773018815657, |
|
"grad_norm": 0.873349666595459, |
|
"learning_rate": 0.00026190764971961017, |
|
"loss": 4.0419, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 2.2214409000831044, |
|
"grad_norm": 0.9227972626686096, |
|
"learning_rate": 0.0002610176081962301, |
|
"loss": 4.0415, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 2.224104498284643, |
|
"grad_norm": 0.9360315203666687, |
|
"learning_rate": 0.00026012578302250257, |
|
"loss": 4.0391, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 2.226768096486181, |
|
"grad_norm": 1.0437467098236084, |
|
"learning_rate": 0.000259233957848775, |
|
"loss": 4.0425, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 2.2294316946877197, |
|
"grad_norm": 0.9248673319816589, |
|
"learning_rate": 0.0002583421326750475, |
|
"loss": 4.0413, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 2.2320952928892583, |
|
"grad_norm": 0.8973048329353333, |
|
"learning_rate": 0.00025745209115166735, |
|
"loss": 4.0411, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 2.234758891090797, |
|
"grad_norm": 0.9082027077674866, |
|
"learning_rate": 0.0002565602659779398, |
|
"loss": 4.0424, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 2.237422489292335, |
|
"grad_norm": 0.8980434536933899, |
|
"learning_rate": 0.00025566844080421227, |
|
"loss": 4.0389, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 2.2400860874938737, |
|
"grad_norm": 0.8749063014984131, |
|
"learning_rate": 0.0002547766156304847, |
|
"loss": 4.0283, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 2.2427496856954123, |
|
"grad_norm": 0.9931572675704956, |
|
"learning_rate": 0.0002538865741071046, |
|
"loss": 4.0411, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 2.2454132838969505, |
|
"grad_norm": 1.0000332593917847, |
|
"learning_rate": 0.0002529947489333771, |
|
"loss": 4.0426, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 2.248076882098489, |
|
"grad_norm": 0.8988611698150635, |
|
"learning_rate": 0.00025210292375964954, |
|
"loss": 4.0401, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 2.2507404803000277, |
|
"grad_norm": 0.9371945261955261, |
|
"learning_rate": 0.000251211098585922, |
|
"loss": 4.0367, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 2.2534040785015663, |
|
"grad_norm": 0.9270386099815369, |
|
"learning_rate": 0.00025031927341219446, |
|
"loss": 4.0481, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 2.256067676703105, |
|
"grad_norm": 0.964900553226471, |
|
"learning_rate": 0.00024942923188881437, |
|
"loss": 4.0381, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 2.258731274904643, |
|
"grad_norm": 0.8744553923606873, |
|
"learning_rate": 0.00024853740671508686, |
|
"loss": 4.0375, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 2.2613948731061817, |
|
"grad_norm": 0.9299191236495972, |
|
"learning_rate": 0.0002476455815413593, |
|
"loss": 4.036, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 2.2640584713077203, |
|
"grad_norm": 0.9264661073684692, |
|
"learning_rate": 0.0002467537563676318, |
|
"loss": 4.04, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 2.2667220695092585, |
|
"grad_norm": 0.9486096501350403, |
|
"learning_rate": 0.00024586371484425164, |
|
"loss": 4.0362, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 2.269385667710797, |
|
"grad_norm": 0.9084232449531555, |
|
"learning_rate": 0.0002449718896705241, |
|
"loss": 4.0442, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 2.2720492659123357, |
|
"grad_norm": 0.898169755935669, |
|
"learning_rate": 0.00024408006449679656, |
|
"loss": 4.04, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 2.2747128641138743, |
|
"grad_norm": 0.9344006180763245, |
|
"learning_rate": 0.00024318823932306902, |
|
"loss": 4.0393, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 2.2773764623154125, |
|
"grad_norm": 0.9698314666748047, |
|
"learning_rate": 0.00024229641414934147, |
|
"loss": 4.0293, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 2.280040060516951, |
|
"grad_norm": 0.9501084685325623, |
|
"learning_rate": 0.0002414063726259614, |
|
"loss": 4.038, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 2.2827036587184897, |
|
"grad_norm": 0.8912844061851501, |
|
"learning_rate": 0.00024051454745223385, |
|
"loss": 4.0374, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 2.2853672569200283, |
|
"grad_norm": 0.9317381978034973, |
|
"learning_rate": 0.0002396227222785063, |
|
"loss": 4.0353, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 2.2880308551215665, |
|
"grad_norm": 0.9316912889480591, |
|
"learning_rate": 0.00023873089710477877, |
|
"loss": 4.0383, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 2.290694453323105, |
|
"grad_norm": 0.9433039426803589, |
|
"learning_rate": 0.00023784085558139868, |
|
"loss": 4.0332, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 2.2933580515246437, |
|
"grad_norm": 0.9455925226211548, |
|
"learning_rate": 0.00023694903040767112, |
|
"loss": 4.0326, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 2.2960216497261823, |
|
"grad_norm": 0.9149669408798218, |
|
"learning_rate": 0.00023605720523394358, |
|
"loss": 4.0442, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 2.2986852479277204, |
|
"grad_norm": 0.9723134636878967, |
|
"learning_rate": 0.00023516538006021603, |
|
"loss": 4.0313, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 2.301348846129259, |
|
"grad_norm": 0.9359349012374878, |
|
"learning_rate": 0.00023427533853683595, |
|
"loss": 4.0369, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 2.3040124443307977, |
|
"grad_norm": 0.9478726983070374, |
|
"learning_rate": 0.0002333835133631084, |
|
"loss": 4.0386, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 2.3066760425323363, |
|
"grad_norm": 0.9433446526527405, |
|
"learning_rate": 0.00023249168818938084, |
|
"loss": 4.0334, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 2.3093396407338744, |
|
"grad_norm": 0.9548355340957642, |
|
"learning_rate": 0.00023159986301565333, |
|
"loss": 4.0404, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 2.312003238935413, |
|
"grad_norm": 1.014600157737732, |
|
"learning_rate": 0.0002307080378419258, |
|
"loss": 4.0337, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 2.3146668371369516, |
|
"grad_norm": 0.8967020511627197, |
|
"learning_rate": 0.0002298179963185457, |
|
"loss": 4.0343, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 2.31733043533849, |
|
"grad_norm": 1.0393925905227661, |
|
"learning_rate": 0.00022892617114481814, |
|
"loss": 4.0354, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 2.3199940335400284, |
|
"grad_norm": 0.9963262677192688, |
|
"learning_rate": 0.0002280343459710906, |
|
"loss": 4.0358, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 2.322657631741567, |
|
"grad_norm": 0.9155731797218323, |
|
"learning_rate": 0.00022714252079736305, |
|
"loss": 4.0372, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 2.3253212299431056, |
|
"grad_norm": 0.9272859692573547, |
|
"learning_rate": 0.00022625247927398297, |
|
"loss": 4.04, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 2.3279848281446442, |
|
"grad_norm": 0.9763675928115845, |
|
"learning_rate": 0.0002253606541002554, |
|
"loss": 4.0312, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 2.3306484263461824, |
|
"grad_norm": 0.9596668481826782, |
|
"learning_rate": 0.00022446882892652786, |
|
"loss": 4.0337, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 2.333312024547721, |
|
"grad_norm": 0.9284877777099609, |
|
"learning_rate": 0.00022357700375280032, |
|
"loss": 4.0386, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 2.3359756227492596, |
|
"grad_norm": 0.9726400971412659, |
|
"learning_rate": 0.00022268696222942026, |
|
"loss": 4.0354, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 2.338639220950798, |
|
"grad_norm": 0.9305101037025452, |
|
"learning_rate": 0.0002217951370556927, |
|
"loss": 4.0213, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 2.3413028191523364, |
|
"grad_norm": 0.9207624793052673, |
|
"learning_rate": 0.00022090331188196515, |
|
"loss": 4.0388, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 2.343966417353875, |
|
"grad_norm": 0.940703809261322, |
|
"learning_rate": 0.00022001148670823761, |
|
"loss": 4.0303, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 2.3466300155554136, |
|
"grad_norm": 1.0912624597549438, |
|
"learning_rate": 0.00021912144518485753, |
|
"loss": 4.0319, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 2.3492936137569522, |
|
"grad_norm": 0.9056357145309448, |
|
"learning_rate": 0.00021822962001113, |
|
"loss": 4.0326, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 2.3519572119584904, |
|
"grad_norm": 0.891265332698822, |
|
"learning_rate": 0.00021733779483740242, |
|
"loss": 4.0398, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 2.354620810160029, |
|
"grad_norm": 0.9790766835212708, |
|
"learning_rate": 0.00021644596966367488, |
|
"loss": 4.0352, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 2.3572844083615676, |
|
"grad_norm": 0.9584769010543823, |
|
"learning_rate": 0.00021555414448994734, |
|
"loss": 4.0393, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 2.3599480065631058, |
|
"grad_norm": 0.9171414971351624, |
|
"learning_rate": 0.00021466410296656728, |
|
"loss": 4.0384, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 2.3626116047646444, |
|
"grad_norm": 0.9353621006011963, |
|
"learning_rate": 0.00021377227779283972, |
|
"loss": 4.0247, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 2.365275202966183, |
|
"grad_norm": 1.1184170246124268, |
|
"learning_rate": 0.00021288045261911217, |
|
"loss": 4.0374, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 2.3679388011677216, |
|
"grad_norm": 0.9417023062705994, |
|
"learning_rate": 0.00021198862744538463, |
|
"loss": 4.0279, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 2.3706023993692598, |
|
"grad_norm": 1.0378462076187134, |
|
"learning_rate": 0.00021109858592200455, |
|
"loss": 4.0357, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 2.3732659975707984, |
|
"grad_norm": 0.9642356634140015, |
|
"learning_rate": 0.00021020676074827698, |
|
"loss": 4.0334, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 2.375929595772337, |
|
"grad_norm": 0.970891535282135, |
|
"learning_rate": 0.00020931493557454944, |
|
"loss": 4.025, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 2.3785931939738756, |
|
"grad_norm": 0.9346612691879272, |
|
"learning_rate": 0.0002084231104008219, |
|
"loss": 4.0255, |
|
"step": 446500 |
|
}, |
|
{ |
|
"epoch": 2.3812567921754138, |
|
"grad_norm": 0.9348496794700623, |
|
"learning_rate": 0.00020753128522709436, |
|
"loss": 4.0305, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 2.3839203903769524, |
|
"grad_norm": 0.9465219974517822, |
|
"learning_rate": 0.00020664124370371428, |
|
"loss": 4.0279, |
|
"step": 447500 |
|
}, |
|
{ |
|
"epoch": 2.386583988578491, |
|
"grad_norm": 0.9686950445175171, |
|
"learning_rate": 0.00020574941852998673, |
|
"loss": 4.038, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 2.3892475867800296, |
|
"grad_norm": 0.8983688354492188, |
|
"learning_rate": 0.0002048575933562592, |
|
"loss": 4.0302, |
|
"step": 448500 |
|
}, |
|
{ |
|
"epoch": 2.3919111849815677, |
|
"grad_norm": 0.9491548538208008, |
|
"learning_rate": 0.00020396576818253165, |
|
"loss": 4.0302, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 2.3945747831831063, |
|
"grad_norm": 0.9248127341270447, |
|
"learning_rate": 0.00020307572665915154, |
|
"loss": 4.0338, |
|
"step": 449500 |
|
}, |
|
{ |
|
"epoch": 2.397238381384645, |
|
"grad_norm": 0.9573125243186951, |
|
"learning_rate": 0.000202183901485424, |
|
"loss": 4.0337, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 2.3999019795861836, |
|
"grad_norm": 0.9655391573905945, |
|
"learning_rate": 0.00020129207631169646, |
|
"loss": 4.0338, |
|
"step": 450500 |
|
}, |
|
{ |
|
"epoch": 2.4025655777877217, |
|
"grad_norm": 0.9134914875030518, |
|
"learning_rate": 0.00020040025113796892, |
|
"loss": 4.0241, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 2.4052291759892603, |
|
"grad_norm": 0.9635368585586548, |
|
"learning_rate": 0.00019951020961458886, |
|
"loss": 4.0357, |
|
"step": 451500 |
|
}, |
|
{ |
|
"epoch": 2.407892774190799, |
|
"grad_norm": 0.9742798805236816, |
|
"learning_rate": 0.0001986183844408613, |
|
"loss": 4.0242, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 2.4105563723923376, |
|
"grad_norm": 0.9775349497795105, |
|
"learning_rate": 0.00019772655926713375, |
|
"loss": 4.0279, |
|
"step": 452500 |
|
}, |
|
{ |
|
"epoch": 2.4132199705938757, |
|
"grad_norm": 0.9313619136810303, |
|
"learning_rate": 0.0001968347340934062, |
|
"loss": 4.03, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 2.4158835687954143, |
|
"grad_norm": 0.9796269536018372, |
|
"learning_rate": 0.00019594469257002613, |
|
"loss": 4.0254, |
|
"step": 453500 |
|
}, |
|
{ |
|
"epoch": 2.418547166996953, |
|
"grad_norm": 0.9695695042610168, |
|
"learning_rate": 0.00019505286739629856, |
|
"loss": 4.0353, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 2.4212107651984915, |
|
"grad_norm": 0.9753876328468323, |
|
"learning_rate": 0.00019416104222257102, |
|
"loss": 4.0269, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 2.4238743634000297, |
|
"grad_norm": 0.9220411777496338, |
|
"learning_rate": 0.00019326921704884348, |
|
"loss": 4.0289, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 2.4265379616015683, |
|
"grad_norm": 0.9355341196060181, |
|
"learning_rate": 0.0001923791755254634, |
|
"loss": 4.0297, |
|
"step": 455500 |
|
}, |
|
{ |
|
"epoch": 2.429201559803107, |
|
"grad_norm": 1.0068522691726685, |
|
"learning_rate": 0.00019148735035173583, |
|
"loss": 4.0332, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 2.431865158004645, |
|
"grad_norm": 0.9809306263923645, |
|
"learning_rate": 0.00019059552517800831, |
|
"loss": 4.025, |
|
"step": 456500 |
|
}, |
|
{ |
|
"epoch": 2.4345287562061837, |
|
"grad_norm": 0.9140877723693848, |
|
"learning_rate": 0.00018970370000428077, |
|
"loss": 4.0237, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 2.4371923544077223, |
|
"grad_norm": 0.942362368106842, |
|
"learning_rate": 0.00018881187483055323, |
|
"loss": 4.0299, |
|
"step": 457500 |
|
}, |
|
{ |
|
"epoch": 2.439855952609261, |
|
"grad_norm": 1.0030492544174194, |
|
"learning_rate": 0.00018792183330717312, |
|
"loss": 4.0241, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 2.4425195508107995, |
|
"grad_norm": 0.9555344581604004, |
|
"learning_rate": 0.00018703000813344558, |
|
"loss": 4.0269, |
|
"step": 458500 |
|
}, |
|
{ |
|
"epoch": 2.4451831490123377, |
|
"grad_norm": 0.9068697690963745, |
|
"learning_rate": 0.00018613818295971804, |
|
"loss": 4.0273, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 2.4478467472138763, |
|
"grad_norm": 1.026928186416626, |
|
"learning_rate": 0.0001852463577859905, |
|
"loss": 4.0271, |
|
"step": 459500 |
|
}, |
|
{ |
|
"epoch": 2.450510345415415, |
|
"grad_norm": 1.0138953924179077, |
|
"learning_rate": 0.00018435631626261041, |
|
"loss": 4.0273, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 2.453173943616953, |
|
"grad_norm": 0.9750286936759949, |
|
"learning_rate": 0.00018346449108888285, |
|
"loss": 4.0304, |
|
"step": 460500 |
|
}, |
|
{ |
|
"epoch": 2.4558375418184917, |
|
"grad_norm": 0.9891506433486938, |
|
"learning_rate": 0.0001825726659151553, |
|
"loss": 4.028, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 2.4585011400200303, |
|
"grad_norm": 0.9331740140914917, |
|
"learning_rate": 0.00018168084074142777, |
|
"loss": 4.0259, |
|
"step": 461500 |
|
}, |
|
{ |
|
"epoch": 2.461164738221569, |
|
"grad_norm": 0.9839907288551331, |
|
"learning_rate": 0.00018078901556770025, |
|
"loss": 4.0299, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 2.463828336423107, |
|
"grad_norm": 1.092699408531189, |
|
"learning_rate": 0.00017989897404432014, |
|
"loss": 4.0279, |
|
"step": 462500 |
|
}, |
|
{ |
|
"epoch": 2.4664919346246457, |
|
"grad_norm": 0.9484713673591614, |
|
"learning_rate": 0.0001790071488705926, |
|
"loss": 4.0141, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 2.4691555328261843, |
|
"grad_norm": 0.9671944975852966, |
|
"learning_rate": 0.00017811532369686506, |
|
"loss": 4.0262, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 2.471819131027723, |
|
"grad_norm": 0.9488347172737122, |
|
"learning_rate": 0.00017722349852313752, |
|
"loss": 4.0197, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 2.474482729229261, |
|
"grad_norm": 0.9663012623786926, |
|
"learning_rate": 0.0001763334569997574, |
|
"loss": 4.0238, |
|
"step": 464500 |
|
}, |
|
{ |
|
"epoch": 2.4771463274307997, |
|
"grad_norm": 0.9515085220336914, |
|
"learning_rate": 0.00017544163182602987, |
|
"loss": 4.0248, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 2.4798099256323383, |
|
"grad_norm": 0.969129204750061, |
|
"learning_rate": 0.00017454980665230233, |
|
"loss": 4.027, |
|
"step": 465500 |
|
}, |
|
{ |
|
"epoch": 2.482473523833877, |
|
"grad_norm": 0.9723744988441467, |
|
"learning_rate": 0.00017365798147857479, |
|
"loss": 4.0223, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 2.485137122035415, |
|
"grad_norm": 0.9454832673072815, |
|
"learning_rate": 0.0001727679399551947, |
|
"loss": 4.0257, |
|
"step": 466500 |
|
}, |
|
{ |
|
"epoch": 2.4878007202369536, |
|
"grad_norm": 0.9404035210609436, |
|
"learning_rate": 0.00017187611478146716, |
|
"loss": 4.0292, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 2.4904643184384923, |
|
"grad_norm": 0.9745790362358093, |
|
"learning_rate": 0.00017098428960773962, |
|
"loss": 4.027, |
|
"step": 467500 |
|
}, |
|
{ |
|
"epoch": 2.493127916640031, |
|
"grad_norm": 0.952643871307373, |
|
"learning_rate": 0.00017009246443401208, |
|
"loss": 4.0259, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 2.495791514841569, |
|
"grad_norm": 1.0002975463867188, |
|
"learning_rate": 0.000169202422910632, |
|
"loss": 4.0286, |
|
"step": 468500 |
|
}, |
|
{ |
|
"epoch": 2.4984551130431076, |
|
"grad_norm": 0.9904667139053345, |
|
"learning_rate": 0.00016831059773690443, |
|
"loss": 4.0233, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 2.5011187112446462, |
|
"grad_norm": 0.9523800015449524, |
|
"learning_rate": 0.00016741877256317689, |
|
"loss": 4.0205, |
|
"step": 469500 |
|
}, |
|
{ |
|
"epoch": 2.5037823094461844, |
|
"grad_norm": 1.111253023147583, |
|
"learning_rate": 0.00016652694738944935, |
|
"loss": 4.0211, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 2.506445907647723, |
|
"grad_norm": 0.9411515593528748, |
|
"learning_rate": 0.0001656369058660693, |
|
"loss": 4.0276, |
|
"step": 470500 |
|
}, |
|
{ |
|
"epoch": 2.5091095058492616, |
|
"grad_norm": 0.9541642665863037, |
|
"learning_rate": 0.00016474508069234172, |
|
"loss": 4.0248, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 2.5117731040508002, |
|
"grad_norm": 1.016478180885315, |
|
"learning_rate": 0.00016385325551861418, |
|
"loss": 4.0253, |
|
"step": 471500 |
|
}, |
|
{ |
|
"epoch": 2.514436702252339, |
|
"grad_norm": 0.9605896472930908, |
|
"learning_rate": 0.00016296143034488664, |
|
"loss": 4.0201, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 2.517100300453877, |
|
"grad_norm": 0.9732680916786194, |
|
"learning_rate": 0.00016207138882150655, |
|
"loss": 4.02, |
|
"step": 472500 |
|
}, |
|
{ |
|
"epoch": 2.5197638986554156, |
|
"grad_norm": 0.9240507483482361, |
|
"learning_rate": 0.000161179563647779, |
|
"loss": 4.0156, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 2.522427496856954, |
|
"grad_norm": 1.063936471939087, |
|
"learning_rate": 0.00016028773847405145, |
|
"loss": 4.0252, |
|
"step": 473500 |
|
}, |
|
{ |
|
"epoch": 2.5250910950584924, |
|
"grad_norm": 0.9789932370185852, |
|
"learning_rate": 0.0001593959133003239, |
|
"loss": 4.0243, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 2.527754693260031, |
|
"grad_norm": 0.9427129030227661, |
|
"learning_rate": 0.00015850587177694385, |
|
"loss": 4.0193, |
|
"step": 474500 |
|
}, |
|
{ |
|
"epoch": 2.5304182914615696, |
|
"grad_norm": 1.0714107751846313, |
|
"learning_rate": 0.00015761404660321628, |
|
"loss": 4.0165, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 2.533081889663108, |
|
"grad_norm": 0.9931527376174927, |
|
"learning_rate": 0.00015672222142948874, |
|
"loss": 4.0236, |
|
"step": 475500 |
|
}, |
|
{ |
|
"epoch": 2.535745487864647, |
|
"grad_norm": 0.9835180640220642, |
|
"learning_rate": 0.0001558303962557612, |
|
"loss": 4.0227, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 2.538409086066185, |
|
"grad_norm": 1.021427869796753, |
|
"learning_rate": 0.00015493857108203366, |
|
"loss": 4.0233, |
|
"step": 476500 |
|
}, |
|
{ |
|
"epoch": 2.5410726842677236, |
|
"grad_norm": 1.2135415077209473, |
|
"learning_rate": 0.00015404852955865357, |
|
"loss": 4.0206, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 2.543736282469262, |
|
"grad_norm": 1.0140650272369385, |
|
"learning_rate": 0.000153156704384926, |
|
"loss": 4.0232, |
|
"step": 477500 |
|
}, |
|
{ |
|
"epoch": 2.5463998806708004, |
|
"grad_norm": 1.0078463554382324, |
|
"learning_rate": 0.00015226487921119847, |
|
"loss": 4.0182, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 2.549063478872339, |
|
"grad_norm": 1.0854226350784302, |
|
"learning_rate": 0.00015137305403747092, |
|
"loss": 4.019, |
|
"step": 478500 |
|
}, |
|
{ |
|
"epoch": 2.5517270770738776, |
|
"grad_norm": 0.9886216521263123, |
|
"learning_rate": 0.00015048301251409084, |
|
"loss": 4.0224, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 2.554390675275416, |
|
"grad_norm": 1.0139665603637695, |
|
"learning_rate": 0.0001495911873403633, |
|
"loss": 4.0129, |
|
"step": 479500 |
|
}, |
|
{ |
|
"epoch": 2.557054273476955, |
|
"grad_norm": 0.9683591723442078, |
|
"learning_rate": 0.00014869936216663576, |
|
"loss": 4.017, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 2.559717871678493, |
|
"grad_norm": 1.039494276046753, |
|
"learning_rate": 0.00014780753699290822, |
|
"loss": 4.0145, |
|
"step": 480500 |
|
}, |
|
{ |
|
"epoch": 2.5623814698800316, |
|
"grad_norm": 1.0008569955825806, |
|
"learning_rate": 0.00014691749546952813, |
|
"loss": 4.0191, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 2.56504506808157, |
|
"grad_norm": 0.9593690037727356, |
|
"learning_rate": 0.00014602567029580057, |
|
"loss": 4.0247, |
|
"step": 481500 |
|
}, |
|
{ |
|
"epoch": 2.5677086662831083, |
|
"grad_norm": 0.9470319747924805, |
|
"learning_rate": 0.00014513384512207303, |
|
"loss": 4.0227, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 2.570372264484647, |
|
"grad_norm": 1.0550135374069214, |
|
"learning_rate": 0.00014424201994834549, |
|
"loss": 4.0201, |
|
"step": 482500 |
|
}, |
|
{ |
|
"epoch": 2.5730358626861856, |
|
"grad_norm": 1.0270289182662964, |
|
"learning_rate": 0.0001433519784249654, |
|
"loss": 4.0155, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 2.575699460887724, |
|
"grad_norm": 1.0669533014297485, |
|
"learning_rate": 0.00014246015325123783, |
|
"loss": 4.0256, |
|
"step": 483500 |
|
}, |
|
{ |
|
"epoch": 2.5783630590892628, |
|
"grad_norm": 0.9935122132301331, |
|
"learning_rate": 0.0001415683280775103, |
|
"loss": 4.0131, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 2.581026657290801, |
|
"grad_norm": 1.0519307851791382, |
|
"learning_rate": 0.00014067650290378275, |
|
"loss": 4.0225, |
|
"step": 484500 |
|
}, |
|
{ |
|
"epoch": 2.5836902554923395, |
|
"grad_norm": 0.9848348498344421, |
|
"learning_rate": 0.0001397864613804027, |
|
"loss": 4.0173, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 2.586353853693878, |
|
"grad_norm": 0.9730287194252014, |
|
"learning_rate": 0.00013889463620667515, |
|
"loss": 4.0184, |
|
"step": 485500 |
|
}, |
|
{ |
|
"epoch": 2.5890174518954163, |
|
"grad_norm": 1.023484706878662, |
|
"learning_rate": 0.00013800281103294759, |
|
"loss": 4.0183, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 2.591681050096955, |
|
"grad_norm": 0.9631215929985046, |
|
"learning_rate": 0.00013711098585922005, |
|
"loss": 4.0186, |
|
"step": 486500 |
|
}, |
|
{ |
|
"epoch": 2.5943446482984935, |
|
"grad_norm": 0.9774326682090759, |
|
"learning_rate": 0.00013622094433583996, |
|
"loss": 4.0212, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 2.5970082465000317, |
|
"grad_norm": 1.052068829536438, |
|
"learning_rate": 0.00013532911916211242, |
|
"loss": 4.0183, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 2.5996718447015703, |
|
"grad_norm": 0.9873191714286804, |
|
"learning_rate": 0.00013443729398838485, |
|
"loss": 4.0241, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 2.602335442903109, |
|
"grad_norm": 1.1005477905273438, |
|
"learning_rate": 0.0001335454688146573, |
|
"loss": 4.017, |
|
"step": 488500 |
|
}, |
|
{ |
|
"epoch": 2.6049990411046475, |
|
"grad_norm": 0.9617475271224976, |
|
"learning_rate": 0.00013265542729127725, |
|
"loss": 4.0207, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 2.607662639306186, |
|
"grad_norm": 0.9862669706344604, |
|
"learning_rate": 0.0001317636021175497, |
|
"loss": 4.0168, |
|
"step": 489500 |
|
}, |
|
{ |
|
"epoch": 2.6103262375077243, |
|
"grad_norm": 0.9720093011856079, |
|
"learning_rate": 0.00013087177694382215, |
|
"loss": 4.0058, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 2.612989835709263, |
|
"grad_norm": 0.9520342350006104, |
|
"learning_rate": 0.0001299799517700946, |
|
"loss": 4.0146, |
|
"step": 490500 |
|
}, |
|
{ |
|
"epoch": 2.6156534339108015, |
|
"grad_norm": 1.054432988166809, |
|
"learning_rate": 0.00012908991024671452, |
|
"loss": 4.0105, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 2.6183170321123397, |
|
"grad_norm": 0.9796612858772278, |
|
"learning_rate": 0.00012819808507298698, |
|
"loss": 4.0114, |
|
"step": 491500 |
|
}, |
|
{ |
|
"epoch": 2.6209806303138783, |
|
"grad_norm": 1.0970081090927124, |
|
"learning_rate": 0.0001273062598992594, |
|
"loss": 4.0232, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 2.623644228515417, |
|
"grad_norm": 0.9749308228492737, |
|
"learning_rate": 0.00012641443472553187, |
|
"loss": 4.009, |
|
"step": 492500 |
|
}, |
|
{ |
|
"epoch": 2.6263078267169555, |
|
"grad_norm": 1.0011272430419922, |
|
"learning_rate": 0.00012552439320215181, |
|
"loss": 4.0182, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 2.628971424918494, |
|
"grad_norm": 0.9727855920791626, |
|
"learning_rate": 0.00012463256802842425, |
|
"loss": 4.0142, |
|
"step": 493500 |
|
}, |
|
{ |
|
"epoch": 2.6316350231200323, |
|
"grad_norm": 1.054745078086853, |
|
"learning_rate": 0.0001237407428546967, |
|
"loss": 4.0153, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 2.634298621321571, |
|
"grad_norm": 0.9852134585380554, |
|
"learning_rate": 0.00012284891768096917, |
|
"loss": 4.0202, |
|
"step": 494500 |
|
}, |
|
{ |
|
"epoch": 2.6369622195231095, |
|
"grad_norm": 1.0056986808776855, |
|
"learning_rate": 0.00012195887615758908, |
|
"loss": 4.0187, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 2.6396258177246477, |
|
"grad_norm": 0.9925665259361267, |
|
"learning_rate": 0.00012106705098386153, |
|
"loss": 4.0102, |
|
"step": 495500 |
|
}, |
|
{ |
|
"epoch": 2.6422894159261863, |
|
"grad_norm": 0.9884349703788757, |
|
"learning_rate": 0.00012017522581013399, |
|
"loss": 4.0161, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 2.644953014127725, |
|
"grad_norm": 0.9753773808479309, |
|
"learning_rate": 0.00011928340063640645, |
|
"loss": 4.0122, |
|
"step": 496500 |
|
}, |
|
{ |
|
"epoch": 2.6476166123292635, |
|
"grad_norm": 1.0602976083755493, |
|
"learning_rate": 0.00011839157546267889, |
|
"loss": 4.0148, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 2.650280210530802, |
|
"grad_norm": 1.024678349494934, |
|
"learning_rate": 0.00011750153393929882, |
|
"loss": 4.0148, |
|
"step": 497500 |
|
}, |
|
{ |
|
"epoch": 2.6529438087323403, |
|
"grad_norm": 1.0422247648239136, |
|
"learning_rate": 0.00011660970876557127, |
|
"loss": 4.0139, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 2.655607406933879, |
|
"grad_norm": 0.9945011734962463, |
|
"learning_rate": 0.00011571788359184373, |
|
"loss": 4.0098, |
|
"step": 498500 |
|
}, |
|
{ |
|
"epoch": 2.6582710051354175, |
|
"grad_norm": 0.9866018891334534, |
|
"learning_rate": 0.00011482605841811617, |
|
"loss": 4.0151, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 2.6609346033369556, |
|
"grad_norm": 1.071170449256897, |
|
"learning_rate": 0.0001139360168947361, |
|
"loss": 4.016, |
|
"step": 499500 |
|
}, |
|
{ |
|
"epoch": 2.6635982015384942, |
|
"grad_norm": 1.120274543762207, |
|
"learning_rate": 0.00011304419172100855, |
|
"loss": 4.0115, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 2.666261799740033, |
|
"grad_norm": 1.0567705631256104, |
|
"learning_rate": 0.000112152366547281, |
|
"loss": 4.012, |
|
"step": 500500 |
|
}, |
|
{ |
|
"epoch": 2.6689253979415715, |
|
"grad_norm": 0.9878965020179749, |
|
"learning_rate": 0.00011126054137355346, |
|
"loss": 4.0176, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 2.67158899614311, |
|
"grad_norm": 1.064886212348938, |
|
"learning_rate": 0.00011037049985017338, |
|
"loss": 4.0103, |
|
"step": 501500 |
|
}, |
|
{ |
|
"epoch": 2.6742525943446482, |
|
"grad_norm": 1.0028510093688965, |
|
"learning_rate": 0.00010947867467644583, |
|
"loss": 4.0122, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 2.676916192546187, |
|
"grad_norm": 1.0561763048171997, |
|
"learning_rate": 0.00010858684950271829, |
|
"loss": 4.0078, |
|
"step": 502500 |
|
}, |
|
{ |
|
"epoch": 2.6795797907477255, |
|
"grad_norm": 0.9861183166503906, |
|
"learning_rate": 0.00010769502432899074, |
|
"loss": 4.0162, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 2.6822433889492636, |
|
"grad_norm": 1.0413438081741333, |
|
"learning_rate": 0.00010680498280561066, |
|
"loss": 4.0205, |
|
"step": 503500 |
|
}, |
|
{ |
|
"epoch": 2.6849069871508022, |
|
"grad_norm": 0.9923077821731567, |
|
"learning_rate": 0.0001059131576318831, |
|
"loss": 4.0078, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 2.687570585352341, |
|
"grad_norm": 0.9952608346939087, |
|
"learning_rate": 0.00010502133245815557, |
|
"loss": 4.0078, |
|
"step": 504500 |
|
}, |
|
{ |
|
"epoch": 2.690234183553879, |
|
"grad_norm": 1.0345313549041748, |
|
"learning_rate": 0.00010412950728442802, |
|
"loss": 4.0118, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 2.6928977817554176, |
|
"grad_norm": 0.9837112426757812, |
|
"learning_rate": 0.00010323946576104794, |
|
"loss": 4.0108, |
|
"step": 505500 |
|
}, |
|
{ |
|
"epoch": 2.695561379956956, |
|
"grad_norm": 1.0294288396835327, |
|
"learning_rate": 0.00010234764058732039, |
|
"loss": 4.0074, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 2.698224978158495, |
|
"grad_norm": 1.0430691242218018, |
|
"learning_rate": 0.00010145581541359285, |
|
"loss": 4.008, |
|
"step": 506500 |
|
}, |
|
{ |
|
"epoch": 2.7008885763600334, |
|
"grad_norm": 1.006121039390564, |
|
"learning_rate": 0.0001005639902398653, |
|
"loss": 4.0022, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 2.7035521745615716, |
|
"grad_norm": 1.0028232336044312, |
|
"learning_rate": 9.967216506613775e-05, |
|
"loss": 4.0164, |
|
"step": 507500 |
|
}, |
|
{ |
|
"epoch": 2.70621577276311, |
|
"grad_norm": 0.9883862733840942, |
|
"learning_rate": 9.878212354275768e-05, |
|
"loss": 4.0104, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 2.708879370964649, |
|
"grad_norm": 1.087190866470337, |
|
"learning_rate": 9.789029836903013e-05, |
|
"loss": 4.0132, |
|
"step": 508500 |
|
}, |
|
{ |
|
"epoch": 2.711542969166187, |
|
"grad_norm": 1.0679038763046265, |
|
"learning_rate": 9.699847319530258e-05, |
|
"loss": 4.0105, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 2.7142065673677256, |
|
"grad_norm": 0.9755781888961792, |
|
"learning_rate": 9.610664802157504e-05, |
|
"loss": 4.0141, |
|
"step": 509500 |
|
}, |
|
{ |
|
"epoch": 2.716870165569264, |
|
"grad_norm": 1.09120512008667, |
|
"learning_rate": 9.521660649819495e-05, |
|
"loss": 4.0138, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 2.719533763770803, |
|
"grad_norm": 1.0885505676269531, |
|
"learning_rate": 9.43247813244674e-05, |
|
"loss": 4.0065, |
|
"step": 510500 |
|
}, |
|
{ |
|
"epoch": 2.7221973619723414, |
|
"grad_norm": 0.9858110547065735, |
|
"learning_rate": 9.343295615073986e-05, |
|
"loss": 4.0082, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 2.7248609601738796, |
|
"grad_norm": 1.0929360389709473, |
|
"learning_rate": 9.254113097701232e-05, |
|
"loss": 4.0107, |
|
"step": 511500 |
|
}, |
|
{ |
|
"epoch": 2.727524558375418, |
|
"grad_norm": 1.139798641204834, |
|
"learning_rate": 9.165108945363223e-05, |
|
"loss": 4.0113, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 2.730188156576957, |
|
"grad_norm": 1.009216070175171, |
|
"learning_rate": 9.075926427990467e-05, |
|
"loss": 4.0065, |
|
"step": 512500 |
|
}, |
|
{ |
|
"epoch": 2.732851754778495, |
|
"grad_norm": 1.047379732131958, |
|
"learning_rate": 8.986743910617714e-05, |
|
"loss": 4.0164, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 2.7355153529800336, |
|
"grad_norm": 0.9918530583381653, |
|
"learning_rate": 8.89756139324496e-05, |
|
"loss": 4.0016, |
|
"step": 513500 |
|
}, |
|
{ |
|
"epoch": 2.738178951181572, |
|
"grad_norm": 1.0664864778518677, |
|
"learning_rate": 8.80855724090695e-05, |
|
"loss": 4.0112, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 2.740842549383111, |
|
"grad_norm": 1.0139024257659912, |
|
"learning_rate": 8.719374723534195e-05, |
|
"loss": 4.014, |
|
"step": 514500 |
|
}, |
|
{ |
|
"epoch": 2.7435061475846494, |
|
"grad_norm": 1.0350786447525024, |
|
"learning_rate": 8.630192206161441e-05, |
|
"loss": 4.0062, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 2.7461697457861876, |
|
"grad_norm": 1.1327440738677979, |
|
"learning_rate": 8.541009688788688e-05, |
|
"loss": 4.0072, |
|
"step": 515500 |
|
}, |
|
{ |
|
"epoch": 2.748833343987726, |
|
"grad_norm": 1.0807819366455078, |
|
"learning_rate": 8.452005536450679e-05, |
|
"loss": 4.0037, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 2.7514969421892648, |
|
"grad_norm": 0.9618473649024963, |
|
"learning_rate": 8.362823019077925e-05, |
|
"loss": 4.0069, |
|
"step": 516500 |
|
}, |
|
{ |
|
"epoch": 2.754160540390803, |
|
"grad_norm": 1.0459738969802856, |
|
"learning_rate": 8.273640501705169e-05, |
|
"loss": 4.0066, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 2.7568241385923415, |
|
"grad_norm": 0.9917722940444946, |
|
"learning_rate": 8.184457984332415e-05, |
|
"loss": 3.9992, |
|
"step": 517500 |
|
}, |
|
{ |
|
"epoch": 2.75948773679388, |
|
"grad_norm": 1.0388100147247314, |
|
"learning_rate": 8.095453831994407e-05, |
|
"loss": 4.0052, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 2.7621513349954188, |
|
"grad_norm": 1.041391372680664, |
|
"learning_rate": 8.006271314621653e-05, |
|
"loss": 4.0032, |
|
"step": 518500 |
|
}, |
|
{ |
|
"epoch": 2.7648149331969574, |
|
"grad_norm": 1.06915283203125, |
|
"learning_rate": 7.917088797248897e-05, |
|
"loss": 4.0031, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 2.7674785313984955, |
|
"grad_norm": 1.0097078084945679, |
|
"learning_rate": 7.827906279876143e-05, |
|
"loss": 4.0074, |
|
"step": 519500 |
|
}, |
|
{ |
|
"epoch": 2.770142129600034, |
|
"grad_norm": 1.0231430530548096, |
|
"learning_rate": 7.738902127538135e-05, |
|
"loss": 4.0133, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 2.7728057278015728, |
|
"grad_norm": 1.1709152460098267, |
|
"learning_rate": 7.64971961016538e-05, |
|
"loss": 4.0105, |
|
"step": 520500 |
|
}, |
|
{ |
|
"epoch": 2.775469326003111, |
|
"grad_norm": 1.0553919076919556, |
|
"learning_rate": 7.560537092792625e-05, |
|
"loss": 4.0005, |
|
"step": 521000 |
|
}, |
|
{ |
|
"epoch": 2.7781329242046495, |
|
"grad_norm": 1.0332099199295044, |
|
"learning_rate": 7.471354575419871e-05, |
|
"loss": 4.0137, |
|
"step": 521500 |
|
}, |
|
{ |
|
"epoch": 2.780796522406188, |
|
"grad_norm": 1.0436155796051025, |
|
"learning_rate": 7.382350423081863e-05, |
|
"loss": 4.0046, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 2.7834601206077263, |
|
"grad_norm": 1.0391409397125244, |
|
"learning_rate": 7.293167905709109e-05, |
|
"loss": 4.0041, |
|
"step": 522500 |
|
}, |
|
{ |
|
"epoch": 2.786123718809265, |
|
"grad_norm": 1.1365002393722534, |
|
"learning_rate": 7.203985388336353e-05, |
|
"loss": 4.0052, |
|
"step": 523000 |
|
}, |
|
{ |
|
"epoch": 2.7887873170108035, |
|
"grad_norm": 1.0857511758804321, |
|
"learning_rate": 7.114802870963599e-05, |
|
"loss": 4.0059, |
|
"step": 523500 |
|
}, |
|
{ |
|
"epoch": 2.791450915212342, |
|
"grad_norm": 0.9912382364273071, |
|
"learning_rate": 7.02579871862559e-05, |
|
"loss": 3.9987, |
|
"step": 524000 |
|
}, |
|
{ |
|
"epoch": 2.7941145134138807, |
|
"grad_norm": 1.032727599143982, |
|
"learning_rate": 6.936616201252837e-05, |
|
"loss": 4.0058, |
|
"step": 524500 |
|
}, |
|
{ |
|
"epoch": 2.796778111615419, |
|
"grad_norm": 1.0187702178955078, |
|
"learning_rate": 6.847433683880082e-05, |
|
"loss": 4.0103, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 2.7994417098169575, |
|
"grad_norm": 0.981054425239563, |
|
"learning_rate": 6.758251166507327e-05, |
|
"loss": 4.0111, |
|
"step": 525500 |
|
}, |
|
{ |
|
"epoch": 2.802105308018496, |
|
"grad_norm": 1.1054233312606812, |
|
"learning_rate": 6.669068649134573e-05, |
|
"loss": 4.0051, |
|
"step": 526000 |
|
}, |
|
{ |
|
"epoch": 2.8047689062200343, |
|
"grad_norm": 1.060707449913025, |
|
"learning_rate": 6.580064496796565e-05, |
|
"loss": 4.0112, |
|
"step": 526500 |
|
}, |
|
{ |
|
"epoch": 2.807432504421573, |
|
"grad_norm": 0.9906247854232788, |
|
"learning_rate": 6.49088197942381e-05, |
|
"loss": 4.0067, |
|
"step": 527000 |
|
}, |
|
{ |
|
"epoch": 2.8100961026231115, |
|
"grad_norm": 1.0259308815002441, |
|
"learning_rate": 6.401699462051055e-05, |
|
"loss": 3.9976, |
|
"step": 527500 |
|
}, |
|
{ |
|
"epoch": 2.81275970082465, |
|
"grad_norm": 1.0347638130187988, |
|
"learning_rate": 6.312516944678301e-05, |
|
"loss": 4.0036, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 2.8154232990261887, |
|
"grad_norm": 1.0310813188552856, |
|
"learning_rate": 6.223512792340293e-05, |
|
"loss": 3.9994, |
|
"step": 528500 |
|
}, |
|
{ |
|
"epoch": 2.818086897227727, |
|
"grad_norm": 1.085179090499878, |
|
"learning_rate": 6.134330274967537e-05, |
|
"loss": 4.0085, |
|
"step": 529000 |
|
}, |
|
{ |
|
"epoch": 2.8207504954292655, |
|
"grad_norm": 1.0044561624526978, |
|
"learning_rate": 6.045147757594784e-05, |
|
"loss": 4.0058, |
|
"step": 529500 |
|
}, |
|
{ |
|
"epoch": 2.823414093630804, |
|
"grad_norm": 1.0580705404281616, |
|
"learning_rate": 5.955965240222029e-05, |
|
"loss": 3.9968, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 2.8260776918323423, |
|
"grad_norm": 1.1205203533172607, |
|
"learning_rate": 5.86696108788402e-05, |
|
"loss": 3.9991, |
|
"step": 530500 |
|
}, |
|
{ |
|
"epoch": 2.828741290033881, |
|
"grad_norm": 1.0346322059631348, |
|
"learning_rate": 5.777778570511266e-05, |
|
"loss": 4.0044, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 2.8314048882354195, |
|
"grad_norm": 1.078075647354126, |
|
"learning_rate": 5.688596053138511e-05, |
|
"loss": 3.9978, |
|
"step": 531500 |
|
}, |
|
{ |
|
"epoch": 2.834068486436958, |
|
"grad_norm": 1.0365418195724487, |
|
"learning_rate": 5.599413535765757e-05, |
|
"loss": 4.0039, |
|
"step": 532000 |
|
}, |
|
{ |
|
"epoch": 2.8367320846384967, |
|
"grad_norm": 1.0657716989517212, |
|
"learning_rate": 5.510409383427748e-05, |
|
"loss": 4.004, |
|
"step": 532500 |
|
}, |
|
{ |
|
"epoch": 2.839395682840035, |
|
"grad_norm": 1.1193735599517822, |
|
"learning_rate": 5.421226866054994e-05, |
|
"loss": 3.9981, |
|
"step": 533000 |
|
}, |
|
{ |
|
"epoch": 2.8420592810415735, |
|
"grad_norm": 1.0354912281036377, |
|
"learning_rate": 5.332044348682239e-05, |
|
"loss": 4.004, |
|
"step": 533500 |
|
}, |
|
{ |
|
"epoch": 2.844722879243112, |
|
"grad_norm": 1.0501588582992554, |
|
"learning_rate": 5.2428618313094844e-05, |
|
"loss": 4.0008, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 2.8473864774446502, |
|
"grad_norm": 1.0080904960632324, |
|
"learning_rate": 5.1538576789714766e-05, |
|
"loss": 4.002, |
|
"step": 534500 |
|
}, |
|
{ |
|
"epoch": 2.850050075646189, |
|
"grad_norm": 1.0569877624511719, |
|
"learning_rate": 5.064675161598722e-05, |
|
"loss": 4.0042, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 2.8527136738477274, |
|
"grad_norm": 1.0170665979385376, |
|
"learning_rate": 4.975492644225967e-05, |
|
"loss": 4.0016, |
|
"step": 535500 |
|
}, |
|
{ |
|
"epoch": 2.855377272049266, |
|
"grad_norm": 1.0019437074661255, |
|
"learning_rate": 4.886310126853213e-05, |
|
"loss": 3.9992, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 2.8580408702508047, |
|
"grad_norm": 1.059810757637024, |
|
"learning_rate": 4.797305974515204e-05, |
|
"loss": 4.0066, |
|
"step": 536500 |
|
}, |
|
{ |
|
"epoch": 2.860704468452343, |
|
"grad_norm": 1.0938292741775513, |
|
"learning_rate": 4.70812345714245e-05, |
|
"loss": 4.0008, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 2.8633680666538814, |
|
"grad_norm": 1.0392727851867676, |
|
"learning_rate": 4.618940939769695e-05, |
|
"loss": 4.0009, |
|
"step": 537500 |
|
}, |
|
{ |
|
"epoch": 2.86603166485542, |
|
"grad_norm": 1.041225790977478, |
|
"learning_rate": 4.529758422396941e-05, |
|
"loss": 4.0025, |
|
"step": 538000 |
|
}, |
|
{ |
|
"epoch": 2.868695263056958, |
|
"grad_norm": 1.0904215574264526, |
|
"learning_rate": 4.440754270058932e-05, |
|
"loss": 3.9982, |
|
"step": 538500 |
|
}, |
|
{ |
|
"epoch": 2.871358861258497, |
|
"grad_norm": 1.0225439071655273, |
|
"learning_rate": 4.351571752686177e-05, |
|
"loss": 3.9986, |
|
"step": 539000 |
|
}, |
|
{ |
|
"epoch": 2.8740224594600354, |
|
"grad_norm": 1.0368945598602295, |
|
"learning_rate": 4.262389235313424e-05, |
|
"loss": 3.9998, |
|
"step": 539500 |
|
}, |
|
{ |
|
"epoch": 2.8766860576615736, |
|
"grad_norm": 1.0657331943511963, |
|
"learning_rate": 4.173206717940669e-05, |
|
"loss": 3.996, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 2.879349655863112, |
|
"grad_norm": 1.0275654792785645, |
|
"learning_rate": 4.084024200567914e-05, |
|
"loss": 3.9983, |
|
"step": 540500 |
|
}, |
|
{ |
|
"epoch": 2.882013254064651, |
|
"grad_norm": 1.107050895690918, |
|
"learning_rate": 3.995020048229905e-05, |
|
"loss": 4.0028, |
|
"step": 541000 |
|
}, |
|
{ |
|
"epoch": 2.8846768522661894, |
|
"grad_norm": 1.001038908958435, |
|
"learning_rate": 3.905837530857151e-05, |
|
"loss": 3.9941, |
|
"step": 541500 |
|
}, |
|
{ |
|
"epoch": 2.887340450467728, |
|
"grad_norm": 1.0545873641967773, |
|
"learning_rate": 3.8166550134843964e-05, |
|
"loss": 3.9987, |
|
"step": 542000 |
|
}, |
|
{ |
|
"epoch": 2.890004048669266, |
|
"grad_norm": 1.0375920534133911, |
|
"learning_rate": 3.727472496111642e-05, |
|
"loss": 3.995, |
|
"step": 542500 |
|
}, |
|
{ |
|
"epoch": 2.892667646870805, |
|
"grad_norm": 1.0322425365447998, |
|
"learning_rate": 3.638468343773634e-05, |
|
"loss": 3.994, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 2.8953312450723434, |
|
"grad_norm": 1.0789730548858643, |
|
"learning_rate": 3.549285826400879e-05, |
|
"loss": 3.9958, |
|
"step": 543500 |
|
}, |
|
{ |
|
"epoch": 2.8979948432738816, |
|
"grad_norm": 1.1932363510131836, |
|
"learning_rate": 3.4601033090281244e-05, |
|
"loss": 4.005, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 2.90065844147542, |
|
"grad_norm": 1.1194884777069092, |
|
"learning_rate": 3.3709207916553696e-05, |
|
"loss": 3.9965, |
|
"step": 544500 |
|
}, |
|
{ |
|
"epoch": 2.903322039676959, |
|
"grad_norm": 1.03001868724823, |
|
"learning_rate": 3.281916639317362e-05, |
|
"loss": 4.0013, |
|
"step": 545000 |
|
}, |
|
{ |
|
"epoch": 2.9059856378784974, |
|
"grad_norm": 0.986453115940094, |
|
"learning_rate": 3.192734121944607e-05, |
|
"loss": 3.9935, |
|
"step": 545500 |
|
}, |
|
{ |
|
"epoch": 2.908649236080036, |
|
"grad_norm": 1.0338671207427979, |
|
"learning_rate": 3.1035516045718524e-05, |
|
"loss": 4.0017, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 2.911312834281574, |
|
"grad_norm": 1.0669965744018555, |
|
"learning_rate": 3.014369087199098e-05, |
|
"loss": 3.9954, |
|
"step": 546500 |
|
}, |
|
{ |
|
"epoch": 2.9139764324831128, |
|
"grad_norm": 1.024873971939087, |
|
"learning_rate": 2.9253649348610895e-05, |
|
"loss": 3.9967, |
|
"step": 547000 |
|
}, |
|
{ |
|
"epoch": 2.9166400306846514, |
|
"grad_norm": 1.0891566276550293, |
|
"learning_rate": 2.8361824174883348e-05, |
|
"loss": 4.0024, |
|
"step": 547500 |
|
}, |
|
{ |
|
"epoch": 2.9193036288861895, |
|
"grad_norm": 0.9691978096961975, |
|
"learning_rate": 2.7469999001155807e-05, |
|
"loss": 3.9982, |
|
"step": 548000 |
|
}, |
|
{ |
|
"epoch": 2.921967227087728, |
|
"grad_norm": 1.0564926862716675, |
|
"learning_rate": 2.6578173827428263e-05, |
|
"loss": 4.0025, |
|
"step": 548500 |
|
}, |
|
{ |
|
"epoch": 2.9246308252892668, |
|
"grad_norm": 0.997660756111145, |
|
"learning_rate": 2.5688132304048175e-05, |
|
"loss": 3.9959, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 2.9272944234908054, |
|
"grad_norm": 1.0368565320968628, |
|
"learning_rate": 2.479630713032063e-05, |
|
"loss": 3.9977, |
|
"step": 549500 |
|
}, |
|
{ |
|
"epoch": 2.929958021692344, |
|
"grad_norm": 1.069231629371643, |
|
"learning_rate": 2.3904481956593084e-05, |
|
"loss": 3.9915, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 2.932621619893882, |
|
"grad_norm": 1.0751917362213135, |
|
"learning_rate": 2.3012656782865543e-05, |
|
"loss": 3.997, |
|
"step": 550500 |
|
}, |
|
{ |
|
"epoch": 2.9352852180954208, |
|
"grad_norm": 1.0397218465805054, |
|
"learning_rate": 2.212261525948545e-05, |
|
"loss": 3.9997, |
|
"step": 551000 |
|
}, |
|
{ |
|
"epoch": 2.9379488162969594, |
|
"grad_norm": 1.086714506149292, |
|
"learning_rate": 2.1230790085757908e-05, |
|
"loss": 3.9943, |
|
"step": 551500 |
|
}, |
|
{ |
|
"epoch": 2.9406124144984975, |
|
"grad_norm": 1.141553521156311, |
|
"learning_rate": 2.0338964912030367e-05, |
|
"loss": 3.9987, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 2.943276012700036, |
|
"grad_norm": 1.005601406097412, |
|
"learning_rate": 1.944713973830282e-05, |
|
"loss": 3.9904, |
|
"step": 552500 |
|
}, |
|
{ |
|
"epoch": 2.9459396109015747, |
|
"grad_norm": 1.010642647743225, |
|
"learning_rate": 1.8557098214922735e-05, |
|
"loss": 3.9881, |
|
"step": 553000 |
|
}, |
|
{ |
|
"epoch": 2.9486032091031134, |
|
"grad_norm": 1.104560375213623, |
|
"learning_rate": 1.7665273041195188e-05, |
|
"loss": 3.9918, |
|
"step": 553500 |
|
}, |
|
{ |
|
"epoch": 2.951266807304652, |
|
"grad_norm": 1.0412003993988037, |
|
"learning_rate": 1.6773447867467644e-05, |
|
"loss": 3.9997, |
|
"step": 554000 |
|
}, |
|
{ |
|
"epoch": 2.95393040550619, |
|
"grad_norm": 1.0635658502578735, |
|
"learning_rate": 1.5881622693740103e-05, |
|
"loss": 3.994, |
|
"step": 554500 |
|
}, |
|
{ |
|
"epoch": 2.9565940037077287, |
|
"grad_norm": 1.0909868478775024, |
|
"learning_rate": 1.4991581170360012e-05, |
|
"loss": 3.9942, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 2.9592576019092673, |
|
"grad_norm": 1.052293062210083, |
|
"learning_rate": 1.4099755996632468e-05, |
|
"loss": 3.9975, |
|
"step": 555500 |
|
}, |
|
{ |
|
"epoch": 2.9619212001108055, |
|
"grad_norm": 1.068088412284851, |
|
"learning_rate": 1.3207930822904926e-05, |
|
"loss": 3.9942, |
|
"step": 556000 |
|
}, |
|
{ |
|
"epoch": 2.964584798312344, |
|
"grad_norm": 1.1510958671569824, |
|
"learning_rate": 1.2316105649177382e-05, |
|
"loss": 3.9951, |
|
"step": 556500 |
|
}, |
|
{ |
|
"epoch": 2.9672483965138827, |
|
"grad_norm": 1.048006534576416, |
|
"learning_rate": 1.1426064125797293e-05, |
|
"loss": 3.9971, |
|
"step": 557000 |
|
}, |
|
{ |
|
"epoch": 2.9699119947154213, |
|
"grad_norm": 1.0319584608078003, |
|
"learning_rate": 1.0534238952069748e-05, |
|
"loss": 3.9934, |
|
"step": 557500 |
|
}, |
|
{ |
|
"epoch": 2.9725755929169595, |
|
"grad_norm": 1.0391571521759033, |
|
"learning_rate": 9.642413778342204e-06, |
|
"loss": 3.9943, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 2.975239191118498, |
|
"grad_norm": 1.0609184503555298, |
|
"learning_rate": 8.75058860461466e-06, |
|
"loss": 3.9923, |
|
"step": 558500 |
|
}, |
|
{ |
|
"epoch": 2.9779027893200367, |
|
"grad_norm": 1.0420206785202026, |
|
"learning_rate": 7.860547081234572e-06, |
|
"loss": 3.9939, |
|
"step": 559000 |
|
}, |
|
{ |
|
"epoch": 2.9805663875215753, |
|
"grad_norm": 1.0162791013717651, |
|
"learning_rate": 6.968721907507028e-06, |
|
"loss": 3.9993, |
|
"step": 559500 |
|
}, |
|
{ |
|
"epoch": 2.9832299857231135, |
|
"grad_norm": 1.1188008785247803, |
|
"learning_rate": 6.076896733779484e-06, |
|
"loss": 3.9952, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.985893583924652, |
|
"grad_norm": 1.1251684427261353, |
|
"learning_rate": 5.18507156005194e-06, |
|
"loss": 3.9936, |
|
"step": 560500 |
|
}, |
|
{ |
|
"epoch": 2.9885571821261907, |
|
"grad_norm": 1.072590947151184, |
|
"learning_rate": 4.295030036671852e-06, |
|
"loss": 3.9891, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 2.991220780327729, |
|
"grad_norm": 1.0949697494506836, |
|
"learning_rate": 3.403204862944307e-06, |
|
"loss": 3.9909, |
|
"step": 561500 |
|
}, |
|
{ |
|
"epoch": 2.9938843785292675, |
|
"grad_norm": 1.0467427968978882, |
|
"learning_rate": 2.5113796892167635e-06, |
|
"loss": 4.0004, |
|
"step": 562000 |
|
}, |
|
{ |
|
"epoch": 2.996547976730806, |
|
"grad_norm": 1.0436049699783325, |
|
"learning_rate": 1.6195545154892197e-06, |
|
"loss": 3.9896, |
|
"step": 562500 |
|
}, |
|
{ |
|
"epoch": 2.9992115749323447, |
|
"grad_norm": 1.1010395288467407, |
|
"learning_rate": 7.295129921091309e-07, |
|
"loss": 3.9912, |
|
"step": 563000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 563148, |
|
"total_flos": 4.819699538212516e+17, |
|
"train_loss": 4.150129232981245, |
|
"train_runtime": 39834.0737, |
|
"train_samples_per_second": 904.789, |
|
"train_steps_per_second": 14.137 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 563148, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.819699538212516e+17, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|