| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 563148, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0026635982015384943, | |
| "grad_norm": 0.44449880719184875, | |
| "learning_rate": 0.0001996, | |
| "loss": 9.0366, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.005327196403076989, | |
| "grad_norm": 0.18657611310482025, | |
| "learning_rate": 0.0003996, | |
| "loss": 7.4314, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.007990794604615483, | |
| "grad_norm": 0.4415804147720337, | |
| "learning_rate": 0.0005996, | |
| "loss": 7.3209, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.010654392806153977, | |
| "grad_norm": 0.5994516611099243, | |
| "learning_rate": 0.0007996, | |
| "loss": 7.0489, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.013317991007692471, | |
| "grad_norm": 0.4792259931564331, | |
| "learning_rate": 0.0009996, | |
| "loss": 6.8814, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.015981589209230967, | |
| "grad_norm": 0.517628014087677, | |
| "learning_rate": 0.0009991099584766199, | |
| "loss": 6.7583, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.01864518741076946, | |
| "grad_norm": 0.6621844172477722, | |
| "learning_rate": 0.0009982181333028923, | |
| "loss": 6.6864, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.021308785612307955, | |
| "grad_norm": 0.9153323173522949, | |
| "learning_rate": 0.0009973263081291647, | |
| "loss": 6.6285, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.02397238381384645, | |
| "grad_norm": 0.6160380840301514, | |
| "learning_rate": 0.0009964362666057848, | |
| "loss": 6.5925, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.026635982015384942, | |
| "grad_norm": 0.952617883682251, | |
| "learning_rate": 0.0009955444414320573, | |
| "loss": 6.5313, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.029299580216923436, | |
| "grad_norm": 0.552077054977417, | |
| "learning_rate": 0.0009946526162583297, | |
| "loss": 6.4612, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.031963178418461934, | |
| "grad_norm": 0.7289395332336426, | |
| "learning_rate": 0.0009937607910846021, | |
| "loss": 6.4108, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.034626776620000424, | |
| "grad_norm": 0.6313225626945496, | |
| "learning_rate": 0.0009928689659108746, | |
| "loss": 6.3711, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.03729037482153892, | |
| "grad_norm": 0.6914771795272827, | |
| "learning_rate": 0.000991977140737147, | |
| "loss": 6.3561, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.03995397302307741, | |
| "grad_norm": 0.5898305773735046, | |
| "learning_rate": 0.0009910870992137668, | |
| "loss": 6.3347, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.04261757122461591, | |
| "grad_norm": 0.7071697115898132, | |
| "learning_rate": 0.0009901952740400395, | |
| "loss": 6.3035, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.045281169426154406, | |
| "grad_norm": 0.6536933183670044, | |
| "learning_rate": 0.000989303448866312, | |
| "loss": 6.2981, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.0479447676276929, | |
| "grad_norm": 0.5664694309234619, | |
| "learning_rate": 0.0009884116236925844, | |
| "loss": 6.2829, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.050608365829231394, | |
| "grad_norm": 0.6218783259391785, | |
| "learning_rate": 0.0009875197985188568, | |
| "loss": 6.2588, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.053271964030769885, | |
| "grad_norm": 0.753494918346405, | |
| "learning_rate": 0.0009866279733451292, | |
| "loss": 6.2158, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.05593556223230838, | |
| "grad_norm": 0.7004017233848572, | |
| "learning_rate": 0.000985737931821749, | |
| "loss": 6.1671, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.05859916043384687, | |
| "grad_norm": 0.823192298412323, | |
| "learning_rate": 0.0009848461066480215, | |
| "loss": 6.1248, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.06126275863538537, | |
| "grad_norm": 0.9359510540962219, | |
| "learning_rate": 0.000983954281474294, | |
| "loss": 6.0659, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.06392635683692387, | |
| "grad_norm": 1.012602686882019, | |
| "learning_rate": 0.0009830624563005664, | |
| "loss": 6.0177, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.06658995503846236, | |
| "grad_norm": 1.175893783569336, | |
| "learning_rate": 0.0009821724147771865, | |
| "loss": 5.9664, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.06925355324000085, | |
| "grad_norm": 1.2990820407867432, | |
| "learning_rate": 0.000981280589603459, | |
| "loss": 5.9002, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.07191715144153935, | |
| "grad_norm": 1.5142649412155151, | |
| "learning_rate": 0.0009803887644297313, | |
| "loss": 5.8395, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.07458074964307784, | |
| "grad_norm": 1.1063514947891235, | |
| "learning_rate": 0.0009794969392560038, | |
| "loss": 5.7873, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.07724434784461634, | |
| "grad_norm": 1.1882684230804443, | |
| "learning_rate": 0.0009786068977326236, | |
| "loss": 5.7462, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.07990794604615482, | |
| "grad_norm": 1.1717172861099243, | |
| "learning_rate": 0.000977715072558896, | |
| "loss": 5.7139, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.08257154424769332, | |
| "grad_norm": 1.0602678060531616, | |
| "learning_rate": 0.0009768232473851685, | |
| "loss": 5.6954, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.08523514244923182, | |
| "grad_norm": 1.3342951536178589, | |
| "learning_rate": 0.000975931422211441, | |
| "loss": 5.6708, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.08789874065077032, | |
| "grad_norm": 1.1896706819534302, | |
| "learning_rate": 0.0009750395970377135, | |
| "loss": 5.6496, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.09056233885230881, | |
| "grad_norm": 1.2416741847991943, | |
| "learning_rate": 0.0009741495555143335, | |
| "loss": 5.6257, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.0932259370538473, | |
| "grad_norm": 1.1610051393508911, | |
| "learning_rate": 0.000973257730340606, | |
| "loss": 5.6045, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.0958895352553858, | |
| "grad_norm": 1.377124309539795, | |
| "learning_rate": 0.0009723659051668784, | |
| "loss": 5.5915, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.09855313345692429, | |
| "grad_norm": 1.2425626516342163, | |
| "learning_rate": 0.0009714740799931508, | |
| "loss": 5.5665, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.10121673165846279, | |
| "grad_norm": 1.3102902173995972, | |
| "learning_rate": 0.0009705840384697707, | |
| "loss": 5.5016, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.10388032986000127, | |
| "grad_norm": 1.1269280910491943, | |
| "learning_rate": 0.0009696922132960431, | |
| "loss": 5.3915, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.10654392806153977, | |
| "grad_norm": 1.0961062908172607, | |
| "learning_rate": 0.0009688003881223157, | |
| "loss": 5.3182, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.10920752626307827, | |
| "grad_norm": 1.030776023864746, | |
| "learning_rate": 0.0009679085629485881, | |
| "loss": 5.2717, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.11187112446461676, | |
| "grad_norm": 1.1483319997787476, | |
| "learning_rate": 0.000967018521425208, | |
| "loss": 5.2376, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.11453472266615526, | |
| "grad_norm": 1.011655330657959, | |
| "learning_rate": 0.0009661266962514804, | |
| "loss": 5.2126, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.11719832086769374, | |
| "grad_norm": 1.0027350187301636, | |
| "learning_rate": 0.0009652348710777528, | |
| "loss": 5.1876, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.11986191906923224, | |
| "grad_norm": 0.9846087694168091, | |
| "learning_rate": 0.0009643430459040254, | |
| "loss": 5.1627, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.12252551727077074, | |
| "grad_norm": 1.0151575803756714, | |
| "learning_rate": 0.0009634512207302978, | |
| "loss": 5.1478, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.12518911547230924, | |
| "grad_norm": 0.9792256355285645, | |
| "learning_rate": 0.0009625611792069178, | |
| "loss": 5.124, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.12785271367384773, | |
| "grad_norm": 0.9928046464920044, | |
| "learning_rate": 0.0009616693540331902, | |
| "loss": 5.1147, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.13051631187538623, | |
| "grad_norm": 1.0437482595443726, | |
| "learning_rate": 0.0009607775288594626, | |
| "loss": 5.0957, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.13317991007692473, | |
| "grad_norm": 0.9204614162445068, | |
| "learning_rate": 0.0009598857036857352, | |
| "loss": 5.0938, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.1358435082784632, | |
| "grad_norm": 0.9496144652366638, | |
| "learning_rate": 0.0009589938785120076, | |
| "loss": 5.0705, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.1385071064800017, | |
| "grad_norm": 0.8606376647949219, | |
| "learning_rate": 0.00095810205333828, | |
| "loss": 5.0659, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.1411707046815402, | |
| "grad_norm": 0.8681563138961792, | |
| "learning_rate": 0.0009572120118148999, | |
| "loss": 5.0512, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.1438343028830787, | |
| "grad_norm": 0.9975363612174988, | |
| "learning_rate": 0.0009563201866411723, | |
| "loss": 5.0447, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.1464979010846172, | |
| "grad_norm": 1.0180999040603638, | |
| "learning_rate": 0.0009554283614674449, | |
| "loss": 5.025, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.14916149928615569, | |
| "grad_norm": 0.8448518514633179, | |
| "learning_rate": 0.0009545365362937173, | |
| "loss": 5.0264, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.15182509748769418, | |
| "grad_norm": 0.8591077327728271, | |
| "learning_rate": 0.0009536447111199897, | |
| "loss": 5.0093, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.15448869568923268, | |
| "grad_norm": 0.8553301095962524, | |
| "learning_rate": 0.0009527528859462622, | |
| "loss": 5.0058, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.15715229389077118, | |
| "grad_norm": 0.8588173389434814, | |
| "learning_rate": 0.0009518610607725346, | |
| "loss": 4.9948, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.15981589209230965, | |
| "grad_norm": 0.9031510949134827, | |
| "learning_rate": 0.0009509692355988071, | |
| "loss": 4.9913, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.16247949029384814, | |
| "grad_norm": 0.8640721440315247, | |
| "learning_rate": 0.000950079194075427, | |
| "loss": 4.9867, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.16514308849538664, | |
| "grad_norm": 0.8439059853553772, | |
| "learning_rate": 0.0009491873689016994, | |
| "loss": 4.9821, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.16780668669692514, | |
| "grad_norm": 0.8463523983955383, | |
| "learning_rate": 0.0009482955437279719, | |
| "loss": 4.9663, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.17047028489846364, | |
| "grad_norm": 0.8896917104721069, | |
| "learning_rate": 0.0009474037185542443, | |
| "loss": 4.9684, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.17313388310000213, | |
| "grad_norm": 0.8256401419639587, | |
| "learning_rate": 0.0009465136770308644, | |
| "loss": 4.9629, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.17579748130154063, | |
| "grad_norm": 0.9592456221580505, | |
| "learning_rate": 0.0009456236355074842, | |
| "loss": 4.9561, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.17846107950307913, | |
| "grad_norm": 0.9278562068939209, | |
| "learning_rate": 0.0009447318103337567, | |
| "loss": 4.9461, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.18112467770461763, | |
| "grad_norm": 0.8999398946762085, | |
| "learning_rate": 0.0009438399851600291, | |
| "loss": 4.9543, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.1837882759061561, | |
| "grad_norm": 0.9635962843894958, | |
| "learning_rate": 0.0009429481599863015, | |
| "loss": 4.9381, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.1864518741076946, | |
| "grad_norm": 0.9490432143211365, | |
| "learning_rate": 0.0009420563348125741, | |
| "loss": 4.9307, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.1891154723092331, | |
| "grad_norm": 0.9016521573066711, | |
| "learning_rate": 0.0009411645096388465, | |
| "loss": 4.9327, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.1917790705107716, | |
| "grad_norm": 0.8551514744758606, | |
| "learning_rate": 0.0009402726844651189, | |
| "loss": 4.9249, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.19444266871231008, | |
| "grad_norm": 0.8358152508735657, | |
| "learning_rate": 0.0009393808592913914, | |
| "loss": 4.9229, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.19710626691384858, | |
| "grad_norm": 0.8498304486274719, | |
| "learning_rate": 0.0009384908177680113, | |
| "loss": 4.9215, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.19976986511538708, | |
| "grad_norm": 0.8565486073493958, | |
| "learning_rate": 0.0009375989925942838, | |
| "loss": 4.9184, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.20243346331692558, | |
| "grad_norm": 0.8608818650245667, | |
| "learning_rate": 0.0009367071674205563, | |
| "loss": 4.9151, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.20509706151846407, | |
| "grad_norm": 0.9130340218544006, | |
| "learning_rate": 0.0009358153422468287, | |
| "loss": 4.917, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.20776065972000254, | |
| "grad_norm": 0.9141052961349487, | |
| "learning_rate": 0.0009349253007234486, | |
| "loss": 4.9087, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.21042425792154104, | |
| "grad_norm": 0.828787624835968, | |
| "learning_rate": 0.000934033475549721, | |
| "loss": 4.9072, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.21308785612307954, | |
| "grad_norm": 0.8560599088668823, | |
| "learning_rate": 0.0009331416503759935, | |
| "loss": 4.8994, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.21575145432461804, | |
| "grad_norm": 0.8474921584129333, | |
| "learning_rate": 0.000932249825202266, | |
| "loss": 4.9011, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.21841505252615653, | |
| "grad_norm": 0.7966994047164917, | |
| "learning_rate": 0.0009313597836788859, | |
| "loss": 4.8958, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.22107865072769503, | |
| "grad_norm": 0.8658061623573303, | |
| "learning_rate": 0.0009304679585051583, | |
| "loss": 4.8971, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.22374224892923353, | |
| "grad_norm": 0.8644976019859314, | |
| "learning_rate": 0.0009295761333314307, | |
| "loss": 4.8881, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.22640584713077203, | |
| "grad_norm": 0.8099656105041504, | |
| "learning_rate": 0.0009286843081577032, | |
| "loss": 4.8828, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.22906944533231052, | |
| "grad_norm": 0.8898533582687378, | |
| "learning_rate": 0.0009277942666343233, | |
| "loss": 4.8901, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.231733043533849, | |
| "grad_norm": 0.8771415948867798, | |
| "learning_rate": 0.0009269024414605957, | |
| "loss": 4.8766, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.2343966417353875, | |
| "grad_norm": 0.8513174653053284, | |
| "learning_rate": 0.0009260106162868681, | |
| "loss": 4.8767, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.237060239936926, | |
| "grad_norm": 0.8937121629714966, | |
| "learning_rate": 0.0009251187911131405, | |
| "loss": 4.8754, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.23972383813846448, | |
| "grad_norm": 0.981132447719574, | |
| "learning_rate": 0.0009242287495897604, | |
| "loss": 4.8788, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.24238743634000298, | |
| "grad_norm": 0.784052312374115, | |
| "learning_rate": 0.000923336924416033, | |
| "loss": 4.8671, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.24505103454154148, | |
| "grad_norm": 0.8733552694320679, | |
| "learning_rate": 0.0009224450992423054, | |
| "loss": 4.8681, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.24771463274307998, | |
| "grad_norm": 0.8183045983314514, | |
| "learning_rate": 0.0009215532740685778, | |
| "loss": 4.8696, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.2503782309446185, | |
| "grad_norm": 0.8594405651092529, | |
| "learning_rate": 0.0009206632325451977, | |
| "loss": 4.8637, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.25304182914615697, | |
| "grad_norm": 0.8543962240219116, | |
| "learning_rate": 0.0009197714073714701, | |
| "loss": 4.864, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.25570542734769547, | |
| "grad_norm": 0.7812336683273315, | |
| "learning_rate": 0.0009188795821977425, | |
| "loss": 4.8583, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.25836902554923397, | |
| "grad_norm": 0.8478542566299438, | |
| "learning_rate": 0.0009179877570240151, | |
| "loss": 4.8507, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.26103262375077246, | |
| "grad_norm": 0.8426432013511658, | |
| "learning_rate": 0.0009170959318502875, | |
| "loss": 4.8512, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.26369622195231096, | |
| "grad_norm": 0.8853486180305481, | |
| "learning_rate": 0.0009162058903269075, | |
| "loss": 4.8563, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.26635982015384946, | |
| "grad_norm": 0.883021891117096, | |
| "learning_rate": 0.0009153140651531799, | |
| "loss": 4.8507, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.2690234183553879, | |
| "grad_norm": 0.8407544493675232, | |
| "learning_rate": 0.0009144222399794523, | |
| "loss": 4.8554, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.2716870165569264, | |
| "grad_norm": 0.8120921850204468, | |
| "learning_rate": 0.0009135304148057249, | |
| "loss": 4.8485, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.2743506147584649, | |
| "grad_norm": 0.8241139054298401, | |
| "learning_rate": 0.0009126403732823447, | |
| "loss": 4.8508, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.2770142129600034, | |
| "grad_norm": 0.7940220236778259, | |
| "learning_rate": 0.0009117485481086172, | |
| "loss": 4.8465, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.2796778111615419, | |
| "grad_norm": 0.7913591265678406, | |
| "learning_rate": 0.0009108567229348896, | |
| "loss": 4.8467, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.2823414093630804, | |
| "grad_norm": 0.7899219393730164, | |
| "learning_rate": 0.000909964897761162, | |
| "loss": 4.8379, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.2850050075646189, | |
| "grad_norm": 0.7952625751495361, | |
| "learning_rate": 0.000909074856237782, | |
| "loss": 4.847, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.2876686057661574, | |
| "grad_norm": 0.8424190878868103, | |
| "learning_rate": 0.0009081830310640544, | |
| "loss": 4.8368, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.2903322039676959, | |
| "grad_norm": 0.8853405714035034, | |
| "learning_rate": 0.0009072912058903269, | |
| "loss": 4.8442, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.2929958021692344, | |
| "grad_norm": 0.8321651220321655, | |
| "learning_rate": 0.0009063993807165993, | |
| "loss": 4.8369, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.2956594003707729, | |
| "grad_norm": 0.7945202589035034, | |
| "learning_rate": 0.0009055093391932193, | |
| "loss": 4.8276, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.29832299857231137, | |
| "grad_norm": 0.8524190187454224, | |
| "learning_rate": 0.0009046175140194918, | |
| "loss": 4.8284, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.30098659677384987, | |
| "grad_norm": 0.7767360210418701, | |
| "learning_rate": 0.0009037256888457643, | |
| "loss": 4.8282, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.30365019497538837, | |
| "grad_norm": 0.7614521980285645, | |
| "learning_rate": 0.0009028338636720367, | |
| "loss": 4.8309, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.30631379317692686, | |
| "grad_norm": 0.8267444372177124, | |
| "learning_rate": 0.0009019438221486565, | |
| "loss": 4.8297, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.30897739137846536, | |
| "grad_norm": 0.8024168014526367, | |
| "learning_rate": 0.000901051996974929, | |
| "loss": 4.828, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.31164098958000386, | |
| "grad_norm": 0.8234706521034241, | |
| "learning_rate": 0.0009001601718012014, | |
| "loss": 4.8239, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.31430458778154235, | |
| "grad_norm": 0.7709868550300598, | |
| "learning_rate": 0.000899268346627474, | |
| "loss": 4.8287, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.3169681859830808, | |
| "grad_norm": 0.8301928043365479, | |
| "learning_rate": 0.0008983783051040939, | |
| "loss": 4.8283, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.3196317841846193, | |
| "grad_norm": 0.8093799948692322, | |
| "learning_rate": 0.0008974864799303664, | |
| "loss": 4.82, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.3222953823861578, | |
| "grad_norm": 0.866875410079956, | |
| "learning_rate": 0.0008965946547566388, | |
| "loss": 4.8259, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.3249589805876963, | |
| "grad_norm": 0.9615957140922546, | |
| "learning_rate": 0.0008957028295829112, | |
| "loss": 4.8148, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.3276225787892348, | |
| "grad_norm": 0.8391242623329163, | |
| "learning_rate": 0.0008948127880595312, | |
| "loss": 4.8227, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.3302861769907733, | |
| "grad_norm": 0.8788413405418396, | |
| "learning_rate": 0.0008939209628858036, | |
| "loss": 4.8087, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.3329497751923118, | |
| "grad_norm": 0.8100627660751343, | |
| "learning_rate": 0.000893029137712076, | |
| "loss": 4.8097, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.3356133733938503, | |
| "grad_norm": 0.8943531513214111, | |
| "learning_rate": 0.0008921373125383485, | |
| "loss": 4.819, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.3382769715953888, | |
| "grad_norm": 0.8895285725593567, | |
| "learning_rate": 0.0008912472710149683, | |
| "loss": 4.8069, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.3409405697969273, | |
| "grad_norm": 0.777802586555481, | |
| "learning_rate": 0.0008903554458412409, | |
| "loss": 4.8116, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.34360416799846577, | |
| "grad_norm": 1.0155904293060303, | |
| "learning_rate": 0.0008894636206675133, | |
| "loss": 4.8112, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.34626776620000427, | |
| "grad_norm": 0.7913381457328796, | |
| "learning_rate": 0.0008885717954937858, | |
| "loss": 4.8138, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.34893136440154277, | |
| "grad_norm": 0.8168381452560425, | |
| "learning_rate": 0.0008876799703200582, | |
| "loss": 4.8065, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.35159496260308126, | |
| "grad_norm": 0.8038260340690613, | |
| "learning_rate": 0.0008867899287966782, | |
| "loss": 4.8006, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.35425856080461976, | |
| "grad_norm": 0.781873881816864, | |
| "learning_rate": 0.0008858981036229507, | |
| "loss": 4.809, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.35692215900615826, | |
| "grad_norm": 0.9184179306030273, | |
| "learning_rate": 0.0008850062784492231, | |
| "loss": 4.8035, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.35958575720769675, | |
| "grad_norm": 0.7746654748916626, | |
| "learning_rate": 0.0008841144532754956, | |
| "loss": 4.8099, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.36224935540923525, | |
| "grad_norm": 0.8979808688163757, | |
| "learning_rate": 0.0008832244117521154, | |
| "loss": 4.7976, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.36491295361077375, | |
| "grad_norm": 0.8198953866958618, | |
| "learning_rate": 0.0008823325865783879, | |
| "loss": 4.8001, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.3675765518123122, | |
| "grad_norm": 0.8266115784645081, | |
| "learning_rate": 0.0008814407614046604, | |
| "loss": 4.8043, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.3702401500138507, | |
| "grad_norm": 0.8325560688972473, | |
| "learning_rate": 0.0008805489362309328, | |
| "loss": 4.7989, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.3729037482153892, | |
| "grad_norm": 0.7824032306671143, | |
| "learning_rate": 0.0008796588947075527, | |
| "loss": 4.7945, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.3755673464169277, | |
| "grad_norm": 0.7960947155952454, | |
| "learning_rate": 0.0008787670695338251, | |
| "loss": 4.7975, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.3782309446184662, | |
| "grad_norm": 0.8350421190261841, | |
| "learning_rate": 0.0008778752443600976, | |
| "loss": 4.7977, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.3808945428200047, | |
| "grad_norm": 0.7750975489616394, | |
| "learning_rate": 0.00087698341918637, | |
| "loss": 4.7944, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.3835581410215432, | |
| "grad_norm": 0.8240845799446106, | |
| "learning_rate": 0.0008760933776629901, | |
| "loss": 4.7908, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.3862217392230817, | |
| "grad_norm": 0.8309052586555481, | |
| "learning_rate": 0.0008752015524892625, | |
| "loss": 4.8001, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.38888533742462017, | |
| "grad_norm": 0.8170336484909058, | |
| "learning_rate": 0.0008743097273155349, | |
| "loss": 4.7964, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.39154893562615867, | |
| "grad_norm": 0.8648092746734619, | |
| "learning_rate": 0.0008734179021418074, | |
| "loss": 4.7826, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.39421253382769716, | |
| "grad_norm": 0.8146944046020508, | |
| "learning_rate": 0.0008725278606184272, | |
| "loss": 4.7906, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.39687613202923566, | |
| "grad_norm": 0.792269229888916, | |
| "learning_rate": 0.0008716360354446998, | |
| "loss": 4.7852, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.39953973023077416, | |
| "grad_norm": 0.7599817514419556, | |
| "learning_rate": 0.0008707442102709722, | |
| "loss": 4.7917, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.40220332843231266, | |
| "grad_norm": 0.799649178981781, | |
| "learning_rate": 0.0008698523850972446, | |
| "loss": 4.7846, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.40486692663385115, | |
| "grad_norm": 0.7801626324653625, | |
| "learning_rate": 0.0008689623435738645, | |
| "loss": 4.7879, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.40753052483538965, | |
| "grad_norm": 0.8832575082778931, | |
| "learning_rate": 0.0008680705184001369, | |
| "loss": 4.7881, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.41019412303692815, | |
| "grad_norm": 0.848629355430603, | |
| "learning_rate": 0.0008671786932264095, | |
| "loss": 4.7933, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.41285772123846665, | |
| "grad_norm": 0.8427609205245972, | |
| "learning_rate": 0.0008662868680526819, | |
| "loss": 4.7912, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.4155213194400051, | |
| "grad_norm": 0.767152726650238, | |
| "learning_rate": 0.0008653968265293019, | |
| "loss": 4.7904, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.4181849176415436, | |
| "grad_norm": 0.8218587636947632, | |
| "learning_rate": 0.0008645050013555743, | |
| "loss": 4.7793, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.4208485158430821, | |
| "grad_norm": 0.8102436065673828, | |
| "learning_rate": 0.0008636131761818467, | |
| "loss": 4.7775, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.4235121140446206, | |
| "grad_norm": 0.7857397198677063, | |
| "learning_rate": 0.0008627213510081193, | |
| "loss": 4.7742, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.4261757122461591, | |
| "grad_norm": 0.8044630885124207, | |
| "learning_rate": 0.0008618313094847391, | |
| "loss": 4.7851, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.4288393104476976, | |
| "grad_norm": 0.7105129957199097, | |
| "learning_rate": 0.0008609394843110116, | |
| "loss": 4.7796, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.43150290864923607, | |
| "grad_norm": 0.7851101160049438, | |
| "learning_rate": 0.000860047659137284, | |
| "loss": 4.7825, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.43416650685077457, | |
| "grad_norm": 0.7503988742828369, | |
| "learning_rate": 0.0008591558339635564, | |
| "loss": 4.7825, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.43683010505231307, | |
| "grad_norm": 0.7521843314170837, | |
| "learning_rate": 0.0008582657924401764, | |
| "loss": 4.7818, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.43949370325385156, | |
| "grad_norm": 0.8569875955581665, | |
| "learning_rate": 0.0008573739672664489, | |
| "loss": 4.7768, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.44215730145539006, | |
| "grad_norm": 0.7394946813583374, | |
| "learning_rate": 0.0008564821420927214, | |
| "loss": 4.7793, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.44482089965692856, | |
| "grad_norm": 0.8162407279014587, | |
| "learning_rate": 0.0008555903169189938, | |
| "loss": 4.7695, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.44748449785846706, | |
| "grad_norm": 0.829507052898407, | |
| "learning_rate": 0.0008547002753956137, | |
| "loss": 4.7726, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.45014809606000555, | |
| "grad_norm": 0.8483043313026428, | |
| "learning_rate": 0.0008538084502218861, | |
| "loss": 4.7801, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.45281169426154405, | |
| "grad_norm": 0.8971179127693176, | |
| "learning_rate": 0.0008529166250481586, | |
| "loss": 4.7693, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.45547529246308255, | |
| "grad_norm": 0.8432018160820007, | |
| "learning_rate": 0.0008520247998744311, | |
| "loss": 4.7769, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.45813889066462105, | |
| "grad_norm": 0.8253493905067444, | |
| "learning_rate": 0.0008511347583510509, | |
| "loss": 4.7711, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.46080248886615954, | |
| "grad_norm": 0.7823784351348877, | |
| "learning_rate": 0.0008502429331773234, | |
| "loss": 4.7755, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.463466087067698, | |
| "grad_norm": 0.9113832712173462, | |
| "learning_rate": 0.0008493511080035958, | |
| "loss": 4.7733, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.4661296852692365, | |
| "grad_norm": 0.7511106729507446, | |
| "learning_rate": 0.0008484592828298683, | |
| "loss": 4.7628, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.468793283470775, | |
| "grad_norm": 0.821972668170929, | |
| "learning_rate": 0.0008475674576561408, | |
| "loss": 4.7639, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.4714568816723135, | |
| "grad_norm": 0.8578181862831116, | |
| "learning_rate": 0.0008466774161327607, | |
| "loss": 4.7632, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.474120479873852, | |
| "grad_norm": 0.7680496573448181, | |
| "learning_rate": 0.0008457855909590332, | |
| "loss": 4.7715, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.47678407807539047, | |
| "grad_norm": 0.7780221104621887, | |
| "learning_rate": 0.0008448937657853056, | |
| "loss": 4.7644, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.47944767627692897, | |
| "grad_norm": 0.7615424394607544, | |
| "learning_rate": 0.0008440019406115781, | |
| "loss": 4.7647, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.48211127447846747, | |
| "grad_norm": 0.8719656467437744, | |
| "learning_rate": 0.000843111899088198, | |
| "loss": 4.7745, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.48477487268000596, | |
| "grad_norm": 0.7793582677841187, | |
| "learning_rate": 0.0008422200739144704, | |
| "loss": 4.7668, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.48743847088154446, | |
| "grad_norm": 0.7653023600578308, | |
| "learning_rate": 0.0008413282487407429, | |
| "loss": 4.7646, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.49010206908308296, | |
| "grad_norm": 0.8937133550643921, | |
| "learning_rate": 0.0008404364235670153, | |
| "loss": 4.7646, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.49276566728462146, | |
| "grad_norm": 0.8305505514144897, | |
| "learning_rate": 0.0008395463820436352, | |
| "loss": 4.7647, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.49542926548615995, | |
| "grad_norm": 0.8143522143363953, | |
| "learning_rate": 0.0008386545568699077, | |
| "loss": 4.7679, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.49809286368769845, | |
| "grad_norm": 0.7998281121253967, | |
| "learning_rate": 0.0008377627316961801, | |
| "loss": 4.7655, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.500756461889237, | |
| "grad_norm": 0.823118269443512, | |
| "learning_rate": 0.0008368709065224526, | |
| "loss": 4.7635, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.5034200600907754, | |
| "grad_norm": 0.8964449167251587, | |
| "learning_rate": 0.0008359808649990725, | |
| "loss": 4.7575, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.5060836582923139, | |
| "grad_norm": 0.7982577681541443, | |
| "learning_rate": 0.000835089039825345, | |
| "loss": 4.7669, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.5087472564938524, | |
| "grad_norm": 0.8269961476325989, | |
| "learning_rate": 0.0008341972146516175, | |
| "loss": 4.7613, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.5114108546953909, | |
| "grad_norm": 0.7937721610069275, | |
| "learning_rate": 0.0008333053894778899, | |
| "loss": 4.7591, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.5140744528969294, | |
| "grad_norm": 0.867740273475647, | |
| "learning_rate": 0.0008324153479545098, | |
| "loss": 4.7596, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.5167380510984679, | |
| "grad_norm": 0.8314835429191589, | |
| "learning_rate": 0.0008315235227807822, | |
| "loss": 4.7633, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.5194016493000064, | |
| "grad_norm": 0.8014164566993713, | |
| "learning_rate": 0.0008306316976070547, | |
| "loss": 4.762, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.5220652475015449, | |
| "grad_norm": 0.7812915444374084, | |
| "learning_rate": 0.0008297398724333272, | |
| "loss": 4.7634, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.5247288457030834, | |
| "grad_norm": 0.7968524098396301, | |
| "learning_rate": 0.0008288498309099471, | |
| "loss": 4.7625, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.5273924439046219, | |
| "grad_norm": 0.821968674659729, | |
| "learning_rate": 0.0008279580057362195, | |
| "loss": 4.752, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.5300560421061604, | |
| "grad_norm": 0.7830886244773865, | |
| "learning_rate": 0.0008270661805624919, | |
| "loss": 4.7603, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.5327196403076989, | |
| "grad_norm": 0.7848255634307861, | |
| "learning_rate": 0.0008261743553887644, | |
| "loss": 4.7571, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.5353832385092374, | |
| "grad_norm": 0.7928926944732666, | |
| "learning_rate": 0.0008252843138653843, | |
| "loss": 4.7529, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 0.5380468367107758, | |
| "grad_norm": 0.8001675605773926, | |
| "learning_rate": 0.0008243924886916569, | |
| "loss": 4.7616, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.5407104349123143, | |
| "grad_norm": 0.8647136688232422, | |
| "learning_rate": 0.0008235006635179293, | |
| "loss": 4.7452, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 0.5433740331138528, | |
| "grad_norm": 0.8823105692863464, | |
| "learning_rate": 0.0008226088383442017, | |
| "loss": 4.7549, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.5460376313153913, | |
| "grad_norm": 0.8441142439842224, | |
| "learning_rate": 0.0008217187968208216, | |
| "loss": 4.7544, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 0.5487012295169298, | |
| "grad_norm": 0.8819558620452881, | |
| "learning_rate": 0.000820826971647094, | |
| "loss": 4.7491, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.5513648277184683, | |
| "grad_norm": 0.7855533361434937, | |
| "learning_rate": 0.0008199351464733666, | |
| "loss": 4.7591, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 0.5540284259200068, | |
| "grad_norm": 0.8068869709968567, | |
| "learning_rate": 0.000819043321299639, | |
| "loss": 4.7534, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.5566920241215453, | |
| "grad_norm": 0.8351749181747437, | |
| "learning_rate": 0.0008181532797762589, | |
| "loss": 4.7481, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 0.5593556223230838, | |
| "grad_norm": 0.8479593992233276, | |
| "learning_rate": 0.0008172614546025314, | |
| "loss": 4.7575, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.5620192205246223, | |
| "grad_norm": 0.8183143138885498, | |
| "learning_rate": 0.0008163696294288038, | |
| "loss": 4.7565, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 0.5646828187261608, | |
| "grad_norm": 0.8138937950134277, | |
| "learning_rate": 0.0008154778042550764, | |
| "loss": 4.7482, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.5673464169276993, | |
| "grad_norm": 0.8708425164222717, | |
| "learning_rate": 0.0008145877627316962, | |
| "loss": 4.7516, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 0.5700100151292378, | |
| "grad_norm": 0.8439280986785889, | |
| "learning_rate": 0.0008136959375579687, | |
| "loss": 4.7523, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.5726736133307763, | |
| "grad_norm": 0.8017052412033081, | |
| "learning_rate": 0.0008128041123842411, | |
| "loss": 4.745, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 0.5753372115323148, | |
| "grad_norm": 0.846176266670227, | |
| "learning_rate": 0.0008119122872105135, | |
| "loss": 4.7539, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.5780008097338533, | |
| "grad_norm": 0.8138134479522705, | |
| "learning_rate": 0.0008110222456871334, | |
| "loss": 4.7468, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 0.5806644079353918, | |
| "grad_norm": 0.7649713754653931, | |
| "learning_rate": 0.0008101304205134059, | |
| "loss": 4.7467, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.5833280061369303, | |
| "grad_norm": 0.8558058142662048, | |
| "learning_rate": 0.0008092385953396784, | |
| "loss": 4.7408, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 0.5859916043384688, | |
| "grad_norm": 0.8179123401641846, | |
| "learning_rate": 0.0008083467701659508, | |
| "loss": 4.7526, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.5886552025400072, | |
| "grad_norm": 0.8050591349601746, | |
| "learning_rate": 0.0008074567286425708, | |
| "loss": 4.7423, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 0.5913188007415457, | |
| "grad_norm": 0.7940638661384583, | |
| "learning_rate": 0.0008065649034688432, | |
| "loss": 4.7467, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.5939823989430842, | |
| "grad_norm": 0.7882602214813232, | |
| "learning_rate": 0.0008056730782951157, | |
| "loss": 4.7475, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 0.5966459971446227, | |
| "grad_norm": 0.8111135959625244, | |
| "learning_rate": 0.0008047812531213882, | |
| "loss": 4.749, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.5993095953461612, | |
| "grad_norm": 0.792116641998291, | |
| "learning_rate": 0.000803891211598008, | |
| "loss": 4.7507, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 0.6019731935476997, | |
| "grad_norm": 0.8503523468971252, | |
| "learning_rate": 0.0008029993864242805, | |
| "loss": 4.7389, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.6046367917492382, | |
| "grad_norm": 0.8201097846031189, | |
| "learning_rate": 0.0008021075612505529, | |
| "loss": 4.752, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 0.6073003899507767, | |
| "grad_norm": 0.8428370952606201, | |
| "learning_rate": 0.0008012157360768254, | |
| "loss": 4.7435, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.6099639881523152, | |
| "grad_norm": 0.7499297261238098, | |
| "learning_rate": 0.0008003239109030979, | |
| "loss": 4.7416, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 0.6126275863538537, | |
| "grad_norm": 0.8179661631584167, | |
| "learning_rate": 0.0007994338693797177, | |
| "loss": 4.744, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.6152911845553922, | |
| "grad_norm": 0.8121057152748108, | |
| "learning_rate": 0.0007985420442059902, | |
| "loss": 4.7434, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 0.6179547827569307, | |
| "grad_norm": 0.7849302887916565, | |
| "learning_rate": 0.0007976502190322626, | |
| "loss": 4.7396, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.6206183809584692, | |
| "grad_norm": 0.7559896111488342, | |
| "learning_rate": 0.0007967583938585351, | |
| "loss": 4.7444, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 0.6232819791600077, | |
| "grad_norm": 0.801511824131012, | |
| "learning_rate": 0.0007958683523351551, | |
| "loss": 4.7391, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.6259455773615462, | |
| "grad_norm": 0.752527117729187, | |
| "learning_rate": 0.0007949765271614275, | |
| "loss": 4.7398, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 0.6286091755630847, | |
| "grad_norm": 0.8372392654418945, | |
| "learning_rate": 0.0007940847019877, | |
| "loss": 4.7401, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.6312727737646232, | |
| "grad_norm": 0.842634379863739, | |
| "learning_rate": 0.0007931928768139724, | |
| "loss": 4.7437, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 0.6339363719661616, | |
| "grad_norm": 0.7479714751243591, | |
| "learning_rate": 0.0007923028352905924, | |
| "loss": 4.7428, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.6365999701677001, | |
| "grad_norm": 0.800308108329773, | |
| "learning_rate": 0.0007914110101168648, | |
| "loss": 4.7378, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 0.6392635683692386, | |
| "grad_norm": 0.8084207773208618, | |
| "learning_rate": 0.0007905191849431372, | |
| "loss": 4.7334, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.6419271665707771, | |
| "grad_norm": 0.8754898309707642, | |
| "learning_rate": 0.0007896273597694097, | |
| "loss": 4.7391, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 0.6445907647723156, | |
| "grad_norm": 0.8357532620429993, | |
| "learning_rate": 0.0007887373182460295, | |
| "loss": 4.7394, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.6472543629738541, | |
| "grad_norm": 0.7808672189712524, | |
| "learning_rate": 0.000787845493072302, | |
| "loss": 4.7422, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 0.6499179611753926, | |
| "grad_norm": 0.8768132328987122, | |
| "learning_rate": 0.0007869536678985745, | |
| "loss": 4.7412, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.6525815593769311, | |
| "grad_norm": 0.795536994934082, | |
| "learning_rate": 0.0007860618427248469, | |
| "loss": 4.7424, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 0.6552451575784696, | |
| "grad_norm": 0.8333203792572021, | |
| "learning_rate": 0.0007851718012014669, | |
| "loss": 4.7363, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 0.6579087557800081, | |
| "grad_norm": 0.8043723106384277, | |
| "learning_rate": 0.0007842799760277393, | |
| "loss": 4.7395, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 0.6605723539815466, | |
| "grad_norm": 0.7881098985671997, | |
| "learning_rate": 0.0007833881508540118, | |
| "loss": 4.7373, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.6632359521830851, | |
| "grad_norm": 0.8250852823257446, | |
| "learning_rate": 0.0007824981093306317, | |
| "loss": 4.7361, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 0.6658995503846236, | |
| "grad_norm": 0.791354775428772, | |
| "learning_rate": 0.0007816062841569042, | |
| "loss": 4.7401, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.6685631485861621, | |
| "grad_norm": 0.833494246006012, | |
| "learning_rate": 0.0007807144589831766, | |
| "loss": 4.7368, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 0.6712267467877006, | |
| "grad_norm": 0.8371044993400574, | |
| "learning_rate": 0.000779822633809449, | |
| "loss": 4.7363, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.673890344989239, | |
| "grad_norm": 0.9218412041664124, | |
| "learning_rate": 0.0007789308086357215, | |
| "loss": 4.7373, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 0.6765539431907776, | |
| "grad_norm": 0.8172479867935181, | |
| "learning_rate": 0.000778038983461994, | |
| "loss": 4.7347, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 0.679217541392316, | |
| "grad_norm": 0.8264776468276978, | |
| "learning_rate": 0.0007771471582882665, | |
| "loss": 4.7332, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 0.6818811395938545, | |
| "grad_norm": 0.780692458152771, | |
| "learning_rate": 0.0007762553331145389, | |
| "loss": 4.7348, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.684544737795393, | |
| "grad_norm": 0.794199526309967, | |
| "learning_rate": 0.0007753652915911589, | |
| "loss": 4.737, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 0.6872083359969315, | |
| "grad_norm": 0.8050469756126404, | |
| "learning_rate": 0.0007744734664174313, | |
| "loss": 4.7261, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 0.68987193419847, | |
| "grad_norm": 0.8591654300689697, | |
| "learning_rate": 0.0007735816412437038, | |
| "loss": 4.7311, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 0.6925355324000085, | |
| "grad_norm": 0.8073732256889343, | |
| "learning_rate": 0.0007726898160699763, | |
| "loss": 4.7328, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.695199130601547, | |
| "grad_norm": 0.8377549052238464, | |
| "learning_rate": 0.0007717997745465961, | |
| "loss": 4.7324, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 0.6978627288030855, | |
| "grad_norm": 0.7627879977226257, | |
| "learning_rate": 0.0007709079493728686, | |
| "loss": 4.7304, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 0.700526327004624, | |
| "grad_norm": 0.7747420072555542, | |
| "learning_rate": 0.000770016124199141, | |
| "loss": 4.7337, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 0.7031899252061625, | |
| "grad_norm": 0.833777129650116, | |
| "learning_rate": 0.0007691242990254135, | |
| "loss": 4.7362, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.705853523407701, | |
| "grad_norm": 0.8297452330589294, | |
| "learning_rate": 0.0007682342575020334, | |
| "loss": 4.7331, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 0.7085171216092395, | |
| "grad_norm": 0.8135260343551636, | |
| "learning_rate": 0.0007673424323283058, | |
| "loss": 4.7317, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 0.711180719810778, | |
| "grad_norm": 0.82469242811203, | |
| "learning_rate": 0.0007664506071545783, | |
| "loss": 4.7289, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 0.7138443180123165, | |
| "grad_norm": 0.7857999801635742, | |
| "learning_rate": 0.0007655587819808507, | |
| "loss": 4.7316, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.716507916213855, | |
| "grad_norm": 0.8272935748100281, | |
| "learning_rate": 0.0007646687404574707, | |
| "loss": 4.7298, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 0.7191715144153935, | |
| "grad_norm": 0.8309085965156555, | |
| "learning_rate": 0.0007637769152837432, | |
| "loss": 4.7333, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.721835112616932, | |
| "grad_norm": 0.9184426665306091, | |
| "learning_rate": 0.0007628850901100156, | |
| "loss": 4.7223, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 0.7244987108184705, | |
| "grad_norm": 0.8144403100013733, | |
| "learning_rate": 0.0007619932649362881, | |
| "loss": 4.725, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.727162309020009, | |
| "grad_norm": 0.8021435737609863, | |
| "learning_rate": 0.0007611032234129079, | |
| "loss": 4.7328, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 0.7298259072215475, | |
| "grad_norm": 0.8207322955131531, | |
| "learning_rate": 0.0007602113982391804, | |
| "loss": 4.73, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 0.7324895054230859, | |
| "grad_norm": 0.904644787311554, | |
| "learning_rate": 0.0007593195730654529, | |
| "loss": 4.7276, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 0.7351531036246244, | |
| "grad_norm": 0.7794029712677002, | |
| "learning_rate": 0.0007584277478917253, | |
| "loss": 4.7274, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.7378167018261629, | |
| "grad_norm": 0.7878913879394531, | |
| "learning_rate": 0.0007575377063683452, | |
| "loss": 4.7238, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 0.7404803000277014, | |
| "grad_norm": 0.8159613013267517, | |
| "learning_rate": 0.0007566458811946176, | |
| "loss": 4.7259, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 0.7431438982292399, | |
| "grad_norm": 0.7896559834480286, | |
| "learning_rate": 0.00075575405602089, | |
| "loss": 4.7223, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 0.7458074964307784, | |
| "grad_norm": 0.8425673246383667, | |
| "learning_rate": 0.0007548622308471626, | |
| "loss": 4.7229, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.7484710946323169, | |
| "grad_norm": 0.8656537532806396, | |
| "learning_rate": 0.0007539721893237826, | |
| "loss": 4.7319, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 0.7511346928338554, | |
| "grad_norm": 0.792007327079773, | |
| "learning_rate": 0.000753080364150055, | |
| "loss": 4.7241, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 0.7537982910353939, | |
| "grad_norm": 0.8079518675804138, | |
| "learning_rate": 0.0007521885389763274, | |
| "loss": 4.726, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 0.7564618892369324, | |
| "grad_norm": 0.8287070393562317, | |
| "learning_rate": 0.0007512967138025999, | |
| "loss": 4.7248, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 0.7591254874384709, | |
| "grad_norm": Infinity, | |
| "learning_rate": 0.0007504048886288724, | |
| "loss": 4.7208, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 0.7617890856400094, | |
| "grad_norm": 0.8842335343360901, | |
| "learning_rate": 0.0007495148471054923, | |
| "loss": 4.7186, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.7644526838415479, | |
| "grad_norm": 0.8471961617469788, | |
| "learning_rate": 0.0007486230219317647, | |
| "loss": 4.7238, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 0.7671162820430864, | |
| "grad_norm": 0.8359102010726929, | |
| "learning_rate": 0.0007477311967580371, | |
| "loss": 4.7236, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 0.7697798802446248, | |
| "grad_norm": 0.8359571099281311, | |
| "learning_rate": 0.0007468393715843096, | |
| "loss": 4.7208, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 0.7724434784461633, | |
| "grad_norm": 0.9100736379623413, | |
| "learning_rate": 0.0007459493300609294, | |
| "loss": 4.7247, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.7751070766477018, | |
| "grad_norm": 0.8252699375152588, | |
| "learning_rate": 0.000745057504887202, | |
| "loss": 4.7192, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 0.7777706748492403, | |
| "grad_norm": 0.7999640107154846, | |
| "learning_rate": 0.0007441656797134744, | |
| "loss": 4.7266, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 0.7804342730507788, | |
| "grad_norm": 0.7765536308288574, | |
| "learning_rate": 0.0007432738545397468, | |
| "loss": 4.7193, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 0.7830978712523173, | |
| "grad_norm": 0.8114664554595947, | |
| "learning_rate": 0.0007423838130163668, | |
| "loss": 4.7184, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 0.7857614694538558, | |
| "grad_norm": 0.8485323786735535, | |
| "learning_rate": 0.0007414919878426392, | |
| "loss": 4.7246, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 0.7884250676553943, | |
| "grad_norm": 0.828681468963623, | |
| "learning_rate": 0.0007406001626689118, | |
| "loss": 4.7197, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 0.7910886658569328, | |
| "grad_norm": 0.8879855871200562, | |
| "learning_rate": 0.0007397083374951842, | |
| "loss": 4.7242, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 0.7937522640584713, | |
| "grad_norm": 1.0155161619186401, | |
| "learning_rate": 0.0007388182959718041, | |
| "loss": 4.7196, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 0.7964158622600098, | |
| "grad_norm": 0.7817535996437073, | |
| "learning_rate": 0.0007379264707980765, | |
| "loss": 4.7276, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 0.7990794604615483, | |
| "grad_norm": 0.8676290512084961, | |
| "learning_rate": 0.0007370346456243489, | |
| "loss": 4.7235, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.8017430586630868, | |
| "grad_norm": 0.8710722923278809, | |
| "learning_rate": 0.0007361428204506215, | |
| "loss": 4.7238, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 0.8044066568646253, | |
| "grad_norm": 0.9807900786399841, | |
| "learning_rate": 0.0007352527789272414, | |
| "loss": 4.7265, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 0.8070702550661638, | |
| "grad_norm": 0.8639500737190247, | |
| "learning_rate": 0.0007343609537535139, | |
| "loss": 4.7256, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 0.8097338532677023, | |
| "grad_norm": 0.8448176383972168, | |
| "learning_rate": 0.0007334691285797863, | |
| "loss": 4.719, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 0.8123974514692408, | |
| "grad_norm": 0.8320333361625671, | |
| "learning_rate": 0.0007325773034060587, | |
| "loss": 4.7209, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 0.8150610496707793, | |
| "grad_norm": 0.7863089442253113, | |
| "learning_rate": 0.0007316872618826787, | |
| "loss": 4.7198, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 0.8177246478723178, | |
| "grad_norm": 0.9616714715957642, | |
| "learning_rate": 0.0007307954367089511, | |
| "loss": 4.7184, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 0.8203882460738563, | |
| "grad_norm": 0.8382904529571533, | |
| "learning_rate": 0.0007299036115352236, | |
| "loss": 4.7171, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.8230518442753948, | |
| "grad_norm": 0.8196877837181091, | |
| "learning_rate": 0.000729011786361496, | |
| "loss": 4.7157, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 0.8257154424769333, | |
| "grad_norm": 0.8712915182113647, | |
| "learning_rate": 0.0007281217448381159, | |
| "loss": 4.7224, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.8283790406784717, | |
| "grad_norm": 0.776938259601593, | |
| "learning_rate": 0.0007272299196643883, | |
| "loss": 4.7196, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 0.8310426388800102, | |
| "grad_norm": 0.8299930095672607, | |
| "learning_rate": 0.0007263380944906608, | |
| "loss": 4.7199, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 0.8337062370815487, | |
| "grad_norm": 0.8253493905067444, | |
| "learning_rate": 0.0007254462693169333, | |
| "loss": 4.7143, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 0.8363698352830872, | |
| "grad_norm": 0.8310771584510803, | |
| "learning_rate": 0.0007245562277935532, | |
| "loss": 4.7205, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 0.8390334334846257, | |
| "grad_norm": 0.7761854529380798, | |
| "learning_rate": 0.0007236644026198257, | |
| "loss": 4.7225, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 0.8416970316861642, | |
| "grad_norm": 0.8773240447044373, | |
| "learning_rate": 0.0007227725774460981, | |
| "loss": 4.7101, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 0.8443606298877027, | |
| "grad_norm": 0.8560092449188232, | |
| "learning_rate": 0.0007218807522723706, | |
| "loss": 4.7119, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 0.8470242280892412, | |
| "grad_norm": 0.8768864870071411, | |
| "learning_rate": 0.0007209907107489905, | |
| "loss": 4.7211, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 0.8496878262907797, | |
| "grad_norm": 0.7614040970802307, | |
| "learning_rate": 0.0007200988855752629, | |
| "loss": 4.716, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 0.8523514244923182, | |
| "grad_norm": 0.8368701934814453, | |
| "learning_rate": 0.0007192070604015354, | |
| "loss": 4.719, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.8550150226938567, | |
| "grad_norm": 0.8005092144012451, | |
| "learning_rate": 0.0007183152352278078, | |
| "loss": 4.7179, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 0.8576786208953951, | |
| "grad_norm": 0.7758657932281494, | |
| "learning_rate": 0.0007174251937044278, | |
| "loss": 4.7194, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 0.8603422190969336, | |
| "grad_norm": 0.7939693927764893, | |
| "learning_rate": 0.0007165333685307002, | |
| "loss": 4.7197, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 0.8630058172984721, | |
| "grad_norm": 0.8776530623435974, | |
| "learning_rate": 0.0007156415433569726, | |
| "loss": 4.7114, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 0.8656694155000106, | |
| "grad_norm": 0.8468111753463745, | |
| "learning_rate": 0.0007147497181832451, | |
| "loss": 4.719, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 0.8683330137015491, | |
| "grad_norm": 0.8999060988426208, | |
| "learning_rate": 0.000713859676659865, | |
| "loss": 4.7151, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 0.8709966119030876, | |
| "grad_norm": 0.8831384181976318, | |
| "learning_rate": 0.0007129678514861376, | |
| "loss": 4.7127, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 0.8736602101046261, | |
| "grad_norm": 0.9266347885131836, | |
| "learning_rate": 0.00071207602631241, | |
| "loss": 4.7082, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 0.8763238083061646, | |
| "grad_norm": 0.8402259945869446, | |
| "learning_rate": 0.0007111842011386824, | |
| "loss": 4.7121, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 0.8789874065077031, | |
| "grad_norm": 0.8024085760116577, | |
| "learning_rate": 0.0007102923759649549, | |
| "loss": 4.7128, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.8816510047092416, | |
| "grad_norm": 0.80262690782547, | |
| "learning_rate": 0.0007094023344415747, | |
| "loss": 4.7165, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 0.8843146029107801, | |
| "grad_norm": 0.8166842460632324, | |
| "learning_rate": 0.0007085105092678472, | |
| "loss": 4.7177, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 0.8869782011123186, | |
| "grad_norm": 0.8241666555404663, | |
| "learning_rate": 0.0007076186840941197, | |
| "loss": 4.7107, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 0.8896417993138571, | |
| "grad_norm": 0.792934775352478, | |
| "learning_rate": 0.0007067268589203921, | |
| "loss": 4.7065, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 0.8923053975153956, | |
| "grad_norm": 0.8425348401069641, | |
| "learning_rate": 0.000705836817397012, | |
| "loss": 4.7119, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 0.8949689957169341, | |
| "grad_norm": 0.8911672830581665, | |
| "learning_rate": 0.0007049449922232844, | |
| "loss": 4.7044, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 0.8976325939184726, | |
| "grad_norm": 1.0209442377090454, | |
| "learning_rate": 0.0007040531670495569, | |
| "loss": 4.7031, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 0.9002961921200111, | |
| "grad_norm": 0.8289418816566467, | |
| "learning_rate": 0.0007031613418758294, | |
| "loss": 4.7156, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 0.9029597903215496, | |
| "grad_norm": 0.8719263672828674, | |
| "learning_rate": 0.0007022713003524494, | |
| "loss": 4.7063, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 0.9056233885230881, | |
| "grad_norm": 0.797635555267334, | |
| "learning_rate": 0.0007013794751787218, | |
| "loss": 4.7081, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.9082869867246266, | |
| "grad_norm": 0.7837144732475281, | |
| "learning_rate": 0.0007004876500049942, | |
| "loss": 4.7079, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 0.9109505849261651, | |
| "grad_norm": 0.8080796003341675, | |
| "learning_rate": 0.0006995958248312667, | |
| "loss": 4.7126, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 0.9136141831277036, | |
| "grad_norm": 0.8928093910217285, | |
| "learning_rate": 0.0006987057833078866, | |
| "loss": 4.7084, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 0.9162777813292421, | |
| "grad_norm": 0.8997321724891663, | |
| "learning_rate": 0.0006978139581341591, | |
| "loss": 4.7081, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 0.9189413795307806, | |
| "grad_norm": 0.8603807687759399, | |
| "learning_rate": 0.0006969221329604315, | |
| "loss": 4.7066, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 0.9216049777323191, | |
| "grad_norm": 0.7852337956428528, | |
| "learning_rate": 0.0006960303077867039, | |
| "loss": 4.7099, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 0.9242685759338576, | |
| "grad_norm": 0.8550631999969482, | |
| "learning_rate": 0.0006951402662633238, | |
| "loss": 4.711, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 0.926932174135396, | |
| "grad_norm": 0.809356689453125, | |
| "learning_rate": 0.0006942484410895963, | |
| "loss": 4.7085, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 0.9295957723369345, | |
| "grad_norm": 0.9777870774269104, | |
| "learning_rate": 0.0006933566159158689, | |
| "loss": 4.7119, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 0.932259370538473, | |
| "grad_norm": 0.8507824540138245, | |
| "learning_rate": 0.0006924647907421413, | |
| "loss": 4.7129, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.9349229687400115, | |
| "grad_norm": 0.831298291683197, | |
| "learning_rate": 0.0006915747492187612, | |
| "loss": 4.7159, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 0.93758656694155, | |
| "grad_norm": 0.8560031056404114, | |
| "learning_rate": 0.0006906829240450336, | |
| "loss": 4.706, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 0.9402501651430885, | |
| "grad_norm": 0.8773949146270752, | |
| "learning_rate": 0.000689791098871306, | |
| "loss": 4.698, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 0.942913763344627, | |
| "grad_norm": 0.845332145690918, | |
| "learning_rate": 0.0006888992736975786, | |
| "loss": 4.7075, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 0.9455773615461655, | |
| "grad_norm": 0.8635679483413696, | |
| "learning_rate": 0.0006880092321741984, | |
| "loss": 4.7049, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 0.948240959747704, | |
| "grad_norm": 0.8583770990371704, | |
| "learning_rate": 0.0006871174070004709, | |
| "loss": 4.7019, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 0.9509045579492424, | |
| "grad_norm": 0.8701212406158447, | |
| "learning_rate": 0.0006862255818267433, | |
| "loss": 4.7036, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 0.9535681561507809, | |
| "grad_norm": 0.872428297996521, | |
| "learning_rate": 0.0006853337566530157, | |
| "loss": 4.7085, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 0.9562317543523194, | |
| "grad_norm": 0.9032150506973267, | |
| "learning_rate": 0.0006844437151296358, | |
| "loss": 4.7116, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 0.9588953525538579, | |
| "grad_norm": 0.886792004108429, | |
| "learning_rate": 0.0006835518899559082, | |
| "loss": 4.7074, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.9615589507553964, | |
| "grad_norm": 0.8857409358024597, | |
| "learning_rate": 0.0006826600647821807, | |
| "loss": 4.6975, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 0.9642225489569349, | |
| "grad_norm": 0.8472080826759338, | |
| "learning_rate": 0.0006817682396084531, | |
| "loss": 4.7102, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 0.9668861471584734, | |
| "grad_norm": 0.8281969428062439, | |
| "learning_rate": 0.000680878198085073, | |
| "loss": 4.71, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 0.9695497453600119, | |
| "grad_norm": 0.9546143412590027, | |
| "learning_rate": 0.0006799863729113455, | |
| "loss": 4.7032, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 0.9722133435615504, | |
| "grad_norm": 0.8378576040267944, | |
| "learning_rate": 0.0006790945477376179, | |
| "loss": 4.7027, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 0.9748769417630889, | |
| "grad_norm": 0.8105673789978027, | |
| "learning_rate": 0.0006782027225638904, | |
| "loss": 4.7096, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 0.9775405399646274, | |
| "grad_norm": 0.8699236512184143, | |
| "learning_rate": 0.0006773126810405102, | |
| "loss": 4.7058, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 0.9802041381661659, | |
| "grad_norm": 0.8328757286071777, | |
| "learning_rate": 0.0006764208558667827, | |
| "loss": 4.7049, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 0.9828677363677044, | |
| "grad_norm": 0.8056396245956421, | |
| "learning_rate": 0.0006755290306930552, | |
| "loss": 4.7022, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 0.9855313345692429, | |
| "grad_norm": 0.7749541997909546, | |
| "learning_rate": 0.0006746372055193276, | |
| "loss": 4.7069, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 0.9881949327707814, | |
| "grad_norm": 0.9022479057312012, | |
| "learning_rate": 0.0006737453803456001, | |
| "loss": 4.713, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 0.9908585309723199, | |
| "grad_norm": 0.8944171667098999, | |
| "learning_rate": 0.00067285533882222, | |
| "loss": 4.7115, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 0.9935221291738584, | |
| "grad_norm": 0.9268381595611572, | |
| "learning_rate": 0.0006719635136484925, | |
| "loss": 4.7068, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 0.9961857273753969, | |
| "grad_norm": 0.8826886415481567, | |
| "learning_rate": 0.0006710716884747649, | |
| "loss": 4.7071, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 0.9988493255769354, | |
| "grad_norm": 0.8730109333992004, | |
| "learning_rate": 0.0006701798633010374, | |
| "loss": 4.7062, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 1.001512923778474, | |
| "grad_norm": 0.8082478046417236, | |
| "learning_rate": 0.0006692898217776573, | |
| "loss": 4.7079, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 1.0041765219800123, | |
| "grad_norm": 0.840177059173584, | |
| "learning_rate": 0.0006683979966039297, | |
| "loss": 4.7087, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 1.006840120181551, | |
| "grad_norm": 0.9156913161277771, | |
| "learning_rate": 0.0006675061714302022, | |
| "loss": 4.6991, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 1.0095037183830893, | |
| "grad_norm": 0.909474790096283, | |
| "learning_rate": 0.0006666143462564746, | |
| "loss": 4.7024, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 1.0121673165846279, | |
| "grad_norm": 0.8102747201919556, | |
| "learning_rate": 0.0006657243047330946, | |
| "loss": 4.7031, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 1.0148309147861663, | |
| "grad_norm": 0.8523674011230469, | |
| "learning_rate": 0.000664832479559367, | |
| "loss": 4.7095, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 1.0174945129877049, | |
| "grad_norm": 0.7822126746177673, | |
| "learning_rate": 0.0006639406543856394, | |
| "loss": 4.7074, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 1.0201581111892433, | |
| "grad_norm": 0.8011199831962585, | |
| "learning_rate": 0.0006630488292119119, | |
| "loss": 4.706, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 1.0228217093907819, | |
| "grad_norm": 1.0168174505233765, | |
| "learning_rate": 0.0006621605713388793, | |
| "loss": 4.7017, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 1.0254853075923203, | |
| "grad_norm": 1.1239140033721924, | |
| "learning_rate": 0.0006612687461651517, | |
| "loss": 4.6948, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 1.0281489057938589, | |
| "grad_norm": 0.8248723745346069, | |
| "learning_rate": 0.0006603769209914242, | |
| "loss": 4.7031, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 1.0308125039953973, | |
| "grad_norm": 0.8193596601486206, | |
| "learning_rate": 0.0006594850958176967, | |
| "loss": 4.697, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 1.0334761021969359, | |
| "grad_norm": 0.8454943895339966, | |
| "learning_rate": 0.0006585932706439691, | |
| "loss": 4.7019, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 1.0361397003984743, | |
| "grad_norm": 0.9663782715797424, | |
| "learning_rate": 0.0006577014454702415, | |
| "loss": 4.7044, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 1.0388032986000129, | |
| "grad_norm": 0.8691510558128357, | |
| "learning_rate": 0.0006568096202965141, | |
| "loss": 4.6986, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 1.0414668968015512, | |
| "grad_norm": 0.9316896200180054, | |
| "learning_rate": 0.0006559177951227865, | |
| "loss": 4.6983, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 1.0441304950030899, | |
| "grad_norm": 0.8153261542320251, | |
| "learning_rate": 0.000655025969949059, | |
| "loss": 4.7022, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 1.0467940932046282, | |
| "grad_norm": 0.8377756476402283, | |
| "learning_rate": 0.0006541359284256788, | |
| "loss": 4.6976, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 1.0494576914061668, | |
| "grad_norm": 0.87883460521698, | |
| "learning_rate": 0.0006532441032519512, | |
| "loss": 4.6979, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 1.0521212896077052, | |
| "grad_norm": 0.8691816926002502, | |
| "learning_rate": 0.0006523522780782239, | |
| "loss": 4.7022, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 1.0547848878092438, | |
| "grad_norm": 0.8359382748603821, | |
| "learning_rate": 0.0006514604529044963, | |
| "loss": 4.6971, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 1.0574484860107822, | |
| "grad_norm": 1.1580724716186523, | |
| "learning_rate": 0.0006505704113811162, | |
| "loss": 4.7096, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 1.0601120842123208, | |
| "grad_norm": 0.9052878618240356, | |
| "learning_rate": 0.0006496785862073886, | |
| "loss": 4.6942, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 1.0627756824138592, | |
| "grad_norm": 0.8252458572387695, | |
| "learning_rate": 0.000648786761033661, | |
| "loss": 4.6964, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 1.0654392806153978, | |
| "grad_norm": 0.8529119491577148, | |
| "learning_rate": 0.0006478949358599335, | |
| "loss": 4.6997, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 1.0681028788169362, | |
| "grad_norm": 0.7900977730751038, | |
| "learning_rate": 0.0006470048943365535, | |
| "loss": 4.6909, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 1.0707664770184748, | |
| "grad_norm": 0.8846203088760376, | |
| "learning_rate": 0.0006461130691628259, | |
| "loss": 4.6958, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 1.0734300752200132, | |
| "grad_norm": 0.9266300797462463, | |
| "learning_rate": 0.0006452212439890983, | |
| "loss": 4.699, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 1.0760936734215516, | |
| "grad_norm": 0.8480575680732727, | |
| "learning_rate": 0.0006443294188153707, | |
| "loss": 4.702, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 1.0787572716230902, | |
| "grad_norm": 0.8029345870018005, | |
| "learning_rate": 0.0006434393772919907, | |
| "loss": 4.698, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 1.0814208698246286, | |
| "grad_norm": 0.9869500398635864, | |
| "learning_rate": 0.0006425475521182633, | |
| "loss": 4.6991, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 1.0840844680261672, | |
| "grad_norm": 0.8242001533508301, | |
| "learning_rate": 0.0006416557269445357, | |
| "loss": 4.6968, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 1.0867480662277056, | |
| "grad_norm": 0.9248818755149841, | |
| "learning_rate": 0.0006407639017708081, | |
| "loss": 4.6906, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 1.0894116644292442, | |
| "grad_norm": 0.8327652812004089, | |
| "learning_rate": 0.000639873860247428, | |
| "loss": 4.6968, | |
| "step": 204500 | |
| }, | |
| { | |
| "epoch": 1.0920752626307826, | |
| "grad_norm": 0.898684024810791, | |
| "learning_rate": 0.0006389820350737004, | |
| "loss": 4.693, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 1.0947388608323212, | |
| "grad_norm": 0.826521635055542, | |
| "learning_rate": 0.000638090209899973, | |
| "loss": 4.7004, | |
| "step": 205500 | |
| }, | |
| { | |
| "epoch": 1.0974024590338596, | |
| "grad_norm": 0.8696659803390503, | |
| "learning_rate": 0.0006371983847262454, | |
| "loss": 4.6999, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 1.1000660572353982, | |
| "grad_norm": 0.8574073910713196, | |
| "learning_rate": 0.0006363083432028652, | |
| "loss": 4.6929, | |
| "step": 206500 | |
| }, | |
| { | |
| "epoch": 1.1027296554369366, | |
| "grad_norm": 0.857872486114502, | |
| "learning_rate": 0.0006354165180291377, | |
| "loss": 4.6982, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 1.1053932536384752, | |
| "grad_norm": 0.9049299359321594, | |
| "learning_rate": 0.0006345246928554101, | |
| "loss": 4.6986, | |
| "step": 207500 | |
| }, | |
| { | |
| "epoch": 1.1080568518400136, | |
| "grad_norm": 0.8885313868522644, | |
| "learning_rate": 0.0006336328676816825, | |
| "loss": 4.6961, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 1.1107204500415522, | |
| "grad_norm": 0.885249674320221, | |
| "learning_rate": 0.0006327428261583026, | |
| "loss": 4.6914, | |
| "step": 208500 | |
| }, | |
| { | |
| "epoch": 1.1133840482430906, | |
| "grad_norm": 0.8557220101356506, | |
| "learning_rate": 0.0006318510009845751, | |
| "loss": 4.6979, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 1.1160476464446292, | |
| "grad_norm": 0.850692868232727, | |
| "learning_rate": 0.0006309591758108475, | |
| "loss": 4.6923, | |
| "step": 209500 | |
| }, | |
| { | |
| "epoch": 1.1187112446461676, | |
| "grad_norm": 0.8561812043190002, | |
| "learning_rate": 0.0006300673506371199, | |
| "loss": 4.696, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 1.1213748428477062, | |
| "grad_norm": 0.8279117941856384, | |
| "learning_rate": 0.0006291773091137398, | |
| "loss": 4.6948, | |
| "step": 210500 | |
| }, | |
| { | |
| "epoch": 1.1240384410492446, | |
| "grad_norm": 1.035873293876648, | |
| "learning_rate": 0.0006282854839400123, | |
| "loss": 4.6941, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 1.1267020392507832, | |
| "grad_norm": 0.9458531141281128, | |
| "learning_rate": 0.0006273936587662848, | |
| "loss": 4.7018, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 1.1293656374523215, | |
| "grad_norm": 0.9144226908683777, | |
| "learning_rate": 0.0006265018335925572, | |
| "loss": 4.694, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 1.1320292356538602, | |
| "grad_norm": 0.8634624481201172, | |
| "learning_rate": 0.000625611792069177, | |
| "loss": 4.6982, | |
| "step": 212500 | |
| }, | |
| { | |
| "epoch": 1.1346928338553985, | |
| "grad_norm": 0.8695416450500488, | |
| "learning_rate": 0.0006247199668954495, | |
| "loss": 4.6966, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 1.1373564320569371, | |
| "grad_norm": 0.8389537930488586, | |
| "learning_rate": 0.000623828141721722, | |
| "loss": 4.6981, | |
| "step": 213500 | |
| }, | |
| { | |
| "epoch": 1.1400200302584755, | |
| "grad_norm": 0.8467423915863037, | |
| "learning_rate": 0.0006229363165479945, | |
| "loss": 4.6901, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 1.1426836284600141, | |
| "grad_norm": 0.8449163436889648, | |
| "learning_rate": 0.0006220462750246144, | |
| "loss": 4.6951, | |
| "step": 214500 | |
| }, | |
| { | |
| "epoch": 1.1453472266615525, | |
| "grad_norm": 0.866750180721283, | |
| "learning_rate": 0.0006211544498508869, | |
| "loss": 4.6976, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 1.1480108248630911, | |
| "grad_norm": 0.8245420455932617, | |
| "learning_rate": 0.0006202626246771593, | |
| "loss": 4.6912, | |
| "step": 215500 | |
| }, | |
| { | |
| "epoch": 1.1506744230646295, | |
| "grad_norm": 0.8339635133743286, | |
| "learning_rate": 0.0006193707995034318, | |
| "loss": 4.6915, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 1.1533380212661681, | |
| "grad_norm": 0.8900044560432434, | |
| "learning_rate": 0.0006184807579800517, | |
| "loss": 4.6919, | |
| "step": 216500 | |
| }, | |
| { | |
| "epoch": 1.1560016194677065, | |
| "grad_norm": 0.9079304337501526, | |
| "learning_rate": 0.0006175889328063241, | |
| "loss": 4.6921, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 1.158665217669245, | |
| "grad_norm": 0.8826993107795715, | |
| "learning_rate": 0.0006166971076325966, | |
| "loss": 4.6901, | |
| "step": 217500 | |
| }, | |
| { | |
| "epoch": 1.1613288158707835, | |
| "grad_norm": 0.8574143052101135, | |
| "learning_rate": 0.000615805282458869, | |
| "loss": 4.6906, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 1.1639924140723221, | |
| "grad_norm": 0.8516850471496582, | |
| "learning_rate": 0.0006149152409354888, | |
| "loss": 4.6973, | |
| "step": 218500 | |
| }, | |
| { | |
| "epoch": 1.1666560122738605, | |
| "grad_norm": 1.0509278774261475, | |
| "learning_rate": 0.0006140234157617614, | |
| "loss": 4.6885, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 1.169319610475399, | |
| "grad_norm": 0.8757687211036682, | |
| "learning_rate": 0.0006131315905880338, | |
| "loss": 4.6948, | |
| "step": 219500 | |
| }, | |
| { | |
| "epoch": 1.1719832086769375, | |
| "grad_norm": 0.8677180409431458, | |
| "learning_rate": 0.0006122397654143063, | |
| "loss": 4.6943, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 1.1746468068784761, | |
| "grad_norm": 0.8716105222702026, | |
| "learning_rate": 0.0006113497238909262, | |
| "loss": 4.695, | |
| "step": 220500 | |
| }, | |
| { | |
| "epoch": 1.1773104050800145, | |
| "grad_norm": 0.8486727476119995, | |
| "learning_rate": 0.0006104578987171987, | |
| "loss": 4.6947, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 1.1799740032815529, | |
| "grad_norm": 0.9231880307197571, | |
| "learning_rate": 0.0006095660735434712, | |
| "loss": 4.6864, | |
| "step": 221500 | |
| }, | |
| { | |
| "epoch": 1.1826376014830915, | |
| "grad_norm": 0.9859126210212708, | |
| "learning_rate": 0.0006086742483697436, | |
| "loss": 4.6876, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 1.1853011996846299, | |
| "grad_norm": 0.846367359161377, | |
| "learning_rate": 0.0006077824231960161, | |
| "loss": 4.6871, | |
| "step": 222500 | |
| }, | |
| { | |
| "epoch": 1.1879647978861685, | |
| "grad_norm": 0.8780665397644043, | |
| "learning_rate": 0.0006068923816726359, | |
| "loss": 4.7, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 1.1906283960877069, | |
| "grad_norm": 0.8220515847206116, | |
| "learning_rate": 0.0006060005564989084, | |
| "loss": 4.6904, | |
| "step": 223500 | |
| }, | |
| { | |
| "epoch": 1.1932919942892455, | |
| "grad_norm": 0.8694311380386353, | |
| "learning_rate": 0.0006051087313251809, | |
| "loss": 4.6971, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 1.1959555924907839, | |
| "grad_norm": 0.858805775642395, | |
| "learning_rate": 0.0006042169061514533, | |
| "loss": 4.6811, | |
| "step": 224500 | |
| }, | |
| { | |
| "epoch": 1.1986191906923225, | |
| "grad_norm": 0.976883053779602, | |
| "learning_rate": 0.0006033268646280733, | |
| "loss": 4.7028, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 1.2012827888938609, | |
| "grad_norm": 0.8692370653152466, | |
| "learning_rate": 0.0006024350394543457, | |
| "loss": 4.6903, | |
| "step": 225500 | |
| }, | |
| { | |
| "epoch": 1.2039463870953995, | |
| "grad_norm": 0.929794192314148, | |
| "learning_rate": 0.0006015432142806182, | |
| "loss": 4.6896, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 1.2066099852969379, | |
| "grad_norm": 0.8333790898323059, | |
| "learning_rate": 0.0006006513891068907, | |
| "loss": 4.691, | |
| "step": 226500 | |
| }, | |
| { | |
| "epoch": 1.2092735834984765, | |
| "grad_norm": 0.8712317943572998, | |
| "learning_rate": 0.0005997613475835106, | |
| "loss": 4.6832, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 1.2119371817000149, | |
| "grad_norm": 0.9365465641021729, | |
| "learning_rate": 0.000598869522409783, | |
| "loss": 4.6856, | |
| "step": 227500 | |
| }, | |
| { | |
| "epoch": 1.2146007799015535, | |
| "grad_norm": 0.8496169447898865, | |
| "learning_rate": 0.0005979776972360554, | |
| "loss": 4.6923, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 1.2172643781030918, | |
| "grad_norm": 0.9000328779220581, | |
| "learning_rate": 0.0005970858720623279, | |
| "loss": 4.6847, | |
| "step": 228500 | |
| }, | |
| { | |
| "epoch": 1.2199279763046305, | |
| "grad_norm": 0.8945797681808472, | |
| "learning_rate": 0.0005961958305389477, | |
| "loss": 4.6902, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 1.2225915745061688, | |
| "grad_norm": 0.8715533018112183, | |
| "learning_rate": 0.0005953040053652203, | |
| "loss": 4.6879, | |
| "step": 229500 | |
| }, | |
| { | |
| "epoch": 1.2252551727077075, | |
| "grad_norm": 0.9229386448860168, | |
| "learning_rate": 0.0005944121801914927, | |
| "loss": 4.6897, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 1.2279187709092458, | |
| "grad_norm": 0.8467351198196411, | |
| "learning_rate": 0.0005935203550177651, | |
| "loss": 4.6879, | |
| "step": 230500 | |
| }, | |
| { | |
| "epoch": 1.2305823691107844, | |
| "grad_norm": 0.823901891708374, | |
| "learning_rate": 0.0005926303134943851, | |
| "loss": 4.6885, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 1.2332459673123228, | |
| "grad_norm": 0.8735817074775696, | |
| "learning_rate": 0.0005917384883206575, | |
| "loss": 4.691, | |
| "step": 231500 | |
| }, | |
| { | |
| "epoch": 1.2359095655138614, | |
| "grad_norm": 0.8728153109550476, | |
| "learning_rate": 0.0005908466631469301, | |
| "loss": 4.6857, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 1.2385731637153998, | |
| "grad_norm": 0.8330144882202148, | |
| "learning_rate": 0.0005899548379732025, | |
| "loss": 4.6904, | |
| "step": 232500 | |
| }, | |
| { | |
| "epoch": 1.2412367619169384, | |
| "grad_norm": 0.973419725894928, | |
| "learning_rate": 0.0005890647964498224, | |
| "loss": 4.6829, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 1.2439003601184768, | |
| "grad_norm": 0.8705942034721375, | |
| "learning_rate": 0.0005881729712760948, | |
| "loss": 4.6946, | |
| "step": 233500 | |
| }, | |
| { | |
| "epoch": 1.2465639583200154, | |
| "grad_norm": 0.8772411942481995, | |
| "learning_rate": 0.0005872829297527147, | |
| "loss": 4.6866, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 1.2492275565215538, | |
| "grad_norm": 0.8856297135353088, | |
| "learning_rate": 0.0005863911045789872, | |
| "loss": 4.6899, | |
| "step": 234500 | |
| }, | |
| { | |
| "epoch": 1.2518911547230922, | |
| "grad_norm": 0.9031875133514404, | |
| "learning_rate": 0.0005854992794052596, | |
| "loss": 4.6917, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 1.2545547529246308, | |
| "grad_norm": 1.0048778057098389, | |
| "learning_rate": 0.0005846074542315321, | |
| "loss": 4.6837, | |
| "step": 235500 | |
| }, | |
| { | |
| "epoch": 1.2572183511261694, | |
| "grad_norm": 0.8882681727409363, | |
| "learning_rate": 0.0005837156290578045, | |
| "loss": 4.6813, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 1.2598819493277078, | |
| "grad_norm": 0.8688454627990723, | |
| "learning_rate": 0.0005828238038840769, | |
| "loss": 4.6774, | |
| "step": 236500 | |
| }, | |
| { | |
| "epoch": 1.2625455475292462, | |
| "grad_norm": 0.9453760385513306, | |
| "learning_rate": 0.0005819319787103495, | |
| "loss": 4.6889, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 1.2652091457307848, | |
| "grad_norm": 0.8512344360351562, | |
| "learning_rate": 0.0005810401535366219, | |
| "loss": 4.6835, | |
| "step": 237500 | |
| }, | |
| { | |
| "epoch": 1.2678727439323234, | |
| "grad_norm": 0.858352541923523, | |
| "learning_rate": 0.0005801501120132419, | |
| "loss": 4.6872, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 1.2705363421338618, | |
| "grad_norm": 0.8966683149337769, | |
| "learning_rate": 0.0005792582868395143, | |
| "loss": 4.6812, | |
| "step": 238500 | |
| }, | |
| { | |
| "epoch": 1.2731999403354002, | |
| "grad_norm": 1.3160330057144165, | |
| "learning_rate": 0.0005783664616657867, | |
| "loss": 4.6867, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 1.2758635385369388, | |
| "grad_norm": 0.8476753830909729, | |
| "learning_rate": 0.0005774746364920593, | |
| "loss": 4.6908, | |
| "step": 239500 | |
| }, | |
| { | |
| "epoch": 1.2785271367384774, | |
| "grad_norm": 0.9309910535812378, | |
| "learning_rate": 0.0005765845949686791, | |
| "loss": 4.6763, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 1.2811907349400158, | |
| "grad_norm": 0.8932083249092102, | |
| "learning_rate": 0.0005756927697949516, | |
| "loss": 4.6868, | |
| "step": 240500 | |
| }, | |
| { | |
| "epoch": 1.2838543331415542, | |
| "grad_norm": 0.8718700408935547, | |
| "learning_rate": 0.000574800944621224, | |
| "loss": 4.6848, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 1.2865179313430928, | |
| "grad_norm": 0.8954005837440491, | |
| "learning_rate": 0.0005739091194474964, | |
| "loss": 4.6781, | |
| "step": 241500 | |
| }, | |
| { | |
| "epoch": 1.2891815295446314, | |
| "grad_norm": 0.8750497698783875, | |
| "learning_rate": 0.0005730190779241163, | |
| "loss": 4.6871, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 1.2918451277461698, | |
| "grad_norm": 0.8953655362129211, | |
| "learning_rate": 0.0005721272527503888, | |
| "loss": 4.688, | |
| "step": 242500 | |
| }, | |
| { | |
| "epoch": 1.2945087259477082, | |
| "grad_norm": 0.8756445050239563, | |
| "learning_rate": 0.0005712354275766613, | |
| "loss": 4.6832, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 1.2971723241492468, | |
| "grad_norm": 0.842310905456543, | |
| "learning_rate": 0.0005703436024029338, | |
| "loss": 4.683, | |
| "step": 243500 | |
| }, | |
| { | |
| "epoch": 1.2998359223507852, | |
| "grad_norm": 0.8197309970855713, | |
| "learning_rate": 0.0005694535608795537, | |
| "loss": 4.6839, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 1.3024995205523238, | |
| "grad_norm": 0.8899139165878296, | |
| "learning_rate": 0.0005685617357058261, | |
| "loss": 4.6937, | |
| "step": 244500 | |
| }, | |
| { | |
| "epoch": 1.3051631187538622, | |
| "grad_norm": 0.9787269830703735, | |
| "learning_rate": 0.0005676699105320986, | |
| "loss": 4.686, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 1.3078267169554008, | |
| "grad_norm": 0.936326265335083, | |
| "learning_rate": 0.0005667780853583711, | |
| "loss": 4.6823, | |
| "step": 245500 | |
| }, | |
| { | |
| "epoch": 1.3104903151569391, | |
| "grad_norm": 0.8806182742118835, | |
| "learning_rate": 0.0005658880438349909, | |
| "loss": 4.6825, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 1.3131539133584778, | |
| "grad_norm": 0.9858034253120422, | |
| "learning_rate": 0.0005649962186612634, | |
| "loss": 4.6833, | |
| "step": 246500 | |
| }, | |
| { | |
| "epoch": 1.3158175115600161, | |
| "grad_norm": 1.0451375246047974, | |
| "learning_rate": 0.0005641043934875358, | |
| "loss": 4.6828, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 1.3184811097615547, | |
| "grad_norm": 0.9298591613769531, | |
| "learning_rate": 0.0005632125683138083, | |
| "loss": 4.6864, | |
| "step": 247500 | |
| }, | |
| { | |
| "epoch": 1.3211447079630931, | |
| "grad_norm": 0.8836386799812317, | |
| "learning_rate": 0.0005623207431400808, | |
| "loss": 4.6793, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 1.3238083061646317, | |
| "grad_norm": 0.820397138595581, | |
| "learning_rate": 0.0005614307016167007, | |
| "loss": 4.6895, | |
| "step": 248500 | |
| }, | |
| { | |
| "epoch": 1.3264719043661701, | |
| "grad_norm": 0.9033796787261963, | |
| "learning_rate": 0.0005605388764429732, | |
| "loss": 4.6778, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 1.3291355025677087, | |
| "grad_norm": 0.9165984988212585, | |
| "learning_rate": 0.0005596470512692456, | |
| "loss": 4.6823, | |
| "step": 249500 | |
| }, | |
| { | |
| "epoch": 1.3317991007692471, | |
| "grad_norm": 0.8427574038505554, | |
| "learning_rate": 0.0005587552260955181, | |
| "loss": 4.6832, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 1.3344626989707857, | |
| "grad_norm": 0.8803266286849976, | |
| "learning_rate": 0.0005578634009217906, | |
| "loss": 4.685, | |
| "step": 250500 | |
| }, | |
| { | |
| "epoch": 1.3371262971723241, | |
| "grad_norm": 0.8542140126228333, | |
| "learning_rate": 0.0005569733593984104, | |
| "loss": 4.6851, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 1.3397898953738627, | |
| "grad_norm": 0.9896337389945984, | |
| "learning_rate": 0.0005560815342246829, | |
| "loss": 4.6839, | |
| "step": 251500 | |
| }, | |
| { | |
| "epoch": 1.3424534935754011, | |
| "grad_norm": 0.8505421876907349, | |
| "learning_rate": 0.0005551897090509553, | |
| "loss": 4.6808, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 1.3451170917769395, | |
| "grad_norm": 0.9560419917106628, | |
| "learning_rate": 0.0005542978838772278, | |
| "loss": 4.6787, | |
| "step": 252500 | |
| }, | |
| { | |
| "epoch": 1.347780689978478, | |
| "grad_norm": 0.9107364416122437, | |
| "learning_rate": 0.0005534078423538477, | |
| "loss": 4.6808, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 1.3504442881800167, | |
| "grad_norm": 0.8997001647949219, | |
| "learning_rate": 0.0005525160171801201, | |
| "loss": 4.6799, | |
| "step": 253500 | |
| }, | |
| { | |
| "epoch": 1.353107886381555, | |
| "grad_norm": 0.9192373752593994, | |
| "learning_rate": 0.0005516241920063926, | |
| "loss": 4.6837, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 1.3557714845830935, | |
| "grad_norm": 1.058236837387085, | |
| "learning_rate": 0.000550732366832665, | |
| "loss": 4.6773, | |
| "step": 254500 | |
| }, | |
| { | |
| "epoch": 1.358435082784632, | |
| "grad_norm": 0.949219286441803, | |
| "learning_rate": 0.000549842325309285, | |
| "loss": 4.6826, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 1.3610986809861707, | |
| "grad_norm": 0.8578962087631226, | |
| "learning_rate": 0.0005489505001355575, | |
| "loss": 4.6865, | |
| "step": 255500 | |
| }, | |
| { | |
| "epoch": 1.363762279187709, | |
| "grad_norm": 0.9393055438995361, | |
| "learning_rate": 0.00054805867496183, | |
| "loss": 4.6774, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 1.3664258773892475, | |
| "grad_norm": 1.0173401832580566, | |
| "learning_rate": 0.0005471668497881024, | |
| "loss": 4.6783, | |
| "step": 256500 | |
| }, | |
| { | |
| "epoch": 1.369089475590786, | |
| "grad_norm": 0.8577682971954346, | |
| "learning_rate": 0.0005462768082647222, | |
| "loss": 4.6789, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 1.3717530737923247, | |
| "grad_norm": 0.9181286692619324, | |
| "learning_rate": 0.0005453849830909947, | |
| "loss": 4.6754, | |
| "step": 257500 | |
| }, | |
| { | |
| "epoch": 1.374416671993863, | |
| "grad_norm": 0.8773962259292603, | |
| "learning_rate": 0.0005444931579172672, | |
| "loss": 4.6882, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 1.3770802701954015, | |
| "grad_norm": 0.9522156715393066, | |
| "learning_rate": 0.0005436013327435396, | |
| "loss": 4.684, | |
| "step": 258500 | |
| }, | |
| { | |
| "epoch": 1.37974386839694, | |
| "grad_norm": 0.8997749090194702, | |
| "learning_rate": 0.0005427112912201595, | |
| "loss": 4.6814, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 1.3824074665984787, | |
| "grad_norm": 0.8679563403129578, | |
| "learning_rate": 0.0005418194660464319, | |
| "loss": 4.6849, | |
| "step": 259500 | |
| }, | |
| { | |
| "epoch": 1.385071064800017, | |
| "grad_norm": 0.8472937345504761, | |
| "learning_rate": 0.0005409276408727044, | |
| "loss": 4.6765, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 1.3877346630015555, | |
| "grad_norm": 0.9184697270393372, | |
| "learning_rate": 0.0005400358156989769, | |
| "loss": 4.6795, | |
| "step": 260500 | |
| }, | |
| { | |
| "epoch": 1.390398261203094, | |
| "grad_norm": 0.8961514234542847, | |
| "learning_rate": 0.0005391457741755969, | |
| "loss": 4.6798, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 1.3930618594046325, | |
| "grad_norm": 0.9035218954086304, | |
| "learning_rate": 0.0005382539490018693, | |
| "loss": 4.6804, | |
| "step": 261500 | |
| }, | |
| { | |
| "epoch": 1.395725457606171, | |
| "grad_norm": 0.9542981386184692, | |
| "learning_rate": 0.0005373621238281417, | |
| "loss": 4.6763, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 1.3983890558077094, | |
| "grad_norm": 0.8902364373207092, | |
| "learning_rate": 0.0005364702986544142, | |
| "loss": 4.679, | |
| "step": 262500 | |
| }, | |
| { | |
| "epoch": 1.401052654009248, | |
| "grad_norm": 0.8759614825248718, | |
| "learning_rate": 0.000535580257131034, | |
| "loss": 4.6819, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 1.4037162522107864, | |
| "grad_norm": 0.9290657043457031, | |
| "learning_rate": 0.0005346884319573066, | |
| "loss": 4.6787, | |
| "step": 263500 | |
| }, | |
| { | |
| "epoch": 1.406379850412325, | |
| "grad_norm": 1.0657765865325928, | |
| "learning_rate": 0.000533796606783579, | |
| "loss": 4.6805, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 1.4090434486138634, | |
| "grad_norm": 0.9341272711753845, | |
| "learning_rate": 0.0005329047816098514, | |
| "loss": 4.6824, | |
| "step": 264500 | |
| }, | |
| { | |
| "epoch": 1.411707046815402, | |
| "grad_norm": 0.8521129488945007, | |
| "learning_rate": 0.0005320147400864713, | |
| "loss": 4.6773, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 1.4143706450169404, | |
| "grad_norm": 0.9178290367126465, | |
| "learning_rate": 0.0005311229149127437, | |
| "loss": 4.6795, | |
| "step": 265500 | |
| }, | |
| { | |
| "epoch": 1.417034243218479, | |
| "grad_norm": 0.9215536713600159, | |
| "learning_rate": 0.0005302310897390163, | |
| "loss": 4.6743, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 1.4196978414200174, | |
| "grad_norm": 0.9139541387557983, | |
| "learning_rate": 0.0005293392645652888, | |
| "loss": 4.6732, | |
| "step": 266500 | |
| }, | |
| { | |
| "epoch": 1.422361439621556, | |
| "grad_norm": 0.9697835445404053, | |
| "learning_rate": 0.0005284492230419087, | |
| "loss": 4.6846, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 1.4250250378230944, | |
| "grad_norm": 0.9869498610496521, | |
| "learning_rate": 0.0005275573978681811, | |
| "loss": 4.6732, | |
| "step": 267500 | |
| }, | |
| { | |
| "epoch": 1.427688636024633, | |
| "grad_norm": 0.8871563673019409, | |
| "learning_rate": 0.0005266655726944535, | |
| "loss": 4.6838, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 1.4303522342261714, | |
| "grad_norm": 0.9272149205207825, | |
| "learning_rate": 0.0005257737475207261, | |
| "loss": 4.6749, | |
| "step": 268500 | |
| }, | |
| { | |
| "epoch": 1.43301583242771, | |
| "grad_norm": 0.8581441640853882, | |
| "learning_rate": 0.000524883705997346, | |
| "loss": 4.677, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 1.4356794306292484, | |
| "grad_norm": 0.9157629609107971, | |
| "learning_rate": 0.0005239918808236184, | |
| "loss": 4.6756, | |
| "step": 269500 | |
| }, | |
| { | |
| "epoch": 1.4383430288307868, | |
| "grad_norm": 0.9694315195083618, | |
| "learning_rate": 0.0005231000556498908, | |
| "loss": 4.6721, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 1.4410066270323254, | |
| "grad_norm": 0.905512809753418, | |
| "learning_rate": 0.0005222082304761632, | |
| "loss": 4.6782, | |
| "step": 270500 | |
| }, | |
| { | |
| "epoch": 1.443670225233864, | |
| "grad_norm": 0.8765866160392761, | |
| "learning_rate": 0.0005213181889527832, | |
| "loss": 4.6692, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 1.4463338234354024, | |
| "grad_norm": 0.9838495850563049, | |
| "learning_rate": 0.0005204263637790558, | |
| "loss": 4.6801, | |
| "step": 271500 | |
| }, | |
| { | |
| "epoch": 1.4489974216369408, | |
| "grad_norm": 0.9424646496772766, | |
| "learning_rate": 0.0005195345386053282, | |
| "loss": 4.6701, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 1.4516610198384794, | |
| "grad_norm": 0.8656395077705383, | |
| "learning_rate": 0.0005186427134316006, | |
| "loss": 4.6775, | |
| "step": 272500 | |
| }, | |
| { | |
| "epoch": 1.454324618040018, | |
| "grad_norm": 0.9255796074867249, | |
| "learning_rate": 0.0005177526719082205, | |
| "loss": 4.6753, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 1.4569882162415564, | |
| "grad_norm": 0.9551495313644409, | |
| "learning_rate": 0.0005168608467344929, | |
| "loss": 4.6697, | |
| "step": 273500 | |
| }, | |
| { | |
| "epoch": 1.4596518144430948, | |
| "grad_norm": 0.9294918179512024, | |
| "learning_rate": 0.0005159690215607655, | |
| "loss": 4.678, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 1.4623154126446334, | |
| "grad_norm": 0.8683546185493469, | |
| "learning_rate": 0.0005150771963870379, | |
| "loss": 4.6716, | |
| "step": 274500 | |
| }, | |
| { | |
| "epoch": 1.464979010846172, | |
| "grad_norm": 0.9196661710739136, | |
| "learning_rate": 0.0005141871548636577, | |
| "loss": 4.6811, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 1.4676426090477104, | |
| "grad_norm": 0.8748793005943298, | |
| "learning_rate": 0.0005132953296899302, | |
| "loss": 4.6676, | |
| "step": 275500 | |
| }, | |
| { | |
| "epoch": 1.4703062072492488, | |
| "grad_norm": 0.941302478313446, | |
| "learning_rate": 0.0005124035045162026, | |
| "loss": 4.677, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 1.4729698054507874, | |
| "grad_norm": 0.8474987149238586, | |
| "learning_rate": 0.0005115116793424752, | |
| "loss": 4.6656, | |
| "step": 276500 | |
| }, | |
| { | |
| "epoch": 1.475633403652326, | |
| "grad_norm": 0.9448440074920654, | |
| "learning_rate": 0.0005106216378190951, | |
| "loss": 4.6719, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 1.4782970018538644, | |
| "grad_norm": 0.8382176160812378, | |
| "learning_rate": 0.0005097298126453676, | |
| "loss": 4.6685, | |
| "step": 277500 | |
| }, | |
| { | |
| "epoch": 1.4809606000554028, | |
| "grad_norm": 0.8633021116256714, | |
| "learning_rate": 0.00050883798747164, | |
| "loss": 4.6732, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 1.4836241982569414, | |
| "grad_norm": 0.9060601592063904, | |
| "learning_rate": 0.0005079461622979124, | |
| "loss": 4.6709, | |
| "step": 278500 | |
| }, | |
| { | |
| "epoch": 1.4862877964584797, | |
| "grad_norm": 0.8984940648078918, | |
| "learning_rate": 0.0005070561207745324, | |
| "loss": 4.675, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 1.4889513946600184, | |
| "grad_norm": 0.8761520385742188, | |
| "learning_rate": 0.0005061642956008048, | |
| "loss": 4.6719, | |
| "step": 279500 | |
| }, | |
| { | |
| "epoch": 1.4916149928615567, | |
| "grad_norm": 0.934901773929596, | |
| "learning_rate": 0.0005052724704270773, | |
| "loss": 4.6705, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 1.4942785910630954, | |
| "grad_norm": 0.927005410194397, | |
| "learning_rate": 0.0005043806452533497, | |
| "loss": 4.6736, | |
| "step": 280500 | |
| }, | |
| { | |
| "epoch": 1.4969421892646337, | |
| "grad_norm": 0.9266989827156067, | |
| "learning_rate": 0.0005034888200796221, | |
| "loss": 4.6773, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 1.4996057874661723, | |
| "grad_norm": 0.8785182237625122, | |
| "learning_rate": 0.000502598778556242, | |
| "loss": 4.6678, | |
| "step": 281500 | |
| }, | |
| { | |
| "epoch": 1.5022693856677107, | |
| "grad_norm": 1.0172791481018066, | |
| "learning_rate": 0.0005017069533825145, | |
| "loss": 4.6752, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 1.5049329838692493, | |
| "grad_norm": 0.9704791307449341, | |
| "learning_rate": 0.000500815128208787, | |
| "loss": 4.6635, | |
| "step": 282500 | |
| }, | |
| { | |
| "epoch": 1.507596582070788, | |
| "grad_norm": 0.9124333262443542, | |
| "learning_rate": 0.0004999233030350595, | |
| "loss": 4.6748, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 1.5102601802723261, | |
| "grad_norm": 0.9736974835395813, | |
| "learning_rate": 0.0004990332615116794, | |
| "loss": 4.6747, | |
| "step": 283500 | |
| }, | |
| { | |
| "epoch": 1.5129237784738647, | |
| "grad_norm": 0.9330904483795166, | |
| "learning_rate": 0.0004981414363379518, | |
| "loss": 4.6695, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 1.5155873766754033, | |
| "grad_norm": 1.0524863004684448, | |
| "learning_rate": 0.0004972496111642243, | |
| "loss": 4.6699, | |
| "step": 284500 | |
| }, | |
| { | |
| "epoch": 1.5182509748769417, | |
| "grad_norm": 0.8803556561470032, | |
| "learning_rate": 0.0004963577859904968, | |
| "loss": 4.6711, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 1.52091457307848, | |
| "grad_norm": 0.9705889225006104, | |
| "learning_rate": 0.0004954677444671166, | |
| "loss": 4.6705, | |
| "step": 285500 | |
| }, | |
| { | |
| "epoch": 1.5235781712800187, | |
| "grad_norm": 0.928056001663208, | |
| "learning_rate": 0.000494575919293389, | |
| "loss": 4.6728, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 1.5262417694815573, | |
| "grad_norm": 0.9061446785926819, | |
| "learning_rate": 0.0004936840941196615, | |
| "loss": 4.6669, | |
| "step": 286500 | |
| }, | |
| { | |
| "epoch": 1.5289053676830957, | |
| "grad_norm": 0.9161142706871033, | |
| "learning_rate": 0.000492792268945934, | |
| "loss": 4.6661, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 1.531568965884634, | |
| "grad_norm": 0.89336097240448, | |
| "learning_rate": 0.0004919022274225539, | |
| "loss": 4.678, | |
| "step": 287500 | |
| }, | |
| { | |
| "epoch": 1.5342325640861727, | |
| "grad_norm": 0.886858344078064, | |
| "learning_rate": 0.0004910104022488263, | |
| "loss": 4.6693, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 1.5368961622877113, | |
| "grad_norm": 0.8612877130508423, | |
| "learning_rate": 0.0004901185770750989, | |
| "loss": 4.6631, | |
| "step": 288500 | |
| }, | |
| { | |
| "epoch": 1.5395597604892497, | |
| "grad_norm": 1.027990460395813, | |
| "learning_rate": 0.0004892267519013713, | |
| "loss": 4.6738, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 1.542223358690788, | |
| "grad_norm": 0.8808753490447998, | |
| "learning_rate": 0.0004883367103779913, | |
| "loss": 4.6794, | |
| "step": 289500 | |
| }, | |
| { | |
| "epoch": 1.5448869568923267, | |
| "grad_norm": 0.9345124363899231, | |
| "learning_rate": 0.00048744488520426364, | |
| "loss": 4.6642, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 1.5475505550938653, | |
| "grad_norm": 0.8728553652763367, | |
| "learning_rate": 0.0004865530600305361, | |
| "loss": 4.6744, | |
| "step": 290500 | |
| }, | |
| { | |
| "epoch": 1.5502141532954037, | |
| "grad_norm": 1.0889195203781128, | |
| "learning_rate": 0.00048566123485680856, | |
| "loss": 4.669, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 1.552877751496942, | |
| "grad_norm": 0.9284191727638245, | |
| "learning_rate": 0.0004847711933334285, | |
| "loss": 4.669, | |
| "step": 291500 | |
| }, | |
| { | |
| "epoch": 1.5555413496984807, | |
| "grad_norm": 0.8793983459472656, | |
| "learning_rate": 0.0004838793681597009, | |
| "loss": 4.675, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 1.5582049479000193, | |
| "grad_norm": 0.8682650327682495, | |
| "learning_rate": 0.00048298754298597334, | |
| "loss": 4.6614, | |
| "step": 292500 | |
| }, | |
| { | |
| "epoch": 1.5608685461015577, | |
| "grad_norm": 0.9232677817344666, | |
| "learning_rate": 0.0004820957178122458, | |
| "loss": 4.668, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 1.563532144303096, | |
| "grad_norm": 1.0062049627304077, | |
| "learning_rate": 0.00048120567628886574, | |
| "loss": 4.6651, | |
| "step": 293500 | |
| }, | |
| { | |
| "epoch": 1.5661957425046347, | |
| "grad_norm": 0.9670103192329407, | |
| "learning_rate": 0.0004803138511151382, | |
| "loss": 4.6597, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 1.5688593407061733, | |
| "grad_norm": 0.9307424426078796, | |
| "learning_rate": 0.00047942202594141066, | |
| "loss": 4.6697, | |
| "step": 294500 | |
| }, | |
| { | |
| "epoch": 1.5715229389077117, | |
| "grad_norm": 0.8999619483947754, | |
| "learning_rate": 0.0004785302007676831, | |
| "loss": 4.6739, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 1.57418653710925, | |
| "grad_norm": 0.927873432636261, | |
| "learning_rate": 0.00047764015924430306, | |
| "loss": 4.6701, | |
| "step": 295500 | |
| }, | |
| { | |
| "epoch": 1.5768501353107887, | |
| "grad_norm": 0.966769814491272, | |
| "learning_rate": 0.0004767483340705755, | |
| "loss": 4.6635, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 1.5795137335123273, | |
| "grad_norm": 0.9777745604515076, | |
| "learning_rate": 0.0004758565088968479, | |
| "loss": 4.6755, | |
| "step": 296500 | |
| }, | |
| { | |
| "epoch": 1.5821773317138657, | |
| "grad_norm": 0.8396321535110474, | |
| "learning_rate": 0.0004749646837231204, | |
| "loss": 4.6731, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 1.584840929915404, | |
| "grad_norm": 0.9812055826187134, | |
| "learning_rate": 0.00047407464219974033, | |
| "loss": 4.6723, | |
| "step": 297500 | |
| }, | |
| { | |
| "epoch": 1.5875045281169426, | |
| "grad_norm": 0.8983718156814575, | |
| "learning_rate": 0.0004731828170260128, | |
| "loss": 4.6742, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 1.5901681263184813, | |
| "grad_norm": 0.89915931224823, | |
| "learning_rate": 0.00047229099185228525, | |
| "loss": 4.6661, | |
| "step": 298500 | |
| }, | |
| { | |
| "epoch": 1.5928317245200196, | |
| "grad_norm": 0.9202754497528076, | |
| "learning_rate": 0.0004713991666785577, | |
| "loss": 4.6679, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 1.595495322721558, | |
| "grad_norm": 0.9377342462539673, | |
| "learning_rate": 0.0004705091251551776, | |
| "loss": 4.6664, | |
| "step": 299500 | |
| }, | |
| { | |
| "epoch": 1.5981589209230966, | |
| "grad_norm": 0.8714098930358887, | |
| "learning_rate": 0.00046961729998145, | |
| "loss": 4.6668, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 1.6008225191246352, | |
| "grad_norm": 0.884672224521637, | |
| "learning_rate": 0.00046872547480772246, | |
| "loss": 4.6648, | |
| "step": 300500 | |
| }, | |
| { | |
| "epoch": 1.6034861173261736, | |
| "grad_norm": 0.9066005945205688, | |
| "learning_rate": 0.00046783364963399495, | |
| "loss": 4.6589, | |
| "step": 301000 | |
| }, | |
| { | |
| "epoch": 1.606149715527712, | |
| "grad_norm": 1.024951457977295, | |
| "learning_rate": 0.00046694360811061486, | |
| "loss": 4.6645, | |
| "step": 301500 | |
| }, | |
| { | |
| "epoch": 1.6088133137292506, | |
| "grad_norm": 0.9072735905647278, | |
| "learning_rate": 0.00046605178293688735, | |
| "loss": 4.665, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 1.611476911930789, | |
| "grad_norm": 0.8979782462120056, | |
| "learning_rate": 0.0004651599577631598, | |
| "loss": 4.6667, | |
| "step": 302500 | |
| }, | |
| { | |
| "epoch": 1.6141405101323274, | |
| "grad_norm": 0.8960680961608887, | |
| "learning_rate": 0.0004642681325894322, | |
| "loss": 4.6698, | |
| "step": 303000 | |
| }, | |
| { | |
| "epoch": 1.616804108333866, | |
| "grad_norm": 0.9768756031990051, | |
| "learning_rate": 0.0004633780910660522, | |
| "loss": 4.6678, | |
| "step": 303500 | |
| }, | |
| { | |
| "epoch": 1.6194677065354046, | |
| "grad_norm": 0.941615879535675, | |
| "learning_rate": 0.0004624862658923246, | |
| "loss": 4.6689, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 1.622131304736943, | |
| "grad_norm": 0.9563820362091064, | |
| "learning_rate": 0.0004615944407185971, | |
| "loss": 4.6621, | |
| "step": 304500 | |
| }, | |
| { | |
| "epoch": 1.6247949029384814, | |
| "grad_norm": 0.9180545806884766, | |
| "learning_rate": 0.00046070261554486953, | |
| "loss": 4.6667, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 1.62745850114002, | |
| "grad_norm": 0.8739668726921082, | |
| "learning_rate": 0.0004598125740214894, | |
| "loss": 4.6711, | |
| "step": 305500 | |
| }, | |
| { | |
| "epoch": 1.6301220993415586, | |
| "grad_norm": 1.0049022436141968, | |
| "learning_rate": 0.0004589207488477619, | |
| "loss": 4.6662, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 1.632785697543097, | |
| "grad_norm": 0.8607634902000427, | |
| "learning_rate": 0.0004580289236740343, | |
| "loss": 4.6635, | |
| "step": 306500 | |
| }, | |
| { | |
| "epoch": 1.6354492957446354, | |
| "grad_norm": 0.9192615747451782, | |
| "learning_rate": 0.0004571370985003068, | |
| "loss": 4.6634, | |
| "step": 307000 | |
| }, | |
| { | |
| "epoch": 1.638112893946174, | |
| "grad_norm": 0.8758520483970642, | |
| "learning_rate": 0.0004562470569769267, | |
| "loss": 4.6532, | |
| "step": 307500 | |
| }, | |
| { | |
| "epoch": 1.6407764921477126, | |
| "grad_norm": 0.8956847190856934, | |
| "learning_rate": 0.00045535523180319915, | |
| "loss": 4.6694, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 1.643440090349251, | |
| "grad_norm": 0.8848024010658264, | |
| "learning_rate": 0.00045446340662947163, | |
| "loss": 4.6637, | |
| "step": 308500 | |
| }, | |
| { | |
| "epoch": 1.6461036885507894, | |
| "grad_norm": 0.9178889989852905, | |
| "learning_rate": 0.00045357158145574407, | |
| "loss": 4.6706, | |
| "step": 309000 | |
| }, | |
| { | |
| "epoch": 1.648767286752328, | |
| "grad_norm": 1.0721620321273804, | |
| "learning_rate": 0.0004526797562820165, | |
| "loss": 4.6548, | |
| "step": 309500 | |
| }, | |
| { | |
| "epoch": 1.6514308849538666, | |
| "grad_norm": 0.8807909488677979, | |
| "learning_rate": 0.000451787931108289, | |
| "loss": 4.6666, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 1.654094483155405, | |
| "grad_norm": 0.839378297328949, | |
| "learning_rate": 0.0004508961059345614, | |
| "loss": 4.6671, | |
| "step": 310500 | |
| }, | |
| { | |
| "epoch": 1.6567580813569434, | |
| "grad_norm": 0.9249696135520935, | |
| "learning_rate": 0.0004500042807608339, | |
| "loss": 4.6696, | |
| "step": 311000 | |
| }, | |
| { | |
| "epoch": 1.659421679558482, | |
| "grad_norm": 0.891743540763855, | |
| "learning_rate": 0.0004491142392374538, | |
| "loss": 4.6658, | |
| "step": 311500 | |
| }, | |
| { | |
| "epoch": 1.6620852777600206, | |
| "grad_norm": 0.9119758009910583, | |
| "learning_rate": 0.00044822241406372625, | |
| "loss": 4.6668, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 1.664748875961559, | |
| "grad_norm": 0.9169191122055054, | |
| "learning_rate": 0.00044733058888999874, | |
| "loss": 4.668, | |
| "step": 312500 | |
| }, | |
| { | |
| "epoch": 1.6674124741630973, | |
| "grad_norm": 0.8988668918609619, | |
| "learning_rate": 0.00044643876371627117, | |
| "loss": 4.6602, | |
| "step": 313000 | |
| }, | |
| { | |
| "epoch": 1.670076072364636, | |
| "grad_norm": 0.8959922194480896, | |
| "learning_rate": 0.0004455487221928911, | |
| "loss": 4.6628, | |
| "step": 313500 | |
| }, | |
| { | |
| "epoch": 1.6727396705661746, | |
| "grad_norm": 0.8865877389907837, | |
| "learning_rate": 0.00044465689701916357, | |
| "loss": 4.6645, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 1.675403268767713, | |
| "grad_norm": 0.9459559321403503, | |
| "learning_rate": 0.000443765071845436, | |
| "loss": 4.658, | |
| "step": 314500 | |
| }, | |
| { | |
| "epoch": 1.6780668669692513, | |
| "grad_norm": 0.9914552569389343, | |
| "learning_rate": 0.0004428732466717085, | |
| "loss": 4.6663, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 1.68073046517079, | |
| "grad_norm": 0.9912951588630676, | |
| "learning_rate": 0.00044198320514832835, | |
| "loss": 4.6658, | |
| "step": 315500 | |
| }, | |
| { | |
| "epoch": 1.6833940633723286, | |
| "grad_norm": 0.9673342108726501, | |
| "learning_rate": 0.0004410913799746008, | |
| "loss": 4.6612, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 1.686057661573867, | |
| "grad_norm": 0.9501271843910217, | |
| "learning_rate": 0.00044019955480087327, | |
| "loss": 4.6641, | |
| "step": 316500 | |
| }, | |
| { | |
| "epoch": 1.6887212597754053, | |
| "grad_norm": 0.9438074827194214, | |
| "learning_rate": 0.0004393077296271457, | |
| "loss": 4.6557, | |
| "step": 317000 | |
| }, | |
| { | |
| "epoch": 1.691384857976944, | |
| "grad_norm": 0.9112457633018494, | |
| "learning_rate": 0.0004384176881037656, | |
| "loss": 4.6665, | |
| "step": 317500 | |
| }, | |
| { | |
| "epoch": 1.6940484561784825, | |
| "grad_norm": 0.9219810962677002, | |
| "learning_rate": 0.0004375258629300381, | |
| "loss": 4.6625, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 1.696712054380021, | |
| "grad_norm": 0.8877586126327515, | |
| "learning_rate": 0.00043663403775631054, | |
| "loss": 4.6655, | |
| "step": 318500 | |
| }, | |
| { | |
| "epoch": 1.6993756525815593, | |
| "grad_norm": 1.021628499031067, | |
| "learning_rate": 0.000435742212582583, | |
| "loss": 4.6615, | |
| "step": 319000 | |
| }, | |
| { | |
| "epoch": 1.702039250783098, | |
| "grad_norm": 0.9514620900154114, | |
| "learning_rate": 0.00043485217105920294, | |
| "loss": 4.659, | |
| "step": 319500 | |
| }, | |
| { | |
| "epoch": 1.7047028489846365, | |
| "grad_norm": 0.8997855186462402, | |
| "learning_rate": 0.00043396034588547537, | |
| "loss": 4.6608, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 1.7073664471861747, | |
| "grad_norm": 0.897196352481842, | |
| "learning_rate": 0.00043306852071174786, | |
| "loss": 4.6633, | |
| "step": 320500 | |
| }, | |
| { | |
| "epoch": 1.7100300453877133, | |
| "grad_norm": 0.8859612941741943, | |
| "learning_rate": 0.0004321766955380203, | |
| "loss": 4.663, | |
| "step": 321000 | |
| }, | |
| { | |
| "epoch": 1.712693643589252, | |
| "grad_norm": 0.9287886023521423, | |
| "learning_rate": 0.00043128665401464026, | |
| "loss": 4.6616, | |
| "step": 321500 | |
| }, | |
| { | |
| "epoch": 1.7153572417907903, | |
| "grad_norm": 0.9006426334381104, | |
| "learning_rate": 0.0004303948288409127, | |
| "loss": 4.6677, | |
| "step": 322000 | |
| }, | |
| { | |
| "epoch": 1.7180208399923287, | |
| "grad_norm": 0.9155673384666443, | |
| "learning_rate": 0.0004295030036671851, | |
| "loss": 4.6557, | |
| "step": 322500 | |
| }, | |
| { | |
| "epoch": 1.7206844381938673, | |
| "grad_norm": 0.909574568271637, | |
| "learning_rate": 0.0004286111784934576, | |
| "loss": 4.6645, | |
| "step": 323000 | |
| }, | |
| { | |
| "epoch": 1.723348036395406, | |
| "grad_norm": 0.9735229015350342, | |
| "learning_rate": 0.00042772113697007747, | |
| "loss": 4.6578, | |
| "step": 323500 | |
| }, | |
| { | |
| "epoch": 1.7260116345969443, | |
| "grad_norm": 0.9536921381950378, | |
| "learning_rate": 0.0004268293117963499, | |
| "loss": 4.6607, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 1.7286752327984827, | |
| "grad_norm": 1.100502610206604, | |
| "learning_rate": 0.0004259374866226224, | |
| "loss": 4.6598, | |
| "step": 324500 | |
| }, | |
| { | |
| "epoch": 1.7313388310000213, | |
| "grad_norm": 0.9191217422485352, | |
| "learning_rate": 0.0004250456614488948, | |
| "loss": 4.6607, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 1.73400242920156, | |
| "grad_norm": 0.921844482421875, | |
| "learning_rate": 0.0004241556199255148, | |
| "loss": 4.6567, | |
| "step": 325500 | |
| }, | |
| { | |
| "epoch": 1.7366660274030983, | |
| "grad_norm": 0.9752650856971741, | |
| "learning_rate": 0.0004232637947517872, | |
| "loss": 4.6625, | |
| "step": 326000 | |
| }, | |
| { | |
| "epoch": 1.7393296256046367, | |
| "grad_norm": 0.9209687113761902, | |
| "learning_rate": 0.00042237196957805966, | |
| "loss": 4.6632, | |
| "step": 326500 | |
| }, | |
| { | |
| "epoch": 1.7419932238061753, | |
| "grad_norm": 0.9033056497573853, | |
| "learning_rate": 0.00042148014440433214, | |
| "loss": 4.658, | |
| "step": 327000 | |
| }, | |
| { | |
| "epoch": 1.7446568220077139, | |
| "grad_norm": 0.9369528889656067, | |
| "learning_rate": 0.00042059010288095206, | |
| "loss": 4.6624, | |
| "step": 327500 | |
| }, | |
| { | |
| "epoch": 1.7473204202092523, | |
| "grad_norm": 0.9487442374229431, | |
| "learning_rate": 0.00041969827770722455, | |
| "loss": 4.6571, | |
| "step": 328000 | |
| }, | |
| { | |
| "epoch": 1.7499840184107907, | |
| "grad_norm": 0.9886392951011658, | |
| "learning_rate": 0.000418806452533497, | |
| "loss": 4.6644, | |
| "step": 328500 | |
| }, | |
| { | |
| "epoch": 1.7526476166123293, | |
| "grad_norm": 0.9492540955543518, | |
| "learning_rate": 0.00041791641101011684, | |
| "loss": 4.6602, | |
| "step": 329000 | |
| }, | |
| { | |
| "epoch": 1.7553112148138679, | |
| "grad_norm": 1.0011500120162964, | |
| "learning_rate": 0.0004170245858363893, | |
| "loss": 4.654, | |
| "step": 329500 | |
| }, | |
| { | |
| "epoch": 1.7579748130154063, | |
| "grad_norm": 0.8877175450325012, | |
| "learning_rate": 0.0004161327606626618, | |
| "loss": 4.661, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 1.7606384112169446, | |
| "grad_norm": 0.9424140453338623, | |
| "learning_rate": 0.00041524093548893424, | |
| "loss": 4.6626, | |
| "step": 330500 | |
| }, | |
| { | |
| "epoch": 1.7633020094184833, | |
| "grad_norm": 0.9958423376083374, | |
| "learning_rate": 0.00041434911031520673, | |
| "loss": 4.664, | |
| "step": 331000 | |
| }, | |
| { | |
| "epoch": 1.7659656076200219, | |
| "grad_norm": 0.9889068007469177, | |
| "learning_rate": 0.00041345728514147916, | |
| "loss": 4.6539, | |
| "step": 331500 | |
| }, | |
| { | |
| "epoch": 1.7686292058215602, | |
| "grad_norm": 0.9919883608818054, | |
| "learning_rate": 0.00041256545996775165, | |
| "loss": 4.6619, | |
| "step": 332000 | |
| }, | |
| { | |
| "epoch": 1.7712928040230986, | |
| "grad_norm": 0.9275678992271423, | |
| "learning_rate": 0.0004116736347940241, | |
| "loss": 4.6645, | |
| "step": 332500 | |
| }, | |
| { | |
| "epoch": 1.7739564022246372, | |
| "grad_norm": 0.918587863445282, | |
| "learning_rate": 0.00041078359327064394, | |
| "loss": 4.6579, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 1.7766200004261758, | |
| "grad_norm": 0.9589911699295044, | |
| "learning_rate": 0.00040989176809691643, | |
| "loss": 4.6569, | |
| "step": 333500 | |
| }, | |
| { | |
| "epoch": 1.7792835986277142, | |
| "grad_norm": 0.9149937629699707, | |
| "learning_rate": 0.00040899994292318886, | |
| "loss": 4.669, | |
| "step": 334000 | |
| }, | |
| { | |
| "epoch": 1.7819471968292526, | |
| "grad_norm": 1.0267397165298462, | |
| "learning_rate": 0.00040810811774946135, | |
| "loss": 4.6564, | |
| "step": 334500 | |
| }, | |
| { | |
| "epoch": 1.7846107950307912, | |
| "grad_norm": 0.9392015933990479, | |
| "learning_rate": 0.00040721807622608126, | |
| "loss": 4.6553, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 1.7872743932323298, | |
| "grad_norm": 1.0006318092346191, | |
| "learning_rate": 0.0004063262510523537, | |
| "loss": 4.6639, | |
| "step": 335500 | |
| }, | |
| { | |
| "epoch": 1.7899379914338682, | |
| "grad_norm": 1.0681666135787964, | |
| "learning_rate": 0.0004054344258786262, | |
| "loss": 4.6599, | |
| "step": 336000 | |
| }, | |
| { | |
| "epoch": 1.7926015896354066, | |
| "grad_norm": 0.9203771352767944, | |
| "learning_rate": 0.0004045426007048986, | |
| "loss": 4.6579, | |
| "step": 336500 | |
| }, | |
| { | |
| "epoch": 1.7952651878369452, | |
| "grad_norm": 0.9925394058227539, | |
| "learning_rate": 0.00040365255918151853, | |
| "loss": 4.6666, | |
| "step": 337000 | |
| }, | |
| { | |
| "epoch": 1.7979287860384838, | |
| "grad_norm": 1.0396158695220947, | |
| "learning_rate": 0.000402760734007791, | |
| "loss": 4.6609, | |
| "step": 337500 | |
| }, | |
| { | |
| "epoch": 1.8005923842400222, | |
| "grad_norm": 1.0538824796676636, | |
| "learning_rate": 0.00040186890883406345, | |
| "loss": 4.6548, | |
| "step": 338000 | |
| }, | |
| { | |
| "epoch": 1.8032559824415606, | |
| "grad_norm": 1.0223603248596191, | |
| "learning_rate": 0.00040097708366033594, | |
| "loss": 4.6578, | |
| "step": 338500 | |
| }, | |
| { | |
| "epoch": 1.8059195806430992, | |
| "grad_norm": 0.904390811920166, | |
| "learning_rate": 0.0004000870421369558, | |
| "loss": 4.6594, | |
| "step": 339000 | |
| }, | |
| { | |
| "epoch": 1.8085831788446376, | |
| "grad_norm": 0.950413167476654, | |
| "learning_rate": 0.00039919521696322823, | |
| "loss": 4.6518, | |
| "step": 339500 | |
| }, | |
| { | |
| "epoch": 1.811246777046176, | |
| "grad_norm": 0.8616942167282104, | |
| "learning_rate": 0.0003983033917895007, | |
| "loss": 4.6549, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 1.8139103752477146, | |
| "grad_norm": 0.9749570488929749, | |
| "learning_rate": 0.0003974115666157732, | |
| "loss": 4.6585, | |
| "step": 340500 | |
| }, | |
| { | |
| "epoch": 1.8165739734492532, | |
| "grad_norm": 0.9949798583984375, | |
| "learning_rate": 0.00039652152509239306, | |
| "loss": 4.6555, | |
| "step": 341000 | |
| }, | |
| { | |
| "epoch": 1.8192375716507916, | |
| "grad_norm": 1.0118317604064941, | |
| "learning_rate": 0.00039562969991866555, | |
| "loss": 4.6539, | |
| "step": 341500 | |
| }, | |
| { | |
| "epoch": 1.82190116985233, | |
| "grad_norm": 0.9762909412384033, | |
| "learning_rate": 0.000394737874744938, | |
| "loss": 4.6609, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 1.8245647680538686, | |
| "grad_norm": 0.9497443437576294, | |
| "learning_rate": 0.00039384604957121047, | |
| "loss": 4.6575, | |
| "step": 342500 | |
| }, | |
| { | |
| "epoch": 1.8272283662554072, | |
| "grad_norm": 0.9685680270195007, | |
| "learning_rate": 0.0003929560080478304, | |
| "loss": 4.6511, | |
| "step": 343000 | |
| }, | |
| { | |
| "epoch": 1.8298919644569456, | |
| "grad_norm": 1.0874184370040894, | |
| "learning_rate": 0.0003920641828741028, | |
| "loss": 4.6512, | |
| "step": 343500 | |
| }, | |
| { | |
| "epoch": 1.832555562658484, | |
| "grad_norm": 0.9718310832977295, | |
| "learning_rate": 0.0003911723577003753, | |
| "loss": 4.6546, | |
| "step": 344000 | |
| }, | |
| { | |
| "epoch": 1.8352191608600226, | |
| "grad_norm": 0.9649378657341003, | |
| "learning_rate": 0.00039028053252664773, | |
| "loss": 4.6615, | |
| "step": 344500 | |
| }, | |
| { | |
| "epoch": 1.8378827590615612, | |
| "grad_norm": 1.0718717575073242, | |
| "learning_rate": 0.0003893904910032677, | |
| "loss": 4.6664, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 1.8405463572630996, | |
| "grad_norm": 1.1101832389831543, | |
| "learning_rate": 0.00038849866582954014, | |
| "loss": 4.6559, | |
| "step": 345500 | |
| }, | |
| { | |
| "epoch": 1.843209955464638, | |
| "grad_norm": 0.966593325138092, | |
| "learning_rate": 0.00038760684065581257, | |
| "loss": 4.6577, | |
| "step": 346000 | |
| }, | |
| { | |
| "epoch": 1.8458735536661766, | |
| "grad_norm": 1.01513671875, | |
| "learning_rate": 0.00038671501548208506, | |
| "loss": 4.6569, | |
| "step": 346500 | |
| }, | |
| { | |
| "epoch": 1.8485371518677152, | |
| "grad_norm": 0.9345992207527161, | |
| "learning_rate": 0.0003858249739587049, | |
| "loss": 4.652, | |
| "step": 347000 | |
| }, | |
| { | |
| "epoch": 1.8512007500692536, | |
| "grad_norm": 0.9582251310348511, | |
| "learning_rate": 0.00038493314878497735, | |
| "loss": 4.6579, | |
| "step": 347500 | |
| }, | |
| { | |
| "epoch": 1.853864348270792, | |
| "grad_norm": 0.9576370716094971, | |
| "learning_rate": 0.00038404132361124984, | |
| "loss": 4.6542, | |
| "step": 348000 | |
| }, | |
| { | |
| "epoch": 1.8565279464723305, | |
| "grad_norm": 0.9874210953712463, | |
| "learning_rate": 0.00038314949843752227, | |
| "loss": 4.6531, | |
| "step": 348500 | |
| }, | |
| { | |
| "epoch": 1.8591915446738692, | |
| "grad_norm": 1.075488805770874, | |
| "learning_rate": 0.00038225945691414224, | |
| "loss": 4.656, | |
| "step": 349000 | |
| }, | |
| { | |
| "epoch": 1.8618551428754075, | |
| "grad_norm": 0.9993325471878052, | |
| "learning_rate": 0.00038136763174041467, | |
| "loss": 4.6615, | |
| "step": 349500 | |
| }, | |
| { | |
| "epoch": 1.864518741076946, | |
| "grad_norm": 0.9594368934631348, | |
| "learning_rate": 0.0003804758065666871, | |
| "loss": 4.6533, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 1.8671823392784845, | |
| "grad_norm": 0.9755575060844421, | |
| "learning_rate": 0.0003795839813929596, | |
| "loss": 4.6558, | |
| "step": 350500 | |
| }, | |
| { | |
| "epoch": 1.8698459374800231, | |
| "grad_norm": 0.9865338802337646, | |
| "learning_rate": 0.0003786939398695795, | |
| "loss": 4.6559, | |
| "step": 351000 | |
| }, | |
| { | |
| "epoch": 1.8725095356815615, | |
| "grad_norm": 1.071847677230835, | |
| "learning_rate": 0.00037780211469585194, | |
| "loss": 4.6648, | |
| "step": 351500 | |
| }, | |
| { | |
| "epoch": 1.8751731338831, | |
| "grad_norm": 0.9860469102859497, | |
| "learning_rate": 0.0003769102895221244, | |
| "loss": 4.65, | |
| "step": 352000 | |
| }, | |
| { | |
| "epoch": 1.8778367320846385, | |
| "grad_norm": 0.9507666826248169, | |
| "learning_rate": 0.00037601846434839685, | |
| "loss": 4.6511, | |
| "step": 352500 | |
| }, | |
| { | |
| "epoch": 1.8805003302861771, | |
| "grad_norm": 1.0288827419281006, | |
| "learning_rate": 0.0003751284228250168, | |
| "loss": 4.6602, | |
| "step": 353000 | |
| }, | |
| { | |
| "epoch": 1.8831639284877155, | |
| "grad_norm": 1.0583363771438599, | |
| "learning_rate": 0.00037423659765128926, | |
| "loss": 4.6553, | |
| "step": 353500 | |
| }, | |
| { | |
| "epoch": 1.885827526689254, | |
| "grad_norm": 0.9062938094139099, | |
| "learning_rate": 0.0003733447724775617, | |
| "loss": 4.6633, | |
| "step": 354000 | |
| }, | |
| { | |
| "epoch": 1.8884911248907925, | |
| "grad_norm": 0.9386794567108154, | |
| "learning_rate": 0.0003724529473038342, | |
| "loss": 4.6483, | |
| "step": 354500 | |
| }, | |
| { | |
| "epoch": 1.8911547230923311, | |
| "grad_norm": 0.9764008522033691, | |
| "learning_rate": 0.00037156290578045404, | |
| "loss": 4.6594, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 1.8938183212938695, | |
| "grad_norm": 0.9707098007202148, | |
| "learning_rate": 0.0003706710806067265, | |
| "loss": 4.645, | |
| "step": 355500 | |
| }, | |
| { | |
| "epoch": 1.896481919495408, | |
| "grad_norm": 1.046889066696167, | |
| "learning_rate": 0.00036977925543299896, | |
| "loss": 4.6516, | |
| "step": 356000 | |
| }, | |
| { | |
| "epoch": 1.8991455176969465, | |
| "grad_norm": 0.9305681586265564, | |
| "learning_rate": 0.0003688874302592714, | |
| "loss": 4.6461, | |
| "step": 356500 | |
| }, | |
| { | |
| "epoch": 1.901809115898485, | |
| "grad_norm": 0.963812530040741, | |
| "learning_rate": 0.00036799738873589136, | |
| "loss": 4.651, | |
| "step": 357000 | |
| }, | |
| { | |
| "epoch": 1.9044727141000233, | |
| "grad_norm": 1.0378142595291138, | |
| "learning_rate": 0.0003671055635621638, | |
| "loss": 4.6523, | |
| "step": 357500 | |
| }, | |
| { | |
| "epoch": 1.9071363123015619, | |
| "grad_norm": 1.0353012084960938, | |
| "learning_rate": 0.0003662137383884362, | |
| "loss": 4.6497, | |
| "step": 358000 | |
| }, | |
| { | |
| "epoch": 1.9097999105031005, | |
| "grad_norm": 0.93437659740448, | |
| "learning_rate": 0.0003653219132147087, | |
| "loss": 4.6494, | |
| "step": 358500 | |
| }, | |
| { | |
| "epoch": 1.9124635087046389, | |
| "grad_norm": 0.9659603238105774, | |
| "learning_rate": 0.0003644318716913286, | |
| "loss": 4.6482, | |
| "step": 359000 | |
| }, | |
| { | |
| "epoch": 1.9151271069061773, | |
| "grad_norm": 0.982214629650116, | |
| "learning_rate": 0.0003635400465176011, | |
| "loss": 4.6464, | |
| "step": 359500 | |
| }, | |
| { | |
| "epoch": 1.9177907051077159, | |
| "grad_norm": 0.9894130229949951, | |
| "learning_rate": 0.00036264822134387354, | |
| "loss": 4.6483, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 1.9204543033092545, | |
| "grad_norm": 1.0288091897964478, | |
| "learning_rate": 0.000361756396170146, | |
| "loss": 4.6571, | |
| "step": 360500 | |
| }, | |
| { | |
| "epoch": 1.9231179015107929, | |
| "grad_norm": 1.0425199270248413, | |
| "learning_rate": 0.0003608663546467659, | |
| "loss": 4.6438, | |
| "step": 361000 | |
| }, | |
| { | |
| "epoch": 1.9257814997123313, | |
| "grad_norm": 0.9725956320762634, | |
| "learning_rate": 0.0003599745294730383, | |
| "loss": 4.6524, | |
| "step": 361500 | |
| }, | |
| { | |
| "epoch": 1.9284450979138699, | |
| "grad_norm": 0.9731396436691284, | |
| "learning_rate": 0.00035908270429931075, | |
| "loss": 4.6521, | |
| "step": 362000 | |
| }, | |
| { | |
| "epoch": 1.9311086961154085, | |
| "grad_norm": 1.031201720237732, | |
| "learning_rate": 0.00035819087912558324, | |
| "loss": 4.6466, | |
| "step": 362500 | |
| }, | |
| { | |
| "epoch": 1.9337722943169469, | |
| "grad_norm": 0.9079106450080872, | |
| "learning_rate": 0.0003572990539518557, | |
| "loss": 4.6546, | |
| "step": 363000 | |
| }, | |
| { | |
| "epoch": 1.9364358925184852, | |
| "grad_norm": 1.0807876586914062, | |
| "learning_rate": 0.00035640901242847564, | |
| "loss": 4.6448, | |
| "step": 363500 | |
| }, | |
| { | |
| "epoch": 1.9390994907200239, | |
| "grad_norm": 0.9206376075744629, | |
| "learning_rate": 0.0003555171872547481, | |
| "loss": 4.6485, | |
| "step": 364000 | |
| }, | |
| { | |
| "epoch": 1.9417630889215625, | |
| "grad_norm": 1.031483769416809, | |
| "learning_rate": 0.0003546253620810205, | |
| "loss": 4.6495, | |
| "step": 364500 | |
| }, | |
| { | |
| "epoch": 1.9444266871231008, | |
| "grad_norm": 0.9696449041366577, | |
| "learning_rate": 0.000353733536907293, | |
| "loss": 4.6533, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 1.9470902853246392, | |
| "grad_norm": 0.9895356893539429, | |
| "learning_rate": 0.0003528434953839129, | |
| "loss": 4.6508, | |
| "step": 365500 | |
| }, | |
| { | |
| "epoch": 1.9497538835261778, | |
| "grad_norm": 0.9535285234451294, | |
| "learning_rate": 0.0003519516702101854, | |
| "loss": 4.6442, | |
| "step": 366000 | |
| }, | |
| { | |
| "epoch": 1.9524174817277165, | |
| "grad_norm": 1.0165653228759766, | |
| "learning_rate": 0.00035105984503645783, | |
| "loss": 4.6375, | |
| "step": 366500 | |
| }, | |
| { | |
| "epoch": 1.9550810799292548, | |
| "grad_norm": 0.9320639967918396, | |
| "learning_rate": 0.00035016801986273026, | |
| "loss": 4.6473, | |
| "step": 367000 | |
| }, | |
| { | |
| "epoch": 1.9577446781307932, | |
| "grad_norm": 1.1587982177734375, | |
| "learning_rate": 0.00034927797833935023, | |
| "loss": 4.649, | |
| "step": 367500 | |
| }, | |
| { | |
| "epoch": 1.9604082763323318, | |
| "grad_norm": 0.9320794343948364, | |
| "learning_rate": 0.00034838615316562266, | |
| "loss": 4.6514, | |
| "step": 368000 | |
| }, | |
| { | |
| "epoch": 1.9630718745338704, | |
| "grad_norm": 0.97315913438797, | |
| "learning_rate": 0.0003474943279918951, | |
| "loss": 4.6486, | |
| "step": 368500 | |
| }, | |
| { | |
| "epoch": 1.9657354727354088, | |
| "grad_norm": 1.126283049583435, | |
| "learning_rate": 0.0003466025028181676, | |
| "loss": 4.6525, | |
| "step": 369000 | |
| }, | |
| { | |
| "epoch": 1.9683990709369472, | |
| "grad_norm": 1.041257381439209, | |
| "learning_rate": 0.00034571246129478744, | |
| "loss": 4.6456, | |
| "step": 369500 | |
| }, | |
| { | |
| "epoch": 1.9710626691384858, | |
| "grad_norm": 1.0350292921066284, | |
| "learning_rate": 0.00034482063612105993, | |
| "loss": 4.6488, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 1.9737262673400244, | |
| "grad_norm": 0.9576050639152527, | |
| "learning_rate": 0.00034392881094733236, | |
| "loss": 4.648, | |
| "step": 370500 | |
| }, | |
| { | |
| "epoch": 1.9763898655415628, | |
| "grad_norm": 0.9608176350593567, | |
| "learning_rate": 0.0003430369857736048, | |
| "loss": 4.658, | |
| "step": 371000 | |
| }, | |
| { | |
| "epoch": 1.9790534637431012, | |
| "grad_norm": 1.023898959159851, | |
| "learning_rate": 0.00034214694425022476, | |
| "loss": 4.6533, | |
| "step": 371500 | |
| }, | |
| { | |
| "epoch": 1.9817170619446398, | |
| "grad_norm": 1.0364673137664795, | |
| "learning_rate": 0.0003412551190764972, | |
| "loss": 4.6508, | |
| "step": 372000 | |
| }, | |
| { | |
| "epoch": 1.9843806601461784, | |
| "grad_norm": 0.9874972105026245, | |
| "learning_rate": 0.00034036329390276963, | |
| "loss": 4.6441, | |
| "step": 372500 | |
| }, | |
| { | |
| "epoch": 1.9870442583477168, | |
| "grad_norm": 0.9010471701622009, | |
| "learning_rate": 0.0003394714687290421, | |
| "loss": 4.6428, | |
| "step": 373000 | |
| }, | |
| { | |
| "epoch": 1.9897078565492552, | |
| "grad_norm": 0.9260776042938232, | |
| "learning_rate": 0.00033858142720566203, | |
| "loss": 4.6504, | |
| "step": 373500 | |
| }, | |
| { | |
| "epoch": 1.9923714547507938, | |
| "grad_norm": 1.042839527130127, | |
| "learning_rate": 0.0003376896020319345, | |
| "loss": 4.6514, | |
| "step": 374000 | |
| }, | |
| { | |
| "epoch": 1.9950350529523324, | |
| "grad_norm": 0.9163122177124023, | |
| "learning_rate": 0.00033679777685820695, | |
| "loss": 4.6473, | |
| "step": 374500 | |
| }, | |
| { | |
| "epoch": 1.9976986511538706, | |
| "grad_norm": 0.9647536277770996, | |
| "learning_rate": 0.0003359059516844794, | |
| "loss": 4.6428, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 2.000362249355409, | |
| "grad_norm": 0.9727627038955688, | |
| "learning_rate": 0.0003350159101610993, | |
| "loss": 4.6521, | |
| "step": 375500 | |
| }, | |
| { | |
| "epoch": 2.003025847556948, | |
| "grad_norm": 0.9527126550674438, | |
| "learning_rate": 0.00033412408498737173, | |
| "loss": 4.6466, | |
| "step": 376000 | |
| }, | |
| { | |
| "epoch": 2.0056894457584864, | |
| "grad_norm": 1.060686707496643, | |
| "learning_rate": 0.00033323225981364427, | |
| "loss": 4.6473, | |
| "step": 376500 | |
| }, | |
| { | |
| "epoch": 2.0083530439600246, | |
| "grad_norm": 1.0455362796783447, | |
| "learning_rate": 0.0003323404346399167, | |
| "loss": 4.6405, | |
| "step": 377000 | |
| }, | |
| { | |
| "epoch": 2.011016642161563, | |
| "grad_norm": 0.984203577041626, | |
| "learning_rate": 0.00033145039311653656, | |
| "loss": 4.6529, | |
| "step": 377500 | |
| }, | |
| { | |
| "epoch": 2.013680240363102, | |
| "grad_norm": 0.9882683753967285, | |
| "learning_rate": 0.00033055856794280905, | |
| "loss": 4.6388, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 2.0163438385646404, | |
| "grad_norm": 1.0161495208740234, | |
| "learning_rate": 0.0003296667427690815, | |
| "loss": 4.6407, | |
| "step": 378500 | |
| }, | |
| { | |
| "epoch": 2.0190074367661786, | |
| "grad_norm": 0.9816075563430786, | |
| "learning_rate": 0.0003287749175953539, | |
| "loss": 4.6384, | |
| "step": 379000 | |
| }, | |
| { | |
| "epoch": 2.021671034967717, | |
| "grad_norm": 0.9842175841331482, | |
| "learning_rate": 0.0003278848760719739, | |
| "loss": 4.6458, | |
| "step": 379500 | |
| }, | |
| { | |
| "epoch": 2.0243346331692558, | |
| "grad_norm": 0.965932309627533, | |
| "learning_rate": 0.0003269930508982463, | |
| "loss": 4.6496, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 2.0269982313707944, | |
| "grad_norm": 1.0019505023956299, | |
| "learning_rate": 0.0003261012257245188, | |
| "loss": 4.6423, | |
| "step": 380500 | |
| }, | |
| { | |
| "epoch": 2.0296618295723325, | |
| "grad_norm": 0.9756182432174683, | |
| "learning_rate": 0.0003252111842011387, | |
| "loss": 4.6491, | |
| "step": 381000 | |
| }, | |
| { | |
| "epoch": 2.032325427773871, | |
| "grad_norm": 1.0072699785232544, | |
| "learning_rate": 0.00032431935902741115, | |
| "loss": 4.6371, | |
| "step": 381500 | |
| }, | |
| { | |
| "epoch": 2.0349890259754098, | |
| "grad_norm": 0.9327691793441772, | |
| "learning_rate": 0.00032342753385368364, | |
| "loss": 4.6456, | |
| "step": 382000 | |
| }, | |
| { | |
| "epoch": 2.0376526241769484, | |
| "grad_norm": 1.0072060823440552, | |
| "learning_rate": 0.00032253570867995607, | |
| "loss": 4.6451, | |
| "step": 382500 | |
| }, | |
| { | |
| "epoch": 2.0403162223784865, | |
| "grad_norm": 0.9492465257644653, | |
| "learning_rate": 0.00032164388350622856, | |
| "loss": 4.6469, | |
| "step": 383000 | |
| }, | |
| { | |
| "epoch": 2.042979820580025, | |
| "grad_norm": 1.00717294216156, | |
| "learning_rate": 0.000320752058332501, | |
| "loss": 4.6463, | |
| "step": 383500 | |
| }, | |
| { | |
| "epoch": 2.0456434187815637, | |
| "grad_norm": 0.9812472462654114, | |
| "learning_rate": 0.0003198602331587734, | |
| "loss": 4.6427, | |
| "step": 384000 | |
| }, | |
| { | |
| "epoch": 2.0483070169831024, | |
| "grad_norm": 0.9539963603019714, | |
| "learning_rate": 0.0003189684079850459, | |
| "loss": 4.652, | |
| "step": 384500 | |
| }, | |
| { | |
| "epoch": 2.0509706151846405, | |
| "grad_norm": 0.9309804439544678, | |
| "learning_rate": 0.00031807836646166577, | |
| "loss": 4.6385, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 2.053634213386179, | |
| "grad_norm": 0.9806848764419556, | |
| "learning_rate": 0.0003171865412879382, | |
| "loss": 4.6406, | |
| "step": 385500 | |
| }, | |
| { | |
| "epoch": 2.0562978115877177, | |
| "grad_norm": 0.9556758999824524, | |
| "learning_rate": 0.0003162947161142107, | |
| "loss": 4.6438, | |
| "step": 386000 | |
| }, | |
| { | |
| "epoch": 2.058961409789256, | |
| "grad_norm": 1.0577518939971924, | |
| "learning_rate": 0.0003154028909404832, | |
| "loss": 4.6422, | |
| "step": 386500 | |
| }, | |
| { | |
| "epoch": 2.0616250079907945, | |
| "grad_norm": 0.9437615871429443, | |
| "learning_rate": 0.0003145128494171031, | |
| "loss": 4.6503, | |
| "step": 387000 | |
| }, | |
| { | |
| "epoch": 2.064288606192333, | |
| "grad_norm": 1.0224053859710693, | |
| "learning_rate": 0.0003136210242433755, | |
| "loss": 4.6422, | |
| "step": 387500 | |
| }, | |
| { | |
| "epoch": 2.0669522043938717, | |
| "grad_norm": 0.9545285105705261, | |
| "learning_rate": 0.00031272919906964795, | |
| "loss": 4.6414, | |
| "step": 388000 | |
| }, | |
| { | |
| "epoch": 2.06961580259541, | |
| "grad_norm": 1.057246208190918, | |
| "learning_rate": 0.00031183737389592044, | |
| "loss": 4.6467, | |
| "step": 388500 | |
| }, | |
| { | |
| "epoch": 2.0722794007969485, | |
| "grad_norm": 1.0381290912628174, | |
| "learning_rate": 0.00031094733237254035, | |
| "loss": 4.6454, | |
| "step": 389000 | |
| }, | |
| { | |
| "epoch": 2.074942998998487, | |
| "grad_norm": 0.9364863634109497, | |
| "learning_rate": 0.0003100555071988128, | |
| "loss": 4.6505, | |
| "step": 389500 | |
| }, | |
| { | |
| "epoch": 2.0776065972000257, | |
| "grad_norm": 1.014183759689331, | |
| "learning_rate": 0.0003091636820250853, | |
| "loss": 4.6442, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 2.080270195401564, | |
| "grad_norm": 1.0127809047698975, | |
| "learning_rate": 0.0003082718568513577, | |
| "loss": 4.6461, | |
| "step": 390500 | |
| }, | |
| { | |
| "epoch": 2.0829337936031025, | |
| "grad_norm": 1.0870954990386963, | |
| "learning_rate": 0.0003073818153279777, | |
| "loss": 4.6425, | |
| "step": 391000 | |
| }, | |
| { | |
| "epoch": 2.085597391804641, | |
| "grad_norm": 0.9986569285392761, | |
| "learning_rate": 0.0003064899901542501, | |
| "loss": 4.6464, | |
| "step": 391500 | |
| }, | |
| { | |
| "epoch": 2.0882609900061797, | |
| "grad_norm": 1.044019103050232, | |
| "learning_rate": 0.00030559994863086997, | |
| "loss": 4.6428, | |
| "step": 392000 | |
| }, | |
| { | |
| "epoch": 2.090924588207718, | |
| "grad_norm": 0.9670615196228027, | |
| "learning_rate": 0.00030470812345714246, | |
| "loss": 4.6463, | |
| "step": 392500 | |
| }, | |
| { | |
| "epoch": 2.0935881864092565, | |
| "grad_norm": 0.968877911567688, | |
| "learning_rate": 0.0003038162982834149, | |
| "loss": 4.6445, | |
| "step": 393000 | |
| }, | |
| { | |
| "epoch": 2.096251784610795, | |
| "grad_norm": 1.0653293132781982, | |
| "learning_rate": 0.0003029244731096874, | |
| "loss": 4.642, | |
| "step": 393500 | |
| }, | |
| { | |
| "epoch": 2.0989153828123337, | |
| "grad_norm": 0.9970125555992126, | |
| "learning_rate": 0.0003020344315863073, | |
| "loss": 4.6422, | |
| "step": 394000 | |
| }, | |
| { | |
| "epoch": 2.101578981013872, | |
| "grad_norm": 1.2096583843231201, | |
| "learning_rate": 0.0003011426064125797, | |
| "loss": 4.6387, | |
| "step": 394500 | |
| }, | |
| { | |
| "epoch": 2.1042425792154105, | |
| "grad_norm": 1.0580756664276123, | |
| "learning_rate": 0.0003002507812388522, | |
| "loss": 4.636, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 2.106906177416949, | |
| "grad_norm": 0.9974854588508606, | |
| "learning_rate": 0.00029935895606512464, | |
| "loss": 4.6538, | |
| "step": 395500 | |
| }, | |
| { | |
| "epoch": 2.1095697756184877, | |
| "grad_norm": 0.9898145198822021, | |
| "learning_rate": 0.0002984671308913971, | |
| "loss": 4.6408, | |
| "step": 396000 | |
| }, | |
| { | |
| "epoch": 2.112233373820026, | |
| "grad_norm": 1.0114551782608032, | |
| "learning_rate": 0.00029757530571766956, | |
| "loss": 4.6403, | |
| "step": 396500 | |
| }, | |
| { | |
| "epoch": 2.1148969720215645, | |
| "grad_norm": 1.015718936920166, | |
| "learning_rate": 0.000296683480543942, | |
| "loss": 4.6477, | |
| "step": 397000 | |
| }, | |
| { | |
| "epoch": 2.117560570223103, | |
| "grad_norm": 1.0037897825241089, | |
| "learning_rate": 0.0002957916553702145, | |
| "loss": 4.6417, | |
| "step": 397500 | |
| }, | |
| { | |
| "epoch": 2.1202241684246417, | |
| "grad_norm": 0.9558025002479553, | |
| "learning_rate": 0.0002949016138468344, | |
| "loss": 4.642, | |
| "step": 398000 | |
| }, | |
| { | |
| "epoch": 2.12288776662618, | |
| "grad_norm": 0.9956161975860596, | |
| "learning_rate": 0.0002940097886731068, | |
| "loss": 4.6391, | |
| "step": 398500 | |
| }, | |
| { | |
| "epoch": 2.1255513648277184, | |
| "grad_norm": 1.0069483518600464, | |
| "learning_rate": 0.0002931179634993793, | |
| "loss": 4.641, | |
| "step": 399000 | |
| }, | |
| { | |
| "epoch": 2.128214963029257, | |
| "grad_norm": 0.9737485647201538, | |
| "learning_rate": 0.00029222613832565175, | |
| "loss": 4.6376, | |
| "step": 399500 | |
| }, | |
| { | |
| "epoch": 2.1308785612307957, | |
| "grad_norm": 0.996033251285553, | |
| "learning_rate": 0.0002913360968022717, | |
| "loss": 4.6349, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 2.133542159432334, | |
| "grad_norm": 1.1211779117584229, | |
| "learning_rate": 0.00029044427162854415, | |
| "loss": 4.6415, | |
| "step": 400500 | |
| }, | |
| { | |
| "epoch": 2.1362057576338724, | |
| "grad_norm": 1.0139347314834595, | |
| "learning_rate": 0.0002895524464548166, | |
| "loss": 4.63, | |
| "step": 401000 | |
| }, | |
| { | |
| "epoch": 2.138869355835411, | |
| "grad_norm": 1.051085352897644, | |
| "learning_rate": 0.00028866062128108907, | |
| "loss": 4.6351, | |
| "step": 401500 | |
| }, | |
| { | |
| "epoch": 2.1415329540369497, | |
| "grad_norm": 1.0234901905059814, | |
| "learning_rate": 0.0002877705797577089, | |
| "loss": 4.6384, | |
| "step": 402000 | |
| }, | |
| { | |
| "epoch": 2.144196552238488, | |
| "grad_norm": 1.0635606050491333, | |
| "learning_rate": 0.00028687875458398136, | |
| "loss": 4.6387, | |
| "step": 402500 | |
| }, | |
| { | |
| "epoch": 2.1468601504400264, | |
| "grad_norm": 1.0161880254745483, | |
| "learning_rate": 0.00028598692941025385, | |
| "loss": 4.6428, | |
| "step": 403000 | |
| }, | |
| { | |
| "epoch": 2.149523748641565, | |
| "grad_norm": 1.1507047414779663, | |
| "learning_rate": 0.0002850951042365263, | |
| "loss": 4.6336, | |
| "step": 403500 | |
| }, | |
| { | |
| "epoch": 2.152187346843103, | |
| "grad_norm": 0.9682100415229797, | |
| "learning_rate": 0.00028420506271314625, | |
| "loss": 4.6411, | |
| "step": 404000 | |
| }, | |
| { | |
| "epoch": 2.154850945044642, | |
| "grad_norm": 1.001862645149231, | |
| "learning_rate": 0.0002833132375394187, | |
| "loss": 4.6354, | |
| "step": 404500 | |
| }, | |
| { | |
| "epoch": 2.1575145432461804, | |
| "grad_norm": 1.032013177871704, | |
| "learning_rate": 0.0002824214123656911, | |
| "loss": 4.6297, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 2.160178141447719, | |
| "grad_norm": 1.1009008884429932, | |
| "learning_rate": 0.0002815295871919636, | |
| "loss": 4.6367, | |
| "step": 405500 | |
| }, | |
| { | |
| "epoch": 2.162841739649257, | |
| "grad_norm": 1.0074682235717773, | |
| "learning_rate": 0.00028063776201823603, | |
| "loss": 4.6367, | |
| "step": 406000 | |
| }, | |
| { | |
| "epoch": 2.165505337850796, | |
| "grad_norm": 0.9696961641311646, | |
| "learning_rate": 0.00027974772049485595, | |
| "loss": 4.6361, | |
| "step": 406500 | |
| }, | |
| { | |
| "epoch": 2.1681689360523344, | |
| "grad_norm": 0.9666945934295654, | |
| "learning_rate": 0.00027885589532112843, | |
| "loss": 4.6389, | |
| "step": 407000 | |
| }, | |
| { | |
| "epoch": 2.170832534253873, | |
| "grad_norm": 0.9683700799942017, | |
| "learning_rate": 0.00027796407014740087, | |
| "loss": 4.6401, | |
| "step": 407500 | |
| }, | |
| { | |
| "epoch": 2.173496132455411, | |
| "grad_norm": 0.9791209101676941, | |
| "learning_rate": 0.00027707224497367335, | |
| "loss": 4.6361, | |
| "step": 408000 | |
| }, | |
| { | |
| "epoch": 2.17615973065695, | |
| "grad_norm": 1.0101122856140137, | |
| "learning_rate": 0.0002761822034502932, | |
| "loss": 4.6393, | |
| "step": 408500 | |
| }, | |
| { | |
| "epoch": 2.1788233288584884, | |
| "grad_norm": 0.9944539070129395, | |
| "learning_rate": 0.00027529037827656565, | |
| "loss": 4.6346, | |
| "step": 409000 | |
| }, | |
| { | |
| "epoch": 2.181486927060027, | |
| "grad_norm": 0.9819368124008179, | |
| "learning_rate": 0.0002743985531028382, | |
| "loss": 4.6298, | |
| "step": 409500 | |
| }, | |
| { | |
| "epoch": 2.184150525261565, | |
| "grad_norm": 1.0016804933547974, | |
| "learning_rate": 0.00027350851157945805, | |
| "loss": 4.6415, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 2.1868141234631038, | |
| "grad_norm": 0.9300929307937622, | |
| "learning_rate": 0.00027261668640573053, | |
| "loss": 4.6324, | |
| "step": 410500 | |
| }, | |
| { | |
| "epoch": 2.1894777216646424, | |
| "grad_norm": 0.9710443615913391, | |
| "learning_rate": 0.00027172486123200297, | |
| "loss": 4.6398, | |
| "step": 411000 | |
| }, | |
| { | |
| "epoch": 2.192141319866181, | |
| "grad_norm": 1.0339746475219727, | |
| "learning_rate": 0.0002708330360582754, | |
| "loss": 4.6383, | |
| "step": 411500 | |
| }, | |
| { | |
| "epoch": 2.194804918067719, | |
| "grad_norm": 1.178084373474121, | |
| "learning_rate": 0.0002699412108845479, | |
| "loss": 4.6408, | |
| "step": 412000 | |
| }, | |
| { | |
| "epoch": 2.1974685162692578, | |
| "grad_norm": 1.00334632396698, | |
| "learning_rate": 0.0002690493857108203, | |
| "loss": 4.637, | |
| "step": 412500 | |
| }, | |
| { | |
| "epoch": 2.2001321144707964, | |
| "grad_norm": 0.962380588054657, | |
| "learning_rate": 0.0002681575605370928, | |
| "loss": 4.643, | |
| "step": 413000 | |
| }, | |
| { | |
| "epoch": 2.202795712672335, | |
| "grad_norm": 1.0694209337234497, | |
| "learning_rate": 0.00026726573536336524, | |
| "loss": 4.6382, | |
| "step": 413500 | |
| }, | |
| { | |
| "epoch": 2.205459310873873, | |
| "grad_norm": 1.0394047498703003, | |
| "learning_rate": 0.00026637569383998515, | |
| "loss": 4.6324, | |
| "step": 414000 | |
| }, | |
| { | |
| "epoch": 2.2081229090754118, | |
| "grad_norm": 1.0649442672729492, | |
| "learning_rate": 0.00026548386866625764, | |
| "loss": 4.6362, | |
| "step": 414500 | |
| }, | |
| { | |
| "epoch": 2.2107865072769504, | |
| "grad_norm": 1.0115076303482056, | |
| "learning_rate": 0.00026459382714287755, | |
| "loss": 4.6314, | |
| "step": 415000 | |
| }, | |
| { | |
| "epoch": 2.213450105478489, | |
| "grad_norm": 1.089772343635559, | |
| "learning_rate": 0.00026370200196915, | |
| "loss": 4.6422, | |
| "step": 415500 | |
| }, | |
| { | |
| "epoch": 2.216113703680027, | |
| "grad_norm": 1.0219160318374634, | |
| "learning_rate": 0.00026281017679542247, | |
| "loss": 4.6368, | |
| "step": 416000 | |
| }, | |
| { | |
| "epoch": 2.2187773018815657, | |
| "grad_norm": 1.2249672412872314, | |
| "learning_rate": 0.0002619183516216949, | |
| "loss": 4.6363, | |
| "step": 416500 | |
| }, | |
| { | |
| "epoch": 2.2214409000831044, | |
| "grad_norm": 1.054093599319458, | |
| "learning_rate": 0.0002610265264479674, | |
| "loss": 4.6344, | |
| "step": 417000 | |
| }, | |
| { | |
| "epoch": 2.224104498284643, | |
| "grad_norm": 1.0143494606018066, | |
| "learning_rate": 0.0002601347012742398, | |
| "loss": 4.6343, | |
| "step": 417500 | |
| }, | |
| { | |
| "epoch": 2.226768096486181, | |
| "grad_norm": 1.083958625793457, | |
| "learning_rate": 0.00025924287610051226, | |
| "loss": 4.6375, | |
| "step": 418000 | |
| }, | |
| { | |
| "epoch": 2.2294316946877197, | |
| "grad_norm": 1.0664527416229248, | |
| "learning_rate": 0.00025835105092678474, | |
| "loss": 4.6374, | |
| "step": 418500 | |
| }, | |
| { | |
| "epoch": 2.2320952928892583, | |
| "grad_norm": 0.9486995935440063, | |
| "learning_rate": 0.0002574610094034046, | |
| "loss": 4.6361, | |
| "step": 419000 | |
| }, | |
| { | |
| "epoch": 2.234758891090797, | |
| "grad_norm": 1.0301847457885742, | |
| "learning_rate": 0.0002565691842296771, | |
| "loss": 4.6363, | |
| "step": 419500 | |
| }, | |
| { | |
| "epoch": 2.237422489292335, | |
| "grad_norm": 1.0453981161117554, | |
| "learning_rate": 0.0002556773590559496, | |
| "loss": 4.6315, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 2.2400860874938737, | |
| "grad_norm": 1.0513032674789429, | |
| "learning_rate": 0.000254785533882222, | |
| "loss": 4.6225, | |
| "step": 420500 | |
| }, | |
| { | |
| "epoch": 2.2427496856954123, | |
| "grad_norm": 1.0518192052841187, | |
| "learning_rate": 0.0002538972760091894, | |
| "loss": 4.6359, | |
| "step": 421000 | |
| }, | |
| { | |
| "epoch": 2.2454132838969505, | |
| "grad_norm": 1.0620026588439941, | |
| "learning_rate": 0.00025300545083546184, | |
| "loss": 4.6363, | |
| "step": 421500 | |
| }, | |
| { | |
| "epoch": 2.248076882098489, | |
| "grad_norm": 1.0133403539657593, | |
| "learning_rate": 0.00025211362566173427, | |
| "loss": 4.6356, | |
| "step": 422000 | |
| }, | |
| { | |
| "epoch": 2.2507404803000277, | |
| "grad_norm": 1.0840908288955688, | |
| "learning_rate": 0.00025122180048800676, | |
| "loss": 4.632, | |
| "step": 422500 | |
| }, | |
| { | |
| "epoch": 2.2534040785015663, | |
| "grad_norm": 1.020334005355835, | |
| "learning_rate": 0.0002503299753142792, | |
| "loss": 4.6422, | |
| "step": 423000 | |
| }, | |
| { | |
| "epoch": 2.256067676703105, | |
| "grad_norm": 1.0460251569747925, | |
| "learning_rate": 0.0002494381501405517, | |
| "loss": 4.6341, | |
| "step": 423500 | |
| }, | |
| { | |
| "epoch": 2.258731274904643, | |
| "grad_norm": 0.9714872241020203, | |
| "learning_rate": 0.0002485463249668241, | |
| "loss": 4.6323, | |
| "step": 424000 | |
| }, | |
| { | |
| "epoch": 2.2613948731061817, | |
| "grad_norm": 1.111423373222351, | |
| "learning_rate": 0.00024765449979309654, | |
| "loss": 4.6308, | |
| "step": 424500 | |
| }, | |
| { | |
| "epoch": 2.2640584713077203, | |
| "grad_norm": 1.063982367515564, | |
| "learning_rate": 0.00024676445826971646, | |
| "loss": 4.6341, | |
| "step": 425000 | |
| }, | |
| { | |
| "epoch": 2.2667220695092585, | |
| "grad_norm": 1.131791114807129, | |
| "learning_rate": 0.0002458744167463364, | |
| "loss": 4.6318, | |
| "step": 425500 | |
| }, | |
| { | |
| "epoch": 2.269385667710797, | |
| "grad_norm": 0.969653308391571, | |
| "learning_rate": 0.00024498259157260886, | |
| "loss": 4.6393, | |
| "step": 426000 | |
| }, | |
| { | |
| "epoch": 2.2720492659123357, | |
| "grad_norm": 1.02666175365448, | |
| "learning_rate": 0.00024409076639888132, | |
| "loss": 4.6359, | |
| "step": 426500 | |
| }, | |
| { | |
| "epoch": 2.2747128641138743, | |
| "grad_norm": 1.1176308393478394, | |
| "learning_rate": 0.00024319894122515375, | |
| "loss": 4.6339, | |
| "step": 427000 | |
| }, | |
| { | |
| "epoch": 2.2773764623154125, | |
| "grad_norm": 1.00742506980896, | |
| "learning_rate": 0.0002423071160514262, | |
| "loss": 4.6231, | |
| "step": 427500 | |
| }, | |
| { | |
| "epoch": 2.280040060516951, | |
| "grad_norm": 1.0069453716278076, | |
| "learning_rate": 0.00024141529087769867, | |
| "loss": 4.6345, | |
| "step": 428000 | |
| }, | |
| { | |
| "epoch": 2.2827036587184897, | |
| "grad_norm": 1.0893571376800537, | |
| "learning_rate": 0.00024052346570397113, | |
| "loss": 4.6319, | |
| "step": 428500 | |
| }, | |
| { | |
| "epoch": 2.2853672569200283, | |
| "grad_norm": 0.9942576885223389, | |
| "learning_rate": 0.00023963342418059104, | |
| "loss": 4.6305, | |
| "step": 429000 | |
| }, | |
| { | |
| "epoch": 2.2880308551215665, | |
| "grad_norm": 1.0035908222198486, | |
| "learning_rate": 0.00023874159900686348, | |
| "loss": 4.633, | |
| "step": 429500 | |
| }, | |
| { | |
| "epoch": 2.290694453323105, | |
| "grad_norm": 0.9980865716934204, | |
| "learning_rate": 0.00023784977383313594, | |
| "loss": 4.6276, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 2.2933580515246437, | |
| "grad_norm": 1.0136911869049072, | |
| "learning_rate": 0.0002369579486594084, | |
| "loss": 4.6286, | |
| "step": 430500 | |
| }, | |
| { | |
| "epoch": 2.2960216497261823, | |
| "grad_norm": 1.04083251953125, | |
| "learning_rate": 0.00023606612348568085, | |
| "loss": 4.6401, | |
| "step": 431000 | |
| }, | |
| { | |
| "epoch": 2.2986852479277204, | |
| "grad_norm": 1.3262946605682373, | |
| "learning_rate": 0.00023517429831195331, | |
| "loss": 4.626, | |
| "step": 431500 | |
| }, | |
| { | |
| "epoch": 2.301348846129259, | |
| "grad_norm": 1.0201387405395508, | |
| "learning_rate": 0.00023428247313822577, | |
| "loss": 4.6319, | |
| "step": 432000 | |
| }, | |
| { | |
| "epoch": 2.3040124443307977, | |
| "grad_norm": 1.1112711429595947, | |
| "learning_rate": 0.0002333906479644982, | |
| "loss": 4.6356, | |
| "step": 432500 | |
| }, | |
| { | |
| "epoch": 2.3066760425323363, | |
| "grad_norm": 1.037654995918274, | |
| "learning_rate": 0.00023250060644111815, | |
| "loss": 4.6275, | |
| "step": 433000 | |
| }, | |
| { | |
| "epoch": 2.3093396407338744, | |
| "grad_norm": 1.1203975677490234, | |
| "learning_rate": 0.0002316087812673906, | |
| "loss": 4.6348, | |
| "step": 433500 | |
| }, | |
| { | |
| "epoch": 2.312003238935413, | |
| "grad_norm": 1.1220102310180664, | |
| "learning_rate": 0.00023071695609366307, | |
| "loss": 4.6275, | |
| "step": 434000 | |
| }, | |
| { | |
| "epoch": 2.3146668371369516, | |
| "grad_norm": 1.0318022966384888, | |
| "learning_rate": 0.0002298251309199355, | |
| "loss": 4.6279, | |
| "step": 434500 | |
| }, | |
| { | |
| "epoch": 2.31733043533849, | |
| "grad_norm": 1.0012495517730713, | |
| "learning_rate": 0.00022893508939655541, | |
| "loss": 4.6305, | |
| "step": 435000 | |
| }, | |
| { | |
| "epoch": 2.3199940335400284, | |
| "grad_norm": 1.0262128114700317, | |
| "learning_rate": 0.00022804504787317533, | |
| "loss": 4.631, | |
| "step": 435500 | |
| }, | |
| { | |
| "epoch": 2.322657631741567, | |
| "grad_norm": 1.0249779224395752, | |
| "learning_rate": 0.0002271532226994478, | |
| "loss": 4.6313, | |
| "step": 436000 | |
| }, | |
| { | |
| "epoch": 2.3253212299431056, | |
| "grad_norm": 1.0550204515457153, | |
| "learning_rate": 0.00022626139752572025, | |
| "loss": 4.6357, | |
| "step": 436500 | |
| }, | |
| { | |
| "epoch": 2.3279848281446442, | |
| "grad_norm": 1.1302458047866821, | |
| "learning_rate": 0.0002253695723519927, | |
| "loss": 4.6256, | |
| "step": 437000 | |
| }, | |
| { | |
| "epoch": 2.3306484263461824, | |
| "grad_norm": 1.0832403898239136, | |
| "learning_rate": 0.00022447774717826517, | |
| "loss": 4.6286, | |
| "step": 437500 | |
| }, | |
| { | |
| "epoch": 2.333312024547721, | |
| "grad_norm": 1.0546700954437256, | |
| "learning_rate": 0.00022358770565488506, | |
| "loss": 4.6341, | |
| "step": 438000 | |
| }, | |
| { | |
| "epoch": 2.3359756227492596, | |
| "grad_norm": 1.0069321393966675, | |
| "learning_rate": 0.00022269588048115752, | |
| "loss": 4.6311, | |
| "step": 438500 | |
| }, | |
| { | |
| "epoch": 2.338639220950798, | |
| "grad_norm": 1.0736314058303833, | |
| "learning_rate": 0.00022180405530742997, | |
| "loss": 4.6162, | |
| "step": 439000 | |
| }, | |
| { | |
| "epoch": 2.3413028191523364, | |
| "grad_norm": 1.0590038299560547, | |
| "learning_rate": 0.00022091223013370243, | |
| "loss": 4.6332, | |
| "step": 439500 | |
| }, | |
| { | |
| "epoch": 2.343966417353875, | |
| "grad_norm": 1.022923231124878, | |
| "learning_rate": 0.0002200204049599749, | |
| "loss": 4.6248, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 2.3466300155554136, | |
| "grad_norm": 1.0593072175979614, | |
| "learning_rate": 0.00021912857978624735, | |
| "loss": 4.6273, | |
| "step": 440500 | |
| }, | |
| { | |
| "epoch": 2.3492936137569522, | |
| "grad_norm": 1.0082392692565918, | |
| "learning_rate": 0.00021823675461251979, | |
| "loss": 4.6275, | |
| "step": 441000 | |
| }, | |
| { | |
| "epoch": 2.3519572119584904, | |
| "grad_norm": 0.9842462539672852, | |
| "learning_rate": 0.00021734492943879224, | |
| "loss": 4.6346, | |
| "step": 441500 | |
| }, | |
| { | |
| "epoch": 2.354620810160029, | |
| "grad_norm": 1.0930989980697632, | |
| "learning_rate": 0.0002164548879154122, | |
| "loss": 4.6299, | |
| "step": 442000 | |
| }, | |
| { | |
| "epoch": 2.3572844083615676, | |
| "grad_norm": 1.1248174905776978, | |
| "learning_rate": 0.00021556306274168465, | |
| "loss": 4.6353, | |
| "step": 442500 | |
| }, | |
| { | |
| "epoch": 2.3599480065631058, | |
| "grad_norm": 1.0502623319625854, | |
| "learning_rate": 0.00021467123756795708, | |
| "loss": 4.6322, | |
| "step": 443000 | |
| }, | |
| { | |
| "epoch": 2.3626116047646444, | |
| "grad_norm": 1.045857548713684, | |
| "learning_rate": 0.00021377941239422954, | |
| "loss": 4.6232, | |
| "step": 443500 | |
| }, | |
| { | |
| "epoch": 2.365275202966183, | |
| "grad_norm": 1.151315450668335, | |
| "learning_rate": 0.00021288937087084945, | |
| "loss": 4.6333, | |
| "step": 444000 | |
| }, | |
| { | |
| "epoch": 2.3679388011677216, | |
| "grad_norm": 1.0542734861373901, | |
| "learning_rate": 0.0002119975456971219, | |
| "loss": 4.6246, | |
| "step": 444500 | |
| }, | |
| { | |
| "epoch": 2.3706023993692598, | |
| "grad_norm": 1.1092387437820435, | |
| "learning_rate": 0.00021110572052339435, | |
| "loss": 4.6313, | |
| "step": 445000 | |
| }, | |
| { | |
| "epoch": 2.3732659975707984, | |
| "grad_norm": 1.2137620449066162, | |
| "learning_rate": 0.0002102138953496668, | |
| "loss": 4.631, | |
| "step": 445500 | |
| }, | |
| { | |
| "epoch": 2.375929595772337, | |
| "grad_norm": 1.072719931602478, | |
| "learning_rate": 0.00020932207017593926, | |
| "loss": 4.6199, | |
| "step": 446000 | |
| }, | |
| { | |
| "epoch": 2.3785931939738756, | |
| "grad_norm": 1.1971569061279297, | |
| "learning_rate": 0.00020843024500221172, | |
| "loss": 4.622, | |
| "step": 446500 | |
| }, | |
| { | |
| "epoch": 2.3812567921754138, | |
| "grad_norm": 1.0519288778305054, | |
| "learning_rate": 0.00020753841982848418, | |
| "loss": 4.6261, | |
| "step": 447000 | |
| }, | |
| { | |
| "epoch": 2.3839203903769524, | |
| "grad_norm": 1.0470134019851685, | |
| "learning_rate": 0.0002066483783051041, | |
| "loss": 4.6231, | |
| "step": 447500 | |
| }, | |
| { | |
| "epoch": 2.386583988578491, | |
| "grad_norm": 1.2513642311096191, | |
| "learning_rate": 0.00020575655313137656, | |
| "loss": 4.6348, | |
| "step": 448000 | |
| }, | |
| { | |
| "epoch": 2.3892475867800296, | |
| "grad_norm": 1.031900405883789, | |
| "learning_rate": 0.00020486472795764902, | |
| "loss": 4.6281, | |
| "step": 448500 | |
| }, | |
| { | |
| "epoch": 2.3919111849815677, | |
| "grad_norm": 1.0538623332977295, | |
| "learning_rate": 0.00020397290278392148, | |
| "loss": 4.6284, | |
| "step": 449000 | |
| }, | |
| { | |
| "epoch": 2.3945747831831063, | |
| "grad_norm": 1.071651816368103, | |
| "learning_rate": 0.00020308107761019394, | |
| "loss": 4.6278, | |
| "step": 449500 | |
| }, | |
| { | |
| "epoch": 2.397238381384645, | |
| "grad_norm": 1.1340712308883667, | |
| "learning_rate": 0.00020219103608681382, | |
| "loss": 4.6314, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 2.3999019795861836, | |
| "grad_norm": 1.0195579528808594, | |
| "learning_rate": 0.00020129921091308628, | |
| "loss": 4.6304, | |
| "step": 450500 | |
| }, | |
| { | |
| "epoch": 2.4025655777877217, | |
| "grad_norm": 1.0579105615615845, | |
| "learning_rate": 0.00020040738573935874, | |
| "loss": 4.6223, | |
| "step": 451000 | |
| }, | |
| { | |
| "epoch": 2.4052291759892603, | |
| "grad_norm": 1.0337562561035156, | |
| "learning_rate": 0.0001995155605656312, | |
| "loss": 4.6316, | |
| "step": 451500 | |
| }, | |
| { | |
| "epoch": 2.407892774190799, | |
| "grad_norm": 1.085295557975769, | |
| "learning_rate": 0.00019862373539190363, | |
| "loss": 4.6199, | |
| "step": 452000 | |
| }, | |
| { | |
| "epoch": 2.4105563723923376, | |
| "grad_norm": 1.1386431455612183, | |
| "learning_rate": 0.00019773369386852358, | |
| "loss": 4.6247, | |
| "step": 452500 | |
| }, | |
| { | |
| "epoch": 2.4132199705938757, | |
| "grad_norm": 1.0375934839248657, | |
| "learning_rate": 0.00019684186869479604, | |
| "loss": 4.6259, | |
| "step": 453000 | |
| }, | |
| { | |
| "epoch": 2.4158835687954143, | |
| "grad_norm": 1.110255479812622, | |
| "learning_rate": 0.0001959500435210685, | |
| "loss": 4.6211, | |
| "step": 453500 | |
| }, | |
| { | |
| "epoch": 2.418547166996953, | |
| "grad_norm": 1.0886731147766113, | |
| "learning_rate": 0.00019505821834734096, | |
| "loss": 4.6308, | |
| "step": 454000 | |
| }, | |
| { | |
| "epoch": 2.4212107651984915, | |
| "grad_norm": 1.1896620988845825, | |
| "learning_rate": 0.0001941663931736134, | |
| "loss": 4.6244, | |
| "step": 454500 | |
| }, | |
| { | |
| "epoch": 2.4238743634000297, | |
| "grad_norm": 1.076377034187317, | |
| "learning_rate": 0.00019327456799988585, | |
| "loss": 4.6261, | |
| "step": 455000 | |
| }, | |
| { | |
| "epoch": 2.4265379616015683, | |
| "grad_norm": 1.1211566925048828, | |
| "learning_rate": 0.0001923827428261583, | |
| "loss": 4.627, | |
| "step": 455500 | |
| }, | |
| { | |
| "epoch": 2.429201559803107, | |
| "grad_norm": 1.1093415021896362, | |
| "learning_rate": 0.00019149091765243077, | |
| "loss": 4.6292, | |
| "step": 456000 | |
| }, | |
| { | |
| "epoch": 2.431865158004645, | |
| "grad_norm": 1.2548290491104126, | |
| "learning_rate": 0.00019060087612905065, | |
| "loss": 4.6207, | |
| "step": 456500 | |
| }, | |
| { | |
| "epoch": 2.4345287562061837, | |
| "grad_norm": 1.0689791440963745, | |
| "learning_rate": 0.0001897108346056706, | |
| "loss": 4.622, | |
| "step": 457000 | |
| }, | |
| { | |
| "epoch": 2.4371923544077223, | |
| "grad_norm": 1.0006210803985596, | |
| "learning_rate": 0.00018881900943194306, | |
| "loss": 4.6269, | |
| "step": 457500 | |
| }, | |
| { | |
| "epoch": 2.439855952609261, | |
| "grad_norm": 1.0268884897232056, | |
| "learning_rate": 0.00018792718425821552, | |
| "loss": 4.6215, | |
| "step": 458000 | |
| }, | |
| { | |
| "epoch": 2.4425195508107995, | |
| "grad_norm": 1.223487377166748, | |
| "learning_rate": 0.00018703535908448795, | |
| "loss": 4.6205, | |
| "step": 458500 | |
| }, | |
| { | |
| "epoch": 2.4451831490123377, | |
| "grad_norm": 1.104552984237671, | |
| "learning_rate": 0.0001861435339107604, | |
| "loss": 4.6241, | |
| "step": 459000 | |
| }, | |
| { | |
| "epoch": 2.4478467472138763, | |
| "grad_norm": 1.0752313137054443, | |
| "learning_rate": 0.00018525349238738032, | |
| "loss": 4.6253, | |
| "step": 459500 | |
| }, | |
| { | |
| "epoch": 2.450510345415415, | |
| "grad_norm": 1.0842454433441162, | |
| "learning_rate": 0.00018436166721365278, | |
| "loss": 4.6234, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 2.453173943616953, | |
| "grad_norm": 1.0523731708526611, | |
| "learning_rate": 0.00018346984203992521, | |
| "loss": 4.63, | |
| "step": 460500 | |
| }, | |
| { | |
| "epoch": 2.4558375418184917, | |
| "grad_norm": 1.1694416999816895, | |
| "learning_rate": 0.00018257801686619767, | |
| "loss": 4.6239, | |
| "step": 461000 | |
| }, | |
| { | |
| "epoch": 2.4585011400200303, | |
| "grad_norm": 1.0971251726150513, | |
| "learning_rate": 0.00018168619169247013, | |
| "loss": 4.6215, | |
| "step": 461500 | |
| }, | |
| { | |
| "epoch": 2.461164738221569, | |
| "grad_norm": 1.0404231548309326, | |
| "learning_rate": 0.00018079615016909008, | |
| "loss": 4.6263, | |
| "step": 462000 | |
| }, | |
| { | |
| "epoch": 2.463828336423107, | |
| "grad_norm": 1.0926011800765991, | |
| "learning_rate": 0.00017990432499536254, | |
| "loss": 4.6273, | |
| "step": 462500 | |
| }, | |
| { | |
| "epoch": 2.4664919346246457, | |
| "grad_norm": 1.079408884048462, | |
| "learning_rate": 0.00017901249982163497, | |
| "loss": 4.6124, | |
| "step": 463000 | |
| }, | |
| { | |
| "epoch": 2.4691555328261843, | |
| "grad_norm": 1.0728904008865356, | |
| "learning_rate": 0.00017812067464790743, | |
| "loss": 4.6215, | |
| "step": 463500 | |
| }, | |
| { | |
| "epoch": 2.471819131027723, | |
| "grad_norm": 1.2427496910095215, | |
| "learning_rate": 0.00017722884947417989, | |
| "loss": 4.6177, | |
| "step": 464000 | |
| }, | |
| { | |
| "epoch": 2.474482729229261, | |
| "grad_norm": 1.0962218046188354, | |
| "learning_rate": 0.00017633702430045235, | |
| "loss": 4.6202, | |
| "step": 464500 | |
| }, | |
| { | |
| "epoch": 2.4771463274307997, | |
| "grad_norm": 1.0535134077072144, | |
| "learning_rate": 0.00017544698277707223, | |
| "loss": 4.6229, | |
| "step": 465000 | |
| }, | |
| { | |
| "epoch": 2.4798099256323383, | |
| "grad_norm": 1.1047760248184204, | |
| "learning_rate": 0.0001745551576033447, | |
| "loss": 4.6232, | |
| "step": 465500 | |
| }, | |
| { | |
| "epoch": 2.482473523833877, | |
| "grad_norm": 1.0571211576461792, | |
| "learning_rate": 0.00017366333242961715, | |
| "loss": 4.6182, | |
| "step": 466000 | |
| }, | |
| { | |
| "epoch": 2.485137122035415, | |
| "grad_norm": 1.045280933380127, | |
| "learning_rate": 0.0001727715072558896, | |
| "loss": 4.6214, | |
| "step": 466500 | |
| }, | |
| { | |
| "epoch": 2.4878007202369536, | |
| "grad_norm": 1.0921036005020142, | |
| "learning_rate": 0.00017187968208216207, | |
| "loss": 4.6283, | |
| "step": 467000 | |
| }, | |
| { | |
| "epoch": 2.4904643184384923, | |
| "grad_norm": 1.0829055309295654, | |
| "learning_rate": 0.00017098785690843453, | |
| "loss": 4.6228, | |
| "step": 467500 | |
| }, | |
| { | |
| "epoch": 2.493127916640031, | |
| "grad_norm": 1.0832949876785278, | |
| "learning_rate": 0.00017009603173470696, | |
| "loss": 4.6234, | |
| "step": 468000 | |
| }, | |
| { | |
| "epoch": 2.495791514841569, | |
| "grad_norm": 1.1113747358322144, | |
| "learning_rate": 0.0001692059902113269, | |
| "loss": 4.6245, | |
| "step": 468500 | |
| }, | |
| { | |
| "epoch": 2.4984551130431076, | |
| "grad_norm": 1.0775564908981323, | |
| "learning_rate": 0.00016831416503759937, | |
| "loss": 4.6211, | |
| "step": 469000 | |
| }, | |
| { | |
| "epoch": 2.5011187112446462, | |
| "grad_norm": 1.0286856889724731, | |
| "learning_rate": 0.00016742233986387182, | |
| "loss": 4.6168, | |
| "step": 469500 | |
| }, | |
| { | |
| "epoch": 2.5037823094461844, | |
| "grad_norm": 1.1658544540405273, | |
| "learning_rate": 0.00016653051469014426, | |
| "loss": 4.6178, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 2.506445907647723, | |
| "grad_norm": 1.1998695135116577, | |
| "learning_rate": 0.00016563868951641672, | |
| "loss": 4.6239, | |
| "step": 470500 | |
| }, | |
| { | |
| "epoch": 2.5091095058492616, | |
| "grad_norm": 1.065800666809082, | |
| "learning_rate": 0.00016474686434268918, | |
| "loss": 4.6215, | |
| "step": 471000 | |
| }, | |
| { | |
| "epoch": 2.5117731040508002, | |
| "grad_norm": 1.1773850917816162, | |
| "learning_rate": 0.0001638568228193091, | |
| "loss": 4.6245, | |
| "step": 471500 | |
| }, | |
| { | |
| "epoch": 2.514436702252339, | |
| "grad_norm": 1.1137776374816895, | |
| "learning_rate": 0.00016296499764558152, | |
| "loss": 4.6168, | |
| "step": 472000 | |
| }, | |
| { | |
| "epoch": 2.517100300453877, | |
| "grad_norm": 1.0657340288162231, | |
| "learning_rate": 0.00016207317247185398, | |
| "loss": 4.6185, | |
| "step": 472500 | |
| }, | |
| { | |
| "epoch": 2.5197638986554156, | |
| "grad_norm": 1.0470982789993286, | |
| "learning_rate": 0.00016118134729812644, | |
| "loss": 4.6135, | |
| "step": 473000 | |
| }, | |
| { | |
| "epoch": 2.522427496856954, | |
| "grad_norm": 1.116703748703003, | |
| "learning_rate": 0.0001602895221243989, | |
| "loss": 4.623, | |
| "step": 473500 | |
| }, | |
| { | |
| "epoch": 2.5250910950584924, | |
| "grad_norm": 1.0753133296966553, | |
| "learning_rate": 0.00015939948060101884, | |
| "loss": 4.6198, | |
| "step": 474000 | |
| }, | |
| { | |
| "epoch": 2.527754693260031, | |
| "grad_norm": 1.034504771232605, | |
| "learning_rate": 0.00015850765542729128, | |
| "loss": 4.6167, | |
| "step": 474500 | |
| }, | |
| { | |
| "epoch": 2.5304182914615696, | |
| "grad_norm": 1.1084864139556885, | |
| "learning_rate": 0.00015761583025356374, | |
| "loss": 4.6158, | |
| "step": 475000 | |
| }, | |
| { | |
| "epoch": 2.533081889663108, | |
| "grad_norm": 1.1004912853240967, | |
| "learning_rate": 0.0001567240050798362, | |
| "loss": 4.62, | |
| "step": 475500 | |
| }, | |
| { | |
| "epoch": 2.535745487864647, | |
| "grad_norm": 1.0630244016647339, | |
| "learning_rate": 0.00015583217990610865, | |
| "loss": 4.6224, | |
| "step": 476000 | |
| }, | |
| { | |
| "epoch": 2.538409086066185, | |
| "grad_norm": 1.2044382095336914, | |
| "learning_rate": 0.00015494213838272854, | |
| "loss": 4.621, | |
| "step": 476500 | |
| }, | |
| { | |
| "epoch": 2.5410726842677236, | |
| "grad_norm": 1.135984182357788, | |
| "learning_rate": 0.000154050313209001, | |
| "loss": 4.6186, | |
| "step": 477000 | |
| }, | |
| { | |
| "epoch": 2.543736282469262, | |
| "grad_norm": 1.026955008506775, | |
| "learning_rate": 0.00015315848803527346, | |
| "loss": 4.6232, | |
| "step": 477500 | |
| }, | |
| { | |
| "epoch": 2.5463998806708004, | |
| "grad_norm": 1.180627465248108, | |
| "learning_rate": 0.00015226666286154592, | |
| "loss": 4.616, | |
| "step": 478000 | |
| }, | |
| { | |
| "epoch": 2.549063478872339, | |
| "grad_norm": 1.1590373516082764, | |
| "learning_rate": 0.00015137483768781838, | |
| "loss": 4.6181, | |
| "step": 478500 | |
| }, | |
| { | |
| "epoch": 2.5517270770738776, | |
| "grad_norm": 1.1868000030517578, | |
| "learning_rate": 0.0001504847961644383, | |
| "loss": 4.6204, | |
| "step": 479000 | |
| }, | |
| { | |
| "epoch": 2.554390675275416, | |
| "grad_norm": 1.1171778440475464, | |
| "learning_rate": 0.00014959297099071076, | |
| "loss": 4.612, | |
| "step": 479500 | |
| }, | |
| { | |
| "epoch": 2.557054273476955, | |
| "grad_norm": 1.1593362092971802, | |
| "learning_rate": 0.00014870114581698321, | |
| "loss": 4.6143, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 2.559717871678493, | |
| "grad_norm": 1.047542691230774, | |
| "learning_rate": 0.00014780932064325567, | |
| "loss": 4.6133, | |
| "step": 480500 | |
| }, | |
| { | |
| "epoch": 2.5623814698800316, | |
| "grad_norm": 1.1630990505218506, | |
| "learning_rate": 0.00014691749546952813, | |
| "loss": 4.6167, | |
| "step": 481000 | |
| }, | |
| { | |
| "epoch": 2.56504506808157, | |
| "grad_norm": 1.067874789237976, | |
| "learning_rate": 0.00014602567029580057, | |
| "loss": 4.6257, | |
| "step": 481500 | |
| }, | |
| { | |
| "epoch": 2.5677086662831083, | |
| "grad_norm": 1.2333664894104004, | |
| "learning_rate": 0.00014513384512207303, | |
| "loss": 4.621, | |
| "step": 482000 | |
| }, | |
| { | |
| "epoch": 2.570372264484647, | |
| "grad_norm": 1.1577945947647095, | |
| "learning_rate": 0.00014424380359869294, | |
| "loss": 4.619, | |
| "step": 482500 | |
| }, | |
| { | |
| "epoch": 2.5730358626861856, | |
| "grad_norm": 1.1029491424560547, | |
| "learning_rate": 0.0001433519784249654, | |
| "loss": 4.6151, | |
| "step": 483000 | |
| }, | |
| { | |
| "epoch": 2.575699460887724, | |
| "grad_norm": 1.076328158378601, | |
| "learning_rate": 0.00014246015325123783, | |
| "loss": 4.6231, | |
| "step": 483500 | |
| }, | |
| { | |
| "epoch": 2.5783630590892628, | |
| "grad_norm": 1.164756178855896, | |
| "learning_rate": 0.0001415683280775103, | |
| "loss": 4.6106, | |
| "step": 484000 | |
| }, | |
| { | |
| "epoch": 2.581026657290801, | |
| "grad_norm": 1.0658756494522095, | |
| "learning_rate": 0.00014067650290378275, | |
| "loss": 4.622, | |
| "step": 484500 | |
| }, | |
| { | |
| "epoch": 2.5836902554923395, | |
| "grad_norm": 1.08512282371521, | |
| "learning_rate": 0.00013978467773005524, | |
| "loss": 4.6156, | |
| "step": 485000 | |
| }, | |
| { | |
| "epoch": 2.586353853693878, | |
| "grad_norm": 1.2632811069488525, | |
| "learning_rate": 0.0001388928525563277, | |
| "loss": 4.6186, | |
| "step": 485500 | |
| }, | |
| { | |
| "epoch": 2.5890174518954163, | |
| "grad_norm": 1.0426981449127197, | |
| "learning_rate": 0.00013800102738260016, | |
| "loss": 4.6172, | |
| "step": 486000 | |
| }, | |
| { | |
| "epoch": 2.591681050096955, | |
| "grad_norm": 1.0602271556854248, | |
| "learning_rate": 0.00013711098585922005, | |
| "loss": 4.617, | |
| "step": 486500 | |
| }, | |
| { | |
| "epoch": 2.5943446482984935, | |
| "grad_norm": 1.0918567180633545, | |
| "learning_rate": 0.0001362191606854925, | |
| "loss": 4.6205, | |
| "step": 487000 | |
| }, | |
| { | |
| "epoch": 2.5970082465000317, | |
| "grad_norm": 1.1476528644561768, | |
| "learning_rate": 0.00013532911916211242, | |
| "loss": 4.6164, | |
| "step": 487500 | |
| }, | |
| { | |
| "epoch": 2.5996718447015703, | |
| "grad_norm": 1.0901427268981934, | |
| "learning_rate": 0.00013443729398838485, | |
| "loss": 4.6212, | |
| "step": 488000 | |
| }, | |
| { | |
| "epoch": 2.602335442903109, | |
| "grad_norm": 1.1208913326263428, | |
| "learning_rate": 0.0001335454688146573, | |
| "loss": 4.6151, | |
| "step": 488500 | |
| }, | |
| { | |
| "epoch": 2.6049990411046475, | |
| "grad_norm": 1.1271238327026367, | |
| "learning_rate": 0.00013265364364092977, | |
| "loss": 4.6199, | |
| "step": 489000 | |
| }, | |
| { | |
| "epoch": 2.607662639306186, | |
| "grad_norm": 1.0943602323532104, | |
| "learning_rate": 0.0001317636021175497, | |
| "loss": 4.6166, | |
| "step": 489500 | |
| }, | |
| { | |
| "epoch": 2.6103262375077243, | |
| "grad_norm": 1.1179605722427368, | |
| "learning_rate": 0.00013087177694382215, | |
| "loss": 4.6055, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 2.612989835709263, | |
| "grad_norm": 1.107720971107483, | |
| "learning_rate": 0.0001299799517700946, | |
| "loss": 4.6132, | |
| "step": 490500 | |
| }, | |
| { | |
| "epoch": 2.6156534339108015, | |
| "grad_norm": 1.0601928234100342, | |
| "learning_rate": 0.00012908812659636706, | |
| "loss": 4.6083, | |
| "step": 491000 | |
| }, | |
| { | |
| "epoch": 2.6183170321123397, | |
| "grad_norm": 1.1014827489852905, | |
| "learning_rate": 0.00012819630142263952, | |
| "loss": 4.6125, | |
| "step": 491500 | |
| }, | |
| { | |
| "epoch": 2.6209806303138783, | |
| "grad_norm": 1.2044124603271484, | |
| "learning_rate": 0.0001273062598992594, | |
| "loss": 4.6232, | |
| "step": 492000 | |
| }, | |
| { | |
| "epoch": 2.623644228515417, | |
| "grad_norm": 1.0993869304656982, | |
| "learning_rate": 0.00012641443472553187, | |
| "loss": 4.6082, | |
| "step": 492500 | |
| }, | |
| { | |
| "epoch": 2.6263078267169555, | |
| "grad_norm": 1.161431074142456, | |
| "learning_rate": 0.00012552260955180433, | |
| "loss": 4.6177, | |
| "step": 493000 | |
| }, | |
| { | |
| "epoch": 2.628971424918494, | |
| "grad_norm": 1.0688318014144897, | |
| "learning_rate": 0.0001246307843780768, | |
| "loss": 4.6124, | |
| "step": 493500 | |
| }, | |
| { | |
| "epoch": 2.6316350231200323, | |
| "grad_norm": 1.0411505699157715, | |
| "learning_rate": 0.00012373895920434925, | |
| "loss": 4.6142, | |
| "step": 494000 | |
| }, | |
| { | |
| "epoch": 2.634298621321571, | |
| "grad_norm": 1.101181983947754, | |
| "learning_rate": 0.0001228471340306217, | |
| "loss": 4.6185, | |
| "step": 494500 | |
| }, | |
| { | |
| "epoch": 2.6369622195231095, | |
| "grad_norm": 1.0938246250152588, | |
| "learning_rate": 0.00012195530885689417, | |
| "loss": 4.6202, | |
| "step": 495000 | |
| }, | |
| { | |
| "epoch": 2.6396258177246477, | |
| "grad_norm": 1.137458086013794, | |
| "learning_rate": 0.00012106348368316663, | |
| "loss": 4.6102, | |
| "step": 495500 | |
| }, | |
| { | |
| "epoch": 2.6422894159261863, | |
| "grad_norm": 1.1052279472351074, | |
| "learning_rate": 0.00012017522581013399, | |
| "loss": 4.6173, | |
| "step": 496000 | |
| }, | |
| { | |
| "epoch": 2.644953014127725, | |
| "grad_norm": 1.1576839685440063, | |
| "learning_rate": 0.00011928340063640645, | |
| "loss": 4.6115, | |
| "step": 496500 | |
| }, | |
| { | |
| "epoch": 2.6476166123292635, | |
| "grad_norm": 1.149245023727417, | |
| "learning_rate": 0.00011839157546267889, | |
| "loss": 4.6142, | |
| "step": 497000 | |
| }, | |
| { | |
| "epoch": 2.650280210530802, | |
| "grad_norm": 1.1047520637512207, | |
| "learning_rate": 0.00011749975028895136, | |
| "loss": 4.6143, | |
| "step": 497500 | |
| }, | |
| { | |
| "epoch": 2.6529438087323403, | |
| "grad_norm": 1.2275629043579102, | |
| "learning_rate": 0.00011660792511522382, | |
| "loss": 4.6131, | |
| "step": 498000 | |
| }, | |
| { | |
| "epoch": 2.655607406933879, | |
| "grad_norm": 1.0445078611373901, | |
| "learning_rate": 0.00011571788359184373, | |
| "loss": 4.6088, | |
| "step": 498500 | |
| }, | |
| { | |
| "epoch": 2.6582710051354175, | |
| "grad_norm": 1.119834065437317, | |
| "learning_rate": 0.00011482605841811617, | |
| "loss": 4.6125, | |
| "step": 499000 | |
| }, | |
| { | |
| "epoch": 2.6609346033369556, | |
| "grad_norm": 1.1206032037734985, | |
| "learning_rate": 0.00011393423324438863, | |
| "loss": 4.6185, | |
| "step": 499500 | |
| }, | |
| { | |
| "epoch": 2.6635982015384942, | |
| "grad_norm": 1.3817057609558105, | |
| "learning_rate": 0.0001130424080706611, | |
| "loss": 4.6119, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 2.666261799740033, | |
| "grad_norm": 1.1292685270309448, | |
| "learning_rate": 0.00011215058289693356, | |
| "loss": 4.6121, | |
| "step": 500500 | |
| }, | |
| { | |
| "epoch": 2.6689253979415715, | |
| "grad_norm": 1.1789071559906006, | |
| "learning_rate": 0.00011125875772320601, | |
| "loss": 4.6178, | |
| "step": 501000 | |
| }, | |
| { | |
| "epoch": 2.67158899614311, | |
| "grad_norm": 1.1726536750793457, | |
| "learning_rate": 0.00011036871619982591, | |
| "loss": 4.6096, | |
| "step": 501500 | |
| }, | |
| { | |
| "epoch": 2.6742525943446482, | |
| "grad_norm": 1.1307861804962158, | |
| "learning_rate": 0.00010947689102609837, | |
| "loss": 4.6142, | |
| "step": 502000 | |
| }, | |
| { | |
| "epoch": 2.676916192546187, | |
| "grad_norm": 1.2103127241134644, | |
| "learning_rate": 0.00010858506585237083, | |
| "loss": 4.6082, | |
| "step": 502500 | |
| }, | |
| { | |
| "epoch": 2.6795797907477255, | |
| "grad_norm": 1.0934276580810547, | |
| "learning_rate": 0.00010769324067864329, | |
| "loss": 4.6164, | |
| "step": 503000 | |
| }, | |
| { | |
| "epoch": 2.6822433889492636, | |
| "grad_norm": 1.232783555984497, | |
| "learning_rate": 0.00010680141550491575, | |
| "loss": 4.6195, | |
| "step": 503500 | |
| }, | |
| { | |
| "epoch": 2.6849069871508022, | |
| "grad_norm": 1.0889538526535034, | |
| "learning_rate": 0.00010591137398153565, | |
| "loss": 4.6099, | |
| "step": 504000 | |
| }, | |
| { | |
| "epoch": 2.687570585352341, | |
| "grad_norm": 1.0930888652801514, | |
| "learning_rate": 0.00010501954880780811, | |
| "loss": 4.6079, | |
| "step": 504500 | |
| }, | |
| { | |
| "epoch": 2.690234183553879, | |
| "grad_norm": 1.108748197555542, | |
| "learning_rate": 0.00010412772363408056, | |
| "loss": 4.6141, | |
| "step": 505000 | |
| }, | |
| { | |
| "epoch": 2.6928977817554176, | |
| "grad_norm": 1.1860270500183105, | |
| "learning_rate": 0.00010323589846035303, | |
| "loss": 4.6107, | |
| "step": 505500 | |
| }, | |
| { | |
| "epoch": 2.695561379956956, | |
| "grad_norm": 1.1693322658538818, | |
| "learning_rate": 0.00010234407328662549, | |
| "loss": 4.6087, | |
| "step": 506000 | |
| }, | |
| { | |
| "epoch": 2.698224978158495, | |
| "grad_norm": 1.169573187828064, | |
| "learning_rate": 0.00010145224811289793, | |
| "loss": 4.6091, | |
| "step": 506500 | |
| }, | |
| { | |
| "epoch": 2.7008885763600334, | |
| "grad_norm": 1.126935601234436, | |
| "learning_rate": 0.00010056220658951784, | |
| "loss": 4.6041, | |
| "step": 507000 | |
| }, | |
| { | |
| "epoch": 2.7035521745615716, | |
| "grad_norm": 1.132071614265442, | |
| "learning_rate": 9.96703814157903e-05, | |
| "loss": 4.6176, | |
| "step": 507500 | |
| }, | |
| { | |
| "epoch": 2.70621577276311, | |
| "grad_norm": 1.1209650039672852, | |
| "learning_rate": 9.877855624206277e-05, | |
| "loss": 4.6115, | |
| "step": 508000 | |
| }, | |
| { | |
| "epoch": 2.708879370964649, | |
| "grad_norm": 1.1064993143081665, | |
| "learning_rate": 9.788673106833521e-05, | |
| "loss": 4.614, | |
| "step": 508500 | |
| }, | |
| { | |
| "epoch": 2.711542969166187, | |
| "grad_norm": 1.2343615293502808, | |
| "learning_rate": 9.699490589460767e-05, | |
| "loss": 4.6112, | |
| "step": 509000 | |
| }, | |
| { | |
| "epoch": 2.7142065673677256, | |
| "grad_norm": 1.1082515716552734, | |
| "learning_rate": 9.610486437122757e-05, | |
| "loss": 4.6153, | |
| "step": 509500 | |
| }, | |
| { | |
| "epoch": 2.716870165569264, | |
| "grad_norm": 1.058441162109375, | |
| "learning_rate": 9.521303919750003e-05, | |
| "loss": 4.6144, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 2.719533763770803, | |
| "grad_norm": 1.2399941682815552, | |
| "learning_rate": 9.43212140237725e-05, | |
| "loss": 4.6078, | |
| "step": 510500 | |
| }, | |
| { | |
| "epoch": 2.7221973619723414, | |
| "grad_norm": 1.1185581684112549, | |
| "learning_rate": 9.342938885004495e-05, | |
| "loss": 4.6106, | |
| "step": 511000 | |
| }, | |
| { | |
| "epoch": 2.7248609601738796, | |
| "grad_norm": 1.1241427659988403, | |
| "learning_rate": 9.253934732666485e-05, | |
| "loss": 4.6117, | |
| "step": 511500 | |
| }, | |
| { | |
| "epoch": 2.727524558375418, | |
| "grad_norm": 1.118444800376892, | |
| "learning_rate": 9.164752215293731e-05, | |
| "loss": 4.6118, | |
| "step": 512000 | |
| }, | |
| { | |
| "epoch": 2.730188156576957, | |
| "grad_norm": 1.1134285926818848, | |
| "learning_rate": 9.075569697920977e-05, | |
| "loss": 4.6044, | |
| "step": 512500 | |
| }, | |
| { | |
| "epoch": 2.732851754778495, | |
| "grad_norm": 1.1537599563598633, | |
| "learning_rate": 8.986387180548223e-05, | |
| "loss": 4.6185, | |
| "step": 513000 | |
| }, | |
| { | |
| "epoch": 2.7355153529800336, | |
| "grad_norm": 1.1125168800354004, | |
| "learning_rate": 8.897204663175469e-05, | |
| "loss": 4.6028, | |
| "step": 513500 | |
| }, | |
| { | |
| "epoch": 2.738178951181572, | |
| "grad_norm": 1.1752519607543945, | |
| "learning_rate": 8.808022145802715e-05, | |
| "loss": 4.6123, | |
| "step": 514000 | |
| }, | |
| { | |
| "epoch": 2.740842549383111, | |
| "grad_norm": 1.105495572090149, | |
| "learning_rate": 8.719017993464705e-05, | |
| "loss": 4.6159, | |
| "step": 514500 | |
| }, | |
| { | |
| "epoch": 2.7435061475846494, | |
| "grad_norm": 1.0856335163116455, | |
| "learning_rate": 8.62983547609195e-05, | |
| "loss": 4.609, | |
| "step": 515000 | |
| }, | |
| { | |
| "epoch": 2.7461697457861876, | |
| "grad_norm": 1.145843505859375, | |
| "learning_rate": 8.540652958719197e-05, | |
| "loss": 4.6083, | |
| "step": 515500 | |
| }, | |
| { | |
| "epoch": 2.748833343987726, | |
| "grad_norm": 1.1720407009124756, | |
| "learning_rate": 8.451470441346443e-05, | |
| "loss": 4.6036, | |
| "step": 516000 | |
| }, | |
| { | |
| "epoch": 2.7514969421892648, | |
| "grad_norm": 1.2031077146530151, | |
| "learning_rate": 8.362287923973688e-05, | |
| "loss": 4.6093, | |
| "step": 516500 | |
| }, | |
| { | |
| "epoch": 2.754160540390803, | |
| "grad_norm": 1.2993487119674683, | |
| "learning_rate": 8.273283771635678e-05, | |
| "loss": 4.6076, | |
| "step": 517000 | |
| }, | |
| { | |
| "epoch": 2.7568241385923415, | |
| "grad_norm": 1.0932821035385132, | |
| "learning_rate": 8.184101254262924e-05, | |
| "loss": 4.5978, | |
| "step": 517500 | |
| }, | |
| { | |
| "epoch": 2.75948773679388, | |
| "grad_norm": 1.068040370941162, | |
| "learning_rate": 8.09491873689017e-05, | |
| "loss": 4.6077, | |
| "step": 518000 | |
| }, | |
| { | |
| "epoch": 2.7621513349954188, | |
| "grad_norm": 1.0666356086730957, | |
| "learning_rate": 8.005736219517416e-05, | |
| "loss": 4.6068, | |
| "step": 518500 | |
| }, | |
| { | |
| "epoch": 2.7648149331969574, | |
| "grad_norm": 1.1699191331863403, | |
| "learning_rate": 7.916732067179406e-05, | |
| "loss": 4.604, | |
| "step": 519000 | |
| }, | |
| { | |
| "epoch": 2.7674785313984955, | |
| "grad_norm": 1.1018375158309937, | |
| "learning_rate": 7.827549549806652e-05, | |
| "loss": 4.6085, | |
| "step": 519500 | |
| }, | |
| { | |
| "epoch": 2.770142129600034, | |
| "grad_norm": 1.2034190893173218, | |
| "learning_rate": 7.738367032433898e-05, | |
| "loss": 4.6146, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 2.7728057278015728, | |
| "grad_norm": 1.1737667322158813, | |
| "learning_rate": 7.649184515061142e-05, | |
| "loss": 4.6117, | |
| "step": 520500 | |
| }, | |
| { | |
| "epoch": 2.775469326003111, | |
| "grad_norm": 1.1514512300491333, | |
| "learning_rate": 7.56000199768839e-05, | |
| "loss": 4.6019, | |
| "step": 521000 | |
| }, | |
| { | |
| "epoch": 2.7781329242046495, | |
| "grad_norm": 1.0964044332504272, | |
| "learning_rate": 7.470819480315636e-05, | |
| "loss": 4.616, | |
| "step": 521500 | |
| }, | |
| { | |
| "epoch": 2.780796522406188, | |
| "grad_norm": 1.3086357116699219, | |
| "learning_rate": 7.381815327977626e-05, | |
| "loss": 4.6071, | |
| "step": 522000 | |
| }, | |
| { | |
| "epoch": 2.7834601206077263, | |
| "grad_norm": 1.073895812034607, | |
| "learning_rate": 7.292632810604872e-05, | |
| "loss": 4.6065, | |
| "step": 522500 | |
| }, | |
| { | |
| "epoch": 2.786123718809265, | |
| "grad_norm": 1.1826096773147583, | |
| "learning_rate": 7.203450293232116e-05, | |
| "loss": 4.6063, | |
| "step": 523000 | |
| }, | |
| { | |
| "epoch": 2.7887873170108035, | |
| "grad_norm": 1.230764627456665, | |
| "learning_rate": 7.114267775859364e-05, | |
| "loss": 4.6069, | |
| "step": 523500 | |
| }, | |
| { | |
| "epoch": 2.791450915212342, | |
| "grad_norm": 1.2007604837417603, | |
| "learning_rate": 7.025263623521354e-05, | |
| "loss": 4.5989, | |
| "step": 524000 | |
| }, | |
| { | |
| "epoch": 2.7941145134138807, | |
| "grad_norm": 1.0956413745880127, | |
| "learning_rate": 6.9360811061486e-05, | |
| "loss": 4.6065, | |
| "step": 524500 | |
| }, | |
| { | |
| "epoch": 2.796778111615419, | |
| "grad_norm": 1.1486014127731323, | |
| "learning_rate": 6.846898588775844e-05, | |
| "loss": 4.6125, | |
| "step": 525000 | |
| }, | |
| { | |
| "epoch": 2.7994417098169575, | |
| "grad_norm": 1.0698477029800415, | |
| "learning_rate": 6.75771607140309e-05, | |
| "loss": 4.6129, | |
| "step": 525500 | |
| }, | |
| { | |
| "epoch": 2.802105308018496, | |
| "grad_norm": 1.1725722551345825, | |
| "learning_rate": 6.668711919065082e-05, | |
| "loss": 4.6064, | |
| "step": 526000 | |
| }, | |
| { | |
| "epoch": 2.8047689062200343, | |
| "grad_norm": 1.1817371845245361, | |
| "learning_rate": 6.579529401692328e-05, | |
| "loss": 4.6115, | |
| "step": 526500 | |
| }, | |
| { | |
| "epoch": 2.807432504421573, | |
| "grad_norm": 1.0840002298355103, | |
| "learning_rate": 6.490346884319572e-05, | |
| "loss": 4.6077, | |
| "step": 527000 | |
| }, | |
| { | |
| "epoch": 2.8100961026231115, | |
| "grad_norm": 1.2627172470092773, | |
| "learning_rate": 6.401164366946818e-05, | |
| "loss": 4.5979, | |
| "step": 527500 | |
| }, | |
| { | |
| "epoch": 2.81275970082465, | |
| "grad_norm": 1.1478033065795898, | |
| "learning_rate": 6.311981849574064e-05, | |
| "loss": 4.6051, | |
| "step": 528000 | |
| }, | |
| { | |
| "epoch": 2.8154232990261887, | |
| "grad_norm": 1.1611443758010864, | |
| "learning_rate": 6.222977697236056e-05, | |
| "loss": 4.6019, | |
| "step": 528500 | |
| }, | |
| { | |
| "epoch": 2.818086897227727, | |
| "grad_norm": 1.2540146112442017, | |
| "learning_rate": 6.1337951798633e-05, | |
| "loss": 4.6126, | |
| "step": 529000 | |
| }, | |
| { | |
| "epoch": 2.8207504954292655, | |
| "grad_norm": 1.1421033143997192, | |
| "learning_rate": 6.0446126624905464e-05, | |
| "loss": 4.6093, | |
| "step": 529500 | |
| }, | |
| { | |
| "epoch": 2.823414093630804, | |
| "grad_norm": 1.157571792602539, | |
| "learning_rate": 5.955430145117793e-05, | |
| "loss": 4.5974, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 2.8260776918323423, | |
| "grad_norm": 1.2044634819030762, | |
| "learning_rate": 5.866247627745038e-05, | |
| "loss": 4.6016, | |
| "step": 530500 | |
| }, | |
| { | |
| "epoch": 2.828741290033881, | |
| "grad_norm": 1.1470133066177368, | |
| "learning_rate": 5.777243475407029e-05, | |
| "loss": 4.6065, | |
| "step": 531000 | |
| }, | |
| { | |
| "epoch": 2.8314048882354195, | |
| "grad_norm": 1.1482868194580078, | |
| "learning_rate": 5.6880609580342744e-05, | |
| "loss": 4.5994, | |
| "step": 531500 | |
| }, | |
| { | |
| "epoch": 2.834068486436958, | |
| "grad_norm": 1.1420148611068726, | |
| "learning_rate": 5.59887844066152e-05, | |
| "loss": 4.6044, | |
| "step": 532000 | |
| }, | |
| { | |
| "epoch": 2.8367320846384967, | |
| "grad_norm": 1.1463284492492676, | |
| "learning_rate": 5.509695923288766e-05, | |
| "loss": 4.6069, | |
| "step": 532500 | |
| }, | |
| { | |
| "epoch": 2.839395682840035, | |
| "grad_norm": 1.1625584363937378, | |
| "learning_rate": 5.4205134059160115e-05, | |
| "loss": 4.6004, | |
| "step": 533000 | |
| }, | |
| { | |
| "epoch": 2.8420592810415735, | |
| "grad_norm": 1.1769341230392456, | |
| "learning_rate": 5.3313308885432575e-05, | |
| "loss": 4.6074, | |
| "step": 533500 | |
| }, | |
| { | |
| "epoch": 2.844722879243112, | |
| "grad_norm": 1.1729334592819214, | |
| "learning_rate": 5.242326736205248e-05, | |
| "loss": 4.6021, | |
| "step": 534000 | |
| }, | |
| { | |
| "epoch": 2.8473864774446502, | |
| "grad_norm": 1.0966566801071167, | |
| "learning_rate": 5.1531442188324936e-05, | |
| "loss": 4.6069, | |
| "step": 534500 | |
| }, | |
| { | |
| "epoch": 2.850050075646189, | |
| "grad_norm": 1.1562509536743164, | |
| "learning_rate": 5.06396170145974e-05, | |
| "loss": 4.6079, | |
| "step": 535000 | |
| }, | |
| { | |
| "epoch": 2.8527136738477274, | |
| "grad_norm": 1.0936706066131592, | |
| "learning_rate": 4.9747791840869855e-05, | |
| "loss": 4.6052, | |
| "step": 535500 | |
| }, | |
| { | |
| "epoch": 2.855377272049266, | |
| "grad_norm": 1.1146814823150635, | |
| "learning_rate": 4.885596666714231e-05, | |
| "loss": 4.6007, | |
| "step": 536000 | |
| }, | |
| { | |
| "epoch": 2.8580408702508047, | |
| "grad_norm": 1.1146438121795654, | |
| "learning_rate": 4.7965925143762216e-05, | |
| "loss": 4.6097, | |
| "step": 536500 | |
| }, | |
| { | |
| "epoch": 2.860704468452343, | |
| "grad_norm": 1.1392590999603271, | |
| "learning_rate": 4.7074099970034675e-05, | |
| "loss": 4.6014, | |
| "step": 537000 | |
| }, | |
| { | |
| "epoch": 2.8633680666538814, | |
| "grad_norm": 1.1630158424377441, | |
| "learning_rate": 4.6182274796307135e-05, | |
| "loss": 4.6029, | |
| "step": 537500 | |
| }, | |
| { | |
| "epoch": 2.86603166485542, | |
| "grad_norm": 1.1878501176834106, | |
| "learning_rate": 4.529044962257959e-05, | |
| "loss": 4.6065, | |
| "step": 538000 | |
| }, | |
| { | |
| "epoch": 2.868695263056958, | |
| "grad_norm": 1.2973501682281494, | |
| "learning_rate": 4.4400408099199496e-05, | |
| "loss": 4.6, | |
| "step": 538500 | |
| }, | |
| { | |
| "epoch": 2.871358861258497, | |
| "grad_norm": 1.136915683746338, | |
| "learning_rate": 4.3508582925471955e-05, | |
| "loss": 4.6006, | |
| "step": 539000 | |
| }, | |
| { | |
| "epoch": 2.8740224594600354, | |
| "grad_norm": 1.2329761981964111, | |
| "learning_rate": 4.261675775174441e-05, | |
| "loss": 4.6019, | |
| "step": 539500 | |
| }, | |
| { | |
| "epoch": 2.8766860576615736, | |
| "grad_norm": 1.1819766759872437, | |
| "learning_rate": 4.172493257801686e-05, | |
| "loss": 4.5992, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 2.879349655863112, | |
| "grad_norm": 1.116248369216919, | |
| "learning_rate": 4.083489105463678e-05, | |
| "loss": 4.5994, | |
| "step": 540500 | |
| }, | |
| { | |
| "epoch": 2.882013254064651, | |
| "grad_norm": 1.3588722944259644, | |
| "learning_rate": 3.9943065880909235e-05, | |
| "loss": 4.605, | |
| "step": 541000 | |
| }, | |
| { | |
| "epoch": 2.8846768522661894, | |
| "grad_norm": 1.2594339847564697, | |
| "learning_rate": 3.905124070718169e-05, | |
| "loss": 4.5973, | |
| "step": 541500 | |
| }, | |
| { | |
| "epoch": 2.887340450467728, | |
| "grad_norm": 1.1628178358078003, | |
| "learning_rate": 3.815941553345415e-05, | |
| "loss": 4.6019, | |
| "step": 542000 | |
| }, | |
| { | |
| "epoch": 2.890004048669266, | |
| "grad_norm": 1.2354239225387573, | |
| "learning_rate": 3.72675903597266e-05, | |
| "loss": 4.5984, | |
| "step": 542500 | |
| }, | |
| { | |
| "epoch": 2.892667646870805, | |
| "grad_norm": 1.2508246898651123, | |
| "learning_rate": 3.6377548836346515e-05, | |
| "loss": 4.5962, | |
| "step": 543000 | |
| }, | |
| { | |
| "epoch": 2.8953312450723434, | |
| "grad_norm": 1.1606773138046265, | |
| "learning_rate": 3.548572366261897e-05, | |
| "loss": 4.6024, | |
| "step": 543500 | |
| }, | |
| { | |
| "epoch": 2.8979948432738816, | |
| "grad_norm": 1.2162941694259644, | |
| "learning_rate": 3.459389848889143e-05, | |
| "loss": 4.6087, | |
| "step": 544000 | |
| }, | |
| { | |
| "epoch": 2.90065844147542, | |
| "grad_norm": 1.373126745223999, | |
| "learning_rate": 3.370207331516388e-05, | |
| "loss": 4.5972, | |
| "step": 544500 | |
| }, | |
| { | |
| "epoch": 2.903322039676959, | |
| "grad_norm": 1.1077393293380737, | |
| "learning_rate": 3.281203179178379e-05, | |
| "loss": 4.6033, | |
| "step": 545000 | |
| }, | |
| { | |
| "epoch": 2.9059856378784974, | |
| "grad_norm": 1.1094976663589478, | |
| "learning_rate": 3.1920206618056255e-05, | |
| "loss": 4.5966, | |
| "step": 545500 | |
| }, | |
| { | |
| "epoch": 2.908649236080036, | |
| "grad_norm": 1.182131052017212, | |
| "learning_rate": 3.102838144432871e-05, | |
| "loss": 4.6056, | |
| "step": 546000 | |
| }, | |
| { | |
| "epoch": 2.911312834281574, | |
| "grad_norm": 1.2314406633377075, | |
| "learning_rate": 3.0136556270601163e-05, | |
| "loss": 4.5982, | |
| "step": 546500 | |
| }, | |
| { | |
| "epoch": 2.9139764324831128, | |
| "grad_norm": 1.1318516731262207, | |
| "learning_rate": 2.9244731096873616e-05, | |
| "loss": 4.5985, | |
| "step": 547000 | |
| }, | |
| { | |
| "epoch": 2.9166400306846514, | |
| "grad_norm": 1.1479239463806152, | |
| "learning_rate": 2.835468957349353e-05, | |
| "loss": 4.6081, | |
| "step": 547500 | |
| }, | |
| { | |
| "epoch": 2.9193036288861895, | |
| "grad_norm": 1.1278290748596191, | |
| "learning_rate": 2.7462864399765984e-05, | |
| "loss": 4.6014, | |
| "step": 548000 | |
| }, | |
| { | |
| "epoch": 2.921967227087728, | |
| "grad_norm": 1.300802230834961, | |
| "learning_rate": 2.6571039226038443e-05, | |
| "loss": 4.6044, | |
| "step": 548500 | |
| }, | |
| { | |
| "epoch": 2.9246308252892668, | |
| "grad_norm": 1.1365079879760742, | |
| "learning_rate": 2.56792140523109e-05, | |
| "loss": 4.5968, | |
| "step": 549000 | |
| }, | |
| { | |
| "epoch": 2.9272944234908054, | |
| "grad_norm": 1.1759607791900635, | |
| "learning_rate": 2.4787388878583352e-05, | |
| "loss": 4.6017, | |
| "step": 549500 | |
| }, | |
| { | |
| "epoch": 2.929958021692344, | |
| "grad_norm": 1.2129359245300293, | |
| "learning_rate": 2.389556370485581e-05, | |
| "loss": 4.5963, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 2.932621619893882, | |
| "grad_norm": 1.1694817543029785, | |
| "learning_rate": 2.300552218147572e-05, | |
| "loss": 4.6016, | |
| "step": 550500 | |
| }, | |
| { | |
| "epoch": 2.9352852180954208, | |
| "grad_norm": 1.108017086982727, | |
| "learning_rate": 2.211369700774818e-05, | |
| "loss": 4.6028, | |
| "step": 551000 | |
| }, | |
| { | |
| "epoch": 2.9379488162969594, | |
| "grad_norm": 1.1087723970413208, | |
| "learning_rate": 2.1221871834020636e-05, | |
| "loss": 4.5982, | |
| "step": 551500 | |
| }, | |
| { | |
| "epoch": 2.9406124144984975, | |
| "grad_norm": 1.055584192276001, | |
| "learning_rate": 2.0330046660293088e-05, | |
| "loss": 4.6014, | |
| "step": 552000 | |
| }, | |
| { | |
| "epoch": 2.943276012700036, | |
| "grad_norm": 1.1524064540863037, | |
| "learning_rate": 1.9440005136913003e-05, | |
| "loss": 4.5961, | |
| "step": 552500 | |
| }, | |
| { | |
| "epoch": 2.9459396109015747, | |
| "grad_norm": 1.16587233543396, | |
| "learning_rate": 1.8548179963185456e-05, | |
| "loss": 4.5915, | |
| "step": 553000 | |
| }, | |
| { | |
| "epoch": 2.9486032091031134, | |
| "grad_norm": 1.1600918769836426, | |
| "learning_rate": 1.7656354789457912e-05, | |
| "loss": 4.596, | |
| "step": 553500 | |
| }, | |
| { | |
| "epoch": 2.951266807304652, | |
| "grad_norm": 1.1187764406204224, | |
| "learning_rate": 1.676452961573037e-05, | |
| "loss": 4.6055, | |
| "step": 554000 | |
| }, | |
| { | |
| "epoch": 2.95393040550619, | |
| "grad_norm": 1.2266861200332642, | |
| "learning_rate": 1.5872704442002824e-05, | |
| "loss": 4.6004, | |
| "step": 554500 | |
| }, | |
| { | |
| "epoch": 2.9565940037077287, | |
| "grad_norm": 1.130671739578247, | |
| "learning_rate": 1.4980879268275282e-05, | |
| "loss": 4.5992, | |
| "step": 555000 | |
| }, | |
| { | |
| "epoch": 2.9592576019092673, | |
| "grad_norm": 1.1526157855987549, | |
| "learning_rate": 1.4089054094547738e-05, | |
| "loss": 4.6047, | |
| "step": 555500 | |
| }, | |
| { | |
| "epoch": 2.9619212001108055, | |
| "grad_norm": 1.2285641431808472, | |
| "learning_rate": 1.3197228920820194e-05, | |
| "loss": 4.5974, | |
| "step": 556000 | |
| }, | |
| { | |
| "epoch": 2.964584798312344, | |
| "grad_norm": 1.1854966878890991, | |
| "learning_rate": 1.2307187397440106e-05, | |
| "loss": 4.5995, | |
| "step": 556500 | |
| }, | |
| { | |
| "epoch": 2.9672483965138827, | |
| "grad_norm": 1.1808573007583618, | |
| "learning_rate": 1.141536222371256e-05, | |
| "loss": 4.6021, | |
| "step": 557000 | |
| }, | |
| { | |
| "epoch": 2.9699119947154213, | |
| "grad_norm": 1.1743810176849365, | |
| "learning_rate": 1.0523537049985018e-05, | |
| "loss": 4.5979, | |
| "step": 557500 | |
| }, | |
| { | |
| "epoch": 2.9725755929169595, | |
| "grad_norm": 1.172972321510315, | |
| "learning_rate": 9.631711876257474e-06, | |
| "loss": 4.5972, | |
| "step": 558000 | |
| }, | |
| { | |
| "epoch": 2.975239191118498, | |
| "grad_norm": 1.1044169664382935, | |
| "learning_rate": 8.741670352877386e-06, | |
| "loss": 4.5968, | |
| "step": 558500 | |
| }, | |
| { | |
| "epoch": 2.9779027893200367, | |
| "grad_norm": 1.1353402137756348, | |
| "learning_rate": 7.84984517914984e-06, | |
| "loss": 4.597, | |
| "step": 559000 | |
| }, | |
| { | |
| "epoch": 2.9805663875215753, | |
| "grad_norm": 1.1849350929260254, | |
| "learning_rate": 6.958020005422297e-06, | |
| "loss": 4.6015, | |
| "step": 559500 | |
| }, | |
| { | |
| "epoch": 2.9832299857231135, | |
| "grad_norm": 1.2167035341262817, | |
| "learning_rate": 6.066194831694753e-06, | |
| "loss": 4.5984, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 2.985893583924652, | |
| "grad_norm": 1.1984131336212158, | |
| "learning_rate": 5.176153308314665e-06, | |
| "loss": 4.5977, | |
| "step": 560500 | |
| }, | |
| { | |
| "epoch": 2.9885571821261907, | |
| "grad_norm": 1.148808240890503, | |
| "learning_rate": 4.2843281345871205e-06, | |
| "loss": 4.592, | |
| "step": 561000 | |
| }, | |
| { | |
| "epoch": 2.991220780327729, | |
| "grad_norm": 1.1721874475479126, | |
| "learning_rate": 3.392502960859577e-06, | |
| "loss": 4.5946, | |
| "step": 561500 | |
| }, | |
| { | |
| "epoch": 2.9938843785292675, | |
| "grad_norm": 1.171322226524353, | |
| "learning_rate": 2.500677787132033e-06, | |
| "loss": 4.6057, | |
| "step": 562000 | |
| }, | |
| { | |
| "epoch": 2.996547976730806, | |
| "grad_norm": 1.1349517107009888, | |
| "learning_rate": 1.608852613404489e-06, | |
| "loss": 4.5944, | |
| "step": 562500 | |
| }, | |
| { | |
| "epoch": 2.9992115749323447, | |
| "grad_norm": 1.145351529121399, | |
| "learning_rate": 7.188110900244004e-07, | |
| "loss": 4.5939, | |
| "step": 563000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 563148, | |
| "total_flos": 2.917985780733604e+17, | |
| "train_loss": 4.746018495113768, | |
| "train_runtime": 39559.2785, | |
| "train_samples_per_second": 911.074, | |
| "train_steps_per_second": 14.236 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 563148, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.917985780733604e+17, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |