{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 11838, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.8175293207168579, "learning_rate": 0.0001999999119654754, "loss": 1.7625, "step": 5 }, { "epoch": 0.0, "grad_norm": 0.461701363325119, "learning_rate": 0.00019999964786205653, "loss": 1.5481, "step": 10 }, { "epoch": 0.0, "grad_norm": 0.5782307386398315, "learning_rate": 0.00019999920769020845, "loss": 1.466, "step": 15 }, { "epoch": 0.0, "grad_norm": 0.6779885292053223, "learning_rate": 0.00019999859145070615, "loss": 1.2965, "step": 20 }, { "epoch": 0.0, "grad_norm": 0.5275429487228394, "learning_rate": 0.00019999779914463462, "loss": 1.1209, "step": 25 }, { "epoch": 0.0, "grad_norm": 0.4957067668437958, "learning_rate": 0.0001999968307733889, "loss": 1.0367, "step": 30 }, { "epoch": 0.0, "grad_norm": 0.41212332248687744, "learning_rate": 0.000199995686338674, "loss": 1.1318, "step": 35 }, { "epoch": 0.0, "grad_norm": 0.6483787894248962, "learning_rate": 0.00019999436584250483, "loss": 1.1682, "step": 40 }, { "epoch": 0.0, "grad_norm": 0.5552895665168762, "learning_rate": 0.00019999286928720647, "loss": 1.1751, "step": 45 }, { "epoch": 0.0, "grad_norm": 0.5129882097244263, "learning_rate": 0.00019999119667541386, "loss": 1.1218, "step": 50 }, { "epoch": 0.0, "grad_norm": 0.504658043384552, "learning_rate": 0.00019998934801007193, "loss": 1.0083, "step": 55 }, { "epoch": 0.01, "grad_norm": 0.5507149696350098, "learning_rate": 0.00019998732329443562, "loss": 1.0734, "step": 60 }, { "epoch": 0.01, "grad_norm": 0.4575830101966858, "learning_rate": 0.00019998512253206982, "loss": 1.1557, "step": 65 }, { "epoch": 0.01, "grad_norm": 0.5136922001838684, "learning_rate": 0.0001999827457268494, "loss": 1.0223, "step": 70 }, { "epoch": 0.01, "grad_norm": 0.4587676227092743, "learning_rate": 0.00019998019288295922, "loss": 1.1057, "step": 75 }, { "epoch": 0.01, "grad_norm": 0.5710563063621521, "learning_rate": 0.00019997746400489397, "loss": 1.0945, "step": 80 }, { "epoch": 0.01, "grad_norm": 0.4434487223625183, "learning_rate": 0.00019997455909745844, "loss": 1.1251, "step": 85 }, { "epoch": 0.01, "grad_norm": 0.5528855323791504, "learning_rate": 0.00019997147816576717, "loss": 1.1266, "step": 90 }, { "epoch": 0.01, "grad_norm": 0.5259051322937012, "learning_rate": 0.00019996822121524485, "loss": 1.1532, "step": 95 }, { "epoch": 0.01, "grad_norm": 0.532082736492157, "learning_rate": 0.00019996478825162585, "loss": 1.3226, "step": 100 }, { "epoch": 0.01, "grad_norm": 0.5315178632736206, "learning_rate": 0.00019996117928095463, "loss": 1.0409, "step": 105 }, { "epoch": 0.01, "grad_norm": 0.5802783370018005, "learning_rate": 0.00019995739430958545, "loss": 1.1499, "step": 110 }, { "epoch": 0.01, "grad_norm": 0.5138378739356995, "learning_rate": 0.00019995343334418245, "loss": 1.0474, "step": 115 }, { "epoch": 0.01, "grad_norm": 0.4681589603424072, "learning_rate": 0.0001999492963917197, "loss": 1.1214, "step": 120 }, { "epoch": 0.01, "grad_norm": 0.5345844626426697, "learning_rate": 0.00019994498345948108, "loss": 1.0918, "step": 125 }, { "epoch": 0.01, "grad_norm": 0.5528503060340881, "learning_rate": 0.00019994049455506033, "loss": 1.178, "step": 130 }, { "epoch": 0.01, "grad_norm": 0.5040786266326904, "learning_rate": 0.00019993582968636097, "loss": 0.9658, "step": 135 }, { "epoch": 0.01, "grad_norm": 0.4512442350387573, "learning_rate": 0.0001999309888615965, "loss": 1.1075, "step": 140 }, { "epoch": 0.01, "grad_norm": 0.464190810918808, "learning_rate": 0.00019992597208929, "loss": 1.08, "step": 145 }, { "epoch": 0.01, "grad_norm": 0.5537208914756775, "learning_rate": 0.00019992077937827456, "loss": 1.1566, "step": 150 }, { "epoch": 0.01, "grad_norm": 0.42293214797973633, "learning_rate": 0.00019991541073769283, "loss": 1.0393, "step": 155 }, { "epoch": 0.01, "grad_norm": 0.4625384211540222, "learning_rate": 0.0001999098661769974, "loss": 1.0042, "step": 160 }, { "epoch": 0.01, "grad_norm": 0.4168233871459961, "learning_rate": 0.0001999041457059505, "loss": 1.0598, "step": 165 }, { "epoch": 0.01, "grad_norm": 0.46117836236953735, "learning_rate": 0.0001998982493346241, "loss": 1.0951, "step": 170 }, { "epoch": 0.01, "grad_norm": 0.5079285502433777, "learning_rate": 0.0001998921770733999, "loss": 1.043, "step": 175 }, { "epoch": 0.02, "grad_norm": 0.5561312437057495, "learning_rate": 0.00019988592893296927, "loss": 0.9438, "step": 180 }, { "epoch": 0.02, "grad_norm": 0.5467772483825684, "learning_rate": 0.00019987950492433325, "loss": 0.9862, "step": 185 }, { "epoch": 0.02, "grad_norm": 0.41703835129737854, "learning_rate": 0.0001998729050588025, "loss": 1.0838, "step": 190 }, { "epoch": 0.02, "grad_norm": 0.5075678825378418, "learning_rate": 0.0001998661293479974, "loss": 0.9902, "step": 195 }, { "epoch": 0.02, "grad_norm": 0.50690096616745, "learning_rate": 0.00019985917780384786, "loss": 1.0627, "step": 200 }, { "epoch": 0.02, "grad_norm": 0.5631605386734009, "learning_rate": 0.00019985205043859336, "loss": 1.0876, "step": 205 }, { "epoch": 0.02, "grad_norm": 0.6694210171699524, "learning_rate": 0.00019984474726478303, "loss": 1.1046, "step": 210 }, { "epoch": 0.02, "grad_norm": 0.5434871912002563, "learning_rate": 0.00019983726829527547, "loss": 1.0263, "step": 215 }, { "epoch": 0.02, "grad_norm": 0.6015032529830933, "learning_rate": 0.00019982961354323887, "loss": 1.0756, "step": 220 }, { "epoch": 0.02, "grad_norm": 0.4603692591190338, "learning_rate": 0.00019982178302215082, "loss": 1.0387, "step": 225 }, { "epoch": 0.02, "grad_norm": 0.5583211183547974, "learning_rate": 0.00019981377674579845, "loss": 0.9821, "step": 230 }, { "epoch": 0.02, "grad_norm": 0.7076869606971741, "learning_rate": 0.00019980559472827843, "loss": 1.1412, "step": 235 }, { "epoch": 0.02, "grad_norm": 0.5172699689865112, "learning_rate": 0.00019979723698399665, "loss": 1.0096, "step": 240 }, { "epoch": 0.02, "grad_norm": 0.5216347575187683, "learning_rate": 0.00019978870352766853, "loss": 0.946, "step": 245 }, { "epoch": 0.02, "grad_norm": 0.490485817193985, "learning_rate": 0.0001997799943743189, "loss": 1.0325, "step": 250 }, { "epoch": 0.02, "grad_norm": 0.5536062717437744, "learning_rate": 0.00019977110953928182, "loss": 0.9317, "step": 255 }, { "epoch": 0.02, "grad_norm": 0.5582374930381775, "learning_rate": 0.0001997620490382008, "loss": 0.954, "step": 260 }, { "epoch": 0.02, "grad_norm": 0.47851961851119995, "learning_rate": 0.0001997528128870285, "loss": 1.0212, "step": 265 }, { "epoch": 0.02, "grad_norm": 0.5606931447982788, "learning_rate": 0.00019974340110202697, "loss": 1.0405, "step": 270 }, { "epoch": 0.02, "grad_norm": 0.611594021320343, "learning_rate": 0.00019973381369976746, "loss": 1.0257, "step": 275 }, { "epoch": 0.02, "grad_norm": 0.46861839294433594, "learning_rate": 0.00019972405069713041, "loss": 0.9963, "step": 280 }, { "epoch": 0.02, "grad_norm": 0.6010798811912537, "learning_rate": 0.00019971411211130543, "loss": 1.1186, "step": 285 }, { "epoch": 0.02, "grad_norm": 0.427389919757843, "learning_rate": 0.00019970399795979132, "loss": 1.0869, "step": 290 }, { "epoch": 0.02, "grad_norm": 0.46258246898651123, "learning_rate": 0.00019969370826039592, "loss": 0.9047, "step": 295 }, { "epoch": 0.03, "grad_norm": 0.5251803994178772, "learning_rate": 0.00019968324303123625, "loss": 0.9507, "step": 300 }, { "epoch": 0.03, "grad_norm": 0.5841838121414185, "learning_rate": 0.00019967260229073836, "loss": 0.9455, "step": 305 }, { "epoch": 0.03, "grad_norm": 0.6464457511901855, "learning_rate": 0.00019966178605763726, "loss": 1.1312, "step": 310 }, { "epoch": 0.03, "grad_norm": 0.5451318621635437, "learning_rate": 0.00019965079435097698, "loss": 1.0751, "step": 315 }, { "epoch": 0.03, "grad_norm": 0.6237244009971619, "learning_rate": 0.00019963962719011055, "loss": 1.0577, "step": 320 }, { "epoch": 0.03, "grad_norm": 0.4929693043231964, "learning_rate": 0.00019962828459469984, "loss": 0.995, "step": 325 }, { "epoch": 0.03, "grad_norm": 0.529183566570282, "learning_rate": 0.0001996167665847157, "loss": 1.0397, "step": 330 }, { "epoch": 0.03, "grad_norm": 0.5752055644989014, "learning_rate": 0.00019960507318043775, "loss": 1.0677, "step": 335 }, { "epoch": 0.03, "grad_norm": 0.5159496665000916, "learning_rate": 0.00019959320440245443, "loss": 1.0188, "step": 340 }, { "epoch": 0.03, "grad_norm": 0.5186464786529541, "learning_rate": 0.00019958116027166307, "loss": 1.0477, "step": 345 }, { "epoch": 0.03, "grad_norm": 0.5070178508758545, "learning_rate": 0.00019956894080926958, "loss": 1.0221, "step": 350 }, { "epoch": 0.03, "grad_norm": 0.6111320853233337, "learning_rate": 0.00019955654603678866, "loss": 1.0038, "step": 355 }, { "epoch": 0.03, "grad_norm": 0.5824604630470276, "learning_rate": 0.0001995439759760437, "loss": 1.1021, "step": 360 }, { "epoch": 0.03, "grad_norm": 0.5628572702407837, "learning_rate": 0.00019953123064916665, "loss": 1.0429, "step": 365 }, { "epoch": 0.03, "grad_norm": 0.5787538290023804, "learning_rate": 0.00019951831007859814, "loss": 1.0226, "step": 370 }, { "epoch": 0.03, "grad_norm": 0.46730467677116394, "learning_rate": 0.00019950521428708723, "loss": 0.9446, "step": 375 }, { "epoch": 0.03, "grad_norm": 0.5635990500450134, "learning_rate": 0.0001994919432976916, "loss": 1.081, "step": 380 }, { "epoch": 0.03, "grad_norm": 0.47697439789772034, "learning_rate": 0.00019947849713377734, "loss": 1.0084, "step": 385 }, { "epoch": 0.03, "grad_norm": 0.566819965839386, "learning_rate": 0.00019946487581901895, "loss": 0.9866, "step": 390 }, { "epoch": 0.03, "grad_norm": 0.585972785949707, "learning_rate": 0.00019945107937739944, "loss": 1.0689, "step": 395 }, { "epoch": 0.03, "grad_norm": 0.5571792125701904, "learning_rate": 0.00019943710783320998, "loss": 0.9717, "step": 400 }, { "epoch": 0.03, "grad_norm": 0.506241500377655, "learning_rate": 0.00019942296121105017, "loss": 0.9637, "step": 405 }, { "epoch": 0.03, "grad_norm": 0.5207057595252991, "learning_rate": 0.00019940863953582787, "loss": 0.9639, "step": 410 }, { "epoch": 0.04, "grad_norm": 0.4859829246997833, "learning_rate": 0.00019939414283275906, "loss": 0.9717, "step": 415 }, { "epoch": 0.04, "grad_norm": 0.599038302898407, "learning_rate": 0.00019937947112736796, "loss": 1.1271, "step": 420 }, { "epoch": 0.04, "grad_norm": 0.6036107540130615, "learning_rate": 0.00019936462444548693, "loss": 0.9301, "step": 425 }, { "epoch": 0.04, "grad_norm": 0.6932292580604553, "learning_rate": 0.00019934960281325635, "loss": 1.0803, "step": 430 }, { "epoch": 0.04, "grad_norm": 0.6190482974052429, "learning_rate": 0.0001993344062571247, "loss": 0.9878, "step": 435 }, { "epoch": 0.04, "grad_norm": 0.4994017481803894, "learning_rate": 0.00019931903480384838, "loss": 0.9301, "step": 440 }, { "epoch": 0.04, "grad_norm": 0.4884631633758545, "learning_rate": 0.00019930348848049177, "loss": 0.9366, "step": 445 }, { "epoch": 0.04, "grad_norm": 0.5066394209861755, "learning_rate": 0.00019928776731442712, "loss": 0.9241, "step": 450 }, { "epoch": 0.04, "grad_norm": 0.5030893683433533, "learning_rate": 0.00019927187133333456, "loss": 0.9532, "step": 455 }, { "epoch": 0.04, "grad_norm": 0.5532776713371277, "learning_rate": 0.00019925580056520198, "loss": 0.8736, "step": 460 }, { "epoch": 0.04, "grad_norm": 0.4686765968799591, "learning_rate": 0.00019923955503832504, "loss": 0.9661, "step": 465 }, { "epoch": 0.04, "grad_norm": 0.5450586676597595, "learning_rate": 0.00019922313478130713, "loss": 0.9508, "step": 470 }, { "epoch": 0.04, "grad_norm": 0.536483645439148, "learning_rate": 0.00019920653982305911, "loss": 1.128, "step": 475 }, { "epoch": 0.04, "grad_norm": 0.6655276417732239, "learning_rate": 0.0001991897701927997, "loss": 1.0245, "step": 480 }, { "epoch": 0.04, "grad_norm": 0.6011082530021667, "learning_rate": 0.00019917282592005496, "loss": 1.1497, "step": 485 }, { "epoch": 0.04, "grad_norm": 0.9368462562561035, "learning_rate": 0.0001991557070346585, "loss": 0.8886, "step": 490 }, { "epoch": 0.04, "grad_norm": 0.6440569162368774, "learning_rate": 0.00019913841356675142, "loss": 1.1014, "step": 495 }, { "epoch": 0.04, "grad_norm": 0.5625852942466736, "learning_rate": 0.00019912094554678215, "loss": 0.8747, "step": 500 }, { "epoch": 0.04, "grad_norm": 0.5758554339408875, "learning_rate": 0.00019910330300550646, "loss": 1.0431, "step": 505 }, { "epoch": 0.04, "grad_norm": 0.6677930355072021, "learning_rate": 0.00019908548597398742, "loss": 0.7627, "step": 510 }, { "epoch": 0.04, "grad_norm": 0.4941328763961792, "learning_rate": 0.0001990674944835953, "loss": 0.9874, "step": 515 }, { "epoch": 0.04, "grad_norm": 0.5404765009880066, "learning_rate": 0.00019904932856600752, "loss": 1.087, "step": 520 }, { "epoch": 0.04, "grad_norm": 0.5204087495803833, "learning_rate": 0.00019903098825320867, "loss": 1.0357, "step": 525 }, { "epoch": 0.04, "grad_norm": 0.45817357301712036, "learning_rate": 0.00019901247357749036, "loss": 1.017, "step": 530 }, { "epoch": 0.05, "grad_norm": 0.7364314794540405, "learning_rate": 0.0001989937845714512, "loss": 1.0032, "step": 535 }, { "epoch": 0.05, "grad_norm": 0.4314422905445099, "learning_rate": 0.00019897492126799674, "loss": 0.8679, "step": 540 }, { "epoch": 0.05, "grad_norm": 0.5871337652206421, "learning_rate": 0.00019895588370033942, "loss": 0.9531, "step": 545 }, { "epoch": 0.05, "grad_norm": 0.5464487671852112, "learning_rate": 0.00019893667190199848, "loss": 1.074, "step": 550 }, { "epoch": 0.05, "grad_norm": 0.5099916458129883, "learning_rate": 0.00019891728590680003, "loss": 1.026, "step": 555 }, { "epoch": 0.05, "grad_norm": 0.5275132656097412, "learning_rate": 0.00019889772574887673, "loss": 1.0716, "step": 560 }, { "epoch": 0.05, "grad_norm": 0.6441323161125183, "learning_rate": 0.000198877991462668, "loss": 1.0636, "step": 565 }, { "epoch": 0.05, "grad_norm": 0.6186425089836121, "learning_rate": 0.00019885808308291977, "loss": 0.9923, "step": 570 }, { "epoch": 0.05, "grad_norm": 0.6667066216468811, "learning_rate": 0.00019883800064468462, "loss": 1.0927, "step": 575 }, { "epoch": 0.05, "grad_norm": 0.5096490383148193, "learning_rate": 0.0001988177441833214, "loss": 0.9456, "step": 580 }, { "epoch": 0.05, "grad_norm": 0.581665575504303, "learning_rate": 0.00019879731373449554, "loss": 1.1691, "step": 585 }, { "epoch": 0.05, "grad_norm": 0.6705063581466675, "learning_rate": 0.00019877670933417872, "loss": 1.0357, "step": 590 }, { "epoch": 0.05, "grad_norm": 0.5527098774909973, "learning_rate": 0.0001987559310186489, "loss": 0.9919, "step": 595 }, { "epoch": 0.05, "grad_norm": 0.5956196188926697, "learning_rate": 0.0001987349788244903, "loss": 1.1586, "step": 600 }, { "epoch": 0.05, "grad_norm": 0.5246900916099548, "learning_rate": 0.0001987138527885932, "loss": 1.0395, "step": 605 }, { "epoch": 0.05, "grad_norm": 0.6794516444206238, "learning_rate": 0.00019869255294815402, "loss": 1.0114, "step": 610 }, { "epoch": 0.05, "grad_norm": 0.6502415537834167, "learning_rate": 0.00019867107934067523, "loss": 1.0978, "step": 615 }, { "epoch": 0.05, "grad_norm": 0.4909512996673584, "learning_rate": 0.00019864943200396517, "loss": 0.9087, "step": 620 }, { "epoch": 0.05, "grad_norm": 0.6279435753822327, "learning_rate": 0.0001986276109761381, "loss": 1.0851, "step": 625 }, { "epoch": 0.05, "grad_norm": 0.5172310471534729, "learning_rate": 0.0001986056162956141, "loss": 1.0416, "step": 630 }, { "epoch": 0.05, "grad_norm": 0.49973011016845703, "learning_rate": 0.00019858344800111898, "loss": 1.1246, "step": 635 }, { "epoch": 0.05, "grad_norm": 0.5211731195449829, "learning_rate": 0.0001985611061316843, "loss": 0.9146, "step": 640 }, { "epoch": 0.05, "grad_norm": 0.6123340725898743, "learning_rate": 0.0001985385907266471, "loss": 1.1233, "step": 645 }, { "epoch": 0.05, "grad_norm": 0.5667193531990051, "learning_rate": 0.00019851590182565012, "loss": 1.0207, "step": 650 }, { "epoch": 0.06, "grad_norm": 0.5176669359207153, "learning_rate": 0.0001984930394686414, "loss": 0.8969, "step": 655 }, { "epoch": 0.06, "grad_norm": 0.5587745308876038, "learning_rate": 0.00019847000369587457, "loss": 0.9042, "step": 660 }, { "epoch": 0.06, "grad_norm": 0.6178166270256042, "learning_rate": 0.00019844679454790844, "loss": 1.1374, "step": 665 }, { "epoch": 0.06, "grad_norm": 0.7185156941413879, "learning_rate": 0.00019842341206560712, "loss": 1.0347, "step": 670 }, { "epoch": 0.06, "grad_norm": 0.6146171689033508, "learning_rate": 0.00019839985629013999, "loss": 0.9015, "step": 675 }, { "epoch": 0.06, "grad_norm": 0.550493061542511, "learning_rate": 0.00019837612726298143, "loss": 1.0746, "step": 680 }, { "epoch": 0.06, "grad_norm": 0.5300818681716919, "learning_rate": 0.0001983522250259109, "loss": 1.0232, "step": 685 }, { "epoch": 0.06, "grad_norm": 0.7765306234359741, "learning_rate": 0.0001983281496210129, "loss": 1.0948, "step": 690 }, { "epoch": 0.06, "grad_norm": 0.6479122638702393, "learning_rate": 0.00019830390109067673, "loss": 1.0204, "step": 695 }, { "epoch": 0.06, "grad_norm": 0.6021077036857605, "learning_rate": 0.00019827947947759653, "loss": 0.9591, "step": 700 }, { "epoch": 0.06, "grad_norm": 0.7223843336105347, "learning_rate": 0.0001982548848247712, "loss": 1.0684, "step": 705 }, { "epoch": 0.06, "grad_norm": 0.6151075959205627, "learning_rate": 0.00019823011717550438, "loss": 1.0078, "step": 710 }, { "epoch": 0.06, "grad_norm": 0.5454387068748474, "learning_rate": 0.0001982051765734042, "loss": 0.9134, "step": 715 }, { "epoch": 0.06, "grad_norm": 0.5072502493858337, "learning_rate": 0.00019818006306238328, "loss": 0.9802, "step": 720 }, { "epoch": 0.06, "grad_norm": 0.5242615938186646, "learning_rate": 0.0001981547766866588, "loss": 1.0727, "step": 725 }, { "epoch": 0.06, "grad_norm": 0.573273241519928, "learning_rate": 0.00019812931749075223, "loss": 0.941, "step": 730 }, { "epoch": 0.06, "grad_norm": 0.6620715856552124, "learning_rate": 0.00019810368551948936, "loss": 0.7937, "step": 735 }, { "epoch": 0.06, "grad_norm": 0.5468537211418152, "learning_rate": 0.00019807788081800012, "loss": 0.9955, "step": 740 }, { "epoch": 0.06, "grad_norm": 0.4930030405521393, "learning_rate": 0.00019805190343171857, "loss": 1.0195, "step": 745 }, { "epoch": 0.06, "grad_norm": 0.6010162234306335, "learning_rate": 0.00019802575340638296, "loss": 0.8513, "step": 750 }, { "epoch": 0.06, "grad_norm": 0.6166960000991821, "learning_rate": 0.0001979994307880353, "loss": 1.0954, "step": 755 }, { "epoch": 0.06, "grad_norm": 0.6688129901885986, "learning_rate": 0.00019797293562302158, "loss": 0.9794, "step": 760 }, { "epoch": 0.06, "grad_norm": 0.6365656852722168, "learning_rate": 0.00019794626795799158, "loss": 1.0153, "step": 765 }, { "epoch": 0.07, "grad_norm": 0.5212395787239075, "learning_rate": 0.00019791942783989889, "loss": 0.9277, "step": 770 }, { "epoch": 0.07, "grad_norm": 0.7180500626564026, "learning_rate": 0.00019789241531600053, "loss": 1.001, "step": 775 }, { "epoch": 0.07, "grad_norm": 0.5612985491752625, "learning_rate": 0.00019786523043385727, "loss": 1.0079, "step": 780 }, { "epoch": 0.07, "grad_norm": 0.5919773578643799, "learning_rate": 0.00019783787324133324, "loss": 0.8491, "step": 785 }, { "epoch": 0.07, "grad_norm": 0.5292619466781616, "learning_rate": 0.00019781034378659604, "loss": 0.8185, "step": 790 }, { "epoch": 0.07, "grad_norm": 0.45849528908729553, "learning_rate": 0.00019778264211811646, "loss": 0.8061, "step": 795 }, { "epoch": 0.07, "grad_norm": 0.6099018454551697, "learning_rate": 0.0001977547682846686, "loss": 1.0933, "step": 800 }, { "epoch": 0.07, "grad_norm": 0.4419599175453186, "learning_rate": 0.00019772672233532964, "loss": 1.0046, "step": 805 }, { "epoch": 0.07, "grad_norm": 0.5074923634529114, "learning_rate": 0.0001976985043194798, "loss": 0.9954, "step": 810 }, { "epoch": 0.07, "grad_norm": 0.5581572651863098, "learning_rate": 0.00019767011428680227, "loss": 0.8782, "step": 815 }, { "epoch": 0.07, "grad_norm": 0.603863537311554, "learning_rate": 0.00019764155228728315, "loss": 0.8936, "step": 820 }, { "epoch": 0.07, "grad_norm": 0.6839099526405334, "learning_rate": 0.0001976128183712113, "loss": 0.98, "step": 825 }, { "epoch": 0.07, "grad_norm": 0.6504345536231995, "learning_rate": 0.00019758391258917814, "loss": 0.9985, "step": 830 }, { "epoch": 0.07, "grad_norm": 0.5674350261688232, "learning_rate": 0.0001975548349920779, "loss": 1.0153, "step": 835 }, { "epoch": 0.07, "grad_norm": 0.5308940410614014, "learning_rate": 0.00019752558563110724, "loss": 1.0934, "step": 840 }, { "epoch": 0.07, "grad_norm": 0.6243263483047485, "learning_rate": 0.0001974961645577652, "loss": 1.059, "step": 845 }, { "epoch": 0.07, "grad_norm": 0.5877118706703186, "learning_rate": 0.00019746657182385314, "loss": 0.9244, "step": 850 }, { "epoch": 0.07, "grad_norm": 0.6568809151649475, "learning_rate": 0.00019743680748147478, "loss": 0.8841, "step": 855 }, { "epoch": 0.07, "grad_norm": 0.5473036170005798, "learning_rate": 0.00019740687158303585, "loss": 0.8777, "step": 860 }, { "epoch": 0.07, "grad_norm": 0.6813970804214478, "learning_rate": 0.0001973767641812443, "loss": 1.0363, "step": 865 }, { "epoch": 0.07, "grad_norm": 0.6258594393730164, "learning_rate": 0.00019734648532910982, "loss": 0.9106, "step": 870 }, { "epoch": 0.07, "grad_norm": 0.5775337815284729, "learning_rate": 0.00019731603507994416, "loss": 1.0125, "step": 875 }, { "epoch": 0.07, "grad_norm": 0.5688369870185852, "learning_rate": 0.00019728541348736084, "loss": 0.9422, "step": 880 }, { "epoch": 0.07, "grad_norm": 0.6406494975090027, "learning_rate": 0.00019725462060527489, "loss": 1.0349, "step": 885 }, { "epoch": 0.08, "grad_norm": 0.5844593048095703, "learning_rate": 0.00019722365648790313, "loss": 0.8833, "step": 890 }, { "epoch": 0.08, "grad_norm": 0.5722345113754272, "learning_rate": 0.00019719252118976374, "loss": 0.9489, "step": 895 }, { "epoch": 0.08, "grad_norm": 0.49358856678009033, "learning_rate": 0.00019716121476567639, "loss": 0.9482, "step": 900 }, { "epoch": 0.08, "grad_norm": 0.777001142501831, "learning_rate": 0.00019712973727076195, "loss": 1.0312, "step": 905 }, { "epoch": 0.08, "grad_norm": 0.5142468810081482, "learning_rate": 0.0001970980887604426, "loss": 0.9216, "step": 910 }, { "epoch": 0.08, "grad_norm": 0.5355789065361023, "learning_rate": 0.0001970662692904415, "loss": 0.9777, "step": 915 }, { "epoch": 0.08, "grad_norm": 0.6558182239532471, "learning_rate": 0.000197034278916783, "loss": 1.035, "step": 920 }, { "epoch": 0.08, "grad_norm": 0.5900774598121643, "learning_rate": 0.00019700211769579213, "loss": 0.9305, "step": 925 }, { "epoch": 0.08, "grad_norm": 0.6186517477035522, "learning_rate": 0.00019696978568409495, "loss": 1.0441, "step": 930 }, { "epoch": 0.08, "grad_norm": 0.5638003349304199, "learning_rate": 0.000196937282938618, "loss": 0.9227, "step": 935 }, { "epoch": 0.08, "grad_norm": 0.574424147605896, "learning_rate": 0.0001969046095165887, "loss": 1.0394, "step": 940 }, { "epoch": 0.08, "grad_norm": 0.5076507925987244, "learning_rate": 0.0001968717654755347, "loss": 0.9493, "step": 945 }, { "epoch": 0.08, "grad_norm": 0.5305479168891907, "learning_rate": 0.00019683875087328427, "loss": 0.9148, "step": 950 }, { "epoch": 0.08, "grad_norm": 0.7044605612754822, "learning_rate": 0.0001968055657679659, "loss": 0.9093, "step": 955 }, { "epoch": 0.08, "grad_norm": 0.6829433441162109, "learning_rate": 0.00019677221021800824, "loss": 0.9498, "step": 960 }, { "epoch": 0.08, "grad_norm": 0.691895067691803, "learning_rate": 0.00019673868428214016, "loss": 1.002, "step": 965 }, { "epoch": 0.08, "grad_norm": 0.5967201590538025, "learning_rate": 0.00019670498801939044, "loss": 0.9357, "step": 970 }, { "epoch": 0.08, "grad_norm": 0.6142829060554504, "learning_rate": 0.0001966711214890877, "loss": 1.0207, "step": 975 }, { "epoch": 0.08, "grad_norm": 0.5852668881416321, "learning_rate": 0.0001966370847508605, "loss": 1.0109, "step": 980 }, { "epoch": 0.08, "grad_norm": 0.572533369064331, "learning_rate": 0.00019660287786463698, "loss": 0.8034, "step": 985 }, { "epoch": 0.08, "grad_norm": 0.5755594968795776, "learning_rate": 0.00019656850089064484, "loss": 1.0993, "step": 990 }, { "epoch": 0.08, "grad_norm": 0.6088036298751831, "learning_rate": 0.00019653395388941137, "loss": 0.9437, "step": 995 }, { "epoch": 0.08, "grad_norm": 0.5511387586593628, "learning_rate": 0.00019649923692176304, "loss": 0.9882, "step": 1000 }, { "epoch": 0.08, "grad_norm": 0.5409395098686218, "learning_rate": 0.00019646435004882576, "loss": 1.038, "step": 1005 }, { "epoch": 0.09, "grad_norm": 0.629104495048523, "learning_rate": 0.00019642929333202452, "loss": 0.9842, "step": 1010 }, { "epoch": 0.09, "grad_norm": 0.5766791105270386, "learning_rate": 0.00019639406683308336, "loss": 1.0024, "step": 1015 }, { "epoch": 0.09, "grad_norm": 0.4806615114212036, "learning_rate": 0.00019635867061402516, "loss": 0.9551, "step": 1020 }, { "epoch": 0.09, "grad_norm": 0.5499693751335144, "learning_rate": 0.00019632310473717172, "loss": 0.9023, "step": 1025 }, { "epoch": 0.09, "grad_norm": 0.6102238893508911, "learning_rate": 0.00019628736926514365, "loss": 0.9405, "step": 1030 }, { "epoch": 0.09, "grad_norm": 0.5422660708427429, "learning_rate": 0.00019625146426085994, "loss": 1.0331, "step": 1035 }, { "epoch": 0.09, "grad_norm": 0.5721043944358826, "learning_rate": 0.00019621538978753823, "loss": 0.9257, "step": 1040 }, { "epoch": 0.09, "grad_norm": 0.6052049994468689, "learning_rate": 0.00019617914590869452, "loss": 0.9577, "step": 1045 }, { "epoch": 0.09, "grad_norm": 0.5600439310073853, "learning_rate": 0.00019614273268814305, "loss": 0.8236, "step": 1050 }, { "epoch": 0.09, "grad_norm": 0.5360490083694458, "learning_rate": 0.00019610615018999622, "loss": 1.0483, "step": 1055 }, { "epoch": 0.09, "grad_norm": 0.6123297810554504, "learning_rate": 0.0001960693984786645, "loss": 0.9726, "step": 1060 }, { "epoch": 0.09, "grad_norm": 0.5726840496063232, "learning_rate": 0.00019603247761885629, "loss": 1.0083, "step": 1065 }, { "epoch": 0.09, "grad_norm": 0.5117479562759399, "learning_rate": 0.00019599538767557775, "loss": 0.8722, "step": 1070 }, { "epoch": 0.09, "grad_norm": 0.5932919979095459, "learning_rate": 0.00019595812871413281, "loss": 1.0538, "step": 1075 }, { "epoch": 0.09, "grad_norm": 0.5998363494873047, "learning_rate": 0.00019592070080012302, "loss": 0.9474, "step": 1080 }, { "epoch": 0.09, "grad_norm": 0.5751232504844666, "learning_rate": 0.00019588310399944726, "loss": 0.9461, "step": 1085 }, { "epoch": 0.09, "grad_norm": 0.6181055903434753, "learning_rate": 0.00019584533837830196, "loss": 0.9216, "step": 1090 }, { "epoch": 0.09, "grad_norm": 0.5857065916061401, "learning_rate": 0.00019580740400318062, "loss": 0.8235, "step": 1095 }, { "epoch": 0.09, "grad_norm": 0.6219877600669861, "learning_rate": 0.00019576930094087396, "loss": 1.0098, "step": 1100 }, { "epoch": 0.09, "grad_norm": 0.5622178316116333, "learning_rate": 0.00019573102925846968, "loss": 0.8896, "step": 1105 }, { "epoch": 0.09, "grad_norm": 0.4788983464241028, "learning_rate": 0.00019569258902335236, "loss": 0.9877, "step": 1110 }, { "epoch": 0.09, "grad_norm": 0.5626773238182068, "learning_rate": 0.00019565398030320336, "loss": 0.9384, "step": 1115 }, { "epoch": 0.09, "grad_norm": 0.6275051236152649, "learning_rate": 0.0001956152031660007, "loss": 0.9299, "step": 1120 }, { "epoch": 0.1, "grad_norm": 0.5800061821937561, "learning_rate": 0.00019557625768001886, "loss": 0.825, "step": 1125 }, { "epoch": 0.1, "grad_norm": 0.717424213886261, "learning_rate": 0.00019553714391382887, "loss": 0.93, "step": 1130 }, { "epoch": 0.1, "grad_norm": 0.5426806211471558, "learning_rate": 0.0001954978619362979, "loss": 1.007, "step": 1135 }, { "epoch": 0.1, "grad_norm": 0.578883945941925, "learning_rate": 0.00019545841181658943, "loss": 1.0774, "step": 1140 }, { "epoch": 0.1, "grad_norm": 0.5970021486282349, "learning_rate": 0.0001954187936241628, "loss": 0.9603, "step": 1145 }, { "epoch": 0.1, "grad_norm": 0.669302761554718, "learning_rate": 0.00019537900742877344, "loss": 1.0047, "step": 1150 }, { "epoch": 0.1, "grad_norm": 0.5312142968177795, "learning_rate": 0.00019533905330047256, "loss": 0.9533, "step": 1155 }, { "epoch": 0.1, "grad_norm": 0.5459814667701721, "learning_rate": 0.000195298931309607, "loss": 1.0127, "step": 1160 }, { "epoch": 0.1, "grad_norm": 0.6206276416778564, "learning_rate": 0.00019525864152681913, "loss": 0.8151, "step": 1165 }, { "epoch": 0.1, "grad_norm": 0.6036674380302429, "learning_rate": 0.00019521818402304681, "loss": 0.9973, "step": 1170 }, { "epoch": 0.1, "grad_norm": 0.5393249988555908, "learning_rate": 0.0001951775588695232, "loss": 1.0176, "step": 1175 }, { "epoch": 0.1, "grad_norm": 0.6075353026390076, "learning_rate": 0.0001951367661377766, "loss": 1.0152, "step": 1180 }, { "epoch": 0.1, "grad_norm": 0.5007694363594055, "learning_rate": 0.00019509580589963034, "loss": 0.9276, "step": 1185 }, { "epoch": 0.1, "grad_norm": 0.5564121007919312, "learning_rate": 0.0001950546782272028, "loss": 0.9572, "step": 1190 }, { "epoch": 0.1, "grad_norm": 0.5426845550537109, "learning_rate": 0.00019501338319290708, "loss": 0.8693, "step": 1195 }, { "epoch": 0.1, "grad_norm": 0.6294403076171875, "learning_rate": 0.00019497192086945093, "loss": 1.0606, "step": 1200 }, { "epoch": 0.1, "grad_norm": 0.5286447405815125, "learning_rate": 0.00019493029132983662, "loss": 0.9979, "step": 1205 }, { "epoch": 0.1, "grad_norm": 0.4923780858516693, "learning_rate": 0.00019488849464736096, "loss": 1.07, "step": 1210 }, { "epoch": 0.1, "grad_norm": 0.5261374711990356, "learning_rate": 0.00019484653089561494, "loss": 1.0666, "step": 1215 }, { "epoch": 0.1, "grad_norm": 0.5969892740249634, "learning_rate": 0.00019480440014848377, "loss": 0.8119, "step": 1220 }, { "epoch": 0.1, "grad_norm": 0.5572890043258667, "learning_rate": 0.00019476210248014656, "loss": 0.9422, "step": 1225 }, { "epoch": 0.1, "grad_norm": 0.7353805303573608, "learning_rate": 0.0001947196379650765, "loss": 0.9641, "step": 1230 }, { "epoch": 0.1, "grad_norm": 0.5229889750480652, "learning_rate": 0.00019467700667804048, "loss": 0.9063, "step": 1235 }, { "epoch": 0.1, "grad_norm": 0.4695720672607422, "learning_rate": 0.00019463420869409893, "loss": 1.0855, "step": 1240 }, { "epoch": 0.11, "grad_norm": 0.5980749726295471, "learning_rate": 0.00019459124408860586, "loss": 0.9487, "step": 1245 }, { "epoch": 0.11, "grad_norm": 0.5264121294021606, "learning_rate": 0.0001945481129372087, "loss": 0.9755, "step": 1250 }, { "epoch": 0.11, "grad_norm": 0.5308972001075745, "learning_rate": 0.000194504815315848, "loss": 0.9853, "step": 1255 }, { "epoch": 0.11, "grad_norm": 0.5365180373191833, "learning_rate": 0.0001944613513007575, "loss": 0.9656, "step": 1260 }, { "epoch": 0.11, "grad_norm": 0.5717142820358276, "learning_rate": 0.00019441772096846384, "loss": 0.952, "step": 1265 }, { "epoch": 0.11, "grad_norm": 0.6359343528747559, "learning_rate": 0.0001943739243957866, "loss": 0.8685, "step": 1270 }, { "epoch": 0.11, "grad_norm": 0.4931330382823944, "learning_rate": 0.00019432996165983797, "loss": 0.9423, "step": 1275 }, { "epoch": 0.11, "grad_norm": 0.6247501373291016, "learning_rate": 0.00019428583283802265, "loss": 1.0109, "step": 1280 }, { "epoch": 0.11, "grad_norm": 0.5827012062072754, "learning_rate": 0.0001942415380080379, "loss": 0.9511, "step": 1285 }, { "epoch": 0.11, "grad_norm": 0.5535582304000854, "learning_rate": 0.00019419707724787323, "loss": 0.9364, "step": 1290 }, { "epoch": 0.11, "grad_norm": 0.5828814506530762, "learning_rate": 0.00019415245063581025, "loss": 0.9613, "step": 1295 }, { "epoch": 0.11, "grad_norm": 0.5350412726402283, "learning_rate": 0.00019410765825042257, "loss": 0.9448, "step": 1300 }, { "epoch": 0.11, "grad_norm": 0.7406845092773438, "learning_rate": 0.00019406270017057576, "loss": 0.9062, "step": 1305 }, { "epoch": 0.11, "grad_norm": 0.5344066023826599, "learning_rate": 0.00019401757647542707, "loss": 0.8728, "step": 1310 }, { "epoch": 0.11, "grad_norm": 0.6511924862861633, "learning_rate": 0.00019397228724442537, "loss": 0.871, "step": 1315 }, { "epoch": 0.11, "grad_norm": 0.5950039625167847, "learning_rate": 0.00019392683255731096, "loss": 1.0393, "step": 1320 }, { "epoch": 0.11, "grad_norm": 0.8080087304115295, "learning_rate": 0.00019388121249411553, "loss": 1.0526, "step": 1325 }, { "epoch": 0.11, "grad_norm": 0.5649018883705139, "learning_rate": 0.0001938354271351618, "loss": 0.9657, "step": 1330 }, { "epoch": 0.11, "grad_norm": 0.6691799163818359, "learning_rate": 0.00019378947656106373, "loss": 0.9644, "step": 1335 }, { "epoch": 0.11, "grad_norm": 0.6107922792434692, "learning_rate": 0.00019374336085272595, "loss": 0.9491, "step": 1340 }, { "epoch": 0.11, "grad_norm": 0.5547528862953186, "learning_rate": 0.000193697080091344, "loss": 1.004, "step": 1345 }, { "epoch": 0.11, "grad_norm": 0.6795949935913086, "learning_rate": 0.000193650634358404, "loss": 1.0367, "step": 1350 }, { "epoch": 0.11, "grad_norm": 0.6405168175697327, "learning_rate": 0.00019360402373568247, "loss": 0.9986, "step": 1355 }, { "epoch": 0.11, "grad_norm": 0.5481475591659546, "learning_rate": 0.0001935572483052463, "loss": 1.0394, "step": 1360 }, { "epoch": 0.12, "grad_norm": 0.5024700164794922, "learning_rate": 0.00019351030814945255, "loss": 0.9751, "step": 1365 }, { "epoch": 0.12, "grad_norm": 0.5783774852752686, "learning_rate": 0.0001934632033509483, "loss": 0.7837, "step": 1370 }, { "epoch": 0.12, "grad_norm": 0.6434618830680847, "learning_rate": 0.00019341593399267053, "loss": 0.8806, "step": 1375 }, { "epoch": 0.12, "grad_norm": 0.6476423740386963, "learning_rate": 0.00019336850015784594, "loss": 1.0671, "step": 1380 }, { "epoch": 0.12, "grad_norm": 0.6362334489822388, "learning_rate": 0.00019332090192999087, "loss": 0.9831, "step": 1385 }, { "epoch": 0.12, "grad_norm": 0.5181714296340942, "learning_rate": 0.000193273139392911, "loss": 0.8729, "step": 1390 }, { "epoch": 0.12, "grad_norm": 0.5226086378097534, "learning_rate": 0.0001932252126307014, "loss": 0.9101, "step": 1395 }, { "epoch": 0.12, "grad_norm": 0.5152550339698792, "learning_rate": 0.00019317712172774632, "loss": 0.8719, "step": 1400 }, { "epoch": 0.12, "grad_norm": 0.5058336853981018, "learning_rate": 0.00019312886676871888, "loss": 0.8942, "step": 1405 }, { "epoch": 0.12, "grad_norm": 0.5755054354667664, "learning_rate": 0.00019308044783858115, "loss": 0.9656, "step": 1410 }, { "epoch": 0.12, "grad_norm": 0.6656663417816162, "learning_rate": 0.0001930318650225839, "loss": 1.0673, "step": 1415 }, { "epoch": 0.12, "grad_norm": 0.5695021152496338, "learning_rate": 0.0001929831184062664, "loss": 1.0599, "step": 1420 }, { "epoch": 0.12, "grad_norm": 0.520157516002655, "learning_rate": 0.0001929342080754564, "loss": 1.0071, "step": 1425 }, { "epoch": 0.12, "grad_norm": 0.5740432739257812, "learning_rate": 0.00019288513411626983, "loss": 0.9698, "step": 1430 }, { "epoch": 0.12, "grad_norm": 0.5819209218025208, "learning_rate": 0.00019283589661511072, "loss": 0.8553, "step": 1435 }, { "epoch": 0.12, "grad_norm": 0.6635054349899292, "learning_rate": 0.0001927864956586711, "loss": 0.9225, "step": 1440 }, { "epoch": 0.12, "grad_norm": 0.6044415235519409, "learning_rate": 0.00019273693133393076, "loss": 1.0187, "step": 1445 }, { "epoch": 0.12, "grad_norm": 0.5304946899414062, "learning_rate": 0.00019268720372815713, "loss": 0.9037, "step": 1450 }, { "epoch": 0.12, "grad_norm": 0.5266116857528687, "learning_rate": 0.00019263731292890515, "loss": 0.9544, "step": 1455 }, { "epoch": 0.12, "grad_norm": 0.5947398543357849, "learning_rate": 0.00019258725902401703, "loss": 1.1737, "step": 1460 }, { "epoch": 0.12, "grad_norm": 0.5687413215637207, "learning_rate": 0.00019253704210162224, "loss": 0.8613, "step": 1465 }, { "epoch": 0.12, "grad_norm": 0.6292214393615723, "learning_rate": 0.00019248666225013726, "loss": 1.0017, "step": 1470 }, { "epoch": 0.12, "grad_norm": 0.5356031656265259, "learning_rate": 0.00019243611955826537, "loss": 0.855, "step": 1475 }, { "epoch": 0.13, "grad_norm": 0.6292675733566284, "learning_rate": 0.00019238541411499663, "loss": 0.9568, "step": 1480 }, { "epoch": 0.13, "grad_norm": 0.562177836894989, "learning_rate": 0.0001923345460096076, "loss": 0.9332, "step": 1485 }, { "epoch": 0.13, "grad_norm": 0.6126812696456909, "learning_rate": 0.00019228351533166134, "loss": 1.0263, "step": 1490 }, { "epoch": 0.13, "grad_norm": 0.6032052636146545, "learning_rate": 0.000192232322171007, "loss": 0.9851, "step": 1495 }, { "epoch": 0.13, "grad_norm": 0.5387076735496521, "learning_rate": 0.00019218096661777992, "loss": 0.913, "step": 1500 }, { "epoch": 0.13, "grad_norm": 0.5938889980316162, "learning_rate": 0.00019212944876240137, "loss": 0.9152, "step": 1505 }, { "epoch": 0.13, "grad_norm": 0.5894052982330322, "learning_rate": 0.00019207776869557833, "loss": 0.9353, "step": 1510 }, { "epoch": 0.13, "grad_norm": 0.6678389310836792, "learning_rate": 0.00019202592650830337, "loss": 0.992, "step": 1515 }, { "epoch": 0.13, "grad_norm": 0.5318301916122437, "learning_rate": 0.00019197392229185453, "loss": 0.9352, "step": 1520 }, { "epoch": 0.13, "grad_norm": 0.6742917895317078, "learning_rate": 0.0001919217561377952, "loss": 0.8734, "step": 1525 }, { "epoch": 0.13, "grad_norm": 0.7230522632598877, "learning_rate": 0.0001918694281379738, "loss": 1.0778, "step": 1530 }, { "epoch": 0.13, "grad_norm": 0.6975288391113281, "learning_rate": 0.0001918169383845237, "loss": 1.0108, "step": 1535 }, { "epoch": 0.13, "grad_norm": 0.5289484858512878, "learning_rate": 0.0001917642869698632, "loss": 0.9754, "step": 1540 }, { "epoch": 0.13, "grad_norm": 0.5557776093482971, "learning_rate": 0.0001917114739866951, "loss": 1.016, "step": 1545 }, { "epoch": 0.13, "grad_norm": 0.5662341117858887, "learning_rate": 0.00019165849952800667, "loss": 0.7972, "step": 1550 }, { "epoch": 0.13, "grad_norm": 0.6539111733436584, "learning_rate": 0.0001916053636870696, "loss": 1.0211, "step": 1555 }, { "epoch": 0.13, "grad_norm": 0.6270111203193665, "learning_rate": 0.00019155206655743965, "loss": 0.9429, "step": 1560 }, { "epoch": 0.13, "grad_norm": 0.633138120174408, "learning_rate": 0.00019149860823295656, "loss": 1.1118, "step": 1565 }, { "epoch": 0.13, "grad_norm": 0.5337046384811401, "learning_rate": 0.00019144498880774386, "loss": 0.8627, "step": 1570 }, { "epoch": 0.13, "grad_norm": 0.7054827809333801, "learning_rate": 0.00019139120837620882, "loss": 1.0627, "step": 1575 }, { "epoch": 0.13, "grad_norm": 0.7774983644485474, "learning_rate": 0.00019133726703304208, "loss": 0.9882, "step": 1580 }, { "epoch": 0.13, "grad_norm": 0.571898877620697, "learning_rate": 0.00019128316487321772, "loss": 0.988, "step": 1585 }, { "epoch": 0.13, "grad_norm": 0.5544953942298889, "learning_rate": 0.00019122890199199284, "loss": 0.8627, "step": 1590 }, { "epoch": 0.13, "grad_norm": 0.6004731059074402, "learning_rate": 0.0001911744784849076, "loss": 0.8951, "step": 1595 }, { "epoch": 0.14, "grad_norm": 0.526996910572052, "learning_rate": 0.00019111989444778492, "loss": 1.012, "step": 1600 }, { "epoch": 0.14, "grad_norm": 0.68979811668396, "learning_rate": 0.00019106514997673047, "loss": 0.9303, "step": 1605 }, { "epoch": 0.14, "grad_norm": 0.6508918404579163, "learning_rate": 0.00019101024516813224, "loss": 1.0252, "step": 1610 }, { "epoch": 0.14, "grad_norm": 0.5442551374435425, "learning_rate": 0.00019095518011866063, "loss": 1.0041, "step": 1615 }, { "epoch": 0.14, "grad_norm": 0.6386384963989258, "learning_rate": 0.0001908999549252682, "loss": 1.0107, "step": 1620 }, { "epoch": 0.14, "grad_norm": 0.6549636125564575, "learning_rate": 0.0001908445696851893, "loss": 0.9851, "step": 1625 }, { "epoch": 0.14, "grad_norm": 0.5537642240524292, "learning_rate": 0.00019078902449594032, "loss": 0.9206, "step": 1630 }, { "epoch": 0.14, "grad_norm": 0.5848385095596313, "learning_rate": 0.00019073331945531908, "loss": 0.9487, "step": 1635 }, { "epoch": 0.14, "grad_norm": 0.6486941576004028, "learning_rate": 0.00019067745466140495, "loss": 0.8375, "step": 1640 }, { "epoch": 0.14, "grad_norm": 0.5679529905319214, "learning_rate": 0.0001906214302125586, "loss": 0.99, "step": 1645 }, { "epoch": 0.14, "grad_norm": 0.5364441275596619, "learning_rate": 0.00019056524620742157, "loss": 0.9085, "step": 1650 }, { "epoch": 0.14, "grad_norm": 0.5058781504631042, "learning_rate": 0.00019050890274491665, "loss": 0.9123, "step": 1655 }, { "epoch": 0.14, "grad_norm": 0.820630669593811, "learning_rate": 0.00019045239992424717, "loss": 1.0015, "step": 1660 }, { "epoch": 0.14, "grad_norm": 0.5033766031265259, "learning_rate": 0.00019039573784489716, "loss": 0.937, "step": 1665 }, { "epoch": 0.14, "grad_norm": 0.585077166557312, "learning_rate": 0.00019033891660663098, "loss": 1.0317, "step": 1670 }, { "epoch": 0.14, "grad_norm": 0.5304557681083679, "learning_rate": 0.00019028193630949323, "loss": 0.9043, "step": 1675 }, { "epoch": 0.14, "grad_norm": 0.6207611560821533, "learning_rate": 0.00019022479705380857, "loss": 0.9307, "step": 1680 }, { "epoch": 0.14, "grad_norm": 0.7342296242713928, "learning_rate": 0.0001901674989401816, "loss": 0.8608, "step": 1685 }, { "epoch": 0.14, "grad_norm": 0.6580296158790588, "learning_rate": 0.00019011004206949652, "loss": 0.9218, "step": 1690 }, { "epoch": 0.14, "grad_norm": 0.5700413584709167, "learning_rate": 0.00019005242654291708, "loss": 1.0436, "step": 1695 }, { "epoch": 0.14, "grad_norm": 0.6400768756866455, "learning_rate": 0.00018999465246188644, "loss": 1.0718, "step": 1700 }, { "epoch": 0.14, "grad_norm": 0.5975671410560608, "learning_rate": 0.00018993671992812683, "loss": 0.9152, "step": 1705 }, { "epoch": 0.14, "grad_norm": 0.5661541223526001, "learning_rate": 0.00018987862904363954, "loss": 1.0337, "step": 1710 }, { "epoch": 0.14, "grad_norm": 0.7099330425262451, "learning_rate": 0.00018982037991070462, "loss": 0.9411, "step": 1715 }, { "epoch": 0.15, "grad_norm": 0.5101850628852844, "learning_rate": 0.00018976197263188079, "loss": 0.9404, "step": 1720 }, { "epoch": 0.15, "grad_norm": 0.5761948823928833, "learning_rate": 0.00018970340731000516, "loss": 0.9742, "step": 1725 }, { "epoch": 0.15, "grad_norm": 0.6656047105789185, "learning_rate": 0.00018964468404819313, "loss": 0.8846, "step": 1730 }, { "epoch": 0.15, "grad_norm": 0.6487800478935242, "learning_rate": 0.00018958580294983822, "loss": 1.0617, "step": 1735 }, { "epoch": 0.15, "grad_norm": 0.549220621585846, "learning_rate": 0.00018952676411861184, "loss": 0.943, "step": 1740 }, { "epoch": 0.15, "grad_norm": 0.5477379560470581, "learning_rate": 0.00018946756765846304, "loss": 0.9236, "step": 1745 }, { "epoch": 0.15, "grad_norm": 0.618018627166748, "learning_rate": 0.00018940821367361847, "loss": 0.9085, "step": 1750 }, { "epoch": 0.15, "grad_norm": 0.7567707300186157, "learning_rate": 0.00018934870226858217, "loss": 0.9261, "step": 1755 }, { "epoch": 0.15, "grad_norm": 0.6278589963912964, "learning_rate": 0.0001892890335481353, "loss": 0.8587, "step": 1760 }, { "epoch": 0.15, "grad_norm": 0.5153677463531494, "learning_rate": 0.00018922920761733596, "loss": 0.9223, "step": 1765 }, { "epoch": 0.15, "grad_norm": 0.5698601007461548, "learning_rate": 0.00018916922458151914, "loss": 0.9802, "step": 1770 }, { "epoch": 0.15, "grad_norm": 0.548460066318512, "learning_rate": 0.0001891090845462964, "loss": 0.8717, "step": 1775 }, { "epoch": 0.15, "grad_norm": 0.5761381983757019, "learning_rate": 0.00018904878761755569, "loss": 0.8788, "step": 1780 }, { "epoch": 0.15, "grad_norm": 0.6153483986854553, "learning_rate": 0.0001889883339014613, "loss": 0.9666, "step": 1785 }, { "epoch": 0.15, "grad_norm": 0.6116446256637573, "learning_rate": 0.00018892772350445345, "loss": 0.9085, "step": 1790 }, { "epoch": 0.15, "grad_norm": 0.6375726461410522, "learning_rate": 0.00018886695653324832, "loss": 0.9871, "step": 1795 }, { "epoch": 0.15, "grad_norm": 0.6123708486557007, "learning_rate": 0.00018880603309483776, "loss": 0.8711, "step": 1800 }, { "epoch": 0.15, "grad_norm": 0.6581029295921326, "learning_rate": 0.00018874495329648908, "loss": 1.0394, "step": 1805 }, { "epoch": 0.15, "grad_norm": 0.6889798641204834, "learning_rate": 0.00018868371724574488, "loss": 0.8321, "step": 1810 }, { "epoch": 0.15, "grad_norm": 0.5388063788414001, "learning_rate": 0.00018862232505042288, "loss": 0.9827, "step": 1815 }, { "epoch": 0.15, "grad_norm": 0.7022605538368225, "learning_rate": 0.00018856077681861578, "loss": 0.9111, "step": 1820 }, { "epoch": 0.15, "grad_norm": 0.6853319406509399, "learning_rate": 0.0001884990726586909, "loss": 0.9423, "step": 1825 }, { "epoch": 0.15, "grad_norm": 0.5777489542961121, "learning_rate": 0.00018843721267929023, "loss": 0.8929, "step": 1830 }, { "epoch": 0.16, "grad_norm": 0.8301356434822083, "learning_rate": 0.00018837519698933002, "loss": 1.0039, "step": 1835 }, { "epoch": 0.16, "grad_norm": 0.6395474076271057, "learning_rate": 0.00018831302569800073, "loss": 0.8821, "step": 1840 }, { "epoch": 0.16, "grad_norm": 0.7763014435768127, "learning_rate": 0.00018825069891476671, "loss": 0.9198, "step": 1845 }, { "epoch": 0.16, "grad_norm": 0.583583652973175, "learning_rate": 0.00018818821674936623, "loss": 0.9855, "step": 1850 }, { "epoch": 0.16, "grad_norm": 0.5729549527168274, "learning_rate": 0.00018812557931181093, "loss": 0.9374, "step": 1855 }, { "epoch": 0.16, "grad_norm": 0.6349241733551025, "learning_rate": 0.000188062786712386, "loss": 0.9622, "step": 1860 }, { "epoch": 0.16, "grad_norm": 0.49465417861938477, "learning_rate": 0.00018799983906164983, "loss": 0.9116, "step": 1865 }, { "epoch": 0.16, "grad_norm": 0.678386390209198, "learning_rate": 0.00018793673647043364, "loss": 0.9196, "step": 1870 }, { "epoch": 0.16, "grad_norm": 0.6616655588150024, "learning_rate": 0.00018787347904984165, "loss": 1.0232, "step": 1875 }, { "epoch": 0.16, "grad_norm": 0.512854278087616, "learning_rate": 0.00018781006691125053, "loss": 0.8166, "step": 1880 }, { "epoch": 0.16, "grad_norm": 0.6445545554161072, "learning_rate": 0.0001877465001663095, "loss": 1.0117, "step": 1885 }, { "epoch": 0.16, "grad_norm": 0.7158140540122986, "learning_rate": 0.0001876827789269399, "loss": 0.9406, "step": 1890 }, { "epoch": 0.16, "grad_norm": 0.6912984848022461, "learning_rate": 0.0001876189033053351, "loss": 1.0287, "step": 1895 }, { "epoch": 0.16, "grad_norm": 0.7598811388015747, "learning_rate": 0.00018755487341396028, "loss": 1.0113, "step": 1900 }, { "epoch": 0.16, "grad_norm": 0.6084948778152466, "learning_rate": 0.00018749068936555228, "loss": 1.0907, "step": 1905 }, { "epoch": 0.16, "grad_norm": 0.625379741191864, "learning_rate": 0.00018742635127311935, "loss": 0.9019, "step": 1910 }, { "epoch": 0.16, "grad_norm": 0.5288641452789307, "learning_rate": 0.00018736185924994096, "loss": 0.8875, "step": 1915 }, { "epoch": 0.16, "grad_norm": 0.5389487147331238, "learning_rate": 0.00018729721340956758, "loss": 0.9304, "step": 1920 }, { "epoch": 0.16, "grad_norm": 0.6083305478096008, "learning_rate": 0.0001872324138658206, "loss": 0.966, "step": 1925 }, { "epoch": 0.16, "grad_norm": 0.5934799909591675, "learning_rate": 0.00018716746073279184, "loss": 0.8495, "step": 1930 }, { "epoch": 0.16, "grad_norm": 0.5639356970787048, "learning_rate": 0.00018710235412484373, "loss": 0.9915, "step": 1935 }, { "epoch": 0.16, "grad_norm": 0.5736001133918762, "learning_rate": 0.00018703709415660887, "loss": 0.8608, "step": 1940 }, { "epoch": 0.16, "grad_norm": 0.8066298365592957, "learning_rate": 0.00018697168094298984, "loss": 0.9448, "step": 1945 }, { "epoch": 0.16, "grad_norm": 0.5450537204742432, "learning_rate": 0.00018690611459915908, "loss": 0.9935, "step": 1950 }, { "epoch": 0.17, "grad_norm": 0.6609448194503784, "learning_rate": 0.00018684039524055862, "loss": 1.0216, "step": 1955 }, { "epoch": 0.17, "grad_norm": 0.6248445510864258, "learning_rate": 0.0001867745229828999, "loss": 0.9785, "step": 1960 }, { "epoch": 0.17, "grad_norm": 0.7231089472770691, "learning_rate": 0.00018670849794216355, "loss": 0.8399, "step": 1965 }, { "epoch": 0.17, "grad_norm": 0.5523176193237305, "learning_rate": 0.00018664232023459933, "loss": 1.0047, "step": 1970 }, { "epoch": 0.17, "grad_norm": 0.662384569644928, "learning_rate": 0.00018657598997672562, "loss": 1.1134, "step": 1975 }, { "epoch": 0.17, "grad_norm": 0.6732993125915527, "learning_rate": 0.00018650950728532948, "loss": 1.0003, "step": 1980 }, { "epoch": 0.17, "grad_norm": 0.6265495419502258, "learning_rate": 0.00018644287227746636, "loss": 0.9312, "step": 1985 }, { "epoch": 0.17, "grad_norm": 0.6594348549842834, "learning_rate": 0.0001863760850704599, "loss": 1.1153, "step": 1990 }, { "epoch": 0.17, "grad_norm": 0.5969790816307068, "learning_rate": 0.0001863091457819017, "loss": 0.87, "step": 1995 }, { "epoch": 0.17, "grad_norm": 0.6349937319755554, "learning_rate": 0.00018624205452965112, "loss": 1.0112, "step": 2000 }, { "epoch": 0.17, "grad_norm": 0.5987355709075928, "learning_rate": 0.00018617481143183508, "loss": 1.0038, "step": 2005 }, { "epoch": 0.17, "grad_norm": 0.6250818371772766, "learning_rate": 0.00018610741660684784, "loss": 0.9675, "step": 2010 }, { "epoch": 0.17, "grad_norm": 0.6271193623542786, "learning_rate": 0.00018603987017335092, "loss": 0.9785, "step": 2015 }, { "epoch": 0.17, "grad_norm": 0.7074173092842102, "learning_rate": 0.0001859721722502726, "loss": 0.8998, "step": 2020 }, { "epoch": 0.17, "grad_norm": 0.5771471261978149, "learning_rate": 0.000185904322956808, "loss": 0.9635, "step": 2025 }, { "epoch": 0.17, "grad_norm": 0.5951381325721741, "learning_rate": 0.0001858363224124187, "loss": 0.8828, "step": 2030 }, { "epoch": 0.17, "grad_norm": 0.8378333449363708, "learning_rate": 0.0001857681707368326, "loss": 1.0037, "step": 2035 }, { "epoch": 0.17, "grad_norm": 0.5882629752159119, "learning_rate": 0.0001856998680500438, "loss": 0.993, "step": 2040 }, { "epoch": 0.17, "grad_norm": 0.571729302406311, "learning_rate": 0.00018563141447231211, "loss": 1.0208, "step": 2045 }, { "epoch": 0.17, "grad_norm": 0.5603118538856506, "learning_rate": 0.0001855628101241631, "loss": 0.9141, "step": 2050 }, { "epoch": 0.17, "grad_norm": 0.6984039545059204, "learning_rate": 0.00018549405512638783, "loss": 1.0206, "step": 2055 }, { "epoch": 0.17, "grad_norm": 0.5990726947784424, "learning_rate": 0.00018542514960004253, "loss": 0.9379, "step": 2060 }, { "epoch": 0.17, "grad_norm": 0.6375360488891602, "learning_rate": 0.0001853560936664485, "loss": 0.9176, "step": 2065 }, { "epoch": 0.17, "grad_norm": 0.629737377166748, "learning_rate": 0.00018528688744719193, "loss": 0.8489, "step": 2070 }, { "epoch": 0.18, "grad_norm": 0.60643470287323, "learning_rate": 0.0001852175310641235, "loss": 0.9451, "step": 2075 }, { "epoch": 0.18, "grad_norm": 0.6629886031150818, "learning_rate": 0.00018514802463935834, "loss": 1.0165, "step": 2080 }, { "epoch": 0.18, "grad_norm": 0.49199485778808594, "learning_rate": 0.00018507836829527574, "loss": 0.9393, "step": 2085 }, { "epoch": 0.18, "grad_norm": 0.6570359468460083, "learning_rate": 0.000185008562154519, "loss": 0.924, "step": 2090 }, { "epoch": 0.18, "grad_norm": 0.6161836981773376, "learning_rate": 0.00018493860633999508, "loss": 0.8037, "step": 2095 }, { "epoch": 0.18, "grad_norm": 0.697126567363739, "learning_rate": 0.00018486850097487457, "loss": 0.9855, "step": 2100 }, { "epoch": 0.18, "grad_norm": 0.6809502243995667, "learning_rate": 0.00018479824618259128, "loss": 1.0517, "step": 2105 }, { "epoch": 0.18, "grad_norm": 0.61480313539505, "learning_rate": 0.0001847278420868422, "loss": 0.9386, "step": 2110 }, { "epoch": 0.18, "grad_norm": 0.5264391899108887, "learning_rate": 0.00018465728881158708, "loss": 0.8437, "step": 2115 }, { "epoch": 0.18, "grad_norm": 0.559967577457428, "learning_rate": 0.00018458658648104844, "loss": 1.0144, "step": 2120 }, { "epoch": 0.18, "grad_norm": 0.5891844034194946, "learning_rate": 0.00018451573521971123, "loss": 0.9301, "step": 2125 }, { "epoch": 0.18, "grad_norm": 0.6855583190917969, "learning_rate": 0.00018444473515232256, "loss": 0.8999, "step": 2130 }, { "epoch": 0.18, "grad_norm": 0.547498881816864, "learning_rate": 0.00018437358640389158, "loss": 0.8687, "step": 2135 }, { "epoch": 0.18, "grad_norm": 0.565273642539978, "learning_rate": 0.00018430228909968921, "loss": 0.9559, "step": 2140 }, { "epoch": 0.18, "grad_norm": 0.5726015567779541, "learning_rate": 0.00018423084336524793, "loss": 0.9186, "step": 2145 }, { "epoch": 0.18, "grad_norm": 0.6429528594017029, "learning_rate": 0.00018415924932636157, "loss": 0.7512, "step": 2150 }, { "epoch": 0.18, "grad_norm": 0.6005961298942566, "learning_rate": 0.0001840875071090851, "loss": 0.9153, "step": 2155 }, { "epoch": 0.18, "grad_norm": 0.5854561924934387, "learning_rate": 0.00018401561683973434, "loss": 0.9366, "step": 2160 }, { "epoch": 0.18, "grad_norm": 0.6333120465278625, "learning_rate": 0.0001839435786448858, "loss": 0.9863, "step": 2165 }, { "epoch": 0.18, "grad_norm": 0.6162759065628052, "learning_rate": 0.00018387139265137642, "loss": 0.9348, "step": 2170 }, { "epoch": 0.18, "grad_norm": 0.5720029473304749, "learning_rate": 0.00018379905898630345, "loss": 0.8502, "step": 2175 }, { "epoch": 0.18, "grad_norm": 0.5871778130531311, "learning_rate": 0.00018372657777702406, "loss": 0.9709, "step": 2180 }, { "epoch": 0.18, "grad_norm": 0.6709279417991638, "learning_rate": 0.00018365394915115517, "loss": 1.149, "step": 2185 }, { "epoch": 0.18, "grad_norm": 0.5620378851890564, "learning_rate": 0.0001835811732365734, "loss": 0.9973, "step": 2190 }, { "epoch": 0.19, "grad_norm": 0.758850634098053, "learning_rate": 0.00018350825016141457, "loss": 0.9039, "step": 2195 }, { "epoch": 0.19, "grad_norm": 0.6607004404067993, "learning_rate": 0.00018343518005407367, "loss": 0.8812, "step": 2200 }, { "epoch": 0.19, "grad_norm": 0.6985838413238525, "learning_rate": 0.0001833619630432045, "loss": 1.0202, "step": 2205 }, { "epoch": 0.19, "grad_norm": 0.809180736541748, "learning_rate": 0.00018328859925771958, "loss": 0.9662, "step": 2210 }, { "epoch": 0.19, "grad_norm": 0.6806530952453613, "learning_rate": 0.0001832150888267898, "loss": 0.8784, "step": 2215 }, { "epoch": 0.19, "grad_norm": 0.5656675100326538, "learning_rate": 0.00018314143187984433, "loss": 0.905, "step": 2220 }, { "epoch": 0.19, "grad_norm": 0.694346010684967, "learning_rate": 0.00018306762854657023, "loss": 1.007, "step": 2225 }, { "epoch": 0.19, "grad_norm": 0.6591660976409912, "learning_rate": 0.00018299367895691234, "loss": 0.8863, "step": 2230 }, { "epoch": 0.19, "grad_norm": 0.6169084310531616, "learning_rate": 0.00018291958324107298, "loss": 0.8985, "step": 2235 }, { "epoch": 0.19, "grad_norm": 0.7212360501289368, "learning_rate": 0.00018284534152951176, "loss": 0.9613, "step": 2240 }, { "epoch": 0.19, "grad_norm": 0.5280630588531494, "learning_rate": 0.00018277095395294538, "loss": 0.9924, "step": 2245 }, { "epoch": 0.19, "grad_norm": 0.5625287890434265, "learning_rate": 0.00018269642064234733, "loss": 0.8689, "step": 2250 }, { "epoch": 0.19, "grad_norm": 0.6884534955024719, "learning_rate": 0.0001826217417289477, "loss": 0.9303, "step": 2255 }, { "epoch": 0.19, "grad_norm": 0.5620412230491638, "learning_rate": 0.00018254691734423295, "loss": 0.8627, "step": 2260 }, { "epoch": 0.19, "grad_norm": 0.5788276195526123, "learning_rate": 0.00018247194761994567, "loss": 1.0121, "step": 2265 }, { "epoch": 0.19, "grad_norm": 0.5471813082695007, "learning_rate": 0.00018239683268808432, "loss": 1.0408, "step": 2270 }, { "epoch": 0.19, "grad_norm": 0.5952843427658081, "learning_rate": 0.00018232157268090307, "loss": 1.019, "step": 2275 }, { "epoch": 0.19, "grad_norm": 0.5923483967781067, "learning_rate": 0.00018224616773091147, "loss": 0.9152, "step": 2280 }, { "epoch": 0.19, "grad_norm": 0.7464591860771179, "learning_rate": 0.00018217061797087434, "loss": 0.8466, "step": 2285 }, { "epoch": 0.19, "grad_norm": 0.7097582221031189, "learning_rate": 0.00018209492353381138, "loss": 0.9672, "step": 2290 }, { "epoch": 0.19, "grad_norm": 0.6305751800537109, "learning_rate": 0.00018201908455299707, "loss": 0.9242, "step": 2295 }, { "epoch": 0.19, "grad_norm": 0.657504141330719, "learning_rate": 0.00018194310116196043, "loss": 0.9943, "step": 2300 }, { "epoch": 0.19, "grad_norm": 0.5786561965942383, "learning_rate": 0.00018186697349448463, "loss": 0.9458, "step": 2305 }, { "epoch": 0.2, "grad_norm": 0.6431105732917786, "learning_rate": 0.000181790701684607, "loss": 0.9829, "step": 2310 }, { "epoch": 0.2, "grad_norm": 0.6315768957138062, "learning_rate": 0.0001817142858666185, "loss": 1.018, "step": 2315 }, { "epoch": 0.2, "grad_norm": 0.6315834522247314, "learning_rate": 0.00018163772617506383, "loss": 0.9245, "step": 2320 }, { "epoch": 0.2, "grad_norm": 0.6845858097076416, "learning_rate": 0.00018156102274474086, "loss": 0.9734, "step": 2325 }, { "epoch": 0.2, "grad_norm": 0.5911626815795898, "learning_rate": 0.00018148417571070056, "loss": 0.8466, "step": 2330 }, { "epoch": 0.2, "grad_norm": 0.7361390590667725, "learning_rate": 0.00018140718520824684, "loss": 0.8586, "step": 2335 }, { "epoch": 0.2, "grad_norm": 0.6635876297950745, "learning_rate": 0.0001813300513729361, "loss": 1.0269, "step": 2340 }, { "epoch": 0.2, "grad_norm": 0.6086567640304565, "learning_rate": 0.0001812527743405772, "loss": 0.9184, "step": 2345 }, { "epoch": 0.2, "grad_norm": 0.5980998277664185, "learning_rate": 0.00018117535424723102, "loss": 0.9805, "step": 2350 }, { "epoch": 0.2, "grad_norm": 0.5727255344390869, "learning_rate": 0.0001810977912292104, "loss": 1.0672, "step": 2355 }, { "epoch": 0.2, "grad_norm": 0.6550287008285522, "learning_rate": 0.00018102008542307982, "loss": 1.0177, "step": 2360 }, { "epoch": 0.2, "grad_norm": 0.5286814570426941, "learning_rate": 0.00018094223696565512, "loss": 0.7283, "step": 2365 }, { "epoch": 0.2, "grad_norm": 0.7307307124137878, "learning_rate": 0.0001808642459940034, "loss": 0.9459, "step": 2370 }, { "epoch": 0.2, "grad_norm": 0.7078204154968262, "learning_rate": 0.0001807861126454426, "loss": 0.9447, "step": 2375 }, { "epoch": 0.2, "grad_norm": 0.6107788681983948, "learning_rate": 0.00018070783705754134, "loss": 1.0948, "step": 2380 }, { "epoch": 0.2, "grad_norm": 0.5652554035186768, "learning_rate": 0.00018062941936811868, "loss": 0.9937, "step": 2385 }, { "epoch": 0.2, "grad_norm": 0.5260435342788696, "learning_rate": 0.00018055085971524398, "loss": 0.9008, "step": 2390 }, { "epoch": 0.2, "grad_norm": 0.6040837168693542, "learning_rate": 0.0001804721582372364, "loss": 0.9693, "step": 2395 }, { "epoch": 0.2, "grad_norm": 0.5115237832069397, "learning_rate": 0.00018039331507266492, "loss": 0.9628, "step": 2400 }, { "epoch": 0.2, "grad_norm": 0.6985922455787659, "learning_rate": 0.00018031433036034793, "loss": 0.9271, "step": 2405 }, { "epoch": 0.2, "grad_norm": 0.9261385798454285, "learning_rate": 0.0001802352042393531, "loss": 0.898, "step": 2410 }, { "epoch": 0.2, "grad_norm": 0.5070507526397705, "learning_rate": 0.00018015593684899702, "loss": 0.9087, "step": 2415 }, { "epoch": 0.2, "grad_norm": 0.5075459480285645, "learning_rate": 0.000180076528328845, "loss": 0.909, "step": 2420 }, { "epoch": 0.2, "grad_norm": 0.5567365288734436, "learning_rate": 0.0001799969788187109, "loss": 0.912, "step": 2425 }, { "epoch": 0.21, "grad_norm": 0.6508195996284485, "learning_rate": 0.0001799172884586568, "loss": 0.8513, "step": 2430 }, { "epoch": 0.21, "grad_norm": 0.7663083672523499, "learning_rate": 0.0001798374573889927, "loss": 0.9042, "step": 2435 }, { "epoch": 0.21, "grad_norm": 0.5899822115898132, "learning_rate": 0.00017975748575027646, "loss": 0.9742, "step": 2440 }, { "epoch": 0.21, "grad_norm": 0.5543133616447449, "learning_rate": 0.00017967737368331337, "loss": 0.9616, "step": 2445 }, { "epoch": 0.21, "grad_norm": 0.5752557516098022, "learning_rate": 0.00017959712132915599, "loss": 0.9027, "step": 2450 }, { "epoch": 0.21, "grad_norm": 0.7131261825561523, "learning_rate": 0.00017951672882910385, "loss": 0.9206, "step": 2455 }, { "epoch": 0.21, "grad_norm": 0.7709518671035767, "learning_rate": 0.0001794361963247033, "loss": 0.934, "step": 2460 }, { "epoch": 0.21, "grad_norm": 0.5232674479484558, "learning_rate": 0.00017935552395774708, "loss": 0.9652, "step": 2465 }, { "epoch": 0.21, "grad_norm": 0.6537283658981323, "learning_rate": 0.00017927471187027436, "loss": 0.9757, "step": 2470 }, { "epoch": 0.21, "grad_norm": 0.5960981249809265, "learning_rate": 0.00017919376020457018, "loss": 0.9607, "step": 2475 }, { "epoch": 0.21, "grad_norm": 0.5479128956794739, "learning_rate": 0.0001791126691031653, "loss": 0.9325, "step": 2480 }, { "epoch": 0.21, "grad_norm": 0.8228976726531982, "learning_rate": 0.00017903143870883615, "loss": 0.9168, "step": 2485 }, { "epoch": 0.21, "grad_norm": 0.6928779482841492, "learning_rate": 0.00017895006916460426, "loss": 0.9469, "step": 2490 }, { "epoch": 0.21, "grad_norm": 0.6461488008499146, "learning_rate": 0.00017886856061373623, "loss": 0.8883, "step": 2495 }, { "epoch": 0.21, "grad_norm": 0.5686217546463013, "learning_rate": 0.00017878691319974337, "loss": 0.8932, "step": 2500 }, { "epoch": 0.21, "grad_norm": 0.6429763436317444, "learning_rate": 0.00017870512706638148, "loss": 0.9782, "step": 2505 }, { "epoch": 0.21, "grad_norm": 0.645390510559082, "learning_rate": 0.0001786232023576507, "loss": 0.9944, "step": 2510 }, { "epoch": 0.21, "grad_norm": 0.6108589768409729, "learning_rate": 0.00017854113921779509, "loss": 1.0078, "step": 2515 }, { "epoch": 0.21, "grad_norm": 0.5913373231887817, "learning_rate": 0.00017845893779130237, "loss": 0.8979, "step": 2520 }, { "epoch": 0.21, "grad_norm": 0.711692750453949, "learning_rate": 0.00017837659822290386, "loss": 1.014, "step": 2525 }, { "epoch": 0.21, "grad_norm": 0.5642191767692566, "learning_rate": 0.00017829412065757398, "loss": 1.0521, "step": 2530 }, { "epoch": 0.21, "grad_norm": 0.5885759592056274, "learning_rate": 0.0001782115052405303, "loss": 0.9956, "step": 2535 }, { "epoch": 0.21, "grad_norm": 0.6232182383537292, "learning_rate": 0.00017812875211723291, "loss": 0.9338, "step": 2540 }, { "epoch": 0.21, "grad_norm": 0.6030421257019043, "learning_rate": 0.00017804586143338455, "loss": 0.9516, "step": 2545 }, { "epoch": 0.22, "grad_norm": 0.7771492004394531, "learning_rate": 0.00017796283333492997, "loss": 1.0976, "step": 2550 }, { "epoch": 0.22, "grad_norm": 0.5960396528244019, "learning_rate": 0.00017787966796805596, "loss": 0.9811, "step": 2555 }, { "epoch": 0.22, "grad_norm": 0.6035930514335632, "learning_rate": 0.00017779636547919102, "loss": 0.9693, "step": 2560 }, { "epoch": 0.22, "grad_norm": 0.6715995669364929, "learning_rate": 0.00017771292601500505, "loss": 1.0308, "step": 2565 }, { "epoch": 0.22, "grad_norm": 0.5918522477149963, "learning_rate": 0.00017762934972240913, "loss": 0.8584, "step": 2570 }, { "epoch": 0.22, "grad_norm": 0.5988922715187073, "learning_rate": 0.0001775456367485552, "loss": 0.8162, "step": 2575 }, { "epoch": 0.22, "grad_norm": 0.5941436886787415, "learning_rate": 0.00017746178724083593, "loss": 0.8879, "step": 2580 }, { "epoch": 0.22, "grad_norm": 0.697307288646698, "learning_rate": 0.00017737780134688435, "loss": 1.0048, "step": 2585 }, { "epoch": 0.22, "grad_norm": 0.5817750692367554, "learning_rate": 0.00017729367921457363, "loss": 0.9586, "step": 2590 }, { "epoch": 0.22, "grad_norm": 0.6275402307510376, "learning_rate": 0.00017720942099201678, "loss": 0.9415, "step": 2595 }, { "epoch": 0.22, "grad_norm": 0.5959211587905884, "learning_rate": 0.00017712502682756646, "loss": 0.8882, "step": 2600 }, { "epoch": 0.22, "grad_norm": 0.5991825461387634, "learning_rate": 0.00017704049686981471, "loss": 1.0279, "step": 2605 }, { "epoch": 0.22, "grad_norm": 0.6534688472747803, "learning_rate": 0.0001769558312675926, "loss": 0.9533, "step": 2610 }, { "epoch": 0.22, "grad_norm": 0.56687331199646, "learning_rate": 0.00017687103016997003, "loss": 0.8215, "step": 2615 }, { "epoch": 0.22, "grad_norm": 0.7316619753837585, "learning_rate": 0.0001767860937262555, "loss": 0.9756, "step": 2620 }, { "epoch": 0.22, "grad_norm": 0.580111026763916, "learning_rate": 0.0001767010220859958, "loss": 0.909, "step": 2625 }, { "epoch": 0.22, "grad_norm": 0.6079258918762207, "learning_rate": 0.00017661581539897577, "loss": 0.8558, "step": 2630 }, { "epoch": 0.22, "grad_norm": 0.5412486791610718, "learning_rate": 0.000176530473815218, "loss": 0.9592, "step": 2635 }, { "epoch": 0.22, "grad_norm": 0.5887441635131836, "learning_rate": 0.00017644499748498263, "loss": 0.8299, "step": 2640 }, { "epoch": 0.22, "grad_norm": 0.62538743019104, "learning_rate": 0.000176359386558767, "loss": 0.8315, "step": 2645 }, { "epoch": 0.22, "grad_norm": 0.4921663999557495, "learning_rate": 0.00017627364118730544, "loss": 0.7318, "step": 2650 }, { "epoch": 0.22, "grad_norm": 0.6708532571792603, "learning_rate": 0.00017618776152156901, "loss": 0.9884, "step": 2655 }, { "epoch": 0.22, "grad_norm": 0.5910497903823853, "learning_rate": 0.00017610174771276525, "loss": 0.905, "step": 2660 }, { "epoch": 0.23, "grad_norm": 0.6253485679626465, "learning_rate": 0.0001760155999123378, "loss": 0.9262, "step": 2665 }, { "epoch": 0.23, "grad_norm": 0.6524810194969177, "learning_rate": 0.0001759293182719664, "loss": 1.0608, "step": 2670 }, { "epoch": 0.23, "grad_norm": 0.5854840874671936, "learning_rate": 0.00017584290294356616, "loss": 0.9515, "step": 2675 }, { "epoch": 0.23, "grad_norm": 0.645991325378418, "learning_rate": 0.00017575635407928784, "loss": 0.9688, "step": 2680 }, { "epoch": 0.23, "grad_norm": 0.56763756275177, "learning_rate": 0.00017566967183151714, "loss": 0.9068, "step": 2685 }, { "epoch": 0.23, "grad_norm": 0.5318624377250671, "learning_rate": 0.00017558285635287465, "loss": 0.9443, "step": 2690 }, { "epoch": 0.23, "grad_norm": 0.7761218547821045, "learning_rate": 0.00017549590779621563, "loss": 0.9784, "step": 2695 }, { "epoch": 0.23, "grad_norm": 0.6218341588973999, "learning_rate": 0.00017540882631462954, "loss": 0.9407, "step": 2700 }, { "epoch": 0.23, "grad_norm": 0.6817795634269714, "learning_rate": 0.00017532161206143993, "loss": 0.9117, "step": 2705 }, { "epoch": 0.23, "grad_norm": 0.6102976202964783, "learning_rate": 0.0001752342651902041, "loss": 0.9124, "step": 2710 }, { "epoch": 0.23, "grad_norm": 0.5011729001998901, "learning_rate": 0.00017514678585471284, "loss": 0.8357, "step": 2715 }, { "epoch": 0.23, "grad_norm": 0.8257537484169006, "learning_rate": 0.00017505917420899018, "loss": 0.8757, "step": 2720 }, { "epoch": 0.23, "grad_norm": 0.6271941065788269, "learning_rate": 0.00017497143040729314, "loss": 0.9015, "step": 2725 }, { "epoch": 0.23, "grad_norm": 0.6705794334411621, "learning_rate": 0.0001748835546041114, "loss": 0.9526, "step": 2730 }, { "epoch": 0.23, "grad_norm": 0.5870561003684998, "learning_rate": 0.000174795546954167, "loss": 0.9221, "step": 2735 }, { "epoch": 0.23, "grad_norm": 0.6101082563400269, "learning_rate": 0.00017470740761241422, "loss": 0.886, "step": 2740 }, { "epoch": 0.23, "grad_norm": 0.6358086466789246, "learning_rate": 0.00017461913673403915, "loss": 1.0226, "step": 2745 }, { "epoch": 0.23, "grad_norm": 0.4912102222442627, "learning_rate": 0.00017453073447445952, "loss": 0.8895, "step": 2750 }, { "epoch": 0.23, "grad_norm": 0.6197881698608398, "learning_rate": 0.0001744422009893243, "loss": 0.8461, "step": 2755 }, { "epoch": 0.23, "grad_norm": 0.6709913015365601, "learning_rate": 0.00017435353643451357, "loss": 0.9809, "step": 2760 }, { "epoch": 0.23, "grad_norm": 0.6350414156913757, "learning_rate": 0.00017426474096613812, "loss": 0.9783, "step": 2765 }, { "epoch": 0.23, "grad_norm": 0.5429471135139465, "learning_rate": 0.00017417581474053938, "loss": 0.8892, "step": 2770 }, { "epoch": 0.23, "grad_norm": 0.6633498668670654, "learning_rate": 0.00017408675791428886, "loss": 1.0242, "step": 2775 }, { "epoch": 0.23, "grad_norm": 0.5957458019256592, "learning_rate": 0.00017399757064418805, "loss": 0.8556, "step": 2780 }, { "epoch": 0.24, "grad_norm": 0.6234441995620728, "learning_rate": 0.00017390825308726817, "loss": 1.1179, "step": 2785 }, { "epoch": 0.24, "grad_norm": 0.6558435559272766, "learning_rate": 0.00017381880540078974, "loss": 1.0413, "step": 2790 }, { "epoch": 0.24, "grad_norm": 0.618256151676178, "learning_rate": 0.0001737292277422425, "loss": 0.9414, "step": 2795 }, { "epoch": 0.24, "grad_norm": 0.5883316397666931, "learning_rate": 0.000173639520269345, "loss": 0.8783, "step": 2800 }, { "epoch": 0.24, "grad_norm": 0.7670148015022278, "learning_rate": 0.0001735496831400443, "loss": 0.8774, "step": 2805 }, { "epoch": 0.24, "grad_norm": 0.7569119334220886, "learning_rate": 0.00017345971651251576, "loss": 1.0018, "step": 2810 }, { "epoch": 0.24, "grad_norm": 0.6054137945175171, "learning_rate": 0.00017336962054516277, "loss": 0.9388, "step": 2815 }, { "epoch": 0.24, "grad_norm": 0.7382391691207886, "learning_rate": 0.0001732793953966165, "loss": 0.8181, "step": 2820 }, { "epoch": 0.24, "grad_norm": 0.6390617489814758, "learning_rate": 0.00017318904122573542, "loss": 0.9499, "step": 2825 }, { "epoch": 0.24, "grad_norm": 0.606440007686615, "learning_rate": 0.00017309855819160535, "loss": 0.8691, "step": 2830 }, { "epoch": 0.24, "grad_norm": 0.5001400709152222, "learning_rate": 0.00017300794645353884, "loss": 0.9139, "step": 2835 }, { "epoch": 0.24, "grad_norm": 0.627774178981781, "learning_rate": 0.00017291720617107516, "loss": 0.9617, "step": 2840 }, { "epoch": 0.24, "grad_norm": 0.6031743288040161, "learning_rate": 0.00017282633750397984, "loss": 0.9019, "step": 2845 }, { "epoch": 0.24, "grad_norm": 0.6420722007751465, "learning_rate": 0.0001727353406122445, "loss": 1.001, "step": 2850 }, { "epoch": 0.24, "grad_norm": 0.7209452390670776, "learning_rate": 0.00017264421565608648, "loss": 1.0402, "step": 2855 }, { "epoch": 0.24, "grad_norm": 0.6446462869644165, "learning_rate": 0.00017255296279594862, "loss": 0.9832, "step": 2860 }, { "epoch": 0.24, "grad_norm": 0.6604474782943726, "learning_rate": 0.000172461582192499, "loss": 0.9241, "step": 2865 }, { "epoch": 0.24, "grad_norm": 0.6114985942840576, "learning_rate": 0.00017237007400663053, "loss": 0.8937, "step": 2870 }, { "epoch": 0.24, "grad_norm": 0.6553681492805481, "learning_rate": 0.0001722784383994608, "loss": 0.9854, "step": 2875 }, { "epoch": 0.24, "grad_norm": 0.5967362523078918, "learning_rate": 0.00017218667553233182, "loss": 0.9486, "step": 2880 }, { "epoch": 0.24, "grad_norm": 0.5432732105255127, "learning_rate": 0.00017209478556680957, "loss": 0.9295, "step": 2885 }, { "epoch": 0.24, "grad_norm": 0.579897940158844, "learning_rate": 0.00017200276866468375, "loss": 0.8984, "step": 2890 }, { "epoch": 0.24, "grad_norm": 0.5317714214324951, "learning_rate": 0.0001719106249879678, "loss": 0.8364, "step": 2895 }, { "epoch": 0.24, "grad_norm": 0.7333313226699829, "learning_rate": 0.00017181835469889812, "loss": 0.9368, "step": 2900 }, { "epoch": 0.25, "grad_norm": 0.663776159286499, "learning_rate": 0.00017172595795993413, "loss": 0.9061, "step": 2905 }, { "epoch": 0.25, "grad_norm": 0.6138433814048767, "learning_rate": 0.0001716334349337579, "loss": 0.8793, "step": 2910 }, { "epoch": 0.25, "grad_norm": 0.7783580422401428, "learning_rate": 0.00017154078578327387, "loss": 0.9532, "step": 2915 }, { "epoch": 0.25, "grad_norm": 0.6159131526947021, "learning_rate": 0.00017144801067160844, "loss": 0.96, "step": 2920 }, { "epoch": 0.25, "grad_norm": 0.6060130596160889, "learning_rate": 0.00017135510976211, "loss": 0.8548, "step": 2925 }, { "epoch": 0.25, "grad_norm": 0.6942853927612305, "learning_rate": 0.0001712620832183482, "loss": 0.9283, "step": 2930 }, { "epoch": 0.25, "grad_norm": 0.489607036113739, "learning_rate": 0.00017116893120411398, "loss": 0.9536, "step": 2935 }, { "epoch": 0.25, "grad_norm": 0.6349008679389954, "learning_rate": 0.00017107565388341925, "loss": 0.8852, "step": 2940 }, { "epoch": 0.25, "grad_norm": 0.6873804926872253, "learning_rate": 0.0001709822514204965, "loss": 0.915, "step": 2945 }, { "epoch": 0.25, "grad_norm": 0.5652430653572083, "learning_rate": 0.00017088872397979854, "loss": 0.9261, "step": 2950 }, { "epoch": 0.25, "grad_norm": 0.6061018109321594, "learning_rate": 0.00017079507172599828, "loss": 0.9668, "step": 2955 }, { "epoch": 0.25, "grad_norm": 0.5947713851928711, "learning_rate": 0.00017070129482398832, "loss": 0.9097, "step": 2960 }, { "epoch": 0.25, "grad_norm": 0.6620383858680725, "learning_rate": 0.00017060739343888076, "loss": 0.9397, "step": 2965 }, { "epoch": 0.25, "grad_norm": 0.7726333141326904, "learning_rate": 0.00017051336773600686, "loss": 0.9941, "step": 2970 }, { "epoch": 0.25, "grad_norm": 0.6241410970687866, "learning_rate": 0.00017041921788091684, "loss": 0.9903, "step": 2975 }, { "epoch": 0.25, "grad_norm": 0.6833446025848389, "learning_rate": 0.0001703249440393794, "loss": 1.0074, "step": 2980 }, { "epoch": 0.25, "grad_norm": 0.6586493253707886, "learning_rate": 0.0001702305463773816, "loss": 0.8399, "step": 2985 }, { "epoch": 0.25, "grad_norm": 0.6630938053131104, "learning_rate": 0.00017013602506112853, "loss": 0.8064, "step": 2990 }, { "epoch": 0.25, "grad_norm": 0.6912885904312134, "learning_rate": 0.00017004138025704298, "loss": 0.957, "step": 2995 }, { "epoch": 0.25, "grad_norm": 0.6541727185249329, "learning_rate": 0.00016994661213176512, "loss": 0.9641, "step": 3000 }, { "epoch": 0.25, "grad_norm": 0.6963586211204529, "learning_rate": 0.00016985172085215235, "loss": 0.9264, "step": 3005 }, { "epoch": 0.25, "grad_norm": 0.6190909147262573, "learning_rate": 0.00016975670658527875, "loss": 0.9477, "step": 3010 }, { "epoch": 0.25, "grad_norm": 0.7124261260032654, "learning_rate": 0.00016966156949843513, "loss": 0.8615, "step": 3015 }, { "epoch": 0.26, "grad_norm": 0.5455434918403625, "learning_rate": 0.0001695663097591284, "loss": 0.8761, "step": 3020 }, { "epoch": 0.26, "grad_norm": 0.6497594118118286, "learning_rate": 0.00016947092753508147, "loss": 0.9232, "step": 3025 }, { "epoch": 0.26, "grad_norm": 0.6465092301368713, "learning_rate": 0.00016937542299423294, "loss": 0.9114, "step": 3030 }, { "epoch": 0.26, "grad_norm": 0.5808166861534119, "learning_rate": 0.00016927979630473677, "loss": 0.9503, "step": 3035 }, { "epoch": 0.26, "grad_norm": 0.5648312568664551, "learning_rate": 0.0001691840476349619, "loss": 0.9362, "step": 3040 }, { "epoch": 0.26, "grad_norm": 0.714506983757019, "learning_rate": 0.00016908817715349217, "loss": 0.9906, "step": 3045 }, { "epoch": 0.26, "grad_norm": 0.5561381578445435, "learning_rate": 0.00016899218502912578, "loss": 0.8099, "step": 3050 }, { "epoch": 0.26, "grad_norm": 0.5070639848709106, "learning_rate": 0.00016889607143087516, "loss": 1.008, "step": 3055 }, { "epoch": 0.26, "grad_norm": 0.6084549427032471, "learning_rate": 0.0001687998365279666, "loss": 1.0026, "step": 3060 }, { "epoch": 0.26, "grad_norm": 0.633202850818634, "learning_rate": 0.00016870348048984, "loss": 0.9752, "step": 3065 }, { "epoch": 0.26, "grad_norm": 0.7638620138168335, "learning_rate": 0.0001686070034861485, "loss": 0.7598, "step": 3070 }, { "epoch": 0.26, "grad_norm": 0.6333314776420593, "learning_rate": 0.0001685104056867583, "loss": 0.9456, "step": 3075 }, { "epoch": 0.26, "grad_norm": 0.6526273488998413, "learning_rate": 0.00016841368726174812, "loss": 0.9099, "step": 3080 }, { "epoch": 0.26, "grad_norm": 0.6543826460838318, "learning_rate": 0.00016831684838140927, "loss": 0.9487, "step": 3085 }, { "epoch": 0.26, "grad_norm": 0.5424016714096069, "learning_rate": 0.00016821988921624499, "loss": 0.8815, "step": 3090 }, { "epoch": 0.26, "grad_norm": 0.6248295307159424, "learning_rate": 0.00016812280993697037, "loss": 0.9128, "step": 3095 }, { "epoch": 0.26, "grad_norm": 0.6224409341812134, "learning_rate": 0.000168025610714512, "loss": 0.9597, "step": 3100 }, { "epoch": 0.26, "grad_norm": 0.6363341212272644, "learning_rate": 0.0001679282917200076, "loss": 0.9314, "step": 3105 }, { "epoch": 0.26, "grad_norm": 0.6113150119781494, "learning_rate": 0.00016783085312480585, "loss": 1.0532, "step": 3110 }, { "epoch": 0.26, "grad_norm": 0.6174533367156982, "learning_rate": 0.00016773329510046586, "loss": 1.0273, "step": 3115 }, { "epoch": 0.26, "grad_norm": 0.6600282788276672, "learning_rate": 0.0001676356178187572, "loss": 0.958, "step": 3120 }, { "epoch": 0.26, "grad_norm": 0.5333192944526672, "learning_rate": 0.0001675378214516593, "loss": 0.9824, "step": 3125 }, { "epoch": 0.26, "grad_norm": 0.745349109172821, "learning_rate": 0.00016743990617136128, "loss": 1.0886, "step": 3130 }, { "epoch": 0.26, "grad_norm": 0.6083282232284546, "learning_rate": 0.00016734187215026167, "loss": 1.0196, "step": 3135 }, { "epoch": 0.27, "grad_norm": 0.5257988572120667, "learning_rate": 0.000167243719560968, "loss": 0.813, "step": 3140 }, { "epoch": 0.27, "grad_norm": 0.6657525300979614, "learning_rate": 0.00016714544857629666, "loss": 0.9309, "step": 3145 }, { "epoch": 0.27, "grad_norm": 0.7205111980438232, "learning_rate": 0.00016704705936927244, "loss": 0.8655, "step": 3150 }, { "epoch": 0.27, "grad_norm": 1.0305330753326416, "learning_rate": 0.00016694855211312818, "loss": 1.0213, "step": 3155 }, { "epoch": 0.27, "grad_norm": 0.7004939317703247, "learning_rate": 0.00016684992698130476, "loss": 0.9316, "step": 3160 }, { "epoch": 0.27, "grad_norm": 0.663589358329773, "learning_rate": 0.00016675118414745052, "loss": 0.8924, "step": 3165 }, { "epoch": 0.27, "grad_norm": 0.5968930125236511, "learning_rate": 0.000166652323785421, "loss": 0.926, "step": 3170 }, { "epoch": 0.27, "grad_norm": 0.6623892784118652, "learning_rate": 0.00016655334606927865, "loss": 0.937, "step": 3175 }, { "epoch": 0.27, "grad_norm": 0.7068220376968384, "learning_rate": 0.00016645425117329268, "loss": 1.0194, "step": 3180 }, { "epoch": 0.27, "grad_norm": 0.5999795794487, "learning_rate": 0.0001663550392719385, "loss": 0.9363, "step": 3185 }, { "epoch": 0.27, "grad_norm": 0.5697855949401855, "learning_rate": 0.00016625571053989754, "loss": 0.8442, "step": 3190 }, { "epoch": 0.27, "grad_norm": 0.5499809384346008, "learning_rate": 0.00016615626515205695, "loss": 0.8832, "step": 3195 }, { "epoch": 0.27, "grad_norm": 0.780519425868988, "learning_rate": 0.00016605670328350932, "loss": 0.9941, "step": 3200 }, { "epoch": 0.27, "grad_norm": 0.710145890712738, "learning_rate": 0.00016595702510955227, "loss": 1.0341, "step": 3205 }, { "epoch": 0.27, "grad_norm": 0.7597246766090393, "learning_rate": 0.00016585723080568817, "loss": 0.8778, "step": 3210 }, { "epoch": 0.27, "grad_norm": 0.662965714931488, "learning_rate": 0.00016575732054762397, "loss": 0.8733, "step": 3215 }, { "epoch": 0.27, "grad_norm": 0.6497308015823364, "learning_rate": 0.00016565729451127067, "loss": 0.8922, "step": 3220 }, { "epoch": 0.27, "grad_norm": 0.7098811864852905, "learning_rate": 0.00016555715287274318, "loss": 0.8309, "step": 3225 }, { "epoch": 0.27, "grad_norm": 0.5802999138832092, "learning_rate": 0.00016545689580835994, "loss": 0.9676, "step": 3230 }, { "epoch": 0.27, "grad_norm": 0.6192752718925476, "learning_rate": 0.00016535652349464254, "loss": 0.8422, "step": 3235 }, { "epoch": 0.27, "grad_norm": 0.604736864566803, "learning_rate": 0.00016525603610831566, "loss": 0.8476, "step": 3240 }, { "epoch": 0.27, "grad_norm": 0.6643623113632202, "learning_rate": 0.0001651554338263064, "loss": 1.0216, "step": 3245 }, { "epoch": 0.27, "grad_norm": 0.7391632199287415, "learning_rate": 0.0001650547168257443, "loss": 0.864, "step": 3250 }, { "epoch": 0.27, "grad_norm": 0.7031934857368469, "learning_rate": 0.0001649538852839608, "loss": 0.9264, "step": 3255 }, { "epoch": 0.28, "grad_norm": 0.7183375954627991, "learning_rate": 0.00016485293937848903, "loss": 0.9041, "step": 3260 }, { "epoch": 0.28, "grad_norm": 0.7928013801574707, "learning_rate": 0.0001647518792870635, "loss": 1.0241, "step": 3265 }, { "epoch": 0.28, "grad_norm": 0.7501674294471741, "learning_rate": 0.00016465070518761977, "loss": 0.9598, "step": 3270 }, { "epoch": 0.28, "grad_norm": 0.7421630024909973, "learning_rate": 0.00016454941725829405, "loss": 0.9176, "step": 3275 }, { "epoch": 0.28, "grad_norm": 0.6832653284072876, "learning_rate": 0.0001644480156774231, "loss": 0.8812, "step": 3280 }, { "epoch": 0.28, "grad_norm": 0.6840265989303589, "learning_rate": 0.0001643465006235437, "loss": 0.9848, "step": 3285 }, { "epoch": 0.28, "grad_norm": 0.6492328643798828, "learning_rate": 0.00016424487227539243, "loss": 0.9241, "step": 3290 }, { "epoch": 0.28, "grad_norm": 0.5492807626724243, "learning_rate": 0.00016414313081190537, "loss": 0.8956, "step": 3295 }, { "epoch": 0.28, "grad_norm": 0.6606726050376892, "learning_rate": 0.00016404127641221774, "loss": 0.8719, "step": 3300 }, { "epoch": 0.28, "grad_norm": 0.6319718360900879, "learning_rate": 0.00016393930925566358, "loss": 0.9931, "step": 3305 }, { "epoch": 0.28, "grad_norm": 0.6968238949775696, "learning_rate": 0.00016383722952177557, "loss": 0.9831, "step": 3310 }, { "epoch": 0.28, "grad_norm": 0.7028079032897949, "learning_rate": 0.00016373503739028448, "loss": 1.0404, "step": 3315 }, { "epoch": 0.28, "grad_norm": 0.7885096073150635, "learning_rate": 0.00016363273304111902, "loss": 0.9998, "step": 3320 }, { "epoch": 0.28, "grad_norm": 0.7284576892852783, "learning_rate": 0.00016353031665440547, "loss": 0.8875, "step": 3325 }, { "epoch": 0.28, "grad_norm": 0.9729665517807007, "learning_rate": 0.00016342778841046745, "loss": 0.9376, "step": 3330 }, { "epoch": 0.28, "grad_norm": 0.4611142575740814, "learning_rate": 0.00016332514848982542, "loss": 0.8302, "step": 3335 }, { "epoch": 0.28, "grad_norm": 0.6475715041160583, "learning_rate": 0.00016322239707319648, "loss": 0.9361, "step": 3340 }, { "epoch": 0.28, "grad_norm": 0.771458625793457, "learning_rate": 0.00016311953434149413, "loss": 0.8875, "step": 3345 }, { "epoch": 0.28, "grad_norm": 0.6289612054824829, "learning_rate": 0.0001630165604758278, "loss": 0.8626, "step": 3350 }, { "epoch": 0.28, "grad_norm": 0.659892737865448, "learning_rate": 0.00016291347565750255, "loss": 0.8926, "step": 3355 }, { "epoch": 0.28, "grad_norm": 0.5767649412155151, "learning_rate": 0.00016281028006801887, "loss": 0.9045, "step": 3360 }, { "epoch": 0.28, "grad_norm": 0.5347493886947632, "learning_rate": 0.0001627069738890723, "loss": 0.83, "step": 3365 }, { "epoch": 0.28, "grad_norm": 0.6171711087226868, "learning_rate": 0.00016260355730255297, "loss": 0.8179, "step": 3370 }, { "epoch": 0.29, "grad_norm": 0.7306368350982666, "learning_rate": 0.0001625000304905455, "loss": 0.9047, "step": 3375 }, { "epoch": 0.29, "grad_norm": 0.5920233130455017, "learning_rate": 0.00016239639363532858, "loss": 0.9198, "step": 3380 }, { "epoch": 0.29, "grad_norm": 0.6208842992782593, "learning_rate": 0.00016229264691937462, "loss": 0.9004, "step": 3385 }, { "epoch": 0.29, "grad_norm": 0.5397033095359802, "learning_rate": 0.00016218879052534949, "loss": 1.021, "step": 3390 }, { "epoch": 0.29, "grad_norm": 0.6394155025482178, "learning_rate": 0.0001620848246361122, "loss": 0.9549, "step": 3395 }, { "epoch": 0.29, "grad_norm": 0.6426846981048584, "learning_rate": 0.0001619807494347144, "loss": 0.8002, "step": 3400 }, { "epoch": 0.29, "grad_norm": 0.6755315065383911, "learning_rate": 0.0001618765651044004, "loss": 0.8586, "step": 3405 }, { "epoch": 0.29, "grad_norm": 0.6798297762870789, "learning_rate": 0.00016177227182860647, "loss": 0.8918, "step": 3410 }, { "epoch": 0.29, "grad_norm": 0.6921480298042297, "learning_rate": 0.00016166786979096088, "loss": 0.8891, "step": 3415 }, { "epoch": 0.29, "grad_norm": 0.521027684211731, "learning_rate": 0.00016156335917528325, "loss": 0.8009, "step": 3420 }, { "epoch": 0.29, "grad_norm": 0.6095343232154846, "learning_rate": 0.00016145874016558443, "loss": 0.9309, "step": 3425 }, { "epoch": 0.29, "grad_norm": 0.6683770418167114, "learning_rate": 0.00016135401294606618, "loss": 0.9387, "step": 3430 }, { "epoch": 0.29, "grad_norm": 0.9191362261772156, "learning_rate": 0.0001612491777011206, "loss": 0.7803, "step": 3435 }, { "epoch": 0.29, "grad_norm": 0.7780088186264038, "learning_rate": 0.00016114423461533026, "loss": 0.9855, "step": 3440 }, { "epoch": 0.29, "grad_norm": 0.5860457420349121, "learning_rate": 0.00016103918387346732, "loss": 0.8831, "step": 3445 }, { "epoch": 0.29, "grad_norm": 0.7714948654174805, "learning_rate": 0.00016093402566049367, "loss": 1.0446, "step": 3450 }, { "epoch": 0.29, "grad_norm": 0.6364210844039917, "learning_rate": 0.0001608287601615604, "loss": 0.8378, "step": 3455 }, { "epoch": 0.29, "grad_norm": 0.6578925251960754, "learning_rate": 0.00016072338756200746, "loss": 0.9447, "step": 3460 }, { "epoch": 0.29, "grad_norm": 0.5990071296691895, "learning_rate": 0.00016061790804736332, "loss": 0.9604, "step": 3465 }, { "epoch": 0.29, "grad_norm": 0.6787405610084534, "learning_rate": 0.00016051232180334485, "loss": 0.8815, "step": 3470 }, { "epoch": 0.29, "grad_norm": 0.5929312705993652, "learning_rate": 0.00016040662901585674, "loss": 0.917, "step": 3475 }, { "epoch": 0.29, "grad_norm": 0.6015139818191528, "learning_rate": 0.00016030082987099123, "loss": 0.948, "step": 3480 }, { "epoch": 0.29, "grad_norm": 0.6716448068618774, "learning_rate": 0.00016019492455502787, "loss": 0.9984, "step": 3485 }, { "epoch": 0.29, "grad_norm": 0.6419260501861572, "learning_rate": 0.00016008891325443317, "loss": 0.9077, "step": 3490 }, { "epoch": 0.3, "grad_norm": 0.7909138202667236, "learning_rate": 0.0001599827961558602, "loss": 0.9885, "step": 3495 }, { "epoch": 0.3, "grad_norm": 0.889897346496582, "learning_rate": 0.00015987657344614835, "loss": 0.8763, "step": 3500 }, { "epoch": 0.3, "grad_norm": 0.6767850518226624, "learning_rate": 0.0001597702453123229, "loss": 0.935, "step": 3505 }, { "epoch": 0.3, "grad_norm": 0.6611208915710449, "learning_rate": 0.00015966381194159482, "loss": 0.9498, "step": 3510 }, { "epoch": 0.3, "grad_norm": 0.7573800683021545, "learning_rate": 0.0001595572735213603, "loss": 0.8646, "step": 3515 }, { "epoch": 0.3, "grad_norm": 0.6432080268859863, "learning_rate": 0.00015945063023920056, "loss": 1.0834, "step": 3520 }, { "epoch": 0.3, "grad_norm": 0.6003210544586182, "learning_rate": 0.00015934388228288138, "loss": 0.8283, "step": 3525 }, { "epoch": 0.3, "grad_norm": 0.7602962255477905, "learning_rate": 0.00015923702984035288, "loss": 0.913, "step": 3530 }, { "epoch": 0.3, "grad_norm": 0.6201465725898743, "learning_rate": 0.00015913007309974916, "loss": 0.9628, "step": 3535 }, { "epoch": 0.3, "grad_norm": 0.6988320350646973, "learning_rate": 0.00015902301224938792, "loss": 0.7929, "step": 3540 }, { "epoch": 0.3, "grad_norm": 0.7519627809524536, "learning_rate": 0.00015891584747777018, "loss": 0.966, "step": 3545 }, { "epoch": 0.3, "grad_norm": 0.7300755977630615, "learning_rate": 0.00015880857897357994, "loss": 0.9138, "step": 3550 }, { "epoch": 0.3, "grad_norm": 0.682937502861023, "learning_rate": 0.00015870120692568383, "loss": 0.9415, "step": 3555 }, { "epoch": 0.3, "grad_norm": 0.6300337314605713, "learning_rate": 0.00015859373152313078, "loss": 0.957, "step": 3560 }, { "epoch": 0.3, "grad_norm": 0.6108503937721252, "learning_rate": 0.00015848615295515175, "loss": 0.8324, "step": 3565 }, { "epoch": 0.3, "grad_norm": 0.7553655505180359, "learning_rate": 0.00015837847141115927, "loss": 0.8505, "step": 3570 }, { "epoch": 0.3, "grad_norm": 0.6497313380241394, "learning_rate": 0.00015827068708074724, "loss": 0.8825, "step": 3575 }, { "epoch": 0.3, "grad_norm": 0.8078399896621704, "learning_rate": 0.00015816280015369045, "loss": 0.9456, "step": 3580 }, { "epoch": 0.3, "grad_norm": 0.6483976244926453, "learning_rate": 0.00015805481081994444, "loss": 1.0213, "step": 3585 }, { "epoch": 0.3, "grad_norm": 0.6289666891098022, "learning_rate": 0.00015794671926964497, "loss": 0.8518, "step": 3590 }, { "epoch": 0.3, "grad_norm": 0.577955424785614, "learning_rate": 0.00015783852569310785, "loss": 0.8843, "step": 3595 }, { "epoch": 0.3, "grad_norm": 0.5437608361244202, "learning_rate": 0.00015773023028082842, "loss": 0.8382, "step": 3600 }, { "epoch": 0.3, "grad_norm": 0.6673296093940735, "learning_rate": 0.00015762183322348144, "loss": 0.9322, "step": 3605 }, { "epoch": 0.3, "grad_norm": 0.7770671844482422, "learning_rate": 0.0001575133347119205, "loss": 1.0315, "step": 3610 }, { "epoch": 0.31, "grad_norm": 0.7004712820053101, "learning_rate": 0.00015740473493717802, "loss": 0.8411, "step": 3615 }, { "epoch": 0.31, "grad_norm": 0.595714807510376, "learning_rate": 0.00015729603409046447, "loss": 0.9721, "step": 3620 }, { "epoch": 0.31, "grad_norm": 0.5591194033622742, "learning_rate": 0.00015718723236316846, "loss": 0.9229, "step": 3625 }, { "epoch": 0.31, "grad_norm": 0.7704307436943054, "learning_rate": 0.0001570783299468562, "loss": 0.9729, "step": 3630 }, { "epoch": 0.31, "grad_norm": 0.535123884677887, "learning_rate": 0.000156969327033271, "loss": 0.8177, "step": 3635 }, { "epoch": 0.31, "grad_norm": 0.59185791015625, "learning_rate": 0.00015686022381433337, "loss": 0.8999, "step": 3640 }, { "epoch": 0.31, "grad_norm": 0.5567179322242737, "learning_rate": 0.00015675102048214027, "loss": 0.8699, "step": 3645 }, { "epoch": 0.31, "grad_norm": 0.6161515712738037, "learning_rate": 0.000156641717228965, "loss": 0.9498, "step": 3650 }, { "epoch": 0.31, "grad_norm": 0.8555355668067932, "learning_rate": 0.00015653231424725671, "loss": 0.9246, "step": 3655 }, { "epoch": 0.31, "grad_norm": 0.5881509780883789, "learning_rate": 0.00015642281172964024, "loss": 0.8512, "step": 3660 }, { "epoch": 0.31, "grad_norm": 0.5532040596008301, "learning_rate": 0.0001563132098689156, "loss": 0.8884, "step": 3665 }, { "epoch": 0.31, "grad_norm": 0.6380072832107544, "learning_rate": 0.00015620350885805774, "loss": 0.8272, "step": 3670 }, { "epoch": 0.31, "grad_norm": 0.6400458812713623, "learning_rate": 0.00015609370889021617, "loss": 0.9215, "step": 3675 }, { "epoch": 0.31, "grad_norm": 0.6543514132499695, "learning_rate": 0.00015598381015871472, "loss": 0.8601, "step": 3680 }, { "epoch": 0.31, "grad_norm": 0.5944799184799194, "learning_rate": 0.000155873812857051, "loss": 0.8123, "step": 3685 }, { "epoch": 0.31, "grad_norm": 0.6395347714424133, "learning_rate": 0.0001557637171788962, "loss": 0.8114, "step": 3690 }, { "epoch": 0.31, "grad_norm": 0.6879188418388367, "learning_rate": 0.00015565352331809473, "loss": 0.8653, "step": 3695 }, { "epoch": 0.31, "grad_norm": 0.6839675307273865, "learning_rate": 0.0001555432314686639, "loss": 0.868, "step": 3700 }, { "epoch": 0.31, "grad_norm": 0.6724333167076111, "learning_rate": 0.00015543284182479352, "loss": 0.9477, "step": 3705 }, { "epoch": 0.31, "grad_norm": 0.5998671054840088, "learning_rate": 0.00015532235458084554, "loss": 0.8883, "step": 3710 }, { "epoch": 0.31, "grad_norm": 0.7111408710479736, "learning_rate": 0.00015521176993135388, "loss": 0.9072, "step": 3715 }, { "epoch": 0.31, "grad_norm": 0.6554681658744812, "learning_rate": 0.00015510108807102383, "loss": 0.9257, "step": 3720 }, { "epoch": 0.31, "grad_norm": 0.6711482405662537, "learning_rate": 0.00015499030919473186, "loss": 0.9075, "step": 3725 }, { "epoch": 0.32, "grad_norm": 0.6953566670417786, "learning_rate": 0.00015487943349752533, "loss": 0.9182, "step": 3730 }, { "epoch": 0.32, "grad_norm": 0.5130366086959839, "learning_rate": 0.00015476846117462204, "loss": 0.9534, "step": 3735 }, { "epoch": 0.32, "grad_norm": 0.7102193236351013, "learning_rate": 0.00015465739242140987, "loss": 0.8824, "step": 3740 }, { "epoch": 0.32, "grad_norm": 0.6412502527236938, "learning_rate": 0.0001545462274334465, "loss": 0.9035, "step": 3745 }, { "epoch": 0.32, "grad_norm": 0.5918426513671875, "learning_rate": 0.00015443496640645915, "loss": 0.9179, "step": 3750 }, { "epoch": 0.32, "grad_norm": 0.6007682681083679, "learning_rate": 0.00015432360953634397, "loss": 0.9842, "step": 3755 }, { "epoch": 0.32, "grad_norm": 0.7916061878204346, "learning_rate": 0.00015421215701916596, "loss": 0.9593, "step": 3760 }, { "epoch": 0.32, "grad_norm": 0.8170971870422363, "learning_rate": 0.00015410060905115852, "loss": 0.8976, "step": 3765 }, { "epoch": 0.32, "grad_norm": 0.6541162729263306, "learning_rate": 0.0001539889658287231, "loss": 0.929, "step": 3770 }, { "epoch": 0.32, "grad_norm": 0.5680133104324341, "learning_rate": 0.00015387722754842885, "loss": 0.9046, "step": 3775 }, { "epoch": 0.32, "grad_norm": 0.7456454634666443, "learning_rate": 0.0001537653944070123, "loss": 0.9795, "step": 3780 }, { "epoch": 0.32, "grad_norm": 0.571003794670105, "learning_rate": 0.00015365346660137702, "loss": 0.9768, "step": 3785 }, { "epoch": 0.32, "grad_norm": 0.6417176127433777, "learning_rate": 0.0001535414443285932, "loss": 0.8871, "step": 3790 }, { "epoch": 0.32, "grad_norm": 0.7255185842514038, "learning_rate": 0.0001534293277858974, "loss": 0.9026, "step": 3795 }, { "epoch": 0.32, "grad_norm": 0.6499106287956238, "learning_rate": 0.00015331711717069216, "loss": 0.9168, "step": 3800 }, { "epoch": 0.32, "grad_norm": 0.6854183077812195, "learning_rate": 0.0001532048126805456, "loss": 1.0304, "step": 3805 }, { "epoch": 0.32, "grad_norm": 0.618883490562439, "learning_rate": 0.00015309241451319126, "loss": 0.8583, "step": 3810 }, { "epoch": 0.32, "grad_norm": 0.613739550113678, "learning_rate": 0.00015297992286652745, "loss": 0.932, "step": 3815 }, { "epoch": 0.32, "grad_norm": 0.8370459675788879, "learning_rate": 0.0001528673379386172, "loss": 1.0117, "step": 3820 }, { "epoch": 0.32, "grad_norm": 0.5930368304252625, "learning_rate": 0.0001527546599276876, "loss": 0.9328, "step": 3825 }, { "epoch": 0.32, "grad_norm": 0.5250547528266907, "learning_rate": 0.00015264188903212991, "loss": 0.8252, "step": 3830 }, { "epoch": 0.32, "grad_norm": 0.7919967174530029, "learning_rate": 0.00015252902545049866, "loss": 0.9526, "step": 3835 }, { "epoch": 0.32, "grad_norm": 0.7373855710029602, "learning_rate": 0.00015241606938151177, "loss": 0.8234, "step": 3840 }, { "epoch": 0.32, "grad_norm": 0.8167823553085327, "learning_rate": 0.00015230302102404986, "loss": 0.9571, "step": 3845 }, { "epoch": 0.33, "grad_norm": 0.6310274004936218, "learning_rate": 0.0001521898805771561, "loss": 1.0162, "step": 3850 }, { "epoch": 0.33, "grad_norm": 0.7180379629135132, "learning_rate": 0.0001520766482400358, "loss": 0.8484, "step": 3855 }, { "epoch": 0.33, "grad_norm": 0.5930597186088562, "learning_rate": 0.0001519633242120561, "loss": 0.8296, "step": 3860 }, { "epoch": 0.33, "grad_norm": 0.6432483792304993, "learning_rate": 0.0001518499086927455, "loss": 0.659, "step": 3865 }, { "epoch": 0.33, "grad_norm": 0.611929714679718, "learning_rate": 0.00015173640188179363, "loss": 0.8142, "step": 3870 }, { "epoch": 0.33, "grad_norm": 0.6128609776496887, "learning_rate": 0.00015162280397905086, "loss": 0.9515, "step": 3875 }, { "epoch": 0.33, "grad_norm": 0.6003267765045166, "learning_rate": 0.00015150911518452793, "loss": 0.9143, "step": 3880 }, { "epoch": 0.33, "grad_norm": 0.6806387305259705, "learning_rate": 0.00015139533569839565, "loss": 0.8744, "step": 3885 }, { "epoch": 0.33, "grad_norm": 0.7915828824043274, "learning_rate": 0.00015128146572098442, "loss": 0.9343, "step": 3890 }, { "epoch": 0.33, "grad_norm": 0.5177380442619324, "learning_rate": 0.00015116750545278408, "loss": 0.876, "step": 3895 }, { "epoch": 0.33, "grad_norm": 0.7162129878997803, "learning_rate": 0.00015105345509444336, "loss": 0.8494, "step": 3900 }, { "epoch": 0.33, "grad_norm": 0.7143672108650208, "learning_rate": 0.00015093931484676967, "loss": 0.8876, "step": 3905 }, { "epoch": 0.33, "grad_norm": 0.6772016286849976, "learning_rate": 0.00015082508491072864, "loss": 0.9315, "step": 3910 }, { "epoch": 0.33, "grad_norm": 0.6652882099151611, "learning_rate": 0.00015071076548744386, "loss": 0.9208, "step": 3915 }, { "epoch": 0.33, "grad_norm": 0.743068277835846, "learning_rate": 0.00015059635677819636, "loss": 1.0118, "step": 3920 }, { "epoch": 0.33, "grad_norm": 0.6887498497962952, "learning_rate": 0.00015048185898442463, "loss": 0.8486, "step": 3925 }, { "epoch": 0.33, "grad_norm": 0.8008378744125366, "learning_rate": 0.00015036727230772367, "loss": 0.9585, "step": 3930 }, { "epoch": 0.33, "grad_norm": 0.7885955572128296, "learning_rate": 0.00015025259694984524, "loss": 0.9206, "step": 3935 }, { "epoch": 0.33, "grad_norm": 0.5348213315010071, "learning_rate": 0.0001501378331126972, "loss": 0.9397, "step": 3940 }, { "epoch": 0.33, "grad_norm": 0.6782258152961731, "learning_rate": 0.00015002298099834303, "loss": 0.8909, "step": 3945 }, { "epoch": 0.33, "grad_norm": 0.7077736854553223, "learning_rate": 0.00014990804080900185, "loss": 0.9807, "step": 3950 }, { "epoch": 0.33, "grad_norm": 0.7034088969230652, "learning_rate": 0.0001497930127470477, "loss": 1.0362, "step": 3955 }, { "epoch": 0.33, "grad_norm": 0.5973007082939148, "learning_rate": 0.00014967789701500944, "loss": 0.9119, "step": 3960 }, { "epoch": 0.33, "grad_norm": 0.7092791199684143, "learning_rate": 0.00014956269381557024, "loss": 0.8564, "step": 3965 }, { "epoch": 0.34, "grad_norm": 0.5920495986938477, "learning_rate": 0.00014944740335156724, "loss": 0.9436, "step": 3970 }, { "epoch": 0.34, "grad_norm": 0.7596008777618408, "learning_rate": 0.0001493320258259913, "loss": 1.0043, "step": 3975 }, { "epoch": 0.34, "grad_norm": 0.6508396863937378, "learning_rate": 0.00014921656144198652, "loss": 0.8831, "step": 3980 }, { "epoch": 0.34, "grad_norm": 0.5840360522270203, "learning_rate": 0.00014910101040284992, "loss": 0.9207, "step": 3985 }, { "epoch": 0.34, "grad_norm": 0.7185476422309875, "learning_rate": 0.00014898537291203117, "loss": 0.8468, "step": 3990 }, { "epoch": 0.34, "grad_norm": 0.6996331810951233, "learning_rate": 0.00014886964917313207, "loss": 1.0592, "step": 3995 }, { "epoch": 0.34, "grad_norm": 0.599885106086731, "learning_rate": 0.00014875383938990627, "loss": 0.8687, "step": 4000 }, { "epoch": 0.34, "grad_norm": 0.7138045430183411, "learning_rate": 0.00014863794376625904, "loss": 0.8685, "step": 4005 }, { "epoch": 0.34, "grad_norm": 0.6436852216720581, "learning_rate": 0.00014852196250624662, "loss": 0.9592, "step": 4010 }, { "epoch": 0.34, "grad_norm": 0.6193510293960571, "learning_rate": 0.00014840589581407616, "loss": 0.928, "step": 4015 }, { "epoch": 0.34, "grad_norm": 0.7025611996650696, "learning_rate": 0.00014828974389410516, "loss": 0.9371, "step": 4020 }, { "epoch": 0.34, "grad_norm": 0.7858377695083618, "learning_rate": 0.0001481735069508412, "loss": 0.8292, "step": 4025 }, { "epoch": 0.34, "grad_norm": 0.6432536840438843, "learning_rate": 0.00014805718518894157, "loss": 0.8523, "step": 4030 }, { "epoch": 0.34, "grad_norm": 0.6322362422943115, "learning_rate": 0.00014794077881321292, "loss": 0.9446, "step": 4035 }, { "epoch": 0.34, "grad_norm": 0.6036275625228882, "learning_rate": 0.0001478242880286108, "loss": 0.9576, "step": 4040 }, { "epoch": 0.34, "grad_norm": 0.611020565032959, "learning_rate": 0.00014770771304023942, "loss": 0.8126, "step": 4045 }, { "epoch": 0.34, "grad_norm": 0.5951793789863586, "learning_rate": 0.00014759105405335132, "loss": 0.8255, "step": 4050 }, { "epoch": 0.34, "grad_norm": 0.6406493782997131, "learning_rate": 0.00014747431127334678, "loss": 0.9738, "step": 4055 }, { "epoch": 0.34, "grad_norm": 0.7525074481964111, "learning_rate": 0.0001473574849057738, "loss": 1.0128, "step": 4060 }, { "epoch": 0.34, "grad_norm": 0.5915870070457458, "learning_rate": 0.00014724057515632738, "loss": 0.9119, "step": 4065 }, { "epoch": 0.34, "grad_norm": 0.8320327997207642, "learning_rate": 0.00014712358223084942, "loss": 0.8072, "step": 4070 }, { "epoch": 0.34, "grad_norm": 0.5680391192436218, "learning_rate": 0.00014700650633532827, "loss": 0.8269, "step": 4075 }, { "epoch": 0.34, "grad_norm": 0.7768528461456299, "learning_rate": 0.00014688934767589833, "loss": 0.9261, "step": 4080 }, { "epoch": 0.35, "grad_norm": 0.6114219427108765, "learning_rate": 0.00014677210645883977, "loss": 0.9493, "step": 4085 }, { "epoch": 0.35, "grad_norm": 0.5603370070457458, "learning_rate": 0.00014665478289057805, "loss": 0.876, "step": 4090 }, { "epoch": 0.35, "grad_norm": 0.718364417552948, "learning_rate": 0.00014653737717768367, "loss": 0.9818, "step": 4095 }, { "epoch": 0.35, "grad_norm": 0.6249436140060425, "learning_rate": 0.00014641988952687177, "loss": 0.9, "step": 4100 }, { "epoch": 0.35, "grad_norm": 0.5478988885879517, "learning_rate": 0.0001463023201450017, "loss": 0.8955, "step": 4105 }, { "epoch": 0.35, "grad_norm": 0.6322693824768066, "learning_rate": 0.00014618466923907678, "loss": 0.996, "step": 4110 }, { "epoch": 0.35, "grad_norm": 0.5370429754257202, "learning_rate": 0.00014606693701624385, "loss": 0.8568, "step": 4115 }, { "epoch": 0.35, "grad_norm": 0.5983506441116333, "learning_rate": 0.0001459491236837929, "loss": 0.8785, "step": 4120 }, { "epoch": 0.35, "grad_norm": 0.490751177072525, "learning_rate": 0.00014583122944915672, "loss": 0.9129, "step": 4125 }, { "epoch": 0.35, "grad_norm": 0.6703299283981323, "learning_rate": 0.00014571325451991066, "loss": 0.9941, "step": 4130 }, { "epoch": 0.35, "grad_norm": 0.6486307978630066, "learning_rate": 0.00014559519910377193, "loss": 0.794, "step": 4135 }, { "epoch": 0.35, "grad_norm": 0.6234001517295837, "learning_rate": 0.0001454770634085997, "loss": 0.9608, "step": 4140 }, { "epoch": 0.35, "grad_norm": 0.6722338199615479, "learning_rate": 0.00014535884764239424, "loss": 1.0127, "step": 4145 }, { "epoch": 0.35, "grad_norm": 0.7016012668609619, "learning_rate": 0.00014524055201329704, "loss": 0.9298, "step": 4150 }, { "epoch": 0.35, "grad_norm": 0.536430835723877, "learning_rate": 0.00014512217672959003, "loss": 0.895, "step": 4155 }, { "epoch": 0.35, "grad_norm": 0.6761835217475891, "learning_rate": 0.00014500372199969546, "loss": 0.9557, "step": 4160 }, { "epoch": 0.35, "grad_norm": 0.6276063323020935, "learning_rate": 0.00014488518803217542, "loss": 1.0102, "step": 4165 }, { "epoch": 0.35, "grad_norm": 0.6394610404968262, "learning_rate": 0.0001447665750357316, "loss": 0.8033, "step": 4170 }, { "epoch": 0.35, "grad_norm": 0.7304995656013489, "learning_rate": 0.00014464788321920472, "loss": 0.9172, "step": 4175 }, { "epoch": 0.35, "grad_norm": 0.6288149952888489, "learning_rate": 0.00014452911279157435, "loss": 0.8689, "step": 4180 }, { "epoch": 0.35, "grad_norm": 0.6916682720184326, "learning_rate": 0.0001444102639619585, "loss": 0.9401, "step": 4185 }, { "epoch": 0.35, "grad_norm": 0.709947407245636, "learning_rate": 0.00014429133693961304, "loss": 0.9208, "step": 4190 }, { "epoch": 0.35, "grad_norm": 0.5847663283348083, "learning_rate": 0.0001441723319339318, "loss": 1.025, "step": 4195 }, { "epoch": 0.35, "grad_norm": 0.6856701374053955, "learning_rate": 0.00014405324915444572, "loss": 0.8597, "step": 4200 }, { "epoch": 0.36, "grad_norm": 0.5357947945594788, "learning_rate": 0.00014393408881082265, "loss": 0.916, "step": 4205 }, { "epoch": 0.36, "grad_norm": 0.6699780821800232, "learning_rate": 0.00014381485111286714, "loss": 0.8708, "step": 4210 }, { "epoch": 0.36, "grad_norm": 0.6534074544906616, "learning_rate": 0.00014369553627051982, "loss": 0.9067, "step": 4215 }, { "epoch": 0.36, "grad_norm": 0.7450271844863892, "learning_rate": 0.0001435761444938573, "loss": 0.9721, "step": 4220 }, { "epoch": 0.36, "grad_norm": 0.6965327858924866, "learning_rate": 0.00014345667599309142, "loss": 0.909, "step": 4225 }, { "epoch": 0.36, "grad_norm": 0.6757059097290039, "learning_rate": 0.0001433371309785693, "loss": 0.929, "step": 4230 }, { "epoch": 0.36, "grad_norm": 0.8920908570289612, "learning_rate": 0.0001432175096607727, "loss": 0.957, "step": 4235 }, { "epoch": 0.36, "grad_norm": 0.6721371412277222, "learning_rate": 0.00014309781225031778, "loss": 0.9679, "step": 4240 }, { "epoch": 0.36, "grad_norm": 0.5155514478683472, "learning_rate": 0.00014297803895795455, "loss": 0.9219, "step": 4245 }, { "epoch": 0.36, "grad_norm": 0.7284682393074036, "learning_rate": 0.00014285818999456676, "loss": 0.8668, "step": 4250 }, { "epoch": 0.36, "grad_norm": 0.6953817009925842, "learning_rate": 0.0001427382655711713, "loss": 0.9595, "step": 4255 }, { "epoch": 0.36, "grad_norm": 0.5577906966209412, "learning_rate": 0.000142618265898918, "loss": 0.8521, "step": 4260 }, { "epoch": 0.36, "grad_norm": 0.6189967393875122, "learning_rate": 0.00014249819118908915, "loss": 0.9177, "step": 4265 }, { "epoch": 0.36, "grad_norm": 0.6514103412628174, "learning_rate": 0.00014237804165309913, "loss": 0.9944, "step": 4270 }, { "epoch": 0.36, "grad_norm": 0.5756633281707764, "learning_rate": 0.0001422578175024941, "loss": 0.9258, "step": 4275 }, { "epoch": 0.36, "grad_norm": 0.5479834675788879, "learning_rate": 0.00014213751894895154, "loss": 0.9599, "step": 4280 }, { "epoch": 0.36, "grad_norm": 0.6857898235321045, "learning_rate": 0.00014201714620428, "loss": 0.9585, "step": 4285 }, { "epoch": 0.36, "grad_norm": 0.7923577427864075, "learning_rate": 0.00014189669948041863, "loss": 0.899, "step": 4290 }, { "epoch": 0.36, "grad_norm": 0.7185238003730774, "learning_rate": 0.00014177617898943683, "loss": 0.8412, "step": 4295 }, { "epoch": 0.36, "grad_norm": 0.6681710481643677, "learning_rate": 0.00014165558494353385, "loss": 0.9985, "step": 4300 }, { "epoch": 0.36, "grad_norm": 0.7086035013198853, "learning_rate": 0.00014153491755503853, "loss": 1.0187, "step": 4305 }, { "epoch": 0.36, "grad_norm": 0.806403636932373, "learning_rate": 0.00014141417703640875, "loss": 0.9801, "step": 4310 }, { "epoch": 0.36, "grad_norm": 0.7036173343658447, "learning_rate": 0.0001412933636002312, "loss": 0.8904, "step": 4315 }, { "epoch": 0.36, "grad_norm": 0.6700453162193298, "learning_rate": 0.00014117247745922101, "loss": 0.955, "step": 4320 }, { "epoch": 0.37, "grad_norm": 0.6932754516601562, "learning_rate": 0.00014105151882622122, "loss": 0.8445, "step": 4325 }, { "epoch": 0.37, "grad_norm": 0.6237407326698303, "learning_rate": 0.00014093048791420252, "loss": 0.7346, "step": 4330 }, { "epoch": 0.37, "grad_norm": 0.7434117197990417, "learning_rate": 0.00014080938493626286, "loss": 0.9259, "step": 4335 }, { "epoch": 0.37, "grad_norm": 0.8616409301757812, "learning_rate": 0.00014068821010562718, "loss": 0.9227, "step": 4340 }, { "epoch": 0.37, "grad_norm": 0.8434512615203857, "learning_rate": 0.00014056696363564682, "loss": 0.9353, "step": 4345 }, { "epoch": 0.37, "grad_norm": 0.6269782781600952, "learning_rate": 0.00014044564573979925, "loss": 0.9082, "step": 4350 }, { "epoch": 0.37, "grad_norm": 0.7604944705963135, "learning_rate": 0.0001403242566316878, "loss": 0.9702, "step": 4355 }, { "epoch": 0.37, "grad_norm": 0.5997477769851685, "learning_rate": 0.0001402027965250411, "loss": 0.9641, "step": 4360 }, { "epoch": 0.37, "grad_norm": 0.7834346294403076, "learning_rate": 0.00014008126563371274, "loss": 0.9176, "step": 4365 }, { "epoch": 0.37, "grad_norm": 0.734910249710083, "learning_rate": 0.0001399596641716811, "loss": 1.0047, "step": 4370 }, { "epoch": 0.37, "grad_norm": 0.6782670617103577, "learning_rate": 0.0001398379923530487, "loss": 0.8528, "step": 4375 }, { "epoch": 0.37, "grad_norm": 0.7385916113853455, "learning_rate": 0.0001397162503920419, "loss": 0.9069, "step": 4380 }, { "epoch": 0.37, "grad_norm": 0.7298621535301208, "learning_rate": 0.00013959443850301061, "loss": 0.8389, "step": 4385 }, { "epoch": 0.37, "grad_norm": 0.65964674949646, "learning_rate": 0.00013947255690042795, "loss": 0.9972, "step": 4390 }, { "epoch": 0.37, "grad_norm": 0.6677912473678589, "learning_rate": 0.00013935060579888962, "loss": 0.8049, "step": 4395 }, { "epoch": 0.37, "grad_norm": 0.5836318731307983, "learning_rate": 0.00013922858541311382, "loss": 0.8543, "step": 4400 }, { "epoch": 0.37, "grad_norm": 0.6943433284759521, "learning_rate": 0.00013910649595794058, "loss": 0.9893, "step": 4405 }, { "epoch": 0.37, "grad_norm": 0.5648766160011292, "learning_rate": 0.00013898433764833178, "loss": 0.8044, "step": 4410 }, { "epoch": 0.37, "grad_norm": 0.5739986896514893, "learning_rate": 0.00013886211069937034, "loss": 1.0803, "step": 4415 }, { "epoch": 0.37, "grad_norm": 0.6268223524093628, "learning_rate": 0.00013873981532626007, "loss": 0.8361, "step": 4420 }, { "epoch": 0.37, "grad_norm": 0.9593233466148376, "learning_rate": 0.00013861745174432525, "loss": 0.9178, "step": 4425 }, { "epoch": 0.37, "grad_norm": 0.622471272945404, "learning_rate": 0.00013849502016901035, "loss": 0.9039, "step": 4430 }, { "epoch": 0.37, "grad_norm": 0.6628968715667725, "learning_rate": 0.00013837252081587938, "loss": 0.9192, "step": 4435 }, { "epoch": 0.38, "grad_norm": 0.7090737223625183, "learning_rate": 0.0001382499539006159, "loss": 0.8897, "step": 4440 }, { "epoch": 0.38, "grad_norm": 0.6674631237983704, "learning_rate": 0.00013812731963902224, "loss": 0.864, "step": 4445 }, { "epoch": 0.38, "grad_norm": 0.8230262398719788, "learning_rate": 0.0001380046182470194, "loss": 0.8783, "step": 4450 }, { "epoch": 0.38, "grad_norm": 0.6914052367210388, "learning_rate": 0.0001378818499406465, "loss": 0.9236, "step": 4455 }, { "epoch": 0.38, "grad_norm": 0.672724187374115, "learning_rate": 0.00013775901493606063, "loss": 0.8779, "step": 4460 }, { "epoch": 0.38, "grad_norm": 0.6276265978813171, "learning_rate": 0.0001376361134495361, "loss": 0.9381, "step": 4465 }, { "epoch": 0.38, "grad_norm": 0.6782820820808411, "learning_rate": 0.0001375131456974645, "loss": 0.8978, "step": 4470 }, { "epoch": 0.38, "grad_norm": 0.711754322052002, "learning_rate": 0.0001373901118963539, "loss": 0.9513, "step": 4475 }, { "epoch": 0.38, "grad_norm": 0.5582084655761719, "learning_rate": 0.00013726701226282885, "loss": 0.9309, "step": 4480 }, { "epoch": 0.38, "grad_norm": 0.8418028950691223, "learning_rate": 0.00013714384701362956, "loss": 0.963, "step": 4485 }, { "epoch": 0.38, "grad_norm": 0.6022467017173767, "learning_rate": 0.000137020616365612, "loss": 0.7766, "step": 4490 }, { "epoch": 0.38, "grad_norm": 0.8316811323165894, "learning_rate": 0.0001368973205357472, "loss": 0.9274, "step": 4495 }, { "epoch": 0.38, "grad_norm": 0.7477040886878967, "learning_rate": 0.00013677395974112094, "loss": 0.9448, "step": 4500 }, { "epoch": 0.38, "grad_norm": 0.8349044919013977, "learning_rate": 0.00013665053419893337, "loss": 0.8744, "step": 4505 }, { "epoch": 0.38, "grad_norm": 0.7245243787765503, "learning_rate": 0.0001365270441264987, "loss": 0.8248, "step": 4510 }, { "epoch": 0.38, "grad_norm": 0.5794562101364136, "learning_rate": 0.00013640348974124474, "loss": 0.8832, "step": 4515 }, { "epoch": 0.38, "grad_norm": 0.7254599928855896, "learning_rate": 0.0001362798712607125, "loss": 0.9828, "step": 4520 }, { "epoch": 0.38, "grad_norm": 0.7025798559188843, "learning_rate": 0.00013615618890255589, "loss": 0.916, "step": 4525 }, { "epoch": 0.38, "grad_norm": 0.6569787263870239, "learning_rate": 0.0001360324428845412, "loss": 0.9896, "step": 4530 }, { "epoch": 0.38, "grad_norm": 0.6779191493988037, "learning_rate": 0.00013590863342454693, "loss": 0.8824, "step": 4535 }, { "epoch": 0.38, "grad_norm": 0.5817300081253052, "learning_rate": 0.0001357847607405632, "loss": 0.8415, "step": 4540 }, { "epoch": 0.38, "grad_norm": 0.7714138627052307, "learning_rate": 0.00013566082505069143, "loss": 0.9976, "step": 4545 }, { "epoch": 0.38, "grad_norm": 0.5546395778656006, "learning_rate": 0.00013553682657314412, "loss": 0.8445, "step": 4550 }, { "epoch": 0.38, "grad_norm": 0.6671056747436523, "learning_rate": 0.00013541276552624405, "loss": 0.8457, "step": 4555 }, { "epoch": 0.39, "grad_norm": 0.5993417501449585, "learning_rate": 0.00013528864212842444, "loss": 0.8144, "step": 4560 }, { "epoch": 0.39, "grad_norm": 0.847915530204773, "learning_rate": 0.00013516445659822815, "loss": 0.9305, "step": 4565 }, { "epoch": 0.39, "grad_norm": 0.7033442258834839, "learning_rate": 0.00013504020915430746, "loss": 0.9296, "step": 4570 }, { "epoch": 0.39, "grad_norm": 0.5511518716812134, "learning_rate": 0.00013491590001542367, "loss": 0.8731, "step": 4575 }, { "epoch": 0.39, "grad_norm": 0.7261431813240051, "learning_rate": 0.00013479152940044665, "loss": 0.9634, "step": 4580 }, { "epoch": 0.39, "grad_norm": 0.7022355198860168, "learning_rate": 0.00013466709752835466, "loss": 0.9566, "step": 4585 }, { "epoch": 0.39, "grad_norm": 0.7419948577880859, "learning_rate": 0.00013454260461823365, "loss": 0.8646, "step": 4590 }, { "epoch": 0.39, "grad_norm": 0.7006118893623352, "learning_rate": 0.00013441805088927706, "loss": 0.9215, "step": 4595 }, { "epoch": 0.39, "grad_norm": 0.5963191390037537, "learning_rate": 0.00013429343656078555, "loss": 0.8042, "step": 4600 }, { "epoch": 0.39, "grad_norm": 0.5838218927383423, "learning_rate": 0.0001341687618521663, "loss": 0.8131, "step": 4605 }, { "epoch": 0.39, "grad_norm": 0.750098705291748, "learning_rate": 0.00013404402698293294, "loss": 0.8713, "step": 4610 }, { "epoch": 0.39, "grad_norm": 0.6926030516624451, "learning_rate": 0.00013391923217270497, "loss": 0.9185, "step": 4615 }, { "epoch": 0.39, "grad_norm": 0.6723347902297974, "learning_rate": 0.00013379437764120738, "loss": 0.8885, "step": 4620 }, { "epoch": 0.39, "grad_norm": 0.6616873145103455, "learning_rate": 0.00013366946360827037, "loss": 0.9178, "step": 4625 }, { "epoch": 0.39, "grad_norm": 0.7160006165504456, "learning_rate": 0.00013354449029382893, "loss": 0.7647, "step": 4630 }, { "epoch": 0.39, "grad_norm": 0.7113422751426697, "learning_rate": 0.00013341945791792238, "loss": 0.9592, "step": 4635 }, { "epoch": 0.39, "grad_norm": 0.7037850618362427, "learning_rate": 0.00013329436670069395, "loss": 0.8489, "step": 4640 }, { "epoch": 0.39, "grad_norm": 0.6213445067405701, "learning_rate": 0.0001331692168623907, "loss": 0.9049, "step": 4645 }, { "epoch": 0.39, "grad_norm": 0.6793341636657715, "learning_rate": 0.00013304400862336263, "loss": 0.7977, "step": 4650 }, { "epoch": 0.39, "grad_norm": 0.7716271877288818, "learning_rate": 0.00013291874220406274, "loss": 0.9861, "step": 4655 }, { "epoch": 0.39, "grad_norm": 0.6577031016349792, "learning_rate": 0.00013279341782504645, "loss": 0.9055, "step": 4660 }, { "epoch": 0.39, "grad_norm": 0.6821931004524231, "learning_rate": 0.00013266803570697116, "loss": 0.8969, "step": 4665 }, { "epoch": 0.39, "grad_norm": 0.6055404543876648, "learning_rate": 0.00013254259607059605, "loss": 0.9834, "step": 4670 }, { "epoch": 0.39, "grad_norm": 0.6545614004135132, "learning_rate": 0.0001324170991367814, "loss": 0.8664, "step": 4675 }, { "epoch": 0.4, "grad_norm": 0.6915416121482849, "learning_rate": 0.0001322915451264885, "loss": 0.879, "step": 4680 }, { "epoch": 0.4, "grad_norm": 0.6689884066581726, "learning_rate": 0.00013216593426077918, "loss": 0.9359, "step": 4685 }, { "epoch": 0.4, "grad_norm": 0.7084957361221313, "learning_rate": 0.00013204026676081517, "loss": 0.9283, "step": 4690 }, { "epoch": 0.4, "grad_norm": 0.8455063700675964, "learning_rate": 0.0001319145428478581, "loss": 0.8446, "step": 4695 }, { "epoch": 0.4, "grad_norm": 0.6622646450996399, "learning_rate": 0.0001317887627432689, "loss": 0.7989, "step": 4700 }, { "epoch": 0.4, "grad_norm": 0.5944858193397522, "learning_rate": 0.00013166292666850734, "loss": 0.8847, "step": 4705 }, { "epoch": 0.4, "grad_norm": 0.7730399966239929, "learning_rate": 0.00013153703484513186, "loss": 0.9543, "step": 4710 }, { "epoch": 0.4, "grad_norm": 0.6421423554420471, "learning_rate": 0.00013141108749479898, "loss": 0.8916, "step": 4715 }, { "epoch": 0.4, "grad_norm": 0.6441932916641235, "learning_rate": 0.00013128508483926298, "loss": 0.8689, "step": 4720 }, { "epoch": 0.4, "grad_norm": 0.8238367438316345, "learning_rate": 0.00013115902710037554, "loss": 0.8758, "step": 4725 }, { "epoch": 0.4, "grad_norm": 0.634405791759491, "learning_rate": 0.00013103291450008533, "loss": 0.8721, "step": 4730 }, { "epoch": 0.4, "grad_norm": 0.8031609654426575, "learning_rate": 0.00013090674726043766, "loss": 0.8235, "step": 4735 }, { "epoch": 0.4, "grad_norm": 0.6754260063171387, "learning_rate": 0.0001307805256035739, "loss": 0.8455, "step": 4740 }, { "epoch": 0.4, "grad_norm": 0.7197476029396057, "learning_rate": 0.00013065424975173135, "loss": 0.8448, "step": 4745 }, { "epoch": 0.4, "grad_norm": 0.9335188269615173, "learning_rate": 0.00013052791992724275, "loss": 0.9965, "step": 4750 }, { "epoch": 0.4, "grad_norm": 0.5767316222190857, "learning_rate": 0.00013040153635253575, "loss": 0.9352, "step": 4755 }, { "epoch": 0.4, "grad_norm": 0.6993480324745178, "learning_rate": 0.00013027509925013275, "loss": 0.9312, "step": 4760 }, { "epoch": 0.4, "grad_norm": 0.7889806628227234, "learning_rate": 0.00013014860884265036, "loss": 0.961, "step": 4765 }, { "epoch": 0.4, "grad_norm": 0.6709961891174316, "learning_rate": 0.000130022065352799, "loss": 0.8681, "step": 4770 }, { "epoch": 0.4, "grad_norm": 0.7165763974189758, "learning_rate": 0.00012989546900338264, "loss": 0.9219, "step": 4775 }, { "epoch": 0.4, "grad_norm": 0.8095049262046814, "learning_rate": 0.00012976882001729823, "loss": 0.8742, "step": 4780 }, { "epoch": 0.4, "grad_norm": 0.7213955521583557, "learning_rate": 0.00012964211861753543, "loss": 0.826, "step": 4785 }, { "epoch": 0.4, "grad_norm": 0.7863954305648804, "learning_rate": 0.00012951536502717623, "loss": 0.8956, "step": 4790 }, { "epoch": 0.41, "grad_norm": 0.7114418745040894, "learning_rate": 0.00012938855946939443, "loss": 0.9338, "step": 4795 }, { "epoch": 0.41, "grad_norm": 0.7426603436470032, "learning_rate": 0.0001292617021674554, "loss": 0.9268, "step": 4800 }, { "epoch": 0.41, "grad_norm": 0.7291275858879089, "learning_rate": 0.00012913479334471557, "loss": 0.919, "step": 4805 }, { "epoch": 0.41, "grad_norm": 0.6532735824584961, "learning_rate": 0.0001290078332246221, "loss": 0.9307, "step": 4810 }, { "epoch": 0.41, "grad_norm": 0.6177434325218201, "learning_rate": 0.0001288808220307125, "loss": 0.9095, "step": 4815 }, { "epoch": 0.41, "grad_norm": 0.6966139674186707, "learning_rate": 0.0001287537599866141, "loss": 0.7596, "step": 4820 }, { "epoch": 0.41, "grad_norm": 0.7126907110214233, "learning_rate": 0.00012862664731604388, "loss": 0.8657, "step": 4825 }, { "epoch": 0.41, "grad_norm": 0.5501668453216553, "learning_rate": 0.0001284994842428079, "loss": 0.8841, "step": 4830 }, { "epoch": 0.41, "grad_norm": 0.7405208945274353, "learning_rate": 0.00012837227099080098, "loss": 1.028, "step": 4835 }, { "epoch": 0.41, "grad_norm": 0.674213171005249, "learning_rate": 0.00012824500778400627, "loss": 0.8924, "step": 4840 }, { "epoch": 0.41, "grad_norm": 0.6531869769096375, "learning_rate": 0.00012811769484649492, "loss": 0.9886, "step": 4845 }, { "epoch": 0.41, "grad_norm": 0.7602865695953369, "learning_rate": 0.0001279903324024256, "loss": 0.9032, "step": 4850 }, { "epoch": 0.41, "grad_norm": 0.6832072734832764, "learning_rate": 0.0001278629206760441, "loss": 0.8728, "step": 4855 }, { "epoch": 0.41, "grad_norm": 0.6938978433609009, "learning_rate": 0.0001277354598916831, "loss": 0.977, "step": 4860 }, { "epoch": 0.41, "grad_norm": 0.6449591517448425, "learning_rate": 0.00012760795027376158, "loss": 0.8576, "step": 4865 }, { "epoch": 0.41, "grad_norm": 0.7969632744789124, "learning_rate": 0.00012748039204678446, "loss": 0.9209, "step": 4870 }, { "epoch": 0.41, "grad_norm": 0.6520712971687317, "learning_rate": 0.00012735278543534243, "loss": 0.8105, "step": 4875 }, { "epoch": 0.41, "grad_norm": 0.7720639705657959, "learning_rate": 0.00012722513066411103, "loss": 0.9094, "step": 4880 }, { "epoch": 0.41, "grad_norm": 0.6899474859237671, "learning_rate": 0.00012709742795785097, "loss": 0.8983, "step": 4885 }, { "epoch": 0.41, "grad_norm": 0.7263126969337463, "learning_rate": 0.00012696967754140714, "loss": 0.9146, "step": 4890 }, { "epoch": 0.41, "grad_norm": 1.0260558128356934, "learning_rate": 0.00012684187963970847, "loss": 0.9221, "step": 4895 }, { "epoch": 0.41, "grad_norm": 0.6547735333442688, "learning_rate": 0.00012671403447776753, "loss": 0.8363, "step": 4900 }, { "epoch": 0.41, "grad_norm": 0.6629674434661865, "learning_rate": 0.00012658614228068003, "loss": 0.8795, "step": 4905 }, { "epoch": 0.41, "grad_norm": 0.6511769890785217, "learning_rate": 0.00012645820327362466, "loss": 1.0364, "step": 4910 }, { "epoch": 0.42, "grad_norm": 0.8055089712142944, "learning_rate": 0.0001263302176818623, "loss": 1.0052, "step": 4915 }, { "epoch": 0.42, "grad_norm": 0.6773073673248291, "learning_rate": 0.000126202185730736, "loss": 1.0077, "step": 4920 }, { "epoch": 0.42, "grad_norm": 0.669937789440155, "learning_rate": 0.00012607410764567045, "loss": 0.8325, "step": 4925 }, { "epoch": 0.42, "grad_norm": 0.7064899206161499, "learning_rate": 0.00012594598365217144, "loss": 0.8735, "step": 4930 }, { "epoch": 0.42, "grad_norm": 0.8059523105621338, "learning_rate": 0.00012581781397582567, "loss": 0.9312, "step": 4935 }, { "epoch": 0.42, "grad_norm": 0.6511587500572205, "learning_rate": 0.00012568959884230036, "loss": 0.8524, "step": 4940 }, { "epoch": 0.42, "grad_norm": 0.623746931552887, "learning_rate": 0.0001255613384773426, "loss": 0.887, "step": 4945 }, { "epoch": 0.42, "grad_norm": 0.593783438205719, "learning_rate": 0.0001254330331067792, "loss": 1.0126, "step": 4950 }, { "epoch": 0.42, "grad_norm": 0.6896436214447021, "learning_rate": 0.00012530468295651617, "loss": 1.0481, "step": 4955 }, { "epoch": 0.42, "grad_norm": 0.6880872249603271, "learning_rate": 0.00012517628825253852, "loss": 0.9641, "step": 4960 }, { "epoch": 0.42, "grad_norm": 0.6226217150688171, "learning_rate": 0.00012504784922090945, "loss": 0.9193, "step": 4965 }, { "epoch": 0.42, "grad_norm": 0.6571257710456848, "learning_rate": 0.00012491936608777045, "loss": 0.7745, "step": 4970 }, { "epoch": 0.42, "grad_norm": 0.6938855648040771, "learning_rate": 0.00012479083907934052, "loss": 0.8427, "step": 4975 }, { "epoch": 0.42, "grad_norm": 0.6288595199584961, "learning_rate": 0.00012466226842191587, "loss": 0.8178, "step": 4980 }, { "epoch": 0.42, "grad_norm": 0.6910018920898438, "learning_rate": 0.00012453365434186975, "loss": 0.9649, "step": 4985 }, { "epoch": 0.42, "grad_norm": 0.766521692276001, "learning_rate": 0.00012440499706565164, "loss": 0.8424, "step": 4990 }, { "epoch": 0.42, "grad_norm": 0.6587792038917542, "learning_rate": 0.00012427629681978724, "loss": 0.7928, "step": 4995 }, { "epoch": 0.42, "grad_norm": 0.6307712197303772, "learning_rate": 0.00012414755383087785, "loss": 0.8884, "step": 5000 }, { "epoch": 0.42, "grad_norm": 0.6760114431381226, "learning_rate": 0.0001240187683256, "loss": 0.8033, "step": 5005 }, { "epoch": 0.42, "grad_norm": 0.6159674525260925, "learning_rate": 0.00012388994053070512, "loss": 0.7698, "step": 5010 }, { "epoch": 0.42, "grad_norm": 0.6530436873435974, "learning_rate": 0.00012376107067301912, "loss": 0.9561, "step": 5015 }, { "epoch": 0.42, "grad_norm": 0.7369070053100586, "learning_rate": 0.00012363215897944187, "loss": 0.9346, "step": 5020 }, { "epoch": 0.42, "grad_norm": 0.6479840874671936, "learning_rate": 0.000123503205676947, "loss": 0.8693, "step": 5025 }, { "epoch": 0.42, "grad_norm": 0.7007189989089966, "learning_rate": 0.00012337421099258133, "loss": 0.8471, "step": 5030 }, { "epoch": 0.43, "grad_norm": 0.5950848460197449, "learning_rate": 0.00012324517515346467, "loss": 0.8664, "step": 5035 }, { "epoch": 0.43, "grad_norm": 0.6795806288719177, "learning_rate": 0.00012311609838678905, "loss": 0.9208, "step": 5040 }, { "epoch": 0.43, "grad_norm": 0.7374679446220398, "learning_rate": 0.0001229869809198188, "loss": 0.9093, "step": 5045 }, { "epoch": 0.43, "grad_norm": 0.8739282488822937, "learning_rate": 0.00012285782297988984, "loss": 0.8876, "step": 5050 }, { "epoch": 0.43, "grad_norm": 0.6189777851104736, "learning_rate": 0.00012272862479440922, "loss": 0.8096, "step": 5055 }, { "epoch": 0.43, "grad_norm": 0.7174555063247681, "learning_rate": 0.00012259938659085504, "loss": 0.9407, "step": 5060 }, { "epoch": 0.43, "grad_norm": 0.6594709157943726, "learning_rate": 0.00012247010859677576, "loss": 0.8707, "step": 5065 }, { "epoch": 0.43, "grad_norm": 0.6535437703132629, "learning_rate": 0.00012234079103978993, "loss": 0.9787, "step": 5070 }, { "epoch": 0.43, "grad_norm": 0.6512961387634277, "learning_rate": 0.00012221143414758572, "loss": 0.921, "step": 5075 }, { "epoch": 0.43, "grad_norm": 0.6845799684524536, "learning_rate": 0.00012208203814792056, "loss": 0.8912, "step": 5080 }, { "epoch": 0.43, "grad_norm": 0.6525848507881165, "learning_rate": 0.00012195260326862081, "loss": 0.9101, "step": 5085 }, { "epoch": 0.43, "grad_norm": 0.9541882872581482, "learning_rate": 0.00012182312973758118, "loss": 0.9397, "step": 5090 }, { "epoch": 0.43, "grad_norm": 0.6034583449363708, "learning_rate": 0.00012169361778276451, "loss": 0.9326, "step": 5095 }, { "epoch": 0.43, "grad_norm": 0.623876690864563, "learning_rate": 0.00012156406763220128, "loss": 1.0362, "step": 5100 }, { "epoch": 0.43, "grad_norm": 0.8122617602348328, "learning_rate": 0.0001214344795139892, "loss": 0.8876, "step": 5105 }, { "epoch": 0.43, "grad_norm": 0.6952608823776245, "learning_rate": 0.0001213048536562928, "loss": 0.9326, "step": 5110 }, { "epoch": 0.43, "grad_norm": 0.5716097950935364, "learning_rate": 0.00012117519028734317, "loss": 0.9267, "step": 5115 }, { "epoch": 0.43, "grad_norm": 0.6167184710502625, "learning_rate": 0.00012104548963543729, "loss": 0.9353, "step": 5120 }, { "epoch": 0.43, "grad_norm": 0.8996965289115906, "learning_rate": 0.00012091575192893789, "loss": 0.8414, "step": 5125 }, { "epoch": 0.43, "grad_norm": 0.6290978193283081, "learning_rate": 0.00012078597739627297, "loss": 0.9101, "step": 5130 }, { "epoch": 0.43, "grad_norm": 0.6652675271034241, "learning_rate": 0.00012065616626593528, "loss": 0.9483, "step": 5135 }, { "epoch": 0.43, "grad_norm": 0.7232987880706787, "learning_rate": 0.00012052631876648199, "loss": 1.0557, "step": 5140 }, { "epoch": 0.43, "grad_norm": 0.6346669793128967, "learning_rate": 0.00012039643512653444, "loss": 0.8926, "step": 5145 }, { "epoch": 0.44, "grad_norm": 0.7574824690818787, "learning_rate": 0.00012026651557477745, "loss": 1.0143, "step": 5150 }, { "epoch": 0.44, "grad_norm": 0.6413111090660095, "learning_rate": 0.00012013656033995921, "loss": 1.0192, "step": 5155 }, { "epoch": 0.44, "grad_norm": 0.8342952132225037, "learning_rate": 0.00012000656965089063, "loss": 0.8629, "step": 5160 }, { "epoch": 0.44, "grad_norm": 0.6431555151939392, "learning_rate": 0.00011987654373644506, "loss": 0.8238, "step": 5165 }, { "epoch": 0.44, "grad_norm": 0.5522921085357666, "learning_rate": 0.00011974648282555794, "loss": 0.8706, "step": 5170 }, { "epoch": 0.44, "grad_norm": 0.7213382720947266, "learning_rate": 0.00011961638714722623, "loss": 0.9221, "step": 5175 }, { "epoch": 0.44, "grad_norm": 1.0782811641693115, "learning_rate": 0.00011948625693050816, "loss": 0.8955, "step": 5180 }, { "epoch": 0.44, "grad_norm": 0.6480096578598022, "learning_rate": 0.00011935609240452281, "loss": 0.7231, "step": 5185 }, { "epoch": 0.44, "grad_norm": 0.7860197424888611, "learning_rate": 0.00011922589379844961, "loss": 0.9138, "step": 5190 }, { "epoch": 0.44, "grad_norm": 0.7582351565361023, "learning_rate": 0.00011909566134152794, "loss": 0.9187, "step": 5195 }, { "epoch": 0.44, "grad_norm": 0.6495263576507568, "learning_rate": 0.00011896539526305694, "loss": 0.8453, "step": 5200 }, { "epoch": 0.44, "grad_norm": 0.7486453652381897, "learning_rate": 0.00011883509579239482, "loss": 0.9441, "step": 5205 }, { "epoch": 0.44, "grad_norm": 0.5885391235351562, "learning_rate": 0.0001187047631589586, "loss": 0.8987, "step": 5210 }, { "epoch": 0.44, "grad_norm": 0.709578275680542, "learning_rate": 0.00011857439759222373, "loss": 0.9492, "step": 5215 }, { "epoch": 0.44, "grad_norm": 0.7070071697235107, "learning_rate": 0.00011844399932172362, "loss": 0.9171, "step": 5220 }, { "epoch": 0.44, "grad_norm": 0.7506661415100098, "learning_rate": 0.00011831356857704927, "loss": 0.9367, "step": 5225 }, { "epoch": 0.44, "grad_norm": 0.6146754622459412, "learning_rate": 0.00011818310558784882, "loss": 0.8276, "step": 5230 }, { "epoch": 0.44, "grad_norm": 0.7746636271476746, "learning_rate": 0.00011805261058382723, "loss": 0.9692, "step": 5235 }, { "epoch": 0.44, "grad_norm": 0.7707207202911377, "learning_rate": 0.0001179220837947459, "loss": 1.0199, "step": 5240 }, { "epoch": 0.44, "grad_norm": 0.7371374368667603, "learning_rate": 0.00011779152545042195, "loss": 0.9595, "step": 5245 }, { "epoch": 0.44, "grad_norm": 0.7845306396484375, "learning_rate": 0.00011766093578072832, "loss": 0.9082, "step": 5250 }, { "epoch": 0.44, "grad_norm": 0.7142476439476013, "learning_rate": 0.00011753031501559298, "loss": 0.9966, "step": 5255 }, { "epoch": 0.44, "grad_norm": 0.7987679243087769, "learning_rate": 0.00011739966338499866, "loss": 0.9473, "step": 5260 }, { "epoch": 0.44, "grad_norm": 0.6516871452331543, "learning_rate": 0.00011726898111898246, "loss": 0.9249, "step": 5265 }, { "epoch": 0.45, "grad_norm": 0.6980127692222595, "learning_rate": 0.00011713826844763538, "loss": 0.8802, "step": 5270 }, { "epoch": 0.45, "grad_norm": 0.8179835081100464, "learning_rate": 0.000117007525601102, "loss": 0.9815, "step": 5275 }, { "epoch": 0.45, "grad_norm": 0.6411282420158386, "learning_rate": 0.00011687675280958, "loss": 0.82, "step": 5280 }, { "epoch": 0.45, "grad_norm": 0.6856908202171326, "learning_rate": 0.00011674595030331974, "loss": 0.92, "step": 5285 }, { "epoch": 0.45, "grad_norm": 0.7004132270812988, "learning_rate": 0.00011661511831262401, "loss": 0.9501, "step": 5290 }, { "epoch": 0.45, "grad_norm": 0.6341331005096436, "learning_rate": 0.0001164842570678475, "loss": 0.8728, "step": 5295 }, { "epoch": 0.45, "grad_norm": 0.6990119218826294, "learning_rate": 0.00011635336679939624, "loss": 0.8573, "step": 5300 }, { "epoch": 0.45, "grad_norm": 0.740168571472168, "learning_rate": 0.00011622244773772755, "loss": 1.1156, "step": 5305 }, { "epoch": 0.45, "grad_norm": 0.676529049873352, "learning_rate": 0.00011609150011334937, "loss": 0.9396, "step": 5310 }, { "epoch": 0.45, "grad_norm": 0.7612829804420471, "learning_rate": 0.00011596052415681992, "loss": 0.9255, "step": 5315 }, { "epoch": 0.45, "grad_norm": 0.7749308943748474, "learning_rate": 0.00011582952009874737, "loss": 1.0113, "step": 5320 }, { "epoch": 0.45, "grad_norm": 0.6618918180465698, "learning_rate": 0.00011569848816978924, "loss": 0.8917, "step": 5325 }, { "epoch": 0.45, "grad_norm": 0.7212786078453064, "learning_rate": 0.00011556742860065226, "loss": 0.854, "step": 5330 }, { "epoch": 0.45, "grad_norm": 0.6731553077697754, "learning_rate": 0.00011543634162209178, "loss": 0.9732, "step": 5335 }, { "epoch": 0.45, "grad_norm": 0.6743349432945251, "learning_rate": 0.00011530522746491132, "loss": 0.8256, "step": 5340 }, { "epoch": 0.45, "grad_norm": 0.6633926630020142, "learning_rate": 0.00011517408635996241, "loss": 0.834, "step": 5345 }, { "epoch": 0.45, "grad_norm": 0.6572647094726562, "learning_rate": 0.00011504291853814393, "loss": 0.9172, "step": 5350 }, { "epoch": 0.45, "grad_norm": 0.6950198411941528, "learning_rate": 0.00011491172423040178, "loss": 0.8839, "step": 5355 }, { "epoch": 0.45, "grad_norm": 0.6339337229728699, "learning_rate": 0.00011478050366772855, "loss": 0.9509, "step": 5360 }, { "epoch": 0.45, "grad_norm": 0.7393227815628052, "learning_rate": 0.00011464925708116306, "loss": 0.9535, "step": 5365 }, { "epoch": 0.45, "grad_norm": 0.6855883598327637, "learning_rate": 0.00011451798470178988, "loss": 0.9199, "step": 5370 }, { "epoch": 0.45, "grad_norm": 0.6756834983825684, "learning_rate": 0.0001143866867607391, "loss": 0.8526, "step": 5375 }, { "epoch": 0.45, "grad_norm": 0.6416776180267334, "learning_rate": 0.0001142553634891857, "loss": 0.8788, "step": 5380 }, { "epoch": 0.45, "grad_norm": 0.7423108220100403, "learning_rate": 0.00011412401511834934, "loss": 0.9431, "step": 5385 }, { "epoch": 0.46, "grad_norm": 0.6564056277275085, "learning_rate": 0.00011399264187949385, "loss": 0.9038, "step": 5390 }, { "epoch": 0.46, "grad_norm": 0.6751819849014282, "learning_rate": 0.00011386124400392686, "loss": 0.8355, "step": 5395 }, { "epoch": 0.46, "grad_norm": 0.7076970338821411, "learning_rate": 0.0001137298217229993, "loss": 0.9295, "step": 5400 }, { "epoch": 0.46, "grad_norm": 0.8646618723869324, "learning_rate": 0.00011359837526810521, "loss": 0.9086, "step": 5405 }, { "epoch": 0.46, "grad_norm": 0.7255219221115112, "learning_rate": 0.00011346690487068103, "loss": 0.9413, "step": 5410 }, { "epoch": 0.46, "grad_norm": 0.6536005139350891, "learning_rate": 0.00011333541076220555, "loss": 0.8992, "step": 5415 }, { "epoch": 0.46, "grad_norm": 0.5964111685752869, "learning_rate": 0.00011320389317419908, "loss": 0.8439, "step": 5420 }, { "epoch": 0.46, "grad_norm": 0.7618805766105652, "learning_rate": 0.00011307235233822345, "loss": 0.9356, "step": 5425 }, { "epoch": 0.46, "grad_norm": 0.712472677230835, "learning_rate": 0.00011294078848588136, "loss": 0.8599, "step": 5430 }, { "epoch": 0.46, "grad_norm": 0.5790109038352966, "learning_rate": 0.00011280920184881598, "loss": 0.805, "step": 5435 }, { "epoch": 0.46, "grad_norm": 0.6265473961830139, "learning_rate": 0.0001126775926587107, "loss": 0.8728, "step": 5440 }, { "epoch": 0.46, "grad_norm": 0.7416415810585022, "learning_rate": 0.00011254596114728859, "loss": 0.8682, "step": 5445 }, { "epoch": 0.46, "grad_norm": 0.6786869764328003, "learning_rate": 0.00011241430754631194, "loss": 0.9832, "step": 5450 }, { "epoch": 0.46, "grad_norm": 0.6256182193756104, "learning_rate": 0.00011228263208758206, "loss": 0.8253, "step": 5455 }, { "epoch": 0.46, "grad_norm": 0.6844419836997986, "learning_rate": 0.0001121509350029386, "loss": 0.778, "step": 5460 }, { "epoch": 0.46, "grad_norm": 0.7649736404418945, "learning_rate": 0.00011201921652425945, "loss": 0.8338, "step": 5465 }, { "epoch": 0.46, "grad_norm": 0.6669993996620178, "learning_rate": 0.00011188747688346002, "loss": 0.9117, "step": 5470 }, { "epoch": 0.46, "grad_norm": 0.7160723209381104, "learning_rate": 0.00011175571631249305, "loss": 0.9093, "step": 5475 }, { "epoch": 0.46, "grad_norm": 0.6720280647277832, "learning_rate": 0.00011162393504334814, "loss": 0.8271, "step": 5480 }, { "epoch": 0.46, "grad_norm": 0.7730690836906433, "learning_rate": 0.00011149213330805135, "loss": 0.9728, "step": 5485 }, { "epoch": 0.46, "grad_norm": 0.6851432919502258, "learning_rate": 0.00011136031133866467, "loss": 0.9282, "step": 5490 }, { "epoch": 0.46, "grad_norm": 0.691317081451416, "learning_rate": 0.00011122846936728584, "loss": 0.9255, "step": 5495 }, { "epoch": 0.46, "grad_norm": 0.681551456451416, "learning_rate": 0.00011109660762604774, "loss": 0.8205, "step": 5500 }, { "epoch": 0.47, "grad_norm": 0.6922058463096619, "learning_rate": 0.0001109647263471181, "loss": 0.7905, "step": 5505 }, { "epoch": 0.47, "grad_norm": 0.720058798789978, "learning_rate": 0.00011083282576269905, "loss": 0.9443, "step": 5510 }, { "epoch": 0.47, "grad_norm": 0.7844532132148743, "learning_rate": 0.00011070090610502663, "loss": 1.046, "step": 5515 }, { "epoch": 0.47, "grad_norm": 0.7096543312072754, "learning_rate": 0.00011056896760637063, "loss": 0.9262, "step": 5520 }, { "epoch": 0.47, "grad_norm": 0.5779085755348206, "learning_rate": 0.00011043701049903381, "loss": 0.9182, "step": 5525 }, { "epoch": 0.47, "grad_norm": 0.658515214920044, "learning_rate": 0.00011030503501535186, "loss": 0.9063, "step": 5530 }, { "epoch": 0.47, "grad_norm": 0.8104361891746521, "learning_rate": 0.00011017304138769272, "loss": 1.0538, "step": 5535 }, { "epoch": 0.47, "grad_norm": 0.7393267750740051, "learning_rate": 0.00011004102984845635, "loss": 0.855, "step": 5540 }, { "epoch": 0.47, "grad_norm": 0.7669159173965454, "learning_rate": 0.00010990900063007414, "loss": 0.8585, "step": 5545 }, { "epoch": 0.47, "grad_norm": 0.7517720460891724, "learning_rate": 0.00010977695396500878, "loss": 0.926, "step": 5550 }, { "epoch": 0.47, "grad_norm": 0.7571786046028137, "learning_rate": 0.00010964489008575354, "loss": 0.9036, "step": 5555 }, { "epoch": 0.47, "grad_norm": 0.7351516485214233, "learning_rate": 0.00010951280922483198, "loss": 0.8452, "step": 5560 }, { "epoch": 0.47, "grad_norm": 0.6579244136810303, "learning_rate": 0.0001093807116147977, "loss": 0.8769, "step": 5565 }, { "epoch": 0.47, "grad_norm": 0.6526955366134644, "learning_rate": 0.00010924859748823366, "loss": 1.0031, "step": 5570 }, { "epoch": 0.47, "grad_norm": 0.632077693939209, "learning_rate": 0.00010911646707775194, "loss": 0.9269, "step": 5575 }, { "epoch": 0.47, "grad_norm": 0.6900189518928528, "learning_rate": 0.00010898432061599333, "loss": 0.912, "step": 5580 }, { "epoch": 0.47, "grad_norm": 0.7273942828178406, "learning_rate": 0.00010885215833562683, "loss": 0.9147, "step": 5585 }, { "epoch": 0.47, "grad_norm": 0.7947009205818176, "learning_rate": 0.00010871998046934928, "loss": 0.9692, "step": 5590 }, { "epoch": 0.47, "grad_norm": 0.7955752015113831, "learning_rate": 0.00010858778724988506, "loss": 0.9498, "step": 5595 }, { "epoch": 0.47, "grad_norm": 0.6939447522163391, "learning_rate": 0.00010845557890998545, "loss": 0.7881, "step": 5600 }, { "epoch": 0.47, "grad_norm": 0.6717495322227478, "learning_rate": 0.00010832335568242851, "loss": 1.052, "step": 5605 }, { "epoch": 0.47, "grad_norm": 0.7081969380378723, "learning_rate": 0.0001081911178000183, "loss": 0.8961, "step": 5610 }, { "epoch": 0.47, "grad_norm": 0.7849963307380676, "learning_rate": 0.00010805886549558484, "loss": 0.782, "step": 5615 }, { "epoch": 0.47, "grad_norm": 0.6348875164985657, "learning_rate": 0.00010792659900198359, "loss": 0.9086, "step": 5620 }, { "epoch": 0.48, "grad_norm": 0.6073439717292786, "learning_rate": 0.00010779431855209478, "loss": 0.8713, "step": 5625 }, { "epoch": 0.48, "grad_norm": 0.7204303741455078, "learning_rate": 0.0001076620243788234, "loss": 0.9258, "step": 5630 }, { "epoch": 0.48, "grad_norm": 0.6874275207519531, "learning_rate": 0.00010752971671509857, "loss": 0.9345, "step": 5635 }, { "epoch": 0.48, "grad_norm": 0.6767924427986145, "learning_rate": 0.00010739739579387311, "loss": 0.9967, "step": 5640 }, { "epoch": 0.48, "grad_norm": 0.7352017760276794, "learning_rate": 0.00010726506184812322, "loss": 0.9132, "step": 5645 }, { "epoch": 0.48, "grad_norm": 0.5592889189720154, "learning_rate": 0.00010713271511084797, "loss": 0.8286, "step": 5650 }, { "epoch": 0.48, "grad_norm": 0.6905889511108398, "learning_rate": 0.00010700035581506908, "loss": 0.9153, "step": 5655 }, { "epoch": 0.48, "grad_norm": 0.7167163491249084, "learning_rate": 0.00010686798419383027, "loss": 0.8604, "step": 5660 }, { "epoch": 0.48, "grad_norm": 0.6322777271270752, "learning_rate": 0.00010673560048019693, "loss": 0.8144, "step": 5665 }, { "epoch": 0.48, "grad_norm": 0.6241506338119507, "learning_rate": 0.0001066032049072559, "loss": 0.8855, "step": 5670 }, { "epoch": 0.48, "grad_norm": 0.7140413522720337, "learning_rate": 0.00010647079770811479, "loss": 0.8696, "step": 5675 }, { "epoch": 0.48, "grad_norm": 0.9030040502548218, "learning_rate": 0.00010633837911590163, "loss": 0.8033, "step": 5680 }, { "epoch": 0.48, "grad_norm": 0.7416821718215942, "learning_rate": 0.00010620594936376466, "loss": 0.9086, "step": 5685 }, { "epoch": 0.48, "grad_norm": 0.5464077591896057, "learning_rate": 0.00010607350868487165, "loss": 0.8224, "step": 5690 }, { "epoch": 0.48, "grad_norm": 0.8908843994140625, "learning_rate": 0.00010594105731240961, "loss": 0.872, "step": 5695 }, { "epoch": 0.48, "grad_norm": 0.6819363832473755, "learning_rate": 0.00010580859547958448, "loss": 0.7992, "step": 5700 }, { "epoch": 0.48, "grad_norm": 0.9416067004203796, "learning_rate": 0.00010567612341962048, "loss": 0.9973, "step": 5705 }, { "epoch": 0.48, "grad_norm": 0.7403356432914734, "learning_rate": 0.00010554364136575998, "loss": 0.9134, "step": 5710 }, { "epoch": 0.48, "grad_norm": 0.6867555379867554, "learning_rate": 0.00010541114955126284, "loss": 0.917, "step": 5715 }, { "epoch": 0.48, "grad_norm": 0.6694294214248657, "learning_rate": 0.00010527864820940608, "loss": 0.8229, "step": 5720 }, { "epoch": 0.48, "grad_norm": 0.7279865741729736, "learning_rate": 0.00010514613757348364, "loss": 0.9539, "step": 5725 }, { "epoch": 0.48, "grad_norm": 0.5869626998901367, "learning_rate": 0.0001050136178768057, "loss": 0.8783, "step": 5730 }, { "epoch": 0.48, "grad_norm": 0.8429718017578125, "learning_rate": 0.00010488108935269843, "loss": 1.0312, "step": 5735 }, { "epoch": 0.48, "grad_norm": 0.6295071244239807, "learning_rate": 0.00010474855223450355, "loss": 0.8995, "step": 5740 }, { "epoch": 0.49, "grad_norm": 0.5459900498390198, "learning_rate": 0.0001046160067555779, "loss": 0.9544, "step": 5745 }, { "epoch": 0.49, "grad_norm": 0.7036649584770203, "learning_rate": 0.00010448345314929301, "loss": 0.947, "step": 5750 }, { "epoch": 0.49, "grad_norm": 0.6790043115615845, "learning_rate": 0.00010435089164903484, "loss": 0.9549, "step": 5755 }, { "epoch": 0.49, "grad_norm": 0.7928381562232971, "learning_rate": 0.00010421832248820309, "loss": 0.9555, "step": 5760 }, { "epoch": 0.49, "grad_norm": 0.6902924180030823, "learning_rate": 0.00010408574590021101, "loss": 0.9362, "step": 5765 }, { "epoch": 0.49, "grad_norm": 0.8154725432395935, "learning_rate": 0.000103953162118485, "loss": 0.9168, "step": 5770 }, { "epoch": 0.49, "grad_norm": 0.7690364122390747, "learning_rate": 0.00010382057137646401, "loss": 0.984, "step": 5775 }, { "epoch": 0.49, "grad_norm": 0.6170430183410645, "learning_rate": 0.00010368797390759937, "loss": 0.7565, "step": 5780 }, { "epoch": 0.49, "grad_norm": 0.7296398878097534, "learning_rate": 0.0001035553699453541, "loss": 0.971, "step": 5785 }, { "epoch": 0.49, "grad_norm": 0.6446324586868286, "learning_rate": 0.00010342275972320276, "loss": 0.9776, "step": 5790 }, { "epoch": 0.49, "grad_norm": 0.6967552304267883, "learning_rate": 0.00010329014347463097, "loss": 0.9204, "step": 5795 }, { "epoch": 0.49, "grad_norm": 0.7746196985244751, "learning_rate": 0.00010315752143313479, "loss": 0.887, "step": 5800 }, { "epoch": 0.49, "grad_norm": 0.7225843667984009, "learning_rate": 0.00010302489383222065, "loss": 0.8241, "step": 5805 }, { "epoch": 0.49, "grad_norm": 0.6819685101509094, "learning_rate": 0.00010289226090540473, "loss": 0.7739, "step": 5810 }, { "epoch": 0.49, "grad_norm": 0.8176074028015137, "learning_rate": 0.00010275962288621251, "loss": 0.9995, "step": 5815 }, { "epoch": 0.49, "grad_norm": 0.7226637005805969, "learning_rate": 0.00010262698000817852, "loss": 0.8244, "step": 5820 }, { "epoch": 0.49, "grad_norm": 0.6456871032714844, "learning_rate": 0.00010249433250484579, "loss": 0.8483, "step": 5825 }, { "epoch": 0.49, "grad_norm": 0.7270181775093079, "learning_rate": 0.00010236168060976555, "loss": 0.8909, "step": 5830 }, { "epoch": 0.49, "grad_norm": 0.8558886647224426, "learning_rate": 0.00010222902455649673, "loss": 0.8855, "step": 5835 }, { "epoch": 0.49, "grad_norm": 0.706078052520752, "learning_rate": 0.00010209636457860552, "loss": 0.9707, "step": 5840 }, { "epoch": 0.49, "grad_norm": 0.6462787985801697, "learning_rate": 0.00010196370090966516, "loss": 0.8618, "step": 5845 }, { "epoch": 0.49, "grad_norm": 0.6713019013404846, "learning_rate": 0.0001018310337832553, "loss": 0.8665, "step": 5850 }, { "epoch": 0.49, "grad_norm": 0.7476385831832886, "learning_rate": 0.00010169836343296162, "loss": 0.885, "step": 5855 }, { "epoch": 0.5, "grad_norm": 0.769382655620575, "learning_rate": 0.0001015656900923756, "loss": 0.8916, "step": 5860 }, { "epoch": 0.5, "grad_norm": 0.8415050506591797, "learning_rate": 0.00010143301399509395, "loss": 0.9444, "step": 5865 }, { "epoch": 0.5, "grad_norm": 0.5454522371292114, "learning_rate": 0.00010130033537471815, "loss": 0.8983, "step": 5870 }, { "epoch": 0.5, "grad_norm": 0.6528340578079224, "learning_rate": 0.00010116765446485423, "loss": 0.9029, "step": 5875 }, { "epoch": 0.5, "grad_norm": 0.6366670727729797, "learning_rate": 0.0001010349714991122, "loss": 0.7826, "step": 5880 }, { "epoch": 0.5, "grad_norm": 0.7491776943206787, "learning_rate": 0.00010090228671110568, "loss": 1.0645, "step": 5885 }, { "epoch": 0.5, "grad_norm": 0.5836167335510254, "learning_rate": 0.00010076960033445155, "loss": 0.883, "step": 5890 }, { "epoch": 0.5, "grad_norm": 0.7268155217170715, "learning_rate": 0.0001006369126027694, "loss": 0.8976, "step": 5895 }, { "epoch": 0.5, "grad_norm": 0.7044017910957336, "learning_rate": 0.00010050422374968131, "loss": 0.777, "step": 5900 }, { "epoch": 0.5, "grad_norm": 0.6972168684005737, "learning_rate": 0.00010037153400881126, "loss": 0.9469, "step": 5905 }, { "epoch": 0.5, "grad_norm": 0.7724279165267944, "learning_rate": 0.00010023884361378477, "loss": 0.9001, "step": 5910 }, { "epoch": 0.5, "grad_norm": 0.7246273159980774, "learning_rate": 0.00010010615279822865, "loss": 0.9022, "step": 5915 }, { "epoch": 0.5, "grad_norm": 0.7706332802772522, "learning_rate": 9.99734617957703e-05, "loss": 0.8842, "step": 5920 }, { "epoch": 0.5, "grad_norm": 0.632017195224762, "learning_rate": 9.984077084003752e-05, "loss": 0.9489, "step": 5925 }, { "epoch": 0.5, "grad_norm": 0.7416812181472778, "learning_rate": 9.970808016465797e-05, "loss": 1.0136, "step": 5930 }, { "epoch": 0.5, "grad_norm": 0.6422529816627502, "learning_rate": 9.957539000325893e-05, "loss": 1.0206, "step": 5935 }, { "epoch": 0.5, "grad_norm": 0.9573733806610107, "learning_rate": 9.944270058946666e-05, "loss": 1.0644, "step": 5940 }, { "epoch": 0.5, "grad_norm": 0.7484020590782166, "learning_rate": 9.931001215690616e-05, "loss": 0.9469, "step": 5945 }, { "epoch": 0.5, "grad_norm": 0.6472415924072266, "learning_rate": 9.917732493920071e-05, "loss": 0.7979, "step": 5950 }, { "epoch": 0.5, "grad_norm": 0.6799381971359253, "learning_rate": 9.90446391699714e-05, "loss": 1.0575, "step": 5955 }, { "epoch": 0.5, "grad_norm": 0.7971736192703247, "learning_rate": 9.891195508283684e-05, "loss": 0.8398, "step": 5960 }, { "epoch": 0.5, "grad_norm": 0.60069340467453, "learning_rate": 9.877927291141261e-05, "loss": 0.82, "step": 5965 }, { "epoch": 0.5, "grad_norm": 0.7374914884567261, "learning_rate": 9.864659288931095e-05, "loss": 0.8881, "step": 5970 }, { "epoch": 0.5, "grad_norm": 0.6885590553283691, "learning_rate": 9.851391525014035e-05, "loss": 0.9565, "step": 5975 }, { "epoch": 0.51, "grad_norm": 0.7123907804489136, "learning_rate": 9.838124022750502e-05, "loss": 0.9049, "step": 5980 }, { "epoch": 0.51, "grad_norm": 0.6635785102844238, "learning_rate": 9.824856805500462e-05, "loss": 0.9667, "step": 5985 }, { "epoch": 0.51, "grad_norm": 0.63397616147995, "learning_rate": 9.811589896623382e-05, "loss": 0.9595, "step": 5990 }, { "epoch": 0.51, "grad_norm": 1.2405569553375244, "learning_rate": 9.798323319478178e-05, "loss": 0.7308, "step": 5995 }, { "epoch": 0.51, "grad_norm": 0.7486843466758728, "learning_rate": 9.785057097423186e-05, "loss": 0.8563, "step": 6000 }, { "epoch": 0.51, "grad_norm": 0.7018646597862244, "learning_rate": 9.771791253816123e-05, "loss": 0.8959, "step": 6005 }, { "epoch": 0.51, "grad_norm": 0.6461020112037659, "learning_rate": 9.758525812014029e-05, "loss": 0.8633, "step": 6010 }, { "epoch": 0.51, "grad_norm": 0.7459319829940796, "learning_rate": 9.745260795373239e-05, "loss": 0.8983, "step": 6015 }, { "epoch": 0.51, "grad_norm": 0.6706824898719788, "learning_rate": 9.731996227249347e-05, "loss": 0.9012, "step": 6020 }, { "epoch": 0.51, "grad_norm": 0.8766348958015442, "learning_rate": 9.718732130997148e-05, "loss": 0.9232, "step": 6025 }, { "epoch": 0.51, "grad_norm": 0.6813793778419495, "learning_rate": 9.705468529970613e-05, "loss": 0.9458, "step": 6030 }, { "epoch": 0.51, "grad_norm": 0.6572619676589966, "learning_rate": 9.692205447522837e-05, "loss": 1.0518, "step": 6035 }, { "epoch": 0.51, "grad_norm": 0.8391721844673157, "learning_rate": 9.678942907006002e-05, "loss": 0.8991, "step": 6040 }, { "epoch": 0.51, "grad_norm": 0.7584266662597656, "learning_rate": 9.665680931771341e-05, "loss": 0.949, "step": 6045 }, { "epoch": 0.51, "grad_norm": 0.6266197562217712, "learning_rate": 9.652419545169083e-05, "loss": 0.8782, "step": 6050 }, { "epoch": 0.51, "grad_norm": 0.7364962697029114, "learning_rate": 9.639158770548426e-05, "loss": 0.898, "step": 6055 }, { "epoch": 0.51, "grad_norm": 0.8394407033920288, "learning_rate": 9.625898631257492e-05, "loss": 0.9739, "step": 6060 }, { "epoch": 0.51, "grad_norm": 0.7688999772071838, "learning_rate": 9.612639150643282e-05, "loss": 0.8576, "step": 6065 }, { "epoch": 0.51, "grad_norm": 0.7517114877700806, "learning_rate": 9.599380352051633e-05, "loss": 0.9308, "step": 6070 }, { "epoch": 0.51, "grad_norm": 0.5838608741760254, "learning_rate": 9.586122258827193e-05, "loss": 0.9275, "step": 6075 }, { "epoch": 0.51, "grad_norm": 0.831892192363739, "learning_rate": 9.572864894313357e-05, "loss": 0.8508, "step": 6080 }, { "epoch": 0.51, "grad_norm": 0.8587386608123779, "learning_rate": 9.559608281852238e-05, "loss": 0.9304, "step": 6085 }, { "epoch": 0.51, "grad_norm": 0.7076795697212219, "learning_rate": 9.546352444784632e-05, "loss": 0.8549, "step": 6090 }, { "epoch": 0.51, "grad_norm": 0.7201245427131653, "learning_rate": 9.533097406449962e-05, "loss": 0.953, "step": 6095 }, { "epoch": 0.52, "grad_norm": 0.7607429027557373, "learning_rate": 9.519843190186249e-05, "loss": 0.923, "step": 6100 }, { "epoch": 0.52, "grad_norm": 0.7092984914779663, "learning_rate": 9.506589819330069e-05, "loss": 0.8955, "step": 6105 }, { "epoch": 0.52, "grad_norm": 0.7726672887802124, "learning_rate": 9.493337317216498e-05, "loss": 0.8895, "step": 6110 }, { "epoch": 0.52, "grad_norm": 0.6731693148612976, "learning_rate": 9.4800857071791e-05, "loss": 0.8785, "step": 6115 }, { "epoch": 0.52, "grad_norm": 0.6858357787132263, "learning_rate": 9.466835012549855e-05, "loss": 0.7771, "step": 6120 }, { "epoch": 0.52, "grad_norm": 0.8080103397369385, "learning_rate": 9.453585256659127e-05, "loss": 0.8201, "step": 6125 }, { "epoch": 0.52, "grad_norm": 0.7510241270065308, "learning_rate": 9.440336462835648e-05, "loss": 0.8998, "step": 6130 }, { "epoch": 0.52, "grad_norm": 0.7484809160232544, "learning_rate": 9.42708865440644e-05, "loss": 0.9297, "step": 6135 }, { "epoch": 0.52, "grad_norm": 0.7201412916183472, "learning_rate": 9.413841854696785e-05, "loss": 0.8524, "step": 6140 }, { "epoch": 0.52, "grad_norm": 0.7931877970695496, "learning_rate": 9.400596087030207e-05, "loss": 0.8292, "step": 6145 }, { "epoch": 0.52, "grad_norm": 0.7390592694282532, "learning_rate": 9.387351374728403e-05, "loss": 0.8944, "step": 6150 }, { "epoch": 0.52, "grad_norm": 0.6792050004005432, "learning_rate": 9.3741077411112e-05, "loss": 0.9021, "step": 6155 }, { "epoch": 0.52, "grad_norm": 0.73079913854599, "learning_rate": 9.360865209496554e-05, "loss": 0.8946, "step": 6160 }, { "epoch": 0.52, "grad_norm": 0.7437912225723267, "learning_rate": 9.347623803200456e-05, "loss": 1.0276, "step": 6165 }, { "epoch": 0.52, "grad_norm": 0.5533177852630615, "learning_rate": 9.334383545536918e-05, "loss": 0.871, "step": 6170 }, { "epoch": 0.52, "grad_norm": 0.6979454159736633, "learning_rate": 9.321144459817952e-05, "loss": 0.9913, "step": 6175 }, { "epoch": 0.52, "grad_norm": 0.8437588214874268, "learning_rate": 9.307906569353474e-05, "loss": 0.942, "step": 6180 }, { "epoch": 0.52, "grad_norm": 0.7341928482055664, "learning_rate": 9.294669897451324e-05, "loss": 0.9076, "step": 6185 }, { "epoch": 0.52, "grad_norm": 0.7048665285110474, "learning_rate": 9.281434467417181e-05, "loss": 0.8918, "step": 6190 }, { "epoch": 0.52, "grad_norm": 0.6566076874732971, "learning_rate": 9.268200302554533e-05, "loss": 0.8851, "step": 6195 }, { "epoch": 0.52, "grad_norm": 0.7581920623779297, "learning_rate": 9.254967426164661e-05, "loss": 0.8965, "step": 6200 }, { "epoch": 0.52, "grad_norm": 1.0405224561691284, "learning_rate": 9.241735861546555e-05, "loss": 0.9108, "step": 6205 }, { "epoch": 0.52, "grad_norm": 0.6762104034423828, "learning_rate": 9.228505631996905e-05, "loss": 1.009, "step": 6210 }, { "epoch": 0.53, "grad_norm": 0.7440530061721802, "learning_rate": 9.215276760810061e-05, "loss": 0.9062, "step": 6215 }, { "epoch": 0.53, "grad_norm": 0.673608124256134, "learning_rate": 9.202049271277961e-05, "loss": 0.8629, "step": 6220 }, { "epoch": 0.53, "grad_norm": 0.8083750009536743, "learning_rate": 9.188823186690117e-05, "loss": 0.8885, "step": 6225 }, { "epoch": 0.53, "grad_norm": 0.5504729151725769, "learning_rate": 9.175598530333582e-05, "loss": 0.8642, "step": 6230 }, { "epoch": 0.53, "grad_norm": 0.6912506222724915, "learning_rate": 9.162375325492875e-05, "loss": 1.0152, "step": 6235 }, { "epoch": 0.53, "grad_norm": 0.667316198348999, "learning_rate": 9.149153595449968e-05, "loss": 0.8585, "step": 6240 }, { "epoch": 0.53, "grad_norm": 0.7056398391723633, "learning_rate": 9.135933363484236e-05, "loss": 0.9625, "step": 6245 }, { "epoch": 0.53, "grad_norm": 0.8069510459899902, "learning_rate": 9.122714652872412e-05, "loss": 0.9227, "step": 6250 }, { "epoch": 0.53, "grad_norm": 0.7210214734077454, "learning_rate": 9.109497486888564e-05, "loss": 0.8105, "step": 6255 }, { "epoch": 0.53, "grad_norm": 0.6201779246330261, "learning_rate": 9.096281888804022e-05, "loss": 0.9688, "step": 6260 }, { "epoch": 0.53, "grad_norm": 0.7237870693206787, "learning_rate": 9.083067881887365e-05, "loss": 0.8219, "step": 6265 }, { "epoch": 0.53, "grad_norm": 0.721929132938385, "learning_rate": 9.069855489404372e-05, "loss": 0.8314, "step": 6270 }, { "epoch": 0.53, "grad_norm": 2.525771379470825, "learning_rate": 9.056644734617975e-05, "loss": 0.9326, "step": 6275 }, { "epoch": 0.53, "grad_norm": 0.9808894991874695, "learning_rate": 9.043435640788222e-05, "loss": 0.9461, "step": 6280 }, { "epoch": 0.53, "grad_norm": 0.7714335322380066, "learning_rate": 9.030228231172245e-05, "loss": 0.9285, "step": 6285 }, { "epoch": 0.53, "grad_norm": 0.7612080574035645, "learning_rate": 9.0170225290242e-05, "loss": 0.8963, "step": 6290 }, { "epoch": 0.53, "grad_norm": 0.7176976203918457, "learning_rate": 9.003818557595241e-05, "loss": 0.8336, "step": 6295 }, { "epoch": 0.53, "grad_norm": 0.6766675114631653, "learning_rate": 8.990616340133478e-05, "loss": 0.8465, "step": 6300 }, { "epoch": 0.53, "grad_norm": 0.7587583065032959, "learning_rate": 8.977415899883928e-05, "loss": 0.8742, "step": 6305 }, { "epoch": 0.53, "grad_norm": 0.7511966228485107, "learning_rate": 8.964217260088479e-05, "loss": 0.821, "step": 6310 }, { "epoch": 0.53, "grad_norm": 0.7103337049484253, "learning_rate": 8.951020443985854e-05, "loss": 0.9149, "step": 6315 }, { "epoch": 0.53, "grad_norm": 0.7293053269386292, "learning_rate": 8.937825474811558e-05, "loss": 0.7952, "step": 6320 }, { "epoch": 0.53, "grad_norm": 0.7592235803604126, "learning_rate": 8.924632375797852e-05, "loss": 0.8964, "step": 6325 }, { "epoch": 0.53, "grad_norm": 0.5273784399032593, "learning_rate": 8.911441170173698e-05, "loss": 0.7355, "step": 6330 }, { "epoch": 0.54, "grad_norm": 0.7110022306442261, "learning_rate": 8.898251881164723e-05, "loss": 0.9297, "step": 6335 }, { "epoch": 0.54, "grad_norm": 0.7399802207946777, "learning_rate": 8.88506453199319e-05, "loss": 0.8804, "step": 6340 }, { "epoch": 0.54, "grad_norm": 0.7043013572692871, "learning_rate": 8.871879145877933e-05, "loss": 0.882, "step": 6345 }, { "epoch": 0.54, "grad_norm": 0.673186182975769, "learning_rate": 8.858695746034336e-05, "loss": 0.8835, "step": 6350 }, { "epoch": 0.54, "grad_norm": 0.710834264755249, "learning_rate": 8.84551435567429e-05, "loss": 0.8945, "step": 6355 }, { "epoch": 0.54, "grad_norm": 0.6442700624465942, "learning_rate": 8.832334998006143e-05, "loss": 0.8458, "step": 6360 }, { "epoch": 0.54, "grad_norm": 0.6050325632095337, "learning_rate": 8.819157696234659e-05, "loss": 0.8123, "step": 6365 }, { "epoch": 0.54, "grad_norm": 0.8802506923675537, "learning_rate": 8.805982473560996e-05, "loss": 0.8687, "step": 6370 }, { "epoch": 0.54, "grad_norm": 0.760107696056366, "learning_rate": 8.792809353182638e-05, "loss": 0.9708, "step": 6375 }, { "epoch": 0.54, "grad_norm": 0.6955555081367493, "learning_rate": 8.779638358293374e-05, "loss": 0.9172, "step": 6380 }, { "epoch": 0.54, "grad_norm": 0.5892151594161987, "learning_rate": 8.766469512083251e-05, "loss": 0.8301, "step": 6385 }, { "epoch": 0.54, "grad_norm": 0.6933249831199646, "learning_rate": 8.753302837738527e-05, "loss": 0.8659, "step": 6390 }, { "epoch": 0.54, "grad_norm": 0.7141648530960083, "learning_rate": 8.740138358441648e-05, "loss": 0.9506, "step": 6395 }, { "epoch": 0.54, "grad_norm": 0.7855405211448669, "learning_rate": 8.72697609737118e-05, "loss": 0.8757, "step": 6400 }, { "epoch": 0.54, "grad_norm": 0.6324602365493774, "learning_rate": 8.713816077701792e-05, "loss": 0.8307, "step": 6405 }, { "epoch": 0.54, "grad_norm": 0.6647676229476929, "learning_rate": 8.700658322604211e-05, "loss": 0.8077, "step": 6410 }, { "epoch": 0.54, "grad_norm": 0.7383860945701599, "learning_rate": 8.687502855245169e-05, "loss": 0.9212, "step": 6415 }, { "epoch": 0.54, "grad_norm": 0.6514068841934204, "learning_rate": 8.674349698787366e-05, "loss": 0.9188, "step": 6420 }, { "epoch": 0.54, "grad_norm": 0.8000485301017761, "learning_rate": 8.661198876389448e-05, "loss": 0.9233, "step": 6425 }, { "epoch": 0.54, "grad_norm": 0.5793482065200806, "learning_rate": 8.64805041120594e-05, "loss": 0.9497, "step": 6430 }, { "epoch": 0.54, "grad_norm": 0.7336552143096924, "learning_rate": 8.634904326387216e-05, "loss": 0.8945, "step": 6435 }, { "epoch": 0.54, "grad_norm": 0.9774885773658752, "learning_rate": 8.621760645079468e-05, "loss": 0.954, "step": 6440 }, { "epoch": 0.54, "grad_norm": 0.6644349694252014, "learning_rate": 8.608619390424648e-05, "loss": 0.669, "step": 6445 }, { "epoch": 0.54, "grad_norm": 0.6961809396743774, "learning_rate": 8.595480585560438e-05, "loss": 0.8038, "step": 6450 }, { "epoch": 0.55, "grad_norm": 0.7896516919136047, "learning_rate": 8.582344253620208e-05, "loss": 0.9361, "step": 6455 }, { "epoch": 0.55, "grad_norm": 0.5924127101898193, "learning_rate": 8.569210417732975e-05, "loss": 0.6845, "step": 6460 }, { "epoch": 0.55, "grad_norm": 0.69370037317276, "learning_rate": 8.556079101023348e-05, "loss": 0.8415, "step": 6465 }, { "epoch": 0.55, "grad_norm": 0.8123331665992737, "learning_rate": 8.542950326611525e-05, "loss": 0.8595, "step": 6470 }, { "epoch": 0.55, "grad_norm": 0.8372954726219177, "learning_rate": 8.529824117613208e-05, "loss": 0.9186, "step": 6475 }, { "epoch": 0.55, "grad_norm": 0.8233723640441895, "learning_rate": 8.516700497139589e-05, "loss": 0.9458, "step": 6480 }, { "epoch": 0.55, "grad_norm": 0.7458873391151428, "learning_rate": 8.503579488297304e-05, "loss": 0.7842, "step": 6485 }, { "epoch": 0.55, "grad_norm": 0.7014817595481873, "learning_rate": 8.490461114188383e-05, "loss": 0.9634, "step": 6490 }, { "epoch": 0.55, "grad_norm": 0.5969160795211792, "learning_rate": 8.477345397910229e-05, "loss": 0.9442, "step": 6495 }, { "epoch": 0.55, "grad_norm": 0.6557583808898926, "learning_rate": 8.464232362555557e-05, "loss": 0.8548, "step": 6500 }, { "epoch": 0.55, "grad_norm": 0.6432305574417114, "learning_rate": 8.451122031212357e-05, "loss": 0.9555, "step": 6505 }, { "epoch": 0.55, "grad_norm": 0.6296955347061157, "learning_rate": 8.438014426963874e-05, "loss": 0.9523, "step": 6510 }, { "epoch": 0.55, "grad_norm": 0.6959449052810669, "learning_rate": 8.424909572888542e-05, "loss": 0.8639, "step": 6515 }, { "epoch": 0.55, "grad_norm": 0.7091439366340637, "learning_rate": 8.411807492059944e-05, "loss": 0.8885, "step": 6520 }, { "epoch": 0.55, "grad_norm": 0.7139841914176941, "learning_rate": 8.398708207546797e-05, "loss": 0.9521, "step": 6525 }, { "epoch": 0.55, "grad_norm": 0.5753167867660522, "learning_rate": 8.385611742412887e-05, "loss": 0.8778, "step": 6530 }, { "epoch": 0.55, "grad_norm": 0.8551105260848999, "learning_rate": 8.372518119717027e-05, "loss": 1.0675, "step": 6535 }, { "epoch": 0.55, "grad_norm": 0.6479779481887817, "learning_rate": 8.359427362513046e-05, "loss": 0.8902, "step": 6540 }, { "epoch": 0.55, "grad_norm": 0.690642774105072, "learning_rate": 8.346339493849704e-05, "loss": 0.8771, "step": 6545 }, { "epoch": 0.55, "grad_norm": 0.7777910232543945, "learning_rate": 8.333254536770696e-05, "loss": 0.8605, "step": 6550 }, { "epoch": 0.55, "grad_norm": 0.6432963013648987, "learning_rate": 8.320172514314581e-05, "loss": 0.8891, "step": 6555 }, { "epoch": 0.55, "grad_norm": 0.7652024626731873, "learning_rate": 8.307093449514743e-05, "loss": 0.8381, "step": 6560 }, { "epoch": 0.55, "grad_norm": 0.6508191227912903, "learning_rate": 8.294017365399377e-05, "loss": 0.941, "step": 6565 }, { "epoch": 0.55, "grad_norm": 0.674426257610321, "learning_rate": 8.280944284991418e-05, "loss": 0.9117, "step": 6570 }, { "epoch": 0.56, "grad_norm": 0.7115979790687561, "learning_rate": 8.267874231308506e-05, "loss": 1.003, "step": 6575 }, { "epoch": 0.56, "grad_norm": 0.6742101311683655, "learning_rate": 8.254807227362973e-05, "loss": 0.7705, "step": 6580 }, { "epoch": 0.56, "grad_norm": 0.6757171750068665, "learning_rate": 8.241743296161759e-05, "loss": 0.8727, "step": 6585 }, { "epoch": 0.56, "grad_norm": 0.5816550850868225, "learning_rate": 8.228682460706403e-05, "loss": 1.0165, "step": 6590 }, { "epoch": 0.56, "grad_norm": 0.7802658081054688, "learning_rate": 8.215624743993003e-05, "loss": 0.8722, "step": 6595 }, { "epoch": 0.56, "grad_norm": 0.7401361465454102, "learning_rate": 8.20257016901215e-05, "loss": 0.8048, "step": 6600 }, { "epoch": 0.56, "grad_norm": 0.8161288499832153, "learning_rate": 8.189518758748908e-05, "loss": 0.8837, "step": 6605 }, { "epoch": 0.56, "grad_norm": 0.729274332523346, "learning_rate": 8.176470536182777e-05, "loss": 0.864, "step": 6610 }, { "epoch": 0.56, "grad_norm": 0.8532578349113464, "learning_rate": 8.163425524287628e-05, "loss": 0.9062, "step": 6615 }, { "epoch": 0.56, "grad_norm": 0.8236483931541443, "learning_rate": 8.150383746031707e-05, "loss": 0.9666, "step": 6620 }, { "epoch": 0.56, "grad_norm": 0.655390202999115, "learning_rate": 8.137345224377536e-05, "loss": 0.9652, "step": 6625 }, { "epoch": 0.56, "grad_norm": 0.8593419790267944, "learning_rate": 8.124309982281914e-05, "loss": 0.9262, "step": 6630 }, { "epoch": 0.56, "grad_norm": 0.6646649241447449, "learning_rate": 8.111278042695881e-05, "loss": 0.7556, "step": 6635 }, { "epoch": 0.56, "grad_norm": 0.7062471508979797, "learning_rate": 8.098249428564635e-05, "loss": 0.9019, "step": 6640 }, { "epoch": 0.56, "grad_norm": 0.7289904356002808, "learning_rate": 8.08522416282754e-05, "loss": 0.9905, "step": 6645 }, { "epoch": 0.56, "grad_norm": 0.5822698473930359, "learning_rate": 8.072202268418057e-05, "loss": 0.8327, "step": 6650 }, { "epoch": 0.56, "grad_norm": 1.0038517713546753, "learning_rate": 8.059183768263712e-05, "loss": 0.7722, "step": 6655 }, { "epoch": 0.56, "grad_norm": 0.6496167182922363, "learning_rate": 8.046168685286052e-05, "loss": 0.8617, "step": 6660 }, { "epoch": 0.56, "grad_norm": 0.6460272073745728, "learning_rate": 8.033157042400613e-05, "loss": 0.7342, "step": 6665 }, { "epoch": 0.56, "grad_norm": 0.7813682556152344, "learning_rate": 8.02014886251687e-05, "loss": 0.8798, "step": 6670 }, { "epoch": 0.56, "grad_norm": 0.724125862121582, "learning_rate": 8.007144168538198e-05, "loss": 0.9017, "step": 6675 }, { "epoch": 0.56, "grad_norm": 0.7241107821464539, "learning_rate": 7.994142983361843e-05, "loss": 0.8341, "step": 6680 }, { "epoch": 0.56, "grad_norm": 0.7320983409881592, "learning_rate": 7.981145329878867e-05, "loss": 0.9171, "step": 6685 }, { "epoch": 0.57, "grad_norm": 0.7066484689712524, "learning_rate": 7.96815123097411e-05, "loss": 0.789, "step": 6690 }, { "epoch": 0.57, "grad_norm": 0.6992103457450867, "learning_rate": 7.955160709526167e-05, "loss": 0.9138, "step": 6695 }, { "epoch": 0.57, "grad_norm": 0.7294855713844299, "learning_rate": 7.942173788407318e-05, "loss": 0.8713, "step": 6700 }, { "epoch": 0.57, "grad_norm": 0.9073478579521179, "learning_rate": 7.929190490483517e-05, "loss": 0.9354, "step": 6705 }, { "epoch": 0.57, "grad_norm": 0.7209857702255249, "learning_rate": 7.916210838614331e-05, "loss": 0.9218, "step": 6710 }, { "epoch": 0.57, "grad_norm": 0.7639252543449402, "learning_rate": 7.903234855652907e-05, "loss": 0.9264, "step": 6715 }, { "epoch": 0.57, "grad_norm": 0.8197429776191711, "learning_rate": 7.890262564445939e-05, "loss": 0.8179, "step": 6720 }, { "epoch": 0.57, "grad_norm": 0.6600883603096008, "learning_rate": 7.877293987833617e-05, "loss": 0.8916, "step": 6725 }, { "epoch": 0.57, "grad_norm": 0.7015143632888794, "learning_rate": 7.864329148649584e-05, "loss": 0.8594, "step": 6730 }, { "epoch": 0.57, "grad_norm": 0.7198832035064697, "learning_rate": 7.851368069720917e-05, "loss": 0.8914, "step": 6735 }, { "epoch": 0.57, "grad_norm": 0.7876847386360168, "learning_rate": 7.838410773868061e-05, "loss": 0.7863, "step": 6740 }, { "epoch": 0.57, "grad_norm": 0.8481395840644836, "learning_rate": 7.825457283904802e-05, "loss": 0.9198, "step": 6745 }, { "epoch": 0.57, "grad_norm": 0.7551649212837219, "learning_rate": 7.81250762263823e-05, "loss": 0.944, "step": 6750 }, { "epoch": 0.57, "grad_norm": 0.6790037751197815, "learning_rate": 7.799561812868691e-05, "loss": 0.8491, "step": 6755 }, { "epoch": 0.57, "grad_norm": 0.789820671081543, "learning_rate": 7.786619877389742e-05, "loss": 0.9247, "step": 6760 }, { "epoch": 0.57, "grad_norm": 0.6964678168296814, "learning_rate": 7.773681838988136e-05, "loss": 0.7134, "step": 6765 }, { "epoch": 0.57, "grad_norm": 0.8050416707992554, "learning_rate": 7.760747720443744e-05, "loss": 0.8758, "step": 6770 }, { "epoch": 0.57, "grad_norm": 0.764661431312561, "learning_rate": 7.747817544529555e-05, "loss": 0.8431, "step": 6775 }, { "epoch": 0.57, "grad_norm": 0.7200865745544434, "learning_rate": 7.7348913340116e-05, "loss": 0.7752, "step": 6780 }, { "epoch": 0.57, "grad_norm": 0.8310842514038086, "learning_rate": 7.721969111648936e-05, "loss": 0.9438, "step": 6785 }, { "epoch": 0.57, "grad_norm": 0.8838992714881897, "learning_rate": 7.709050900193601e-05, "loss": 0.9612, "step": 6790 }, { "epoch": 0.57, "grad_norm": 0.6813116073608398, "learning_rate": 7.696136722390566e-05, "loss": 0.7167, "step": 6795 }, { "epoch": 0.57, "grad_norm": 0.8536310791969299, "learning_rate": 7.683226600977695e-05, "loss": 1.0073, "step": 6800 }, { "epoch": 0.57, "grad_norm": 0.5965607762336731, "learning_rate": 7.670320558685724e-05, "loss": 0.7397, "step": 6805 }, { "epoch": 0.58, "grad_norm": 0.7263707518577576, "learning_rate": 7.657418618238196e-05, "loss": 0.8697, "step": 6810 }, { "epoch": 0.58, "grad_norm": 0.7662990689277649, "learning_rate": 7.644520802351431e-05, "loss": 0.9073, "step": 6815 }, { "epoch": 0.58, "grad_norm": 0.659342885017395, "learning_rate": 7.631627133734497e-05, "loss": 0.8674, "step": 6820 }, { "epoch": 0.58, "grad_norm": 0.737581729888916, "learning_rate": 7.61873763508915e-05, "loss": 0.9284, "step": 6825 }, { "epoch": 0.58, "grad_norm": 0.7625646591186523, "learning_rate": 7.605852329109808e-05, "loss": 0.9188, "step": 6830 }, { "epoch": 0.58, "grad_norm": 0.7271213531494141, "learning_rate": 7.592971238483508e-05, "loss": 0.7946, "step": 6835 }, { "epoch": 0.58, "grad_norm": 0.7164754867553711, "learning_rate": 7.580094385889862e-05, "loss": 0.9425, "step": 6840 }, { "epoch": 0.58, "grad_norm": 0.6694819331169128, "learning_rate": 7.567221794001025e-05, "loss": 0.8806, "step": 6845 }, { "epoch": 0.58, "grad_norm": 0.7427958846092224, "learning_rate": 7.554353485481646e-05, "loss": 0.8411, "step": 6850 }, { "epoch": 0.58, "grad_norm": 0.7842068076133728, "learning_rate": 7.54148948298883e-05, "loss": 0.9323, "step": 6855 }, { "epoch": 0.58, "grad_norm": 0.7708226442337036, "learning_rate": 7.528629809172109e-05, "loss": 0.8336, "step": 6860 }, { "epoch": 0.58, "grad_norm": 0.6797932982444763, "learning_rate": 7.515774486673386e-05, "loss": 0.9175, "step": 6865 }, { "epoch": 0.58, "grad_norm": 0.772397518157959, "learning_rate": 7.502923538126903e-05, "loss": 1.0511, "step": 6870 }, { "epoch": 0.58, "grad_norm": 0.7092522382736206, "learning_rate": 7.490076986159207e-05, "loss": 0.9162, "step": 6875 }, { "epoch": 0.58, "grad_norm": 0.841741144657135, "learning_rate": 7.477234853389099e-05, "loss": 0.9273, "step": 6880 }, { "epoch": 0.58, "grad_norm": 0.682362973690033, "learning_rate": 7.464397162427595e-05, "loss": 0.8971, "step": 6885 }, { "epoch": 0.58, "grad_norm": 0.7055883407592773, "learning_rate": 7.451563935877901e-05, "loss": 0.9621, "step": 6890 }, { "epoch": 0.58, "grad_norm": 0.6486619114875793, "learning_rate": 7.438735196335361e-05, "loss": 0.9299, "step": 6895 }, { "epoch": 0.58, "grad_norm": 0.6966800093650818, "learning_rate": 7.425910966387399e-05, "loss": 0.8397, "step": 6900 }, { "epoch": 0.58, "grad_norm": 0.6951152086257935, "learning_rate": 7.413091268613535e-05, "loss": 0.8891, "step": 6905 }, { "epoch": 0.58, "grad_norm": 0.7122839689254761, "learning_rate": 7.400276125585275e-05, "loss": 0.8917, "step": 6910 }, { "epoch": 0.58, "grad_norm": 0.6868377923965454, "learning_rate": 7.387465559866118e-05, "loss": 0.9443, "step": 6915 }, { "epoch": 0.58, "grad_norm": 0.8608435988426208, "learning_rate": 7.374659594011519e-05, "loss": 0.744, "step": 6920 }, { "epoch": 0.58, "grad_norm": 0.7597634196281433, "learning_rate": 7.361858250568805e-05, "loss": 0.9243, "step": 6925 }, { "epoch": 0.59, "grad_norm": 0.6974212527275085, "learning_rate": 7.34906155207719e-05, "loss": 1.0219, "step": 6930 }, { "epoch": 0.59, "grad_norm": 0.7687626481056213, "learning_rate": 7.3362695210677e-05, "loss": 0.8733, "step": 6935 }, { "epoch": 0.59, "grad_norm": 0.7128692865371704, "learning_rate": 7.32348218006313e-05, "loss": 0.8652, "step": 6940 }, { "epoch": 0.59, "grad_norm": 0.6069349646568298, "learning_rate": 7.310699551578045e-05, "loss": 0.8382, "step": 6945 }, { "epoch": 0.59, "grad_norm": 0.985689103603363, "learning_rate": 7.29792165811869e-05, "loss": 0.8298, "step": 6950 }, { "epoch": 0.59, "grad_norm": 0.8701884746551514, "learning_rate": 7.285148522182975e-05, "loss": 0.9955, "step": 6955 }, { "epoch": 0.59, "grad_norm": 0.834453821182251, "learning_rate": 7.272380166260453e-05, "loss": 0.8658, "step": 6960 }, { "epoch": 0.59, "grad_norm": 0.664952278137207, "learning_rate": 7.259616612832237e-05, "loss": 0.8893, "step": 6965 }, { "epoch": 0.59, "grad_norm": 0.8810997009277344, "learning_rate": 7.24685788437099e-05, "loss": 0.9426, "step": 6970 }, { "epoch": 0.59, "grad_norm": 0.6937991380691528, "learning_rate": 7.234104003340898e-05, "loss": 0.9102, "step": 6975 }, { "epoch": 0.59, "grad_norm": 0.6798984408378601, "learning_rate": 7.221354992197587e-05, "loss": 0.7986, "step": 6980 }, { "epoch": 0.59, "grad_norm": 0.811714768409729, "learning_rate": 7.208610873388122e-05, "loss": 0.9457, "step": 6985 }, { "epoch": 0.59, "grad_norm": 0.7090261578559875, "learning_rate": 7.195871669350953e-05, "loss": 0.9537, "step": 6990 }, { "epoch": 0.59, "grad_norm": 0.6901644468307495, "learning_rate": 7.183137402515872e-05, "loss": 0.8441, "step": 6995 }, { "epoch": 0.59, "grad_norm": 0.9163976907730103, "learning_rate": 7.170408095303992e-05, "loss": 0.9279, "step": 7000 }, { "epoch": 0.59, "grad_norm": 0.6931922435760498, "learning_rate": 7.157683770127671e-05, "loss": 0.8883, "step": 7005 }, { "epoch": 0.59, "grad_norm": 0.6846193075180054, "learning_rate": 7.14496444939051e-05, "loss": 0.9294, "step": 7010 }, { "epoch": 0.59, "grad_norm": 0.7656509876251221, "learning_rate": 7.132250155487304e-05, "loss": 0.833, "step": 7015 }, { "epoch": 0.59, "grad_norm": 0.7121906280517578, "learning_rate": 7.119540910803982e-05, "loss": 0.7782, "step": 7020 }, { "epoch": 0.59, "grad_norm": 0.6763817667961121, "learning_rate": 7.106836737717589e-05, "loss": 0.8981, "step": 7025 }, { "epoch": 0.59, "grad_norm": 0.7830772995948792, "learning_rate": 7.094137658596247e-05, "loss": 0.8412, "step": 7030 }, { "epoch": 0.59, "grad_norm": 0.6828163862228394, "learning_rate": 7.081443695799102e-05, "loss": 0.7903, "step": 7035 }, { "epoch": 0.59, "grad_norm": 0.7466477155685425, "learning_rate": 7.068754871676291e-05, "loss": 0.9161, "step": 7040 }, { "epoch": 0.6, "grad_norm": 0.7557064294815063, "learning_rate": 7.056071208568911e-05, "loss": 0.9285, "step": 7045 }, { "epoch": 0.6, "grad_norm": 0.6851228475570679, "learning_rate": 7.043392728808962e-05, "loss": 0.7728, "step": 7050 }, { "epoch": 0.6, "grad_norm": 0.6479991674423218, "learning_rate": 7.030719454719325e-05, "loss": 0.7594, "step": 7055 }, { "epoch": 0.6, "grad_norm": 0.8061099648475647, "learning_rate": 7.018051408613715e-05, "loss": 0.921, "step": 7060 }, { "epoch": 0.6, "grad_norm": 0.7075207233428955, "learning_rate": 7.005388612796635e-05, "loss": 0.8041, "step": 7065 }, { "epoch": 0.6, "grad_norm": 0.7294245362281799, "learning_rate": 6.992731089563356e-05, "loss": 0.9568, "step": 7070 }, { "epoch": 0.6, "grad_norm": 0.7740755081176758, "learning_rate": 6.980078861199854e-05, "loss": 0.8527, "step": 7075 }, { "epoch": 0.6, "grad_norm": 0.7101545929908752, "learning_rate": 6.967431949982789e-05, "loss": 0.9794, "step": 7080 }, { "epoch": 0.6, "grad_norm": 0.728982150554657, "learning_rate": 6.954790378179459e-05, "loss": 0.8613, "step": 7085 }, { "epoch": 0.6, "grad_norm": 0.6854989528656006, "learning_rate": 6.942154168047756e-05, "loss": 0.8637, "step": 7090 }, { "epoch": 0.6, "grad_norm": 0.7399042248725891, "learning_rate": 6.929523341836133e-05, "loss": 0.9954, "step": 7095 }, { "epoch": 0.6, "grad_norm": 0.9394546151161194, "learning_rate": 6.916897921783574e-05, "loss": 0.784, "step": 7100 }, { "epoch": 0.6, "grad_norm": 0.6976795792579651, "learning_rate": 6.904277930119529e-05, "loss": 0.896, "step": 7105 }, { "epoch": 0.6, "grad_norm": 0.905849277973175, "learning_rate": 6.891663389063898e-05, "loss": 0.8973, "step": 7110 }, { "epoch": 0.6, "grad_norm": 0.742719292640686, "learning_rate": 6.879054320826988e-05, "loss": 0.8795, "step": 7115 }, { "epoch": 0.6, "grad_norm": 0.6740474104881287, "learning_rate": 6.866450747609461e-05, "loss": 0.8453, "step": 7120 }, { "epoch": 0.6, "grad_norm": 0.7123729586601257, "learning_rate": 6.853852691602309e-05, "loss": 0.867, "step": 7125 }, { "epoch": 0.6, "grad_norm": 0.8164270520210266, "learning_rate": 6.841260174986811e-05, "loss": 0.9143, "step": 7130 }, { "epoch": 0.6, "grad_norm": 0.7992966175079346, "learning_rate": 6.828673219934491e-05, "loss": 0.9265, "step": 7135 }, { "epoch": 0.6, "grad_norm": 0.7531195878982544, "learning_rate": 6.816091848607081e-05, "loss": 0.8229, "step": 7140 }, { "epoch": 0.6, "grad_norm": 0.8514467477798462, "learning_rate": 6.80351608315648e-05, "loss": 0.697, "step": 7145 }, { "epoch": 0.6, "grad_norm": 0.6383096575737, "learning_rate": 6.790945945724721e-05, "loss": 0.9276, "step": 7150 }, { "epoch": 0.6, "grad_norm": 0.8250300288200378, "learning_rate": 6.778381458443925e-05, "loss": 0.8385, "step": 7155 }, { "epoch": 0.6, "grad_norm": 0.692227303981781, "learning_rate": 6.765822643436267e-05, "loss": 0.8599, "step": 7160 }, { "epoch": 0.61, "grad_norm": 0.6989490985870361, "learning_rate": 6.753269522813929e-05, "loss": 0.7171, "step": 7165 }, { "epoch": 0.61, "grad_norm": 0.7054385542869568, "learning_rate": 6.740722118679075e-05, "loss": 0.7917, "step": 7170 }, { "epoch": 0.61, "grad_norm": 0.7425069212913513, "learning_rate": 6.728180453123798e-05, "loss": 0.8909, "step": 7175 }, { "epoch": 0.61, "grad_norm": 0.7305125594139099, "learning_rate": 6.715644548230086e-05, "loss": 0.8232, "step": 7180 }, { "epoch": 0.61, "grad_norm": 0.7323471307754517, "learning_rate": 6.703114426069797e-05, "loss": 0.8548, "step": 7185 }, { "epoch": 0.61, "grad_norm": 0.8324387669563293, "learning_rate": 6.69059010870459e-05, "loss": 0.878, "step": 7190 }, { "epoch": 0.61, "grad_norm": 0.678524374961853, "learning_rate": 6.678071618185913e-05, "loss": 0.8011, "step": 7195 }, { "epoch": 0.61, "grad_norm": 0.6823922395706177, "learning_rate": 6.665558976554957e-05, "loss": 0.8471, "step": 7200 }, { "epoch": 0.61, "grad_norm": 0.7461297512054443, "learning_rate": 6.653052205842609e-05, "loss": 0.895, "step": 7205 }, { "epoch": 0.61, "grad_norm": 0.6897065043449402, "learning_rate": 6.640551328069414e-05, "loss": 0.9243, "step": 7210 }, { "epoch": 0.61, "grad_norm": 0.7159562706947327, "learning_rate": 6.628056365245561e-05, "loss": 0.8941, "step": 7215 }, { "epoch": 0.61, "grad_norm": 0.8460803627967834, "learning_rate": 6.615567339370803e-05, "loss": 0.8553, "step": 7220 }, { "epoch": 0.61, "grad_norm": 0.7752892971038818, "learning_rate": 6.603084272434455e-05, "loss": 0.836, "step": 7225 }, { "epoch": 0.61, "grad_norm": 0.9097653031349182, "learning_rate": 6.59060718641533e-05, "loss": 0.9746, "step": 7230 }, { "epoch": 0.61, "grad_norm": 0.8083062767982483, "learning_rate": 6.578136103281717e-05, "loss": 0.9358, "step": 7235 }, { "epoch": 0.61, "grad_norm": 0.8664425015449524, "learning_rate": 6.565671044991335e-05, "loss": 0.998, "step": 7240 }, { "epoch": 0.61, "grad_norm": 0.7137695550918579, "learning_rate": 6.553212033491291e-05, "loss": 0.8507, "step": 7245 }, { "epoch": 0.61, "grad_norm": 0.6330139636993408, "learning_rate": 6.540759090718047e-05, "loss": 0.8012, "step": 7250 }, { "epoch": 0.61, "grad_norm": 0.8543661832809448, "learning_rate": 6.528312238597382e-05, "loss": 0.8562, "step": 7255 }, { "epoch": 0.61, "grad_norm": 0.7509561777114868, "learning_rate": 6.515871499044358e-05, "loss": 0.7816, "step": 7260 }, { "epoch": 0.61, "grad_norm": 0.7578710317611694, "learning_rate": 6.50343689396325e-05, "loss": 0.8554, "step": 7265 }, { "epoch": 0.61, "grad_norm": 0.7192575335502625, "learning_rate": 6.491008445247563e-05, "loss": 0.8258, "step": 7270 }, { "epoch": 0.61, "grad_norm": 0.7355043292045593, "learning_rate": 6.478586174779947e-05, "loss": 0.7551, "step": 7275 }, { "epoch": 0.61, "grad_norm": 0.8419028520584106, "learning_rate": 6.466170104432166e-05, "loss": 0.9343, "step": 7280 }, { "epoch": 0.62, "grad_norm": 0.7209092378616333, "learning_rate": 6.453760256065091e-05, "loss": 0.9588, "step": 7285 }, { "epoch": 0.62, "grad_norm": 0.8162868618965149, "learning_rate": 6.441356651528609e-05, "loss": 0.9452, "step": 7290 }, { "epoch": 0.62, "grad_norm": 0.7459548711776733, "learning_rate": 6.428959312661642e-05, "loss": 0.8994, "step": 7295 }, { "epoch": 0.62, "grad_norm": 0.7295629382133484, "learning_rate": 6.416568261292062e-05, "loss": 0.8829, "step": 7300 }, { "epoch": 0.62, "grad_norm": 0.7880662083625793, "learning_rate": 6.404183519236669e-05, "loss": 0.8597, "step": 7305 }, { "epoch": 0.62, "grad_norm": 0.6742045879364014, "learning_rate": 6.391805108301167e-05, "loss": 0.9528, "step": 7310 }, { "epoch": 0.62, "grad_norm": 0.7547880411148071, "learning_rate": 6.37943305028011e-05, "loss": 0.9334, "step": 7315 }, { "epoch": 0.62, "grad_norm": 0.7758142352104187, "learning_rate": 6.367067366956854e-05, "loss": 0.9598, "step": 7320 }, { "epoch": 0.62, "grad_norm": 0.7337788939476013, "learning_rate": 6.354708080103548e-05, "loss": 1.0353, "step": 7325 }, { "epoch": 0.62, "grad_norm": 0.7313909530639648, "learning_rate": 6.342355211481065e-05, "loss": 0.9179, "step": 7330 }, { "epoch": 0.62, "grad_norm": 0.735283613204956, "learning_rate": 6.33000878283898e-05, "loss": 0.9905, "step": 7335 }, { "epoch": 0.62, "grad_norm": 0.714606761932373, "learning_rate": 6.317668815915547e-05, "loss": 0.8085, "step": 7340 }, { "epoch": 0.62, "grad_norm": 0.7330592274665833, "learning_rate": 6.305335332437617e-05, "loss": 0.7654, "step": 7345 }, { "epoch": 0.62, "grad_norm": 0.6161234974861145, "learning_rate": 6.293008354120635e-05, "loss": 0.9045, "step": 7350 }, { "epoch": 0.62, "grad_norm": 0.7620159387588501, "learning_rate": 6.280687902668604e-05, "loss": 0.9067, "step": 7355 }, { "epoch": 0.62, "grad_norm": 0.6839978694915771, "learning_rate": 6.26837399977402e-05, "loss": 0.8462, "step": 7360 }, { "epoch": 0.62, "grad_norm": 0.6819284558296204, "learning_rate": 6.256066667117855e-05, "loss": 0.824, "step": 7365 }, { "epoch": 0.62, "grad_norm": 0.7531672716140747, "learning_rate": 6.243765926369513e-05, "loss": 1.0016, "step": 7370 }, { "epoch": 0.62, "grad_norm": 0.6551452875137329, "learning_rate": 6.231471799186788e-05, "loss": 0.8784, "step": 7375 }, { "epoch": 0.62, "grad_norm": 0.7985334396362305, "learning_rate": 6.219184307215843e-05, "loss": 0.8138, "step": 7380 }, { "epoch": 0.62, "grad_norm": 0.8459438681602478, "learning_rate": 6.206903472091139e-05, "loss": 0.9275, "step": 7385 }, { "epoch": 0.62, "grad_norm": 0.6439704298973083, "learning_rate": 6.194629315435426e-05, "loss": 0.9211, "step": 7390 }, { "epoch": 0.62, "grad_norm": 0.6462088227272034, "learning_rate": 6.182361858859699e-05, "loss": 0.9553, "step": 7395 }, { "epoch": 0.63, "grad_norm": 0.7741279006004333, "learning_rate": 6.170101123963152e-05, "loss": 0.8159, "step": 7400 }, { "epoch": 0.63, "grad_norm": 0.810332179069519, "learning_rate": 6.157847132333138e-05, "loss": 0.9315, "step": 7405 }, { "epoch": 0.63, "grad_norm": 0.6735566854476929, "learning_rate": 6.145599905545151e-05, "loss": 0.9615, "step": 7410 }, { "epoch": 0.63, "grad_norm": 0.7178578972816467, "learning_rate": 6.133359465162767e-05, "loss": 0.8496, "step": 7415 }, { "epoch": 0.63, "grad_norm": 0.8918485045433044, "learning_rate": 6.121125832737605e-05, "loss": 0.8955, "step": 7420 }, { "epoch": 0.63, "grad_norm": 0.6969971060752869, "learning_rate": 6.108899029809313e-05, "loss": 0.8493, "step": 7425 }, { "epoch": 0.63, "grad_norm": 0.7077623605728149, "learning_rate": 6.0966790779055036e-05, "loss": 0.7798, "step": 7430 }, { "epoch": 0.63, "grad_norm": 0.6919378042221069, "learning_rate": 6.0844659985417285e-05, "loss": 0.829, "step": 7435 }, { "epoch": 0.63, "grad_norm": 1.2355577945709229, "learning_rate": 6.0722598132214445e-05, "loss": 0.8555, "step": 7440 }, { "epoch": 0.63, "grad_norm": 0.6976816654205322, "learning_rate": 6.060060543435961e-05, "loss": 0.897, "step": 7445 }, { "epoch": 0.63, "grad_norm": 0.6664767861366272, "learning_rate": 6.0478682106644225e-05, "loss": 0.815, "step": 7450 }, { "epoch": 0.63, "grad_norm": 0.6910175085067749, "learning_rate": 6.0356828363737484e-05, "loss": 0.7017, "step": 7455 }, { "epoch": 0.63, "grad_norm": 0.7369092702865601, "learning_rate": 6.0235044420186125e-05, "loss": 0.8366, "step": 7460 }, { "epoch": 0.63, "grad_norm": 0.7320170998573303, "learning_rate": 6.0113330490413985e-05, "loss": 0.8964, "step": 7465 }, { "epoch": 0.63, "grad_norm": 0.6991029977798462, "learning_rate": 5.9991686788721646e-05, "loss": 0.7967, "step": 7470 }, { "epoch": 0.63, "grad_norm": 0.5753344297409058, "learning_rate": 5.9870113529285956e-05, "loss": 0.9125, "step": 7475 }, { "epoch": 0.63, "grad_norm": 0.7163155674934387, "learning_rate": 5.974861092615985e-05, "loss": 0.8387, "step": 7480 }, { "epoch": 0.63, "grad_norm": 0.7662732601165771, "learning_rate": 5.96271791932718e-05, "loss": 0.9932, "step": 7485 }, { "epoch": 0.63, "grad_norm": 0.8402357697486877, "learning_rate": 5.950581854442547e-05, "loss": 0.8119, "step": 7490 }, { "epoch": 0.63, "grad_norm": 0.7967143654823303, "learning_rate": 5.9384529193299444e-05, "loss": 0.945, "step": 7495 }, { "epoch": 0.63, "grad_norm": 0.7467031478881836, "learning_rate": 5.926331135344671e-05, "loss": 0.8427, "step": 7500 }, { "epoch": 0.63, "grad_norm": 0.6836127042770386, "learning_rate": 5.9142165238294344e-05, "loss": 0.885, "step": 7505 }, { "epoch": 0.63, "grad_norm": 0.8548532128334045, "learning_rate": 5.9021091061143194e-05, "loss": 0.9242, "step": 7510 }, { "epoch": 0.63, "grad_norm": 0.6110352873802185, "learning_rate": 5.89000890351674e-05, "loss": 0.9173, "step": 7515 }, { "epoch": 0.64, "grad_norm": 0.7760581970214844, "learning_rate": 5.877915937341407e-05, "loss": 0.9793, "step": 7520 }, { "epoch": 0.64, "grad_norm": 0.6202065944671631, "learning_rate": 5.865830228880294e-05, "loss": 0.8601, "step": 7525 }, { "epoch": 0.64, "grad_norm": 0.9943656921386719, "learning_rate": 5.8537517994125876e-05, "loss": 0.7854, "step": 7530 }, { "epoch": 0.64, "grad_norm": 0.7132951617240906, "learning_rate": 5.84168067020467e-05, "loss": 0.9711, "step": 7535 }, { "epoch": 0.64, "grad_norm": 0.8035358786582947, "learning_rate": 5.829616862510059e-05, "loss": 0.8294, "step": 7540 }, { "epoch": 0.64, "grad_norm": 0.7671014070510864, "learning_rate": 5.817560397569385e-05, "loss": 0.8559, "step": 7545 }, { "epoch": 0.64, "grad_norm": 0.6550133228302002, "learning_rate": 5.805511296610362e-05, "loss": 0.8136, "step": 7550 }, { "epoch": 0.64, "grad_norm": 0.594433069229126, "learning_rate": 5.793469580847714e-05, "loss": 0.8446, "step": 7555 }, { "epoch": 0.64, "grad_norm": 0.7266082167625427, "learning_rate": 5.7814352714831774e-05, "loss": 0.9847, "step": 7560 }, { "epoch": 0.64, "grad_norm": 0.8068563342094421, "learning_rate": 5.769408389705453e-05, "loss": 0.8966, "step": 7565 }, { "epoch": 0.64, "grad_norm": 0.7309244871139526, "learning_rate": 5.757388956690155e-05, "loss": 0.926, "step": 7570 }, { "epoch": 0.64, "grad_norm": 0.6696233749389648, "learning_rate": 5.7453769935997825e-05, "loss": 0.9283, "step": 7575 }, { "epoch": 0.64, "grad_norm": 0.7572994828224182, "learning_rate": 5.733372521583686e-05, "loss": 1.004, "step": 7580 }, { "epoch": 0.64, "grad_norm": 0.7896938920021057, "learning_rate": 5.721375561778026e-05, "loss": 0.881, "step": 7585 }, { "epoch": 0.64, "grad_norm": 0.6995894908905029, "learning_rate": 5.70938613530573e-05, "loss": 0.8758, "step": 7590 }, { "epoch": 0.64, "grad_norm": 0.8638085722923279, "learning_rate": 5.697404263276476e-05, "loss": 0.8162, "step": 7595 }, { "epoch": 0.64, "grad_norm": 0.7239465117454529, "learning_rate": 5.685429966786628e-05, "loss": 0.8784, "step": 7600 }, { "epoch": 0.64, "grad_norm": 0.7965220808982849, "learning_rate": 5.673463266919216e-05, "loss": 0.7788, "step": 7605 }, { "epoch": 0.64, "grad_norm": 0.7969363927841187, "learning_rate": 5.661504184743895e-05, "loss": 0.9163, "step": 7610 }, { "epoch": 0.64, "grad_norm": 0.7120541930198669, "learning_rate": 5.6495527413169026e-05, "loss": 0.9791, "step": 7615 }, { "epoch": 0.64, "grad_norm": 0.7569546103477478, "learning_rate": 5.6376089576810396e-05, "loss": 0.8799, "step": 7620 }, { "epoch": 0.64, "grad_norm": 0.6639888286590576, "learning_rate": 5.625672854865609e-05, "loss": 0.8349, "step": 7625 }, { "epoch": 0.64, "grad_norm": 0.8096963763237, "learning_rate": 5.613744453886394e-05, "loss": 0.8983, "step": 7630 }, { "epoch": 0.64, "grad_norm": 0.7809593677520752, "learning_rate": 5.6018237757456163e-05, "loss": 0.8812, "step": 7635 }, { "epoch": 0.65, "grad_norm": 0.8204253315925598, "learning_rate": 5.5899108414318994e-05, "loss": 0.862, "step": 7640 }, { "epoch": 0.65, "grad_norm": 0.7801651358604431, "learning_rate": 5.5780056719202304e-05, "loss": 0.873, "step": 7645 }, { "epoch": 0.65, "grad_norm": 0.9668874144554138, "learning_rate": 5.566108288171936e-05, "loss": 0.8918, "step": 7650 }, { "epoch": 0.65, "grad_norm": 0.7693642377853394, "learning_rate": 5.5542187111346224e-05, "loss": 0.8444, "step": 7655 }, { "epoch": 0.65, "grad_norm": 0.6818474531173706, "learning_rate": 5.5423369617421564e-05, "loss": 0.7371, "step": 7660 }, { "epoch": 0.65, "grad_norm": 0.748066246509552, "learning_rate": 5.530463060914619e-05, "loss": 0.8924, "step": 7665 }, { "epoch": 0.65, "grad_norm": 0.6938193440437317, "learning_rate": 5.5185970295582726e-05, "loss": 0.762, "step": 7670 }, { "epoch": 0.65, "grad_norm": 0.7946302890777588, "learning_rate": 5.50673888856553e-05, "loss": 0.8746, "step": 7675 }, { "epoch": 0.65, "grad_norm": 0.6880958080291748, "learning_rate": 5.494888658814907e-05, "loss": 0.8095, "step": 7680 }, { "epoch": 0.65, "grad_norm": 0.9050551652908325, "learning_rate": 5.483046361170992e-05, "loss": 0.9121, "step": 7685 }, { "epoch": 0.65, "grad_norm": 0.7461682558059692, "learning_rate": 5.471212016484399e-05, "loss": 0.9817, "step": 7690 }, { "epoch": 0.65, "grad_norm": 0.819770872592926, "learning_rate": 5.4593856455917536e-05, "loss": 0.8736, "step": 7695 }, { "epoch": 0.65, "grad_norm": 0.8093503713607788, "learning_rate": 5.447567269315627e-05, "loss": 0.9012, "step": 7700 }, { "epoch": 0.65, "grad_norm": 0.6116926074028015, "learning_rate": 5.435756908464529e-05, "loss": 0.8433, "step": 7705 }, { "epoch": 0.65, "grad_norm": 0.8973492383956909, "learning_rate": 5.4239545838328475e-05, "loss": 0.8598, "step": 7710 }, { "epoch": 0.65, "grad_norm": 0.7746146321296692, "learning_rate": 5.4121603162008226e-05, "loss": 0.904, "step": 7715 }, { "epoch": 0.65, "grad_norm": 0.867224395275116, "learning_rate": 5.400374126334511e-05, "loss": 0.8575, "step": 7720 }, { "epoch": 0.65, "grad_norm": 0.6685112714767456, "learning_rate": 5.388596034985742e-05, "loss": 0.9585, "step": 7725 }, { "epoch": 0.65, "grad_norm": 0.7715603113174438, "learning_rate": 5.376826062892086e-05, "loss": 0.8547, "step": 7730 }, { "epoch": 0.65, "grad_norm": 0.8174254894256592, "learning_rate": 5.365064230776831e-05, "loss": 0.8832, "step": 7735 }, { "epoch": 0.65, "grad_norm": 0.7603541016578674, "learning_rate": 5.3533105593489163e-05, "loss": 0.931, "step": 7740 }, { "epoch": 0.65, "grad_norm": 0.6725593209266663, "learning_rate": 5.3415650693029205e-05, "loss": 0.8013, "step": 7745 }, { "epoch": 0.65, "grad_norm": 0.9110146164894104, "learning_rate": 5.329827781319018e-05, "loss": 0.9516, "step": 7750 }, { "epoch": 0.66, "grad_norm": 0.8404948711395264, "learning_rate": 5.318098716062934e-05, "loss": 0.9361, "step": 7755 }, { "epoch": 0.66, "grad_norm": 0.7202340364456177, "learning_rate": 5.30637789418593e-05, "loss": 0.9126, "step": 7760 }, { "epoch": 0.66, "grad_norm": 0.7240476608276367, "learning_rate": 5.294665336324742e-05, "loss": 0.9588, "step": 7765 }, { "epoch": 0.66, "grad_norm": 0.7280794382095337, "learning_rate": 5.2829610631015606e-05, "loss": 0.9005, "step": 7770 }, { "epoch": 0.66, "grad_norm": 0.8151552677154541, "learning_rate": 5.271265095123987e-05, "loss": 0.9236, "step": 7775 }, { "epoch": 0.66, "grad_norm": 0.7294365763664246, "learning_rate": 5.2595774529850006e-05, "loss": 0.8351, "step": 7780 }, { "epoch": 0.66, "grad_norm": 0.7628800868988037, "learning_rate": 5.24789815726292e-05, "loss": 0.8084, "step": 7785 }, { "epoch": 0.66, "grad_norm": 0.7836165428161621, "learning_rate": 5.2362272285213756e-05, "loss": 0.8823, "step": 7790 }, { "epoch": 0.66, "grad_norm": 0.7417895793914795, "learning_rate": 5.224564687309261e-05, "loss": 1.0214, "step": 7795 }, { "epoch": 0.66, "grad_norm": 0.7525092363357544, "learning_rate": 5.2129105541606916e-05, "loss": 0.945, "step": 7800 }, { "epoch": 0.66, "grad_norm": 0.7636805176734924, "learning_rate": 5.2012648495949976e-05, "loss": 0.8494, "step": 7805 }, { "epoch": 0.66, "grad_norm": 0.8036842942237854, "learning_rate": 5.189627594116657e-05, "loss": 0.872, "step": 7810 }, { "epoch": 0.66, "grad_norm": 0.8050177693367004, "learning_rate": 5.1779988082152786e-05, "loss": 0.896, "step": 7815 }, { "epoch": 0.66, "grad_norm": 0.7123312950134277, "learning_rate": 5.166378512365552e-05, "loss": 0.8844, "step": 7820 }, { "epoch": 0.66, "grad_norm": 0.6821147799491882, "learning_rate": 5.1547667270272226e-05, "loss": 0.8998, "step": 7825 }, { "epoch": 0.66, "grad_norm": 0.7495633959770203, "learning_rate": 5.143163472645049e-05, "loss": 0.8506, "step": 7830 }, { "epoch": 0.66, "grad_norm": 0.7350967526435852, "learning_rate": 5.131568769648775e-05, "loss": 0.7543, "step": 7835 }, { "epoch": 0.66, "grad_norm": 0.7220240235328674, "learning_rate": 5.119982638453075e-05, "loss": 0.8647, "step": 7840 }, { "epoch": 0.66, "grad_norm": 0.6074078679084778, "learning_rate": 5.108405099457549e-05, "loss": 0.7925, "step": 7845 }, { "epoch": 0.66, "grad_norm": 0.7537201046943665, "learning_rate": 5.096836173046663e-05, "loss": 0.953, "step": 7850 }, { "epoch": 0.66, "grad_norm": 0.8651964068412781, "learning_rate": 5.0852758795897006e-05, "loss": 0.8693, "step": 7855 }, { "epoch": 0.66, "grad_norm": 0.6949556469917297, "learning_rate": 5.073724239440773e-05, "loss": 0.8184, "step": 7860 }, { "epoch": 0.66, "grad_norm": 0.8523896932601929, "learning_rate": 5.06218127293874e-05, "loss": 0.9198, "step": 7865 }, { "epoch": 0.66, "grad_norm": 0.7108052968978882, "learning_rate": 5.050647000407189e-05, "loss": 0.841, "step": 7870 }, { "epoch": 0.67, "grad_norm": 0.8096588850021362, "learning_rate": 5.039121442154415e-05, "loss": 0.9523, "step": 7875 }, { "epoch": 0.67, "grad_norm": 0.7509374618530273, "learning_rate": 5.027604618473347e-05, "loss": 0.8155, "step": 7880 }, { "epoch": 0.67, "grad_norm": 0.6036114692687988, "learning_rate": 5.016096549641549e-05, "loss": 0.7785, "step": 7885 }, { "epoch": 0.67, "grad_norm": 0.7784599661827087, "learning_rate": 5.004597255921174e-05, "loss": 0.9645, "step": 7890 }, { "epoch": 0.67, "grad_norm": 0.7571439146995544, "learning_rate": 4.993106757558912e-05, "loss": 0.9329, "step": 7895 }, { "epoch": 0.67, "grad_norm": 0.5869072079658508, "learning_rate": 4.981625074785986e-05, "loss": 0.809, "step": 7900 }, { "epoch": 0.67, "grad_norm": 0.8537865877151489, "learning_rate": 4.9701522278180736e-05, "loss": 0.9381, "step": 7905 }, { "epoch": 0.67, "grad_norm": 1.2609446048736572, "learning_rate": 4.958688236855308e-05, "loss": 0.899, "step": 7910 }, { "epoch": 0.67, "grad_norm": 0.8565067052841187, "learning_rate": 4.9472331220822366e-05, "loss": 1.046, "step": 7915 }, { "epoch": 0.67, "grad_norm": 0.9478633403778076, "learning_rate": 4.935786903667767e-05, "loss": 0.8329, "step": 7920 }, { "epoch": 0.67, "grad_norm": 0.7316033244132996, "learning_rate": 4.9243496017651434e-05, "loss": 0.8807, "step": 7925 }, { "epoch": 0.67, "grad_norm": 0.9690425395965576, "learning_rate": 4.912921236511927e-05, "loss": 0.8134, "step": 7930 }, { "epoch": 0.67, "grad_norm": 0.7974583506584167, "learning_rate": 4.901501828029919e-05, "loss": 0.991, "step": 7935 }, { "epoch": 0.67, "grad_norm": 0.8434284925460815, "learning_rate": 4.890091396425163e-05, "loss": 0.8801, "step": 7940 }, { "epoch": 0.67, "grad_norm": 0.7458634972572327, "learning_rate": 4.878689961787907e-05, "loss": 0.8981, "step": 7945 }, { "epoch": 0.67, "grad_norm": 0.6761431097984314, "learning_rate": 4.8672975441925425e-05, "loss": 0.9697, "step": 7950 }, { "epoch": 0.67, "grad_norm": 0.7856532335281372, "learning_rate": 4.8559141636975925e-05, "loss": 0.7743, "step": 7955 }, { "epoch": 0.67, "grad_norm": 0.7205869555473328, "learning_rate": 4.844539840345666e-05, "loss": 0.9266, "step": 7960 }, { "epoch": 0.67, "grad_norm": 0.7787695527076721, "learning_rate": 4.8331745941634235e-05, "loss": 0.8912, "step": 7965 }, { "epoch": 0.67, "grad_norm": 0.800104558467865, "learning_rate": 4.821818445161551e-05, "loss": 0.9171, "step": 7970 }, { "epoch": 0.67, "grad_norm": 0.6946626305580139, "learning_rate": 4.810471413334711e-05, "loss": 0.9681, "step": 7975 }, { "epoch": 0.67, "grad_norm": 0.8018020391464233, "learning_rate": 4.7991335186615126e-05, "loss": 0.9078, "step": 7980 }, { "epoch": 0.67, "grad_norm": 0.8994207978248596, "learning_rate": 4.78780478110448e-05, "loss": 0.9104, "step": 7985 }, { "epoch": 0.67, "grad_norm": 0.6811192631721497, "learning_rate": 4.776485220610014e-05, "loss": 0.8681, "step": 7990 }, { "epoch": 0.68, "grad_norm": 0.7817435264587402, "learning_rate": 4.765174857108352e-05, "loss": 0.8882, "step": 7995 }, { "epoch": 0.68, "grad_norm": 0.6679404377937317, "learning_rate": 4.7538737105135526e-05, "loss": 0.887, "step": 8000 }, { "epoch": 0.68, "grad_norm": 0.6360458135604858, "learning_rate": 4.7425818007234324e-05, "loss": 0.8876, "step": 8005 }, { "epoch": 0.68, "grad_norm": 0.8776546716690063, "learning_rate": 4.73129914761955e-05, "loss": 0.7742, "step": 8010 }, { "epoch": 0.68, "grad_norm": 0.6663452982902527, "learning_rate": 4.720025771067166e-05, "loss": 0.7151, "step": 8015 }, { "epoch": 0.68, "grad_norm": 0.6508984565734863, "learning_rate": 4.708761690915206e-05, "loss": 0.9013, "step": 8020 }, { "epoch": 0.68, "grad_norm": 0.8622150421142578, "learning_rate": 4.697506926996226e-05, "loss": 0.9069, "step": 8025 }, { "epoch": 0.68, "grad_norm": 0.759907603263855, "learning_rate": 4.686261499126389e-05, "loss": 0.9119, "step": 8030 }, { "epoch": 0.68, "grad_norm": 0.6916012763977051, "learning_rate": 4.6750254271054087e-05, "loss": 0.7963, "step": 8035 }, { "epoch": 0.68, "grad_norm": 0.8521272540092468, "learning_rate": 4.663798730716532e-05, "loss": 0.9725, "step": 8040 }, { "epoch": 0.68, "grad_norm": 0.7710113525390625, "learning_rate": 4.6525814297264945e-05, "loss": 0.9471, "step": 8045 }, { "epoch": 0.68, "grad_norm": 0.7958170175552368, "learning_rate": 4.641373543885489e-05, "loss": 0.8105, "step": 8050 }, { "epoch": 0.68, "grad_norm": 0.823060154914856, "learning_rate": 4.6301750929271404e-05, "loss": 0.8821, "step": 8055 }, { "epoch": 0.68, "grad_norm": 0.6921396851539612, "learning_rate": 4.61898609656845e-05, "loss": 0.8424, "step": 8060 }, { "epoch": 0.68, "grad_norm": 0.7996007800102234, "learning_rate": 4.607806574509781e-05, "loss": 0.8629, "step": 8065 }, { "epoch": 0.68, "grad_norm": 0.7803316116333008, "learning_rate": 4.596636546434807e-05, "loss": 0.9515, "step": 8070 }, { "epoch": 0.68, "grad_norm": 0.855900228023529, "learning_rate": 4.585476032010494e-05, "loss": 0.9281, "step": 8075 }, { "epoch": 0.68, "grad_norm": 0.9303337335586548, "learning_rate": 4.5743250508870475e-05, "loss": 0.9563, "step": 8080 }, { "epoch": 0.68, "grad_norm": 0.8133184313774109, "learning_rate": 4.5631836226979017e-05, "loss": 0.9454, "step": 8085 }, { "epoch": 0.68, "grad_norm": 0.7453633546829224, "learning_rate": 4.5520517670596607e-05, "loss": 0.8819, "step": 8090 }, { "epoch": 0.68, "grad_norm": 0.8156158924102783, "learning_rate": 4.540929503572077e-05, "loss": 0.874, "step": 8095 }, { "epoch": 0.68, "grad_norm": 0.7186123132705688, "learning_rate": 4.5298168518180115e-05, "loss": 0.8392, "step": 8100 }, { "epoch": 0.68, "grad_norm": 0.6981141567230225, "learning_rate": 4.518713831363408e-05, "loss": 0.887, "step": 8105 }, { "epoch": 0.69, "grad_norm": 0.9109984040260315, "learning_rate": 4.5076204617572425e-05, "loss": 0.9101, "step": 8110 }, { "epoch": 0.69, "grad_norm": 0.7632050514221191, "learning_rate": 4.4965367625315146e-05, "loss": 0.8918, "step": 8115 }, { "epoch": 0.69, "grad_norm": 0.9145953059196472, "learning_rate": 4.4854627532011836e-05, "loss": 0.9091, "step": 8120 }, { "epoch": 0.69, "grad_norm": 0.748193621635437, "learning_rate": 4.474398453264154e-05, "loss": 0.8552, "step": 8125 }, { "epoch": 0.69, "grad_norm": 0.7607966661453247, "learning_rate": 4.463343882201231e-05, "loss": 0.9048, "step": 8130 }, { "epoch": 0.69, "grad_norm": 0.8794971704483032, "learning_rate": 4.452299059476091e-05, "loss": 0.9144, "step": 8135 }, { "epoch": 0.69, "grad_norm": 0.8333990573883057, "learning_rate": 4.441264004535254e-05, "loss": 0.9613, "step": 8140 }, { "epoch": 0.69, "grad_norm": 0.8173981308937073, "learning_rate": 4.430238736808033e-05, "loss": 0.9441, "step": 8145 }, { "epoch": 0.69, "grad_norm": 0.8113160729408264, "learning_rate": 4.419223275706515e-05, "loss": 0.8207, "step": 8150 }, { "epoch": 0.69, "grad_norm": 0.6924260854721069, "learning_rate": 4.408217640625514e-05, "loss": 0.8045, "step": 8155 }, { "epoch": 0.69, "grad_norm": 0.8391642570495605, "learning_rate": 4.397221850942549e-05, "loss": 0.8219, "step": 8160 }, { "epoch": 0.69, "grad_norm": 0.8425463438034058, "learning_rate": 4.386235926017798e-05, "loss": 0.9439, "step": 8165 }, { "epoch": 0.69, "grad_norm": 0.745590090751648, "learning_rate": 4.3752598851940805e-05, "loss": 1.0404, "step": 8170 }, { "epoch": 0.69, "grad_norm": 0.6746026277542114, "learning_rate": 4.3642937477968105e-05, "loss": 0.8602, "step": 8175 }, { "epoch": 0.69, "grad_norm": 0.6796015501022339, "learning_rate": 4.3533375331339486e-05, "loss": 0.7994, "step": 8180 }, { "epoch": 0.69, "grad_norm": 0.6459152102470398, "learning_rate": 4.3423912604960095e-05, "loss": 0.7885, "step": 8185 }, { "epoch": 0.69, "grad_norm": 0.7240301966667175, "learning_rate": 4.331454949155983e-05, "loss": 0.914, "step": 8190 }, { "epoch": 0.69, "grad_norm": 0.7516999244689941, "learning_rate": 4.320528618369337e-05, "loss": 0.8743, "step": 8195 }, { "epoch": 0.69, "grad_norm": 0.7468701601028442, "learning_rate": 4.309612287373957e-05, "loss": 0.9048, "step": 8200 }, { "epoch": 0.69, "grad_norm": 0.7464987635612488, "learning_rate": 4.298705975390115e-05, "loss": 0.8997, "step": 8205 }, { "epoch": 0.69, "grad_norm": 0.7245876789093018, "learning_rate": 4.287809701620459e-05, "loss": 0.8149, "step": 8210 }, { "epoch": 0.69, "grad_norm": 0.7378844022750854, "learning_rate": 4.2769234852499505e-05, "loss": 0.9746, "step": 8215 }, { "epoch": 0.69, "grad_norm": 0.8340911865234375, "learning_rate": 4.266047345445846e-05, "loss": 0.8128, "step": 8220 }, { "epoch": 0.69, "grad_norm": 0.7118589878082275, "learning_rate": 4.255181301357668e-05, "loss": 0.8448, "step": 8225 }, { "epoch": 0.7, "grad_norm": 0.7692140340805054, "learning_rate": 4.244325372117156e-05, "loss": 0.8732, "step": 8230 }, { "epoch": 0.7, "grad_norm": 0.6922775506973267, "learning_rate": 4.2334795768382306e-05, "loss": 0.749, "step": 8235 }, { "epoch": 0.7, "grad_norm": 0.8570601344108582, "learning_rate": 4.2226439346169924e-05, "loss": 0.8071, "step": 8240 }, { "epoch": 0.7, "grad_norm": 0.7879267334938049, "learning_rate": 4.211818464531649e-05, "loss": 0.8962, "step": 8245 }, { "epoch": 0.7, "grad_norm": 0.8359821438789368, "learning_rate": 4.2010031856425e-05, "loss": 0.8961, "step": 8250 }, { "epoch": 0.7, "grad_norm": 0.8399404287338257, "learning_rate": 4.190198116991915e-05, "loss": 0.9703, "step": 8255 }, { "epoch": 0.7, "grad_norm": 0.7860932946205139, "learning_rate": 4.179403277604259e-05, "loss": 0.9162, "step": 8260 }, { "epoch": 0.7, "grad_norm": 0.7792479991912842, "learning_rate": 4.168618686485916e-05, "loss": 0.8027, "step": 8265 }, { "epoch": 0.7, "grad_norm": 0.7603457570075989, "learning_rate": 4.1578443626252094e-05, "loss": 0.7886, "step": 8270 }, { "epoch": 0.7, "grad_norm": 0.7225006818771362, "learning_rate": 4.147080324992384e-05, "loss": 0.8427, "step": 8275 }, { "epoch": 0.7, "grad_norm": 0.8202854990959167, "learning_rate": 4.136326592539591e-05, "loss": 0.716, "step": 8280 }, { "epoch": 0.7, "grad_norm": 0.7011755108833313, "learning_rate": 4.125583184200812e-05, "loss": 0.8174, "step": 8285 }, { "epoch": 0.7, "grad_norm": 0.8102407455444336, "learning_rate": 4.114850118891866e-05, "loss": 0.9644, "step": 8290 }, { "epoch": 0.7, "grad_norm": 0.7152637839317322, "learning_rate": 4.104127415510365e-05, "loss": 0.9355, "step": 8295 }, { "epoch": 0.7, "grad_norm": 0.766459584236145, "learning_rate": 4.093415092935667e-05, "loss": 0.8724, "step": 8300 }, { "epoch": 0.7, "grad_norm": 0.6977735757827759, "learning_rate": 4.082713170028858e-05, "loss": 0.8373, "step": 8305 }, { "epoch": 0.7, "grad_norm": 0.6987833380699158, "learning_rate": 4.0720216656327105e-05, "loss": 0.9344, "step": 8310 }, { "epoch": 0.7, "grad_norm": 0.7315753698348999, "learning_rate": 4.0613405985716554e-05, "loss": 0.8926, "step": 8315 }, { "epoch": 0.7, "grad_norm": 0.684941828250885, "learning_rate": 4.050669987651742e-05, "loss": 0.9252, "step": 8320 }, { "epoch": 0.7, "grad_norm": 0.7240857481956482, "learning_rate": 4.04000985166062e-05, "loss": 0.8779, "step": 8325 }, { "epoch": 0.7, "grad_norm": 0.7166629433631897, "learning_rate": 4.029360209367487e-05, "loss": 0.8747, "step": 8330 }, { "epoch": 0.7, "grad_norm": 0.7198646664619446, "learning_rate": 4.0187210795230677e-05, "loss": 0.8418, "step": 8335 }, { "epoch": 0.7, "grad_norm": 0.8439791202545166, "learning_rate": 4.008092480859574e-05, "loss": 0.8697, "step": 8340 }, { "epoch": 0.7, "grad_norm": 0.7171686291694641, "learning_rate": 3.997474432090679e-05, "loss": 0.8472, "step": 8345 }, { "epoch": 0.71, "grad_norm": 0.7596126198768616, "learning_rate": 3.986866951911483e-05, "loss": 0.8471, "step": 8350 }, { "epoch": 0.71, "grad_norm": 0.8424491882324219, "learning_rate": 3.9762700589984744e-05, "loss": 0.9175, "step": 8355 }, { "epoch": 0.71, "grad_norm": 0.9210354685783386, "learning_rate": 3.965683772009502e-05, "loss": 0.9507, "step": 8360 }, { "epoch": 0.71, "grad_norm": 0.7487869262695312, "learning_rate": 3.95510810958374e-05, "loss": 0.9011, "step": 8365 }, { "epoch": 0.71, "grad_norm": 0.7913604974746704, "learning_rate": 3.944543090341656e-05, "loss": 0.9093, "step": 8370 }, { "epoch": 0.71, "grad_norm": 0.6716275811195374, "learning_rate": 3.933988732884976e-05, "loss": 0.8307, "step": 8375 }, { "epoch": 0.71, "grad_norm": 0.8774741888046265, "learning_rate": 3.923445055796664e-05, "loss": 0.946, "step": 8380 }, { "epoch": 0.71, "grad_norm": 0.7749642729759216, "learning_rate": 3.912912077640869e-05, "loss": 1.0164, "step": 8385 }, { "epoch": 0.71, "grad_norm": 1.0464688539505005, "learning_rate": 3.9023898169629046e-05, "loss": 0.9048, "step": 8390 }, { "epoch": 0.71, "grad_norm": 0.715448796749115, "learning_rate": 3.891878292289216e-05, "loss": 0.9282, "step": 8395 }, { "epoch": 0.71, "grad_norm": 0.7132737040519714, "learning_rate": 3.881377522127343e-05, "loss": 0.7596, "step": 8400 }, { "epoch": 0.71, "grad_norm": 0.8384308218955994, "learning_rate": 3.8708875249658905e-05, "loss": 0.9182, "step": 8405 }, { "epoch": 0.71, "grad_norm": 1.0072834491729736, "learning_rate": 3.8604083192745036e-05, "loss": 0.818, "step": 8410 }, { "epoch": 0.71, "grad_norm": 0.8128643035888672, "learning_rate": 3.849939923503815e-05, "loss": 0.9118, "step": 8415 }, { "epoch": 0.71, "grad_norm": 0.7836415767669678, "learning_rate": 3.83948235608543e-05, "loss": 0.9227, "step": 8420 }, { "epoch": 0.71, "grad_norm": 0.8463979363441467, "learning_rate": 3.829035635431889e-05, "loss": 0.8403, "step": 8425 }, { "epoch": 0.71, "grad_norm": 0.7599585056304932, "learning_rate": 3.818599779936629e-05, "loss": 0.9183, "step": 8430 }, { "epoch": 0.71, "grad_norm": 0.7512611150741577, "learning_rate": 3.80817480797397e-05, "loss": 0.8014, "step": 8435 }, { "epoch": 0.71, "grad_norm": 0.7594391703605652, "learning_rate": 3.7977607378990574e-05, "loss": 0.8771, "step": 8440 }, { "epoch": 0.71, "grad_norm": 0.6978799104690552, "learning_rate": 3.787357588047844e-05, "loss": 0.7696, "step": 8445 }, { "epoch": 0.71, "grad_norm": 0.8497031927108765, "learning_rate": 3.7769653767370586e-05, "loss": 0.9166, "step": 8450 }, { "epoch": 0.71, "grad_norm": 0.8748128414154053, "learning_rate": 3.766584122264166e-05, "loss": 0.9172, "step": 8455 }, { "epoch": 0.71, "grad_norm": 0.797017514705658, "learning_rate": 3.7562138429073424e-05, "loss": 0.7935, "step": 8460 }, { "epoch": 0.72, "grad_norm": 0.7100562453269958, "learning_rate": 3.7458545569254445e-05, "loss": 0.8367, "step": 8465 }, { "epoch": 0.72, "grad_norm": 0.6721343994140625, "learning_rate": 3.735506282557967e-05, "loss": 0.7493, "step": 8470 }, { "epoch": 0.72, "grad_norm": 0.8016105890274048, "learning_rate": 3.725169038025016e-05, "loss": 0.8932, "step": 8475 }, { "epoch": 0.72, "grad_norm": 0.787413477897644, "learning_rate": 3.714842841527282e-05, "loss": 0.8338, "step": 8480 }, { "epoch": 0.72, "grad_norm": 0.7610762715339661, "learning_rate": 3.7045277112459954e-05, "loss": 0.9025, "step": 8485 }, { "epoch": 0.72, "grad_norm": 0.7663581371307373, "learning_rate": 3.694223665342915e-05, "loss": 0.8799, "step": 8490 }, { "epoch": 0.72, "grad_norm": 0.8082067966461182, "learning_rate": 3.683930721960276e-05, "loss": 0.8652, "step": 8495 }, { "epoch": 0.72, "grad_norm": 0.9462847113609314, "learning_rate": 3.6736488992207615e-05, "loss": 0.8302, "step": 8500 }, { "epoch": 0.72, "grad_norm": 0.7060618996620178, "learning_rate": 3.663378215227483e-05, "loss": 0.7578, "step": 8505 }, { "epoch": 0.72, "grad_norm": 0.8916406035423279, "learning_rate": 3.653118688063935e-05, "loss": 0.9445, "step": 8510 }, { "epoch": 0.72, "grad_norm": 0.7780551910400391, "learning_rate": 3.6428703357939644e-05, "loss": 0.9411, "step": 8515 }, { "epoch": 0.72, "grad_norm": 0.7502923607826233, "learning_rate": 3.632633176461755e-05, "loss": 0.8078, "step": 8520 }, { "epoch": 0.72, "grad_norm": 1.146773338317871, "learning_rate": 3.622407228091774e-05, "loss": 0.9264, "step": 8525 }, { "epoch": 0.72, "grad_norm": 0.7078317403793335, "learning_rate": 3.612192508688751e-05, "loss": 0.8356, "step": 8530 }, { "epoch": 0.72, "grad_norm": 0.7490567564964294, "learning_rate": 3.601989036237644e-05, "loss": 0.9093, "step": 8535 }, { "epoch": 0.72, "grad_norm": 0.7482819557189941, "learning_rate": 3.5917968287036104e-05, "loss": 0.7811, "step": 8540 }, { "epoch": 0.72, "grad_norm": 0.8336611390113831, "learning_rate": 3.5816159040319716e-05, "loss": 0.9027, "step": 8545 }, { "epoch": 0.72, "grad_norm": 0.9120607972145081, "learning_rate": 3.5714462801481895e-05, "loss": 0.8714, "step": 8550 }, { "epoch": 0.72, "grad_norm": 0.6962396502494812, "learning_rate": 3.5612879749578244e-05, "loss": 0.7858, "step": 8555 }, { "epoch": 0.72, "grad_norm": 0.7170752882957458, "learning_rate": 3.551141006346499e-05, "loss": 0.9845, "step": 8560 }, { "epoch": 0.72, "grad_norm": 0.821403443813324, "learning_rate": 3.5410053921798926e-05, "loss": 0.7884, "step": 8565 }, { "epoch": 0.72, "grad_norm": 0.825884997844696, "learning_rate": 3.530881150303679e-05, "loss": 0.8375, "step": 8570 }, { "epoch": 0.72, "grad_norm": 0.7249986529350281, "learning_rate": 3.5207682985435206e-05, "loss": 0.9092, "step": 8575 }, { "epoch": 0.72, "grad_norm": 0.6975976228713989, "learning_rate": 3.510666854705021e-05, "loss": 0.9401, "step": 8580 }, { "epoch": 0.73, "grad_norm": 0.7805627584457397, "learning_rate": 3.5005768365736855e-05, "loss": 0.8207, "step": 8585 }, { "epoch": 0.73, "grad_norm": 0.7622156143188477, "learning_rate": 3.490498261914923e-05, "loss": 0.8758, "step": 8590 }, { "epoch": 0.73, "grad_norm": 0.8169152736663818, "learning_rate": 3.48043114847398e-05, "loss": 0.9381, "step": 8595 }, { "epoch": 0.73, "grad_norm": 0.7095611691474915, "learning_rate": 3.470375513975925e-05, "loss": 0.8201, "step": 8600 }, { "epoch": 0.73, "grad_norm": 0.7938575744628906, "learning_rate": 3.460331376125624e-05, "loss": 0.8418, "step": 8605 }, { "epoch": 0.73, "grad_norm": 0.9179478287696838, "learning_rate": 3.450298752607696e-05, "loss": 0.9845, "step": 8610 }, { "epoch": 0.73, "grad_norm": 0.7515020966529846, "learning_rate": 3.440277661086475e-05, "loss": 0.8611, "step": 8615 }, { "epoch": 0.73, "grad_norm": 0.7615182399749756, "learning_rate": 3.4302681192060114e-05, "loss": 0.9925, "step": 8620 }, { "epoch": 0.73, "grad_norm": 1.1498469114303589, "learning_rate": 3.4202701445900085e-05, "loss": 0.8428, "step": 8625 }, { "epoch": 0.73, "grad_norm": 0.6935446858406067, "learning_rate": 3.410283754841801e-05, "loss": 0.8424, "step": 8630 }, { "epoch": 0.73, "grad_norm": 0.7446281313896179, "learning_rate": 3.40030896754434e-05, "loss": 0.9002, "step": 8635 }, { "epoch": 0.73, "grad_norm": 0.7671748399734497, "learning_rate": 3.390345800260125e-05, "loss": 0.8108, "step": 8640 }, { "epoch": 0.73, "grad_norm": 0.9478299021720886, "learning_rate": 3.380394270531221e-05, "loss": 0.9014, "step": 8645 }, { "epoch": 0.73, "grad_norm": 0.7975957989692688, "learning_rate": 3.370454395879188e-05, "loss": 0.7999, "step": 8650 }, { "epoch": 0.73, "grad_norm": 0.7455906271934509, "learning_rate": 3.360526193805065e-05, "loss": 0.903, "step": 8655 }, { "epoch": 0.73, "grad_norm": 0.8040593266487122, "learning_rate": 3.3506096817893526e-05, "loss": 0.9848, "step": 8660 }, { "epoch": 0.73, "grad_norm": 0.8673847913742065, "learning_rate": 3.3407048772919514e-05, "loss": 0.9344, "step": 8665 }, { "epoch": 0.73, "grad_norm": 0.8976979851722717, "learning_rate": 3.3308117977521544e-05, "loss": 0.8125, "step": 8670 }, { "epoch": 0.73, "grad_norm": 0.6757309436798096, "learning_rate": 3.32093046058862e-05, "loss": 0.8485, "step": 8675 }, { "epoch": 0.73, "grad_norm": 0.7074803709983826, "learning_rate": 3.311060883199323e-05, "loss": 0.9082, "step": 8680 }, { "epoch": 0.73, "grad_norm": 0.79212486743927, "learning_rate": 3.301203082961532e-05, "loss": 0.8276, "step": 8685 }, { "epoch": 0.73, "grad_norm": 0.8590596318244934, "learning_rate": 3.291357077231781e-05, "loss": 0.8158, "step": 8690 }, { "epoch": 0.73, "grad_norm": 0.8269075155258179, "learning_rate": 3.281522883345843e-05, "loss": 0.7806, "step": 8695 }, { "epoch": 0.73, "grad_norm": 0.8562278151512146, "learning_rate": 3.271700518618683e-05, "loss": 0.8634, "step": 8700 }, { "epoch": 0.74, "grad_norm": 0.8395061492919922, "learning_rate": 3.261890000344453e-05, "loss": 0.9034, "step": 8705 }, { "epoch": 0.74, "grad_norm": 0.7919216156005859, "learning_rate": 3.252091345796432e-05, "loss": 0.8768, "step": 8710 }, { "epoch": 0.74, "grad_norm": 0.706246554851532, "learning_rate": 3.2423045722270294e-05, "loss": 0.8709, "step": 8715 }, { "epoch": 0.74, "grad_norm": 0.8488343954086304, "learning_rate": 3.232529696867712e-05, "loss": 0.842, "step": 8720 }, { "epoch": 0.74, "grad_norm": 0.7069640159606934, "learning_rate": 3.222766736929013e-05, "loss": 0.7587, "step": 8725 }, { "epoch": 0.74, "grad_norm": 0.7644367218017578, "learning_rate": 3.2130157096004864e-05, "loss": 0.8415, "step": 8730 }, { "epoch": 0.74, "grad_norm": 0.6687289476394653, "learning_rate": 3.203276632050671e-05, "loss": 0.7619, "step": 8735 }, { "epoch": 0.74, "grad_norm": 0.864510178565979, "learning_rate": 3.1935495214270705e-05, "loss": 0.9298, "step": 8740 }, { "epoch": 0.74, "grad_norm": 0.8863157629966736, "learning_rate": 3.1838343948561136e-05, "loss": 0.851, "step": 8745 }, { "epoch": 0.74, "grad_norm": 0.7367367744445801, "learning_rate": 3.1741312694431315e-05, "loss": 0.8596, "step": 8750 }, { "epoch": 0.74, "grad_norm": 0.6925660371780396, "learning_rate": 3.164440162272322e-05, "loss": 0.9713, "step": 8755 }, { "epoch": 0.74, "grad_norm": 0.7945080399513245, "learning_rate": 3.1547610904067325e-05, "loss": 0.7486, "step": 8760 }, { "epoch": 0.74, "grad_norm": 0.9216223955154419, "learning_rate": 3.145094070888208e-05, "loss": 0.9921, "step": 8765 }, { "epoch": 0.74, "grad_norm": 0.9635785818099976, "learning_rate": 3.13543912073738e-05, "loss": 0.9232, "step": 8770 }, { "epoch": 0.74, "grad_norm": 0.8723745942115784, "learning_rate": 3.125796256953625e-05, "loss": 1.0093, "step": 8775 }, { "epoch": 0.74, "grad_norm": 0.713442325592041, "learning_rate": 3.1161654965150436e-05, "loss": 0.7681, "step": 8780 }, { "epoch": 0.74, "grad_norm": 0.8069811463356018, "learning_rate": 3.1065468563784196e-05, "loss": 0.9341, "step": 8785 }, { "epoch": 0.74, "grad_norm": 0.8023787140846252, "learning_rate": 3.096940353479208e-05, "loss": 0.8578, "step": 8790 }, { "epoch": 0.74, "grad_norm": 0.8353567719459534, "learning_rate": 3.087346004731485e-05, "loss": 0.9737, "step": 8795 }, { "epoch": 0.74, "grad_norm": 0.7994593977928162, "learning_rate": 3.077763827027929e-05, "loss": 0.845, "step": 8800 }, { "epoch": 0.74, "grad_norm": 0.6210119128227234, "learning_rate": 3.0681938372397865e-05, "loss": 0.9005, "step": 8805 }, { "epoch": 0.74, "grad_norm": 0.7582188844680786, "learning_rate": 3.0586360522168476e-05, "loss": 1.0126, "step": 8810 }, { "epoch": 0.74, "grad_norm": 0.8581645488739014, "learning_rate": 3.0490904887874183e-05, "loss": 0.929, "step": 8815 }, { "epoch": 0.75, "grad_norm": 0.752768874168396, "learning_rate": 3.039557163758279e-05, "loss": 0.9874, "step": 8820 }, { "epoch": 0.75, "grad_norm": 0.9083414673805237, "learning_rate": 3.030036093914663e-05, "loss": 0.923, "step": 8825 }, { "epoch": 0.75, "grad_norm": 0.6985150575637817, "learning_rate": 3.0205272960202292e-05, "loss": 0.8371, "step": 8830 }, { "epoch": 0.75, "grad_norm": 0.8764124512672424, "learning_rate": 3.0110307868170263e-05, "loss": 1.0147, "step": 8835 }, { "epoch": 0.75, "grad_norm": 0.8891944885253906, "learning_rate": 3.0015465830254663e-05, "loss": 0.9144, "step": 8840 }, { "epoch": 0.75, "grad_norm": 0.7759283185005188, "learning_rate": 2.9920747013443007e-05, "loss": 0.926, "step": 8845 }, { "epoch": 0.75, "grad_norm": 0.8171085715293884, "learning_rate": 2.98261515845058e-05, "loss": 0.8523, "step": 8850 }, { "epoch": 0.75, "grad_norm": 0.8419032692909241, "learning_rate": 2.9731679709996306e-05, "loss": 0.8821, "step": 8855 }, { "epoch": 0.75, "grad_norm": 0.8328216075897217, "learning_rate": 2.963733155625026e-05, "loss": 0.7794, "step": 8860 }, { "epoch": 0.75, "grad_norm": 0.8300039768218994, "learning_rate": 2.954310728938553e-05, "loss": 0.7377, "step": 8865 }, { "epoch": 0.75, "grad_norm": 0.8087813854217529, "learning_rate": 2.944900707530195e-05, "loss": 0.739, "step": 8870 }, { "epoch": 0.75, "grad_norm": 0.8231684565544128, "learning_rate": 2.9355031079680827e-05, "loss": 0.9466, "step": 8875 }, { "epoch": 0.75, "grad_norm": 0.7937616109848022, "learning_rate": 2.9261179467984822e-05, "loss": 0.9411, "step": 8880 }, { "epoch": 0.75, "grad_norm": 0.8774697780609131, "learning_rate": 2.9167452405457562e-05, "loss": 0.8134, "step": 8885 }, { "epoch": 0.75, "grad_norm": 0.7183670401573181, "learning_rate": 2.907385005712341e-05, "loss": 0.8746, "step": 8890 }, { "epoch": 0.75, "grad_norm": 0.921839714050293, "learning_rate": 2.8980372587787087e-05, "loss": 0.8537, "step": 8895 }, { "epoch": 0.75, "grad_norm": 0.6738794445991516, "learning_rate": 2.888702016203354e-05, "loss": 0.9334, "step": 8900 }, { "epoch": 0.75, "grad_norm": 0.7114542722702026, "learning_rate": 2.879379294422748e-05, "loss": 0.7629, "step": 8905 }, { "epoch": 0.75, "grad_norm": 0.9150660634040833, "learning_rate": 2.8700691098513188e-05, "loss": 0.8519, "step": 8910 }, { "epoch": 0.75, "grad_norm": 0.7252589464187622, "learning_rate": 2.8607714788814176e-05, "loss": 0.8837, "step": 8915 }, { "epoch": 0.75, "grad_norm": 0.7726362943649292, "learning_rate": 2.8514864178832967e-05, "loss": 0.8531, "step": 8920 }, { "epoch": 0.75, "grad_norm": 0.7412396669387817, "learning_rate": 2.842213943205072e-05, "loss": 0.8902, "step": 8925 }, { "epoch": 0.75, "grad_norm": 0.7849807739257812, "learning_rate": 2.8329540711727054e-05, "loss": 0.827, "step": 8930 }, { "epoch": 0.75, "grad_norm": 0.9399614930152893, "learning_rate": 2.823706818089965e-05, "loss": 0.8699, "step": 8935 }, { "epoch": 0.76, "grad_norm": 0.7663363814353943, "learning_rate": 2.8144722002383993e-05, "loss": 0.8914, "step": 8940 }, { "epoch": 0.76, "grad_norm": 0.7527604699134827, "learning_rate": 2.8052502338773146e-05, "loss": 0.81, "step": 8945 }, { "epoch": 0.76, "grad_norm": 0.8573604226112366, "learning_rate": 2.7960409352437333e-05, "loss": 0.9121, "step": 8950 }, { "epoch": 0.76, "grad_norm": 0.8559805750846863, "learning_rate": 2.7868443205523888e-05, "loss": 0.8892, "step": 8955 }, { "epoch": 0.76, "grad_norm": 0.8466255068778992, "learning_rate": 2.777660405995671e-05, "loss": 0.7399, "step": 8960 }, { "epoch": 0.76, "grad_norm": 0.6989338397979736, "learning_rate": 2.768489207743603e-05, "loss": 0.8253, "step": 8965 }, { "epoch": 0.76, "grad_norm": 0.7643797993659973, "learning_rate": 2.7593307419438354e-05, "loss": 0.8435, "step": 8970 }, { "epoch": 0.76, "grad_norm": 0.8357900381088257, "learning_rate": 2.7501850247215878e-05, "loss": 0.856, "step": 8975 }, { "epoch": 0.76, "grad_norm": 0.8707523345947266, "learning_rate": 2.741052072179636e-05, "loss": 0.8189, "step": 8980 }, { "epoch": 0.76, "grad_norm": 0.6612287759780884, "learning_rate": 2.7319319003982925e-05, "loss": 0.9007, "step": 8985 }, { "epoch": 0.76, "grad_norm": 0.8610745668411255, "learning_rate": 2.7228245254353444e-05, "loss": 0.8348, "step": 8990 }, { "epoch": 0.76, "grad_norm": 0.7897176742553711, "learning_rate": 2.7137299633260638e-05, "loss": 0.8131, "step": 8995 }, { "epoch": 0.76, "grad_norm": 0.8576532602310181, "learning_rate": 2.7046482300831642e-05, "loss": 1.0017, "step": 9000 }, { "epoch": 0.76, "grad_norm": 0.7004991769790649, "learning_rate": 2.6955793416967646e-05, "loss": 0.7315, "step": 9005 }, { "epoch": 0.76, "grad_norm": 0.9065749645233154, "learning_rate": 2.686523314134367e-05, "loss": 0.85, "step": 9010 }, { "epoch": 0.76, "grad_norm": 0.8746364116668701, "learning_rate": 2.6774801633408418e-05, "loss": 0.8827, "step": 9015 }, { "epoch": 0.76, "grad_norm": 0.5582964420318604, "learning_rate": 2.668449905238367e-05, "loss": 0.8187, "step": 9020 }, { "epoch": 0.76, "grad_norm": 0.7435264587402344, "learning_rate": 2.659432555726441e-05, "loss": 0.8926, "step": 9025 }, { "epoch": 0.76, "grad_norm": 0.7577239871025085, "learning_rate": 2.6504281306818225e-05, "loss": 0.7974, "step": 9030 }, { "epoch": 0.76, "grad_norm": 0.8755051493644714, "learning_rate": 2.641436645958515e-05, "loss": 0.8204, "step": 9035 }, { "epoch": 0.76, "grad_norm": 0.7866493463516235, "learning_rate": 2.6324581173877473e-05, "loss": 0.9189, "step": 9040 }, { "epoch": 0.76, "grad_norm": 0.6970306634902954, "learning_rate": 2.6234925607779215e-05, "loss": 0.8448, "step": 9045 }, { "epoch": 0.76, "grad_norm": 0.775654673576355, "learning_rate": 2.6145399919146086e-05, "loss": 0.8325, "step": 9050 }, { "epoch": 0.76, "grad_norm": 0.796474277973175, "learning_rate": 2.6056004265605148e-05, "loss": 0.9613, "step": 9055 }, { "epoch": 0.77, "grad_norm": 0.7175946235656738, "learning_rate": 2.596673880455448e-05, "loss": 0.8525, "step": 9060 }, { "epoch": 0.77, "grad_norm": 0.8016606569290161, "learning_rate": 2.587760369316291e-05, "loss": 0.8111, "step": 9065 }, { "epoch": 0.77, "grad_norm": 0.8967776894569397, "learning_rate": 2.578859908836979e-05, "loss": 0.8669, "step": 9070 }, { "epoch": 0.77, "grad_norm": 0.8003263473510742, "learning_rate": 2.569972514688468e-05, "loss": 0.8308, "step": 9075 }, { "epoch": 0.77, "grad_norm": 0.7419909834861755, "learning_rate": 2.5610982025187046e-05, "loss": 0.8653, "step": 9080 }, { "epoch": 0.77, "grad_norm": 0.7726119756698608, "learning_rate": 2.552236987952612e-05, "loss": 0.8031, "step": 9085 }, { "epoch": 0.77, "grad_norm": 0.7673059701919556, "learning_rate": 2.543388886592045e-05, "loss": 1.0341, "step": 9090 }, { "epoch": 0.77, "grad_norm": 0.7712087035179138, "learning_rate": 2.5345539140157705e-05, "loss": 0.811, "step": 9095 }, { "epoch": 0.77, "grad_norm": 0.7213789224624634, "learning_rate": 2.5257320857794397e-05, "loss": 0.7562, "step": 9100 }, { "epoch": 0.77, "grad_norm": 0.7315651178359985, "learning_rate": 2.5169234174155608e-05, "loss": 0.7916, "step": 9105 }, { "epoch": 0.77, "grad_norm": 0.807244598865509, "learning_rate": 2.5081279244334764e-05, "loss": 0.865, "step": 9110 }, { "epoch": 0.77, "grad_norm": 0.7596588134765625, "learning_rate": 2.4993456223193266e-05, "loss": 0.907, "step": 9115 }, { "epoch": 0.77, "grad_norm": 0.9872735142707825, "learning_rate": 2.490576526536025e-05, "loss": 0.9589, "step": 9120 }, { "epoch": 0.77, "grad_norm": 0.8341203927993774, "learning_rate": 2.4818206525232356e-05, "loss": 0.873, "step": 9125 }, { "epoch": 0.77, "grad_norm": 0.8057723045349121, "learning_rate": 2.4730780156973442e-05, "loss": 0.9904, "step": 9130 }, { "epoch": 0.77, "grad_norm": 0.8961916565895081, "learning_rate": 2.464348631451424e-05, "loss": 0.8134, "step": 9135 }, { "epoch": 0.77, "grad_norm": 1.030664086341858, "learning_rate": 2.455632515155224e-05, "loss": 0.8357, "step": 9140 }, { "epoch": 0.77, "grad_norm": 0.6567131280899048, "learning_rate": 2.4469296821551257e-05, "loss": 0.7706, "step": 9145 }, { "epoch": 0.77, "grad_norm": 0.74416583776474, "learning_rate": 2.4382401477741244e-05, "loss": 0.9082, "step": 9150 }, { "epoch": 0.77, "grad_norm": 0.9838562607765198, "learning_rate": 2.429563927311801e-05, "loss": 1.0202, "step": 9155 }, { "epoch": 0.77, "grad_norm": 0.7462790608406067, "learning_rate": 2.4209010360442896e-05, "loss": 0.9368, "step": 9160 }, { "epoch": 0.77, "grad_norm": 0.7337406277656555, "learning_rate": 2.4122514892242677e-05, "loss": 0.8163, "step": 9165 }, { "epoch": 0.77, "grad_norm": 0.6417105197906494, "learning_rate": 2.4036153020809072e-05, "loss": 0.833, "step": 9170 }, { "epoch": 0.78, "grad_norm": 0.9543890357017517, "learning_rate": 2.3949924898198604e-05, "loss": 0.8944, "step": 9175 }, { "epoch": 0.78, "grad_norm": 0.763455867767334, "learning_rate": 2.3863830676232313e-05, "loss": 0.8355, "step": 9180 }, { "epoch": 0.78, "grad_norm": 0.7808805108070374, "learning_rate": 2.377787050649547e-05, "loss": 0.9126, "step": 9185 }, { "epoch": 0.78, "grad_norm": 0.7466232180595398, "learning_rate": 2.36920445403373e-05, "loss": 0.9283, "step": 9190 }, { "epoch": 0.78, "grad_norm": 0.7460727095603943, "learning_rate": 2.3606352928870835e-05, "loss": 0.8184, "step": 9195 }, { "epoch": 0.78, "grad_norm": 0.8474850654602051, "learning_rate": 2.352079582297244e-05, "loss": 0.941, "step": 9200 }, { "epoch": 0.78, "grad_norm": 0.8749337792396545, "learning_rate": 2.34353733732817e-05, "loss": 0.873, "step": 9205 }, { "epoch": 0.78, "grad_norm": 0.8273049592971802, "learning_rate": 2.335008573020111e-05, "loss": 0.9853, "step": 9210 }, { "epoch": 0.78, "grad_norm": 0.7353382706642151, "learning_rate": 2.326493304389582e-05, "loss": 0.9153, "step": 9215 }, { "epoch": 0.78, "grad_norm": 0.8182071447372437, "learning_rate": 2.3179915464293323e-05, "loss": 0.8271, "step": 9220 }, { "epoch": 0.78, "grad_norm": 0.952183187007904, "learning_rate": 2.309503314108331e-05, "loss": 0.8598, "step": 9225 }, { "epoch": 0.78, "grad_norm": 0.8443077206611633, "learning_rate": 2.301028622371726e-05, "loss": 0.8799, "step": 9230 }, { "epoch": 0.78, "grad_norm": 0.7569529414176941, "learning_rate": 2.2925674861408264e-05, "loss": 0.8918, "step": 9235 }, { "epoch": 0.78, "grad_norm": 0.8164322376251221, "learning_rate": 2.2841199203130747e-05, "loss": 0.9618, "step": 9240 }, { "epoch": 0.78, "grad_norm": 0.883344829082489, "learning_rate": 2.2756859397620156e-05, "loss": 0.9899, "step": 9245 }, { "epoch": 0.78, "grad_norm": 0.713097095489502, "learning_rate": 2.267265559337286e-05, "loss": 0.8803, "step": 9250 }, { "epoch": 0.78, "grad_norm": 0.6419798135757446, "learning_rate": 2.2588587938645656e-05, "loss": 0.8278, "step": 9255 }, { "epoch": 0.78, "grad_norm": 0.75221186876297, "learning_rate": 2.2504656581455665e-05, "loss": 0.9868, "step": 9260 }, { "epoch": 0.78, "grad_norm": 0.8278515934944153, "learning_rate": 2.242086166958004e-05, "loss": 0.8268, "step": 9265 }, { "epoch": 0.78, "grad_norm": 0.9154884219169617, "learning_rate": 2.233720335055567e-05, "loss": 0.8536, "step": 9270 }, { "epoch": 0.78, "grad_norm": 0.7821744680404663, "learning_rate": 2.2253681771678946e-05, "loss": 0.9334, "step": 9275 }, { "epoch": 0.78, "grad_norm": 0.7095476984977722, "learning_rate": 2.2170297080005564e-05, "loss": 0.8938, "step": 9280 }, { "epoch": 0.78, "grad_norm": 0.7716023921966553, "learning_rate": 2.208704942235017e-05, "loss": 0.7645, "step": 9285 }, { "epoch": 0.78, "grad_norm": 0.8259437680244446, "learning_rate": 2.200393894528603e-05, "loss": 0.7961, "step": 9290 }, { "epoch": 0.79, "grad_norm": 0.7871156334877014, "learning_rate": 2.1920965795145054e-05, "loss": 0.9552, "step": 9295 }, { "epoch": 0.79, "grad_norm": 0.7442185878753662, "learning_rate": 2.1838130118017252e-05, "loss": 0.871, "step": 9300 }, { "epoch": 0.79, "grad_norm": 0.7882496118545532, "learning_rate": 2.175543205975059e-05, "loss": 0.883, "step": 9305 }, { "epoch": 0.79, "grad_norm": 0.7419693470001221, "learning_rate": 2.1672871765950808e-05, "loss": 0.8593, "step": 9310 }, { "epoch": 0.79, "grad_norm": 0.9180772304534912, "learning_rate": 2.1590449381980993e-05, "loss": 0.9116, "step": 9315 }, { "epoch": 0.79, "grad_norm": 0.7324300408363342, "learning_rate": 2.150816505296147e-05, "loss": 0.8584, "step": 9320 }, { "epoch": 0.79, "grad_norm": 0.8154258728027344, "learning_rate": 2.1426018923769464e-05, "loss": 0.869, "step": 9325 }, { "epoch": 0.79, "grad_norm": 0.7851940989494324, "learning_rate": 2.1344011139038843e-05, "loss": 0.8994, "step": 9330 }, { "epoch": 0.79, "grad_norm": 0.8780238628387451, "learning_rate": 2.126214184316002e-05, "loss": 0.8458, "step": 9335 }, { "epoch": 0.79, "grad_norm": 0.75881427526474, "learning_rate": 2.1180411180279458e-05, "loss": 0.9026, "step": 9340 }, { "epoch": 0.79, "grad_norm": 0.7425323128700256, "learning_rate": 2.1098819294299498e-05, "loss": 0.9413, "step": 9345 }, { "epoch": 0.79, "grad_norm": 0.7396215796470642, "learning_rate": 2.101736632887825e-05, "loss": 0.7693, "step": 9350 }, { "epoch": 0.79, "grad_norm": 0.7948014140129089, "learning_rate": 2.0936052427429186e-05, "loss": 0.8503, "step": 9355 }, { "epoch": 0.79, "grad_norm": 0.7535755634307861, "learning_rate": 2.085487773312086e-05, "loss": 0.9074, "step": 9360 }, { "epoch": 0.79, "grad_norm": 0.7665767073631287, "learning_rate": 2.0773842388876884e-05, "loss": 0.8667, "step": 9365 }, { "epoch": 0.79, "grad_norm": 0.7539588809013367, "learning_rate": 2.0692946537375336e-05, "loss": 0.8238, "step": 9370 }, { "epoch": 0.79, "grad_norm": 0.6851677894592285, "learning_rate": 2.0612190321048762e-05, "loss": 0.8384, "step": 9375 }, { "epoch": 0.79, "grad_norm": 0.9282670617103577, "learning_rate": 2.053157388208393e-05, "loss": 0.8211, "step": 9380 }, { "epoch": 0.79, "grad_norm": 0.897520899772644, "learning_rate": 2.0451097362421366e-05, "loss": 0.9642, "step": 9385 }, { "epoch": 0.79, "grad_norm": 0.8395950794219971, "learning_rate": 2.037076090375539e-05, "loss": 0.8843, "step": 9390 }, { "epoch": 0.79, "grad_norm": 0.8132792115211487, "learning_rate": 2.029056464753363e-05, "loss": 0.9268, "step": 9395 }, { "epoch": 0.79, "grad_norm": 0.6976551413536072, "learning_rate": 2.021050873495679e-05, "loss": 0.7874, "step": 9400 }, { "epoch": 0.79, "grad_norm": 0.8054299354553223, "learning_rate": 2.013059330697864e-05, "loss": 0.9546, "step": 9405 }, { "epoch": 0.79, "grad_norm": 0.8405972719192505, "learning_rate": 2.005081850430548e-05, "loss": 0.8365, "step": 9410 }, { "epoch": 0.8, "grad_norm": 0.6910131573677063, "learning_rate": 1.9971184467396022e-05, "loss": 0.9169, "step": 9415 }, { "epoch": 0.8, "grad_norm": 0.7588268518447876, "learning_rate": 1.989169133646124e-05, "loss": 0.8404, "step": 9420 }, { "epoch": 0.8, "grad_norm": 0.7150195240974426, "learning_rate": 1.981233925146385e-05, "loss": 0.8692, "step": 9425 }, { "epoch": 0.8, "grad_norm": 0.8912619948387146, "learning_rate": 1.9733128352118324e-05, "loss": 0.9376, "step": 9430 }, { "epoch": 0.8, "grad_norm": 0.8724376559257507, "learning_rate": 1.9654058777890573e-05, "loss": 0.9965, "step": 9435 }, { "epoch": 0.8, "grad_norm": 0.7318885326385498, "learning_rate": 1.9575130667997643e-05, "loss": 0.9009, "step": 9440 }, { "epoch": 0.8, "grad_norm": 0.8611711859703064, "learning_rate": 1.9496344161407487e-05, "loss": 1.0043, "step": 9445 }, { "epoch": 0.8, "grad_norm": 0.753633439540863, "learning_rate": 1.9417699396838764e-05, "loss": 0.8654, "step": 9450 }, { "epoch": 0.8, "grad_norm": 0.8039273619651794, "learning_rate": 1.9339196512760538e-05, "loss": 0.8386, "step": 9455 }, { "epoch": 0.8, "grad_norm": 0.760884165763855, "learning_rate": 1.926083564739215e-05, "loss": 0.8392, "step": 9460 }, { "epoch": 0.8, "grad_norm": 0.805150032043457, "learning_rate": 1.9182616938702792e-05, "loss": 0.8525, "step": 9465 }, { "epoch": 0.8, "grad_norm": 0.7307751774787903, "learning_rate": 1.910454052441141e-05, "loss": 0.8812, "step": 9470 }, { "epoch": 0.8, "grad_norm": 0.6862212419509888, "learning_rate": 1.9026606541986393e-05, "loss": 0.8585, "step": 9475 }, { "epoch": 0.8, "grad_norm": 0.7243665456771851, "learning_rate": 1.894881512864537e-05, "loss": 0.8165, "step": 9480 }, { "epoch": 0.8, "grad_norm": 0.6453014612197876, "learning_rate": 1.8871166421354924e-05, "loss": 0.8449, "step": 9485 }, { "epoch": 0.8, "grad_norm": 0.7892723679542542, "learning_rate": 1.879366055683044e-05, "loss": 0.9335, "step": 9490 }, { "epoch": 0.8, "grad_norm": 0.70809006690979, "learning_rate": 1.871629767153573e-05, "loss": 0.9325, "step": 9495 }, { "epoch": 0.8, "grad_norm": 0.816694438457489, "learning_rate": 1.863907790168289e-05, "loss": 0.7737, "step": 9500 }, { "epoch": 0.8, "grad_norm": 0.8121705055236816, "learning_rate": 1.8562001383232043e-05, "loss": 0.7726, "step": 9505 }, { "epoch": 0.8, "grad_norm": 0.8642139434814453, "learning_rate": 1.848506825189107e-05, "loss": 0.8563, "step": 9510 }, { "epoch": 0.8, "grad_norm": 0.7992377877235413, "learning_rate": 1.8408278643115384e-05, "loss": 0.9831, "step": 9515 }, { "epoch": 0.8, "grad_norm": 0.8327411413192749, "learning_rate": 1.833163269210777e-05, "loss": 0.8809, "step": 9520 }, { "epoch": 0.8, "grad_norm": 0.6830605268478394, "learning_rate": 1.825513053381801e-05, "loss": 0.7633, "step": 9525 }, { "epoch": 0.81, "grad_norm": 0.8190145492553711, "learning_rate": 1.8178772302942705e-05, "loss": 0.9256, "step": 9530 }, { "epoch": 0.81, "grad_norm": 0.857457160949707, "learning_rate": 1.8102558133925084e-05, "loss": 0.8816, "step": 9535 }, { "epoch": 0.81, "grad_norm": 0.7766903638839722, "learning_rate": 1.802648816095468e-05, "loss": 0.8836, "step": 9540 }, { "epoch": 0.81, "grad_norm": 0.7608227133750916, "learning_rate": 1.7950562517967217e-05, "loss": 0.9066, "step": 9545 }, { "epoch": 0.81, "grad_norm": 0.67827969789505, "learning_rate": 1.787478133864423e-05, "loss": 0.7088, "step": 9550 }, { "epoch": 0.81, "grad_norm": 0.8599781394004822, "learning_rate": 1.779914475641292e-05, "loss": 0.9545, "step": 9555 }, { "epoch": 0.81, "grad_norm": 0.7540460824966431, "learning_rate": 1.7723652904445907e-05, "loss": 0.8754, "step": 9560 }, { "epoch": 0.81, "grad_norm": 0.7129548192024231, "learning_rate": 1.7648305915660968e-05, "loss": 0.8777, "step": 9565 }, { "epoch": 0.81, "grad_norm": 0.7389048337936401, "learning_rate": 1.75731039227208e-05, "loss": 0.802, "step": 9570 }, { "epoch": 0.81, "grad_norm": 0.7623948454856873, "learning_rate": 1.7498047058032896e-05, "loss": 0.9349, "step": 9575 }, { "epoch": 0.81, "grad_norm": 0.8034424781799316, "learning_rate": 1.742313545374914e-05, "loss": 0.936, "step": 9580 }, { "epoch": 0.81, "grad_norm": 0.8239856958389282, "learning_rate": 1.7348369241765683e-05, "loss": 0.7999, "step": 9585 }, { "epoch": 0.81, "grad_norm": 0.8029961585998535, "learning_rate": 1.7273748553722668e-05, "loss": 0.8171, "step": 9590 }, { "epoch": 0.81, "grad_norm": 0.7298925518989563, "learning_rate": 1.7199273521004046e-05, "loss": 0.8307, "step": 9595 }, { "epoch": 0.81, "grad_norm": 0.7932624220848083, "learning_rate": 1.7124944274737274e-05, "loss": 1.0165, "step": 9600 }, { "epoch": 0.81, "grad_norm": 0.8447069525718689, "learning_rate": 1.7050760945793187e-05, "loss": 0.963, "step": 9605 }, { "epoch": 0.81, "grad_norm": 0.7499577403068542, "learning_rate": 1.6976723664785653e-05, "loss": 0.7964, "step": 9610 }, { "epoch": 0.81, "grad_norm": 0.7524833679199219, "learning_rate": 1.6902832562071404e-05, "loss": 0.7665, "step": 9615 }, { "epoch": 0.81, "grad_norm": 0.8401513695716858, "learning_rate": 1.682908776774981e-05, "loss": 0.8132, "step": 9620 }, { "epoch": 0.81, "grad_norm": 0.8104890584945679, "learning_rate": 1.6755489411662595e-05, "loss": 0.9056, "step": 9625 }, { "epoch": 0.81, "grad_norm": 0.7588112354278564, "learning_rate": 1.668203762339373e-05, "loss": 0.7794, "step": 9630 }, { "epoch": 0.81, "grad_norm": 0.8475006222724915, "learning_rate": 1.6608732532269077e-05, "loss": 0.7724, "step": 9635 }, { "epoch": 0.81, "grad_norm": 0.8192122578620911, "learning_rate": 1.6535574267356192e-05, "loss": 0.9124, "step": 9640 }, { "epoch": 0.81, "grad_norm": 0.7818514704704285, "learning_rate": 1.6462562957464132e-05, "loss": 0.8726, "step": 9645 }, { "epoch": 0.82, "grad_norm": 0.8724757432937622, "learning_rate": 1.6389698731143242e-05, "loss": 0.8193, "step": 9650 }, { "epoch": 0.82, "grad_norm": 0.8011074066162109, "learning_rate": 1.631698171668483e-05, "loss": 0.8256, "step": 9655 }, { "epoch": 0.82, "grad_norm": 1.0343480110168457, "learning_rate": 1.6244412042121105e-05, "loss": 0.9657, "step": 9660 }, { "epoch": 0.82, "grad_norm": 0.8504678010940552, "learning_rate": 1.61719898352248e-05, "loss": 0.9206, "step": 9665 }, { "epoch": 0.82, "grad_norm": 0.8102021813392639, "learning_rate": 1.6099715223508937e-05, "loss": 0.9319, "step": 9670 }, { "epoch": 0.82, "grad_norm": 0.8873084783554077, "learning_rate": 1.6027588334226807e-05, "loss": 0.927, "step": 9675 }, { "epoch": 0.82, "grad_norm": 0.7379308938980103, "learning_rate": 1.59556092943715e-05, "loss": 0.7433, "step": 9680 }, { "epoch": 0.82, "grad_norm": 0.8241307139396667, "learning_rate": 1.5883778230675862e-05, "loss": 0.8641, "step": 9685 }, { "epoch": 0.82, "grad_norm": 0.7821060419082642, "learning_rate": 1.5812095269612136e-05, "loss": 0.9231, "step": 9690 }, { "epoch": 0.82, "grad_norm": 0.7354114651679993, "learning_rate": 1.5740560537391858e-05, "loss": 0.8959, "step": 9695 }, { "epoch": 0.82, "grad_norm": 0.8831083178520203, "learning_rate": 1.5669174159965517e-05, "loss": 1.0867, "step": 9700 }, { "epoch": 0.82, "grad_norm": 0.7737128138542175, "learning_rate": 1.559793626302245e-05, "loss": 0.7872, "step": 9705 }, { "epoch": 0.82, "grad_norm": 0.9330393075942993, "learning_rate": 1.5526846971990505e-05, "loss": 0.7577, "step": 9710 }, { "epoch": 0.82, "grad_norm": 0.8273800611495972, "learning_rate": 1.545590641203599e-05, "loss": 0.867, "step": 9715 }, { "epoch": 0.82, "grad_norm": 0.8835793137550354, "learning_rate": 1.5385114708063265e-05, "loss": 0.8406, "step": 9720 }, { "epoch": 0.82, "grad_norm": 0.7108529806137085, "learning_rate": 1.531447198471453e-05, "loss": 0.8639, "step": 9725 }, { "epoch": 0.82, "grad_norm": 0.8037097454071045, "learning_rate": 1.5243978366369837e-05, "loss": 0.8974, "step": 9730 }, { "epoch": 0.82, "grad_norm": 0.8605689406394958, "learning_rate": 1.5173633977146595e-05, "loss": 0.8906, "step": 9735 }, { "epoch": 0.82, "grad_norm": 0.7327417731285095, "learning_rate": 1.5103438940899494e-05, "loss": 0.8373, "step": 9740 }, { "epoch": 0.82, "grad_norm": 0.8748462200164795, "learning_rate": 1.5033393381220329e-05, "loss": 0.8786, "step": 9745 }, { "epoch": 0.82, "grad_norm": 0.8213547468185425, "learning_rate": 1.4963497421437577e-05, "loss": 1.0147, "step": 9750 }, { "epoch": 0.82, "grad_norm": 0.8919892311096191, "learning_rate": 1.48937511846164e-05, "loss": 0.7929, "step": 9755 }, { "epoch": 0.82, "grad_norm": 0.8843607306480408, "learning_rate": 1.4824154793558375e-05, "loss": 0.8334, "step": 9760 }, { "epoch": 0.82, "grad_norm": 0.8356348872184753, "learning_rate": 1.4754708370801151e-05, "loss": 0.9379, "step": 9765 }, { "epoch": 0.83, "grad_norm": 0.7609606385231018, "learning_rate": 1.4685412038618473e-05, "loss": 0.7203, "step": 9770 }, { "epoch": 0.83, "grad_norm": 0.8738039135932922, "learning_rate": 1.4616265919019645e-05, "loss": 0.793, "step": 9775 }, { "epoch": 0.83, "grad_norm": 0.7854296565055847, "learning_rate": 1.454727013374959e-05, "loss": 0.8969, "step": 9780 }, { "epoch": 0.83, "grad_norm": 0.8933621048927307, "learning_rate": 1.4478424804288582e-05, "loss": 0.9128, "step": 9785 }, { "epoch": 0.83, "grad_norm": 0.9402585625648499, "learning_rate": 1.440973005185191e-05, "loss": 0.899, "step": 9790 }, { "epoch": 0.83, "grad_norm": 0.7768909931182861, "learning_rate": 1.434118599738975e-05, "loss": 0.8688, "step": 9795 }, { "epoch": 0.83, "grad_norm": 0.8614407777786255, "learning_rate": 1.427279276158704e-05, "loss": 1.0387, "step": 9800 }, { "epoch": 0.83, "grad_norm": 0.7587859034538269, "learning_rate": 1.4204550464863021e-05, "loss": 0.9036, "step": 9805 }, { "epoch": 0.83, "grad_norm": 0.8442284464836121, "learning_rate": 1.4136459227371269e-05, "loss": 0.9267, "step": 9810 }, { "epoch": 0.83, "grad_norm": 0.8466005921363831, "learning_rate": 1.4068519168999405e-05, "loss": 0.8621, "step": 9815 }, { "epoch": 0.83, "grad_norm": 0.8399500250816345, "learning_rate": 1.4000730409368845e-05, "loss": 0.8269, "step": 9820 }, { "epoch": 0.83, "grad_norm": 1.0434014797210693, "learning_rate": 1.3933093067834601e-05, "loss": 0.9427, "step": 9825 }, { "epoch": 0.83, "grad_norm": 0.8561471104621887, "learning_rate": 1.3865607263485091e-05, "loss": 0.8998, "step": 9830 }, { "epoch": 0.83, "grad_norm": 1.097008228302002, "learning_rate": 1.3798273115141912e-05, "loss": 0.8711, "step": 9835 }, { "epoch": 0.83, "grad_norm": 0.7780779600143433, "learning_rate": 1.373109074135972e-05, "loss": 0.838, "step": 9840 }, { "epoch": 0.83, "grad_norm": 0.9400659799575806, "learning_rate": 1.3664060260425827e-05, "loss": 0.8199, "step": 9845 }, { "epoch": 0.83, "grad_norm": 0.8247981071472168, "learning_rate": 1.359718179036019e-05, "loss": 0.8271, "step": 9850 }, { "epoch": 0.83, "grad_norm": 0.7556101679801941, "learning_rate": 1.353045544891508e-05, "loss": 0.9536, "step": 9855 }, { "epoch": 0.83, "grad_norm": 0.7069379687309265, "learning_rate": 1.3463881353574947e-05, "loss": 0.7715, "step": 9860 }, { "epoch": 0.83, "grad_norm": 0.7760348916053772, "learning_rate": 1.339745962155613e-05, "loss": 0.7853, "step": 9865 }, { "epoch": 0.83, "grad_norm": 0.8063364028930664, "learning_rate": 1.33311903698068e-05, "loss": 0.7978, "step": 9870 }, { "epoch": 0.83, "grad_norm": 0.7314766049385071, "learning_rate": 1.326507371500656e-05, "loss": 0.9842, "step": 9875 }, { "epoch": 0.83, "grad_norm": 0.7383533120155334, "learning_rate": 1.3199109773566387e-05, "loss": 0.9943, "step": 9880 }, { "epoch": 0.84, "grad_norm": 0.7715831995010376, "learning_rate": 1.3133298661628368e-05, "loss": 0.8209, "step": 9885 }, { "epoch": 0.84, "grad_norm": 0.7701368927955627, "learning_rate": 1.3067640495065492e-05, "loss": 0.8278, "step": 9890 }, { "epoch": 0.84, "grad_norm": 0.9878485798835754, "learning_rate": 1.3002135389481451e-05, "loss": 0.9727, "step": 9895 }, { "epoch": 0.84, "grad_norm": 0.8185439109802246, "learning_rate": 1.29367834602105e-05, "loss": 0.8967, "step": 9900 }, { "epoch": 0.84, "grad_norm": 0.7079369425773621, "learning_rate": 1.2871584822317151e-05, "loss": 0.8743, "step": 9905 }, { "epoch": 0.84, "grad_norm": 0.9138286113739014, "learning_rate": 1.2806539590596023e-05, "loss": 0.8651, "step": 9910 }, { "epoch": 0.84, "grad_norm": 0.7398014068603516, "learning_rate": 1.2741647879571627e-05, "loss": 0.9221, "step": 9915 }, { "epoch": 0.84, "grad_norm": 0.7842152118682861, "learning_rate": 1.2676909803498161e-05, "loss": 0.9457, "step": 9920 }, { "epoch": 0.84, "grad_norm": 1.0707106590270996, "learning_rate": 1.2612325476359388e-05, "loss": 1.025, "step": 9925 }, { "epoch": 0.84, "grad_norm": 0.802452564239502, "learning_rate": 1.2547895011868304e-05, "loss": 0.7072, "step": 9930 }, { "epoch": 0.84, "grad_norm": 0.7343943119049072, "learning_rate": 1.2483618523467e-05, "loss": 0.8953, "step": 9935 }, { "epoch": 0.84, "grad_norm": 0.7321301698684692, "learning_rate": 1.241949612432649e-05, "loss": 0.8973, "step": 9940 }, { "epoch": 0.84, "grad_norm": 0.7284245491027832, "learning_rate": 1.2355527927346478e-05, "loss": 0.8672, "step": 9945 }, { "epoch": 0.84, "grad_norm": 1.00664222240448, "learning_rate": 1.229171404515511e-05, "loss": 0.9601, "step": 9950 }, { "epoch": 0.84, "grad_norm": 0.7685894966125488, "learning_rate": 1.2228054590108962e-05, "loss": 0.9258, "step": 9955 }, { "epoch": 0.84, "grad_norm": 0.870555579662323, "learning_rate": 1.2164549674292581e-05, "loss": 0.9566, "step": 9960 }, { "epoch": 0.84, "grad_norm": 0.8318189382553101, "learning_rate": 1.2101199409518483e-05, "loss": 0.8342, "step": 9965 }, { "epoch": 0.84, "grad_norm": 0.9991118907928467, "learning_rate": 1.2038003907326867e-05, "loss": 0.8669, "step": 9970 }, { "epoch": 0.84, "grad_norm": 0.6647642254829407, "learning_rate": 1.1974963278985463e-05, "loss": 0.8771, "step": 9975 }, { "epoch": 0.84, "grad_norm": 0.8214685916900635, "learning_rate": 1.1912077635489282e-05, "loss": 0.9524, "step": 9980 }, { "epoch": 0.84, "grad_norm": 0.8654179573059082, "learning_rate": 1.1849347087560525e-05, "loss": 0.92, "step": 9985 }, { "epoch": 0.84, "grad_norm": 0.765496551990509, "learning_rate": 1.1786771745648229e-05, "loss": 0.8282, "step": 9990 }, { "epoch": 0.84, "grad_norm": 0.944416880607605, "learning_rate": 1.1724351719928228e-05, "loss": 0.8714, "step": 9995 }, { "epoch": 0.84, "grad_norm": 0.7935224771499634, "learning_rate": 1.1662087120302867e-05, "loss": 0.8278, "step": 10000 }, { "epoch": 0.85, "grad_norm": 0.806790292263031, "learning_rate": 1.1599978056400796e-05, "loss": 0.888, "step": 10005 }, { "epoch": 0.85, "grad_norm": 0.8951229453086853, "learning_rate": 1.1538024637576905e-05, "loss": 0.914, "step": 10010 }, { "epoch": 0.85, "grad_norm": 0.7594240307807922, "learning_rate": 1.1476226972911974e-05, "loss": 0.889, "step": 10015 }, { "epoch": 0.85, "grad_norm": 0.8180289268493652, "learning_rate": 1.1414585171212555e-05, "loss": 0.8932, "step": 10020 }, { "epoch": 0.85, "grad_norm": 0.7589259743690491, "learning_rate": 1.1353099341010786e-05, "loss": 0.8497, "step": 10025 }, { "epoch": 0.85, "grad_norm": 0.6496753096580505, "learning_rate": 1.1291769590564182e-05, "loss": 0.8868, "step": 10030 }, { "epoch": 0.85, "grad_norm": 0.7347110509872437, "learning_rate": 1.1230596027855434e-05, "loss": 0.8546, "step": 10035 }, { "epoch": 0.85, "grad_norm": 0.9040740132331848, "learning_rate": 1.1169578760592292e-05, "loss": 0.9046, "step": 10040 }, { "epoch": 0.85, "grad_norm": 1.1045706272125244, "learning_rate": 1.1108717896207276e-05, "loss": 0.958, "step": 10045 }, { "epoch": 0.85, "grad_norm": 0.7006523013114929, "learning_rate": 1.1048013541857472e-05, "loss": 0.8458, "step": 10050 }, { "epoch": 0.85, "grad_norm": 0.8821123838424683, "learning_rate": 1.0987465804424512e-05, "loss": 0.8466, "step": 10055 }, { "epoch": 0.85, "grad_norm": 0.8056004047393799, "learning_rate": 1.0927074790514203e-05, "loss": 0.8797, "step": 10060 }, { "epoch": 0.85, "grad_norm": 0.7699093222618103, "learning_rate": 1.0866840606456452e-05, "loss": 1.0301, "step": 10065 }, { "epoch": 0.85, "grad_norm": 0.7734396457672119, "learning_rate": 1.0806763358305005e-05, "loss": 0.7968, "step": 10070 }, { "epoch": 0.85, "grad_norm": 0.7000718116760254, "learning_rate": 1.074684315183727e-05, "loss": 0.8832, "step": 10075 }, { "epoch": 0.85, "grad_norm": 0.6902223825454712, "learning_rate": 1.0687080092554225e-05, "loss": 0.8448, "step": 10080 }, { "epoch": 0.85, "grad_norm": 1.0849089622497559, "learning_rate": 1.0627474285680105e-05, "loss": 0.8238, "step": 10085 }, { "epoch": 0.85, "grad_norm": 0.7399893999099731, "learning_rate": 1.0568025836162265e-05, "loss": 0.8334, "step": 10090 }, { "epoch": 0.85, "grad_norm": 0.6186020374298096, "learning_rate": 1.0508734848671064e-05, "loss": 0.7938, "step": 10095 }, { "epoch": 0.85, "grad_norm": 0.6724085807800293, "learning_rate": 1.0449601427599588e-05, "loss": 0.8982, "step": 10100 }, { "epoch": 0.85, "grad_norm": 0.7217341661453247, "learning_rate": 1.0390625677063415e-05, "loss": 0.8907, "step": 10105 }, { "epoch": 0.85, "grad_norm": 0.8844561576843262, "learning_rate": 1.0331807700900664e-05, "loss": 0.9729, "step": 10110 }, { "epoch": 0.85, "grad_norm": 0.9020631909370422, "learning_rate": 1.0273147602671562e-05, "loss": 0.9127, "step": 10115 }, { "epoch": 0.85, "grad_norm": 0.6652135252952576, "learning_rate": 1.0214645485658358e-05, "loss": 0.7979, "step": 10120 }, { "epoch": 0.86, "grad_norm": 0.8186726570129395, "learning_rate": 1.0156301452865246e-05, "loss": 0.795, "step": 10125 }, { "epoch": 0.86, "grad_norm": 0.9490936994552612, "learning_rate": 1.0098115607017922e-05, "loss": 0.7368, "step": 10130 }, { "epoch": 0.86, "grad_norm": 0.8582313060760498, "learning_rate": 1.00400880505637e-05, "loss": 0.7767, "step": 10135 }, { "epoch": 0.86, "grad_norm": 0.7754263877868652, "learning_rate": 9.982218885671158e-06, "loss": 0.8886, "step": 10140 }, { "epoch": 0.86, "grad_norm": 0.7403327822685242, "learning_rate": 9.924508214229933e-06, "loss": 0.8946, "step": 10145 }, { "epoch": 0.86, "grad_norm": 0.787521481513977, "learning_rate": 9.866956137850736e-06, "loss": 0.7958, "step": 10150 }, { "epoch": 0.86, "grad_norm": 0.8559316992759705, "learning_rate": 9.809562757864887e-06, "loss": 0.9273, "step": 10155 }, { "epoch": 0.86, "grad_norm": 0.6812841296195984, "learning_rate": 9.752328175324366e-06, "loss": 0.7358, "step": 10160 }, { "epoch": 0.86, "grad_norm": 0.9591403007507324, "learning_rate": 9.695252491001617e-06, "loss": 0.9783, "step": 10165 }, { "epoch": 0.86, "grad_norm": 0.6938751935958862, "learning_rate": 9.638335805389209e-06, "loss": 0.9006, "step": 10170 }, { "epoch": 0.86, "grad_norm": 0.7790308594703674, "learning_rate": 9.581578218699805e-06, "loss": 0.9647, "step": 10175 }, { "epoch": 0.86, "grad_norm": 0.7733349204063416, "learning_rate": 9.524979830865999e-06, "loss": 0.8754, "step": 10180 }, { "epoch": 0.86, "grad_norm": 0.7591155171394348, "learning_rate": 9.468540741539988e-06, "loss": 0.843, "step": 10185 }, { "epoch": 0.86, "grad_norm": 0.7738540768623352, "learning_rate": 9.41226105009353e-06, "loss": 0.8567, "step": 10190 }, { "epoch": 0.86, "grad_norm": 0.9674339294433594, "learning_rate": 9.356140855617778e-06, "loss": 0.8106, "step": 10195 }, { "epoch": 0.86, "grad_norm": 0.8703681826591492, "learning_rate": 9.30018025692302e-06, "loss": 0.8847, "step": 10200 }, { "epoch": 0.86, "grad_norm": 0.7185772657394409, "learning_rate": 9.244379352538535e-06, "loss": 0.8332, "step": 10205 }, { "epoch": 0.86, "grad_norm": 0.7870998382568359, "learning_rate": 9.188738240712447e-06, "loss": 0.8033, "step": 10210 }, { "epoch": 0.86, "grad_norm": 0.8352511525154114, "learning_rate": 9.133257019411524e-06, "loss": 0.7773, "step": 10215 }, { "epoch": 0.86, "grad_norm": 0.8999930620193481, "learning_rate": 9.077935786321045e-06, "loss": 0.8611, "step": 10220 }, { "epoch": 0.86, "grad_norm": 0.8633418083190918, "learning_rate": 9.022774638844588e-06, "loss": 0.9518, "step": 10225 }, { "epoch": 0.86, "grad_norm": 0.8065072298049927, "learning_rate": 8.96777367410383e-06, "loss": 0.9339, "step": 10230 }, { "epoch": 0.86, "grad_norm": 0.7075096964836121, "learning_rate": 8.912932988938472e-06, "loss": 0.7301, "step": 10235 }, { "epoch": 0.87, "grad_norm": 0.7145190834999084, "learning_rate": 8.858252679905966e-06, "loss": 0.7512, "step": 10240 }, { "epoch": 0.87, "grad_norm": 0.8400455713272095, "learning_rate": 8.803732843281409e-06, "loss": 0.8201, "step": 10245 }, { "epoch": 0.87, "grad_norm": 0.839592695236206, "learning_rate": 8.749373575057384e-06, "loss": 1.0019, "step": 10250 }, { "epoch": 0.87, "grad_norm": 0.8692545890808105, "learning_rate": 8.695174970943732e-06, "loss": 0.9288, "step": 10255 }, { "epoch": 0.87, "grad_norm": 0.8020569086074829, "learning_rate": 8.641137126367416e-06, "loss": 0.8789, "step": 10260 }, { "epoch": 0.87, "grad_norm": 0.8302674293518066, "learning_rate": 8.587260136472353e-06, "loss": 0.8592, "step": 10265 }, { "epoch": 0.87, "grad_norm": 0.8927515745162964, "learning_rate": 8.53354409611924e-06, "loss": 0.7836, "step": 10270 }, { "epoch": 0.87, "grad_norm": 0.8325286507606506, "learning_rate": 8.479989099885388e-06, "loss": 0.8751, "step": 10275 }, { "epoch": 0.87, "grad_norm": 0.808384895324707, "learning_rate": 8.426595242064606e-06, "loss": 0.9749, "step": 10280 }, { "epoch": 0.87, "grad_norm": 0.8181533813476562, "learning_rate": 8.373362616666936e-06, "loss": 1.0068, "step": 10285 }, { "epoch": 0.87, "grad_norm": 0.7378880381584167, "learning_rate": 8.320291317418549e-06, "loss": 0.9654, "step": 10290 }, { "epoch": 0.87, "grad_norm": 0.7099074721336365, "learning_rate": 8.26738143776159e-06, "loss": 0.9094, "step": 10295 }, { "epoch": 0.87, "grad_norm": 0.762239396572113, "learning_rate": 8.214633070853938e-06, "loss": 0.8217, "step": 10300 }, { "epoch": 0.87, "grad_norm": 0.7620306015014648, "learning_rate": 8.162046309569205e-06, "loss": 0.7677, "step": 10305 }, { "epoch": 0.87, "grad_norm": 0.8328052163124084, "learning_rate": 8.109621246496368e-06, "loss": 0.9343, "step": 10310 }, { "epoch": 0.87, "grad_norm": 0.8162596821784973, "learning_rate": 8.057357973939727e-06, "loss": 0.9812, "step": 10315 }, { "epoch": 0.87, "grad_norm": 0.8823369145393372, "learning_rate": 8.005256583918763e-06, "loss": 1.0342, "step": 10320 }, { "epoch": 0.87, "grad_norm": 0.7189355492591858, "learning_rate": 7.953317168167862e-06, "loss": 0.8651, "step": 10325 }, { "epoch": 0.87, "grad_norm": 0.7958583235740662, "learning_rate": 7.901539818136261e-06, "loss": 0.8575, "step": 10330 }, { "epoch": 0.87, "grad_norm": 0.8764750957489014, "learning_rate": 7.849924624987881e-06, "loss": 1.0525, "step": 10335 }, { "epoch": 0.87, "grad_norm": 0.8272556662559509, "learning_rate": 7.798471679601082e-06, "loss": 0.8466, "step": 10340 }, { "epoch": 0.87, "grad_norm": 0.8034465909004211, "learning_rate": 7.747181072568576e-06, "loss": 0.8984, "step": 10345 }, { "epoch": 0.87, "grad_norm": 0.8183860182762146, "learning_rate": 7.696052894197247e-06, "loss": 0.7763, "step": 10350 }, { "epoch": 0.87, "grad_norm": 0.7499911189079285, "learning_rate": 7.645087234507975e-06, "loss": 0.9159, "step": 10355 }, { "epoch": 0.88, "grad_norm": 0.7751929759979248, "learning_rate": 7.594284183235556e-06, "loss": 0.89, "step": 10360 }, { "epoch": 0.88, "grad_norm": 0.9942512512207031, "learning_rate": 7.543643829828406e-06, "loss": 0.9464, "step": 10365 }, { "epoch": 0.88, "grad_norm": 0.915164589881897, "learning_rate": 7.493166263448515e-06, "loss": 0.9092, "step": 10370 }, { "epoch": 0.88, "grad_norm": 0.8597160577774048, "learning_rate": 7.442851572971265e-06, "loss": 0.923, "step": 10375 }, { "epoch": 0.88, "grad_norm": 0.7946441173553467, "learning_rate": 7.392699846985263e-06, "loss": 0.8659, "step": 10380 }, { "epoch": 0.88, "grad_norm": 0.7779476642608643, "learning_rate": 7.342711173792127e-06, "loss": 0.8092, "step": 10385 }, { "epoch": 0.88, "grad_norm": 0.7479021549224854, "learning_rate": 7.2928856414064996e-06, "loss": 0.8293, "step": 10390 }, { "epoch": 0.88, "grad_norm": 0.8338636755943298, "learning_rate": 7.243223337555693e-06, "loss": 0.9277, "step": 10395 }, { "epoch": 0.88, "grad_norm": 0.7314404845237732, "learning_rate": 7.193724349679654e-06, "loss": 0.8412, "step": 10400 }, { "epoch": 0.88, "grad_norm": 0.8216047286987305, "learning_rate": 7.144388764930788e-06, "loss": 0.9765, "step": 10405 }, { "epoch": 0.88, "grad_norm": 0.7718465328216553, "learning_rate": 7.095216670173776e-06, "loss": 0.9366, "step": 10410 }, { "epoch": 0.88, "grad_norm": 0.7446076273918152, "learning_rate": 7.046208151985456e-06, "loss": 0.7936, "step": 10415 }, { "epoch": 0.88, "grad_norm": 0.7243603467941284, "learning_rate": 6.997363296654691e-06, "loss": 0.8252, "step": 10420 }, { "epoch": 0.88, "grad_norm": 0.8296288847923279, "learning_rate": 6.9486821901821435e-06, "loss": 0.7981, "step": 10425 }, { "epoch": 0.88, "grad_norm": 0.8079470992088318, "learning_rate": 6.900164918280128e-06, "loss": 0.9989, "step": 10430 }, { "epoch": 0.88, "grad_norm": 0.8123701214790344, "learning_rate": 6.851811566372601e-06, "loss": 0.8505, "step": 10435 }, { "epoch": 0.88, "grad_norm": 0.870762825012207, "learning_rate": 6.8036222195948075e-06, "loss": 0.8892, "step": 10440 }, { "epoch": 0.88, "grad_norm": 0.9291538596153259, "learning_rate": 6.755596962793309e-06, "loss": 0.8043, "step": 10445 }, { "epoch": 0.88, "grad_norm": 0.8943066596984863, "learning_rate": 6.707735880525723e-06, "loss": 0.8753, "step": 10450 }, { "epoch": 0.88, "grad_norm": 0.6992756724357605, "learning_rate": 6.660039057060552e-06, "loss": 0.8899, "step": 10455 }, { "epoch": 0.88, "grad_norm": 0.6595776677131653, "learning_rate": 6.612506576377175e-06, "loss": 0.957, "step": 10460 }, { "epoch": 0.88, "grad_norm": 0.871720016002655, "learning_rate": 6.565138522165581e-06, "loss": 0.8798, "step": 10465 }, { "epoch": 0.88, "grad_norm": 0.8589921593666077, "learning_rate": 6.517934977826223e-06, "loss": 0.7956, "step": 10470 }, { "epoch": 0.88, "grad_norm": 0.9183969497680664, "learning_rate": 6.4708960264699745e-06, "loss": 0.9662, "step": 10475 }, { "epoch": 0.89, "grad_norm": 0.7627586722373962, "learning_rate": 6.424021750917864e-06, "loss": 0.922, "step": 10480 }, { "epoch": 0.89, "grad_norm": 0.8349488973617554, "learning_rate": 6.377312233700938e-06, "loss": 0.8646, "step": 10485 }, { "epoch": 0.89, "grad_norm": 0.7143064141273499, "learning_rate": 6.3307675570602354e-06, "loss": 0.8173, "step": 10490 }, { "epoch": 0.89, "grad_norm": 0.8462641835212708, "learning_rate": 6.284387802946534e-06, "loss": 0.8991, "step": 10495 }, { "epoch": 0.89, "grad_norm": 0.8399547934532166, "learning_rate": 6.238173053020191e-06, "loss": 0.8205, "step": 10500 }, { "epoch": 0.89, "grad_norm": 0.8168681859970093, "learning_rate": 6.192123388651128e-06, "loss": 0.8276, "step": 10505 }, { "epoch": 0.89, "grad_norm": 0.8551720380783081, "learning_rate": 6.146238890918488e-06, "loss": 0.8583, "step": 10510 }, { "epoch": 0.89, "grad_norm": 0.7600584030151367, "learning_rate": 6.100519640610725e-06, "loss": 0.8404, "step": 10515 }, { "epoch": 0.89, "grad_norm": 0.7038664817810059, "learning_rate": 6.054965718225258e-06, "loss": 0.7928, "step": 10520 }, { "epoch": 0.89, "grad_norm": 0.7905198335647583, "learning_rate": 6.009577203968453e-06, "loss": 0.8677, "step": 10525 }, { "epoch": 0.89, "grad_norm": 0.8545170426368713, "learning_rate": 5.964354177755449e-06, "loss": 0.864, "step": 10530 }, { "epoch": 0.89, "grad_norm": 0.8659030795097351, "learning_rate": 5.919296719209988e-06, "loss": 0.8788, "step": 10535 }, { "epoch": 0.89, "grad_norm": 0.6199814677238464, "learning_rate": 5.874404907664277e-06, "loss": 0.8115, "step": 10540 }, { "epoch": 0.89, "grad_norm": 0.7567442655563354, "learning_rate": 5.8296788221589575e-06, "loss": 0.8215, "step": 10545 }, { "epoch": 0.89, "grad_norm": 0.8200069665908813, "learning_rate": 5.785118541442791e-06, "loss": 0.889, "step": 10550 }, { "epoch": 0.89, "grad_norm": 0.8444991707801819, "learning_rate": 5.740724143972642e-06, "loss": 0.8148, "step": 10555 }, { "epoch": 0.89, "grad_norm": 0.8217705488204956, "learning_rate": 5.6964957079133186e-06, "loss": 0.8814, "step": 10560 }, { "epoch": 0.89, "grad_norm": 0.8477857112884521, "learning_rate": 5.652433311137384e-06, "loss": 0.8157, "step": 10565 }, { "epoch": 0.89, "grad_norm": 0.8516268730163574, "learning_rate": 5.608537031225092e-06, "loss": 0.8576, "step": 10570 }, { "epoch": 0.89, "grad_norm": 0.7571528553962708, "learning_rate": 5.564806945464218e-06, "loss": 0.7577, "step": 10575 }, { "epoch": 0.89, "grad_norm": 0.7651041150093079, "learning_rate": 5.521243130849873e-06, "loss": 0.9706, "step": 10580 }, { "epoch": 0.89, "grad_norm": 0.9462747573852539, "learning_rate": 5.4778456640845135e-06, "loss": 0.8431, "step": 10585 }, { "epoch": 0.89, "grad_norm": 0.8326951861381531, "learning_rate": 5.434614621577594e-06, "loss": 0.7995, "step": 10590 }, { "epoch": 0.89, "grad_norm": 0.9692691564559937, "learning_rate": 5.391550079445606e-06, "loss": 0.9605, "step": 10595 }, { "epoch": 0.9, "grad_norm": 0.8991673588752747, "learning_rate": 5.348652113511898e-06, "loss": 0.866, "step": 10600 }, { "epoch": 0.9, "grad_norm": 0.8957405686378479, "learning_rate": 5.305920799306496e-06, "loss": 0.7906, "step": 10605 }, { "epoch": 0.9, "grad_norm": 0.7040393352508545, "learning_rate": 5.263356212066028e-06, "loss": 0.8215, "step": 10610 }, { "epoch": 0.9, "grad_norm": 0.8569772243499756, "learning_rate": 5.220958426733558e-06, "loss": 0.9202, "step": 10615 }, { "epoch": 0.9, "grad_norm": 1.0359724760055542, "learning_rate": 5.178727517958459e-06, "loss": 0.9172, "step": 10620 }, { "epoch": 0.9, "grad_norm": 0.7228214144706726, "learning_rate": 5.136663560096277e-06, "loss": 0.7619, "step": 10625 }, { "epoch": 0.9, "grad_norm": 0.7785528302192688, "learning_rate": 5.094766627208647e-06, "loss": 0.8318, "step": 10630 }, { "epoch": 0.9, "grad_norm": 0.9546996355056763, "learning_rate": 5.053036793063093e-06, "loss": 0.9362, "step": 10635 }, { "epoch": 0.9, "grad_norm": 0.841090738773346, "learning_rate": 5.011474131132931e-06, "loss": 0.8019, "step": 10640 }, { "epoch": 0.9, "grad_norm": 0.6441927552223206, "learning_rate": 4.970078714597149e-06, "loss": 0.8103, "step": 10645 }, { "epoch": 0.9, "grad_norm": 0.8087906241416931, "learning_rate": 4.928850616340252e-06, "loss": 0.9268, "step": 10650 }, { "epoch": 0.9, "grad_norm": 0.6910356283187866, "learning_rate": 4.887789908952178e-06, "loss": 0.952, "step": 10655 }, { "epoch": 0.9, "grad_norm": 0.7780956029891968, "learning_rate": 4.846896664728118e-06, "loss": 0.8688, "step": 10660 }, { "epoch": 0.9, "grad_norm": 0.7802953720092773, "learning_rate": 4.806170955668421e-06, "loss": 0.7813, "step": 10665 }, { "epoch": 0.9, "grad_norm": 0.8613384366035461, "learning_rate": 4.765612853478451e-06, "loss": 0.8596, "step": 10670 }, { "epoch": 0.9, "grad_norm": 0.990271806716919, "learning_rate": 4.725222429568477e-06, "loss": 0.8501, "step": 10675 }, { "epoch": 0.9, "grad_norm": 0.8030913472175598, "learning_rate": 4.68499975505351e-06, "loss": 0.9165, "step": 10680 }, { "epoch": 0.9, "grad_norm": 0.9801160097122192, "learning_rate": 4.644944900753278e-06, "loss": 0.9632, "step": 10685 }, { "epoch": 0.9, "grad_norm": 0.7507076263427734, "learning_rate": 4.605057937191947e-06, "loss": 1.0431, "step": 10690 }, { "epoch": 0.9, "grad_norm": 0.7139765620231628, "learning_rate": 4.565338934598129e-06, "loss": 0.9047, "step": 10695 }, { "epoch": 0.9, "grad_norm": 0.8364464044570923, "learning_rate": 4.525787962904682e-06, "loss": 0.738, "step": 10700 }, { "epoch": 0.9, "grad_norm": 0.9670020341873169, "learning_rate": 4.4864050917486355e-06, "loss": 0.9677, "step": 10705 }, { "epoch": 0.9, "grad_norm": 0.9008302092552185, "learning_rate": 4.447190390471024e-06, "loss": 0.9228, "step": 10710 }, { "epoch": 0.91, "grad_norm": 0.8564368486404419, "learning_rate": 4.408143928116815e-06, "loss": 0.9183, "step": 10715 }, { "epoch": 0.91, "grad_norm": 0.8975895047187805, "learning_rate": 4.369265773434739e-06, "loss": 0.7976, "step": 10720 }, { "epoch": 0.91, "grad_norm": 0.7680640816688538, "learning_rate": 4.330555994877195e-06, "loss": 0.8273, "step": 10725 }, { "epoch": 0.91, "grad_norm": 0.8345970511436462, "learning_rate": 4.292014660600119e-06, "loss": 0.8742, "step": 10730 }, { "epoch": 0.91, "grad_norm": 0.8377253413200378, "learning_rate": 4.253641838462852e-06, "loss": 0.9009, "step": 10735 }, { "epoch": 0.91, "grad_norm": 0.7228429317474365, "learning_rate": 4.2154375960280935e-06, "loss": 0.8395, "step": 10740 }, { "epoch": 0.91, "grad_norm": 0.8424662947654724, "learning_rate": 4.17740200056167e-06, "loss": 0.9902, "step": 10745 }, { "epoch": 0.91, "grad_norm": 0.72088623046875, "learning_rate": 4.139535119032501e-06, "loss": 0.9015, "step": 10750 }, { "epoch": 0.91, "grad_norm": 0.7103576064109802, "learning_rate": 4.1018370181124424e-06, "loss": 0.9158, "step": 10755 }, { "epoch": 0.91, "grad_norm": 0.7628330588340759, "learning_rate": 4.064307764176168e-06, "loss": 0.8489, "step": 10760 }, { "epoch": 0.91, "grad_norm": 0.7091038823127747, "learning_rate": 4.0269474233010865e-06, "loss": 0.8728, "step": 10765 }, { "epoch": 0.91, "grad_norm": 0.8292171359062195, "learning_rate": 3.9897560612672136e-06, "loss": 0.8284, "step": 10770 }, { "epoch": 0.91, "grad_norm": 0.6915936470031738, "learning_rate": 3.9527337435570025e-06, "loss": 0.811, "step": 10775 }, { "epoch": 0.91, "grad_norm": 0.9190654754638672, "learning_rate": 3.915880535355298e-06, "loss": 0.9775, "step": 10780 }, { "epoch": 0.91, "grad_norm": 0.9156875610351562, "learning_rate": 3.879196501549209e-06, "loss": 0.9032, "step": 10785 }, { "epoch": 0.91, "grad_norm": 0.7918872833251953, "learning_rate": 3.842681706727957e-06, "loss": 0.9325, "step": 10790 }, { "epoch": 0.91, "grad_norm": 0.7252178192138672, "learning_rate": 3.806336215182782e-06, "loss": 0.7474, "step": 10795 }, { "epoch": 0.91, "grad_norm": 0.8236010074615479, "learning_rate": 3.7701600909068714e-06, "loss": 0.9378, "step": 10800 }, { "epoch": 0.91, "grad_norm": 0.8983059525489807, "learning_rate": 3.734153397595164e-06, "loss": 0.8263, "step": 10805 }, { "epoch": 0.91, "grad_norm": 0.8560591340065002, "learning_rate": 3.6983161986443027e-06, "loss": 0.8704, "step": 10810 }, { "epoch": 0.91, "grad_norm": 0.9709807634353638, "learning_rate": 3.662648557152515e-06, "loss": 0.8396, "step": 10815 }, { "epoch": 0.91, "grad_norm": 0.7081771492958069, "learning_rate": 3.6271505359194547e-06, "loss": 0.8082, "step": 10820 }, { "epoch": 0.91, "grad_norm": 0.8575358390808105, "learning_rate": 3.591822197446182e-06, "loss": 0.8739, "step": 10825 }, { "epoch": 0.91, "grad_norm": 0.9410017728805542, "learning_rate": 3.556663603934951e-06, "loss": 0.8653, "step": 10830 }, { "epoch": 0.92, "grad_norm": 0.8749052882194519, "learning_rate": 3.5216748172891446e-06, "loss": 0.9879, "step": 10835 }, { "epoch": 0.92, "grad_norm": 0.7416231036186218, "learning_rate": 3.486855899113217e-06, "loss": 0.8421, "step": 10840 }, { "epoch": 0.92, "grad_norm": 0.9364011287689209, "learning_rate": 3.4522069107124966e-06, "loss": 0.866, "step": 10845 }, { "epoch": 0.92, "grad_norm": 0.774756669998169, "learning_rate": 3.4177279130931163e-06, "loss": 0.8056, "step": 10850 }, { "epoch": 0.92, "grad_norm": 1.0904358625411987, "learning_rate": 3.3834189669619377e-06, "loss": 0.8952, "step": 10855 }, { "epoch": 0.92, "grad_norm": 0.8134381771087646, "learning_rate": 3.3492801327263843e-06, "loss": 0.8085, "step": 10860 }, { "epoch": 0.92, "grad_norm": 0.7840288877487183, "learning_rate": 3.3153114704943756e-06, "loss": 1.0089, "step": 10865 }, { "epoch": 0.92, "grad_norm": 1.1242091655731201, "learning_rate": 3.2815130400742133e-06, "loss": 0.805, "step": 10870 }, { "epoch": 0.92, "grad_norm": 0.88893061876297, "learning_rate": 3.247884900974474e-06, "loss": 0.7861, "step": 10875 }, { "epoch": 0.92, "grad_norm": 0.7033008933067322, "learning_rate": 3.214427112403906e-06, "loss": 0.893, "step": 10880 }, { "epoch": 0.92, "grad_norm": 0.9423139691352844, "learning_rate": 3.181139733271332e-06, "loss": 0.8106, "step": 10885 }, { "epoch": 0.92, "grad_norm": 0.831916332244873, "learning_rate": 3.1480228221854923e-06, "loss": 0.7484, "step": 10890 }, { "epoch": 0.92, "grad_norm": 0.6366778612136841, "learning_rate": 3.1150764374550443e-06, "loss": 0.8293, "step": 10895 }, { "epoch": 0.92, "grad_norm": 0.787534773349762, "learning_rate": 3.0823006370883534e-06, "loss": 0.859, "step": 10900 }, { "epoch": 0.92, "grad_norm": 0.747705340385437, "learning_rate": 3.0496954787934684e-06, "loss": 0.8347, "step": 10905 }, { "epoch": 0.92, "grad_norm": 0.802186906337738, "learning_rate": 3.0172610199780017e-06, "loss": 0.7809, "step": 10910 }, { "epoch": 0.92, "grad_norm": 0.92281574010849, "learning_rate": 2.984997317748972e-06, "loss": 0.9261, "step": 10915 }, { "epoch": 0.92, "grad_norm": 0.7391403913497925, "learning_rate": 2.9529044289127726e-06, "loss": 0.6701, "step": 10920 }, { "epoch": 0.92, "grad_norm": 0.921058177947998, "learning_rate": 2.9209824099750595e-06, "loss": 0.8643, "step": 10925 }, { "epoch": 0.92, "grad_norm": 0.8696638345718384, "learning_rate": 2.889231317140617e-06, "loss": 0.9082, "step": 10930 }, { "epoch": 0.92, "grad_norm": 1.0707058906555176, "learning_rate": 2.857651206313305e-06, "loss": 0.8325, "step": 10935 }, { "epoch": 0.92, "grad_norm": 0.7705253958702087, "learning_rate": 2.8262421330959244e-06, "loss": 0.9603, "step": 10940 }, { "epoch": 0.92, "grad_norm": 0.8354006409645081, "learning_rate": 2.795004152790115e-06, "loss": 0.8568, "step": 10945 }, { "epoch": 0.92, "grad_norm": 0.9094474911689758, "learning_rate": 2.7639373203963036e-06, "loss": 0.9253, "step": 10950 }, { "epoch": 0.93, "grad_norm": 0.7950231432914734, "learning_rate": 2.7330416906135582e-06, "loss": 0.8863, "step": 10955 }, { "epoch": 0.93, "grad_norm": 0.8168395161628723, "learning_rate": 2.702317317839531e-06, "loss": 0.769, "step": 10960 }, { "epoch": 0.93, "grad_norm": 0.8256956934928894, "learning_rate": 2.6717642561703505e-06, "loss": 0.8708, "step": 10965 }, { "epoch": 0.93, "grad_norm": 0.7272620797157288, "learning_rate": 2.6413825594004625e-06, "loss": 0.847, "step": 10970 }, { "epoch": 0.93, "grad_norm": 0.8060124516487122, "learning_rate": 2.611172281022645e-06, "loss": 0.804, "step": 10975 }, { "epoch": 0.93, "grad_norm": 0.8608812689781189, "learning_rate": 2.5811334742278593e-06, "loss": 0.9412, "step": 10980 }, { "epoch": 0.93, "grad_norm": 0.7618398666381836, "learning_rate": 2.551266191905133e-06, "loss": 0.8519, "step": 10985 }, { "epoch": 0.93, "grad_norm": 0.7475120425224304, "learning_rate": 2.5215704866415224e-06, "loss": 0.708, "step": 10990 }, { "epoch": 0.93, "grad_norm": 0.7691943049430847, "learning_rate": 2.492046410721971e-06, "loss": 0.8495, "step": 10995 }, { "epoch": 0.93, "grad_norm": 0.8780038952827454, "learning_rate": 2.4626940161292187e-06, "loss": 1.1176, "step": 11000 }, { "epoch": 0.93, "grad_norm": 0.7955008745193481, "learning_rate": 2.4335133545437596e-06, "loss": 0.8303, "step": 11005 }, { "epoch": 0.93, "grad_norm": 0.7584604620933533, "learning_rate": 2.4045044773437163e-06, "loss": 0.8121, "step": 11010 }, { "epoch": 0.93, "grad_norm": 0.7853428721427917, "learning_rate": 2.3756674356047338e-06, "loss": 0.8703, "step": 11015 }, { "epoch": 0.93, "grad_norm": 0.8115850687026978, "learning_rate": 2.3470022800999193e-06, "loss": 0.8613, "step": 11020 }, { "epoch": 0.93, "grad_norm": 0.7144448161125183, "learning_rate": 2.318509061299745e-06, "loss": 0.8079, "step": 11025 }, { "epoch": 0.93, "grad_norm": 0.923337459564209, "learning_rate": 2.2901878293719257e-06, "loss": 0.9307, "step": 11030 }, { "epoch": 0.93, "grad_norm": 0.7687123417854309, "learning_rate": 2.2620386341814182e-06, "loss": 0.8111, "step": 11035 }, { "epoch": 0.93, "grad_norm": 0.949176549911499, "learning_rate": 2.234061525290232e-06, "loss": 0.8639, "step": 11040 }, { "epoch": 0.93, "grad_norm": 0.8756553530693054, "learning_rate": 2.2062565519573865e-06, "loss": 0.9837, "step": 11045 }, { "epoch": 0.93, "grad_norm": 0.8877837657928467, "learning_rate": 2.1786237631388428e-06, "loss": 0.9086, "step": 11050 }, { "epoch": 0.93, "grad_norm": 0.7283702492713928, "learning_rate": 2.1511632074873835e-06, "loss": 0.8531, "step": 11055 }, { "epoch": 0.93, "grad_norm": 1.1859776973724365, "learning_rate": 2.1238749333525543e-06, "loss": 0.8264, "step": 11060 }, { "epoch": 0.93, "grad_norm": 0.7941679358482361, "learning_rate": 2.096758988780556e-06, "loss": 0.9445, "step": 11065 }, { "epoch": 0.94, "grad_norm": 0.724950909614563, "learning_rate": 2.069815421514176e-06, "loss": 0.7968, "step": 11070 }, { "epoch": 0.94, "grad_norm": 0.7822914123535156, "learning_rate": 2.0430442789927007e-06, "loss": 0.8489, "step": 11075 }, { "epoch": 0.94, "grad_norm": 0.7305450439453125, "learning_rate": 2.0164456083518246e-06, "loss": 0.8252, "step": 11080 }, { "epoch": 0.94, "grad_norm": 0.9346940517425537, "learning_rate": 1.990019456423564e-06, "loss": 0.8682, "step": 11085 }, { "epoch": 0.94, "grad_norm": 0.9352343678474426, "learning_rate": 1.9637658697362003e-06, "loss": 0.9635, "step": 11090 }, { "epoch": 0.94, "grad_norm": 0.6705776453018188, "learning_rate": 1.93768489451418e-06, "loss": 0.7376, "step": 11095 }, { "epoch": 0.94, "grad_norm": 0.6965312361717224, "learning_rate": 1.911776576678015e-06, "loss": 0.802, "step": 11100 }, { "epoch": 0.94, "grad_norm": 0.8443041443824768, "learning_rate": 1.8860409618442488e-06, "loss": 0.8767, "step": 11105 }, { "epoch": 0.94, "grad_norm": 0.8731304407119751, "learning_rate": 1.8604780953253353e-06, "loss": 0.8648, "step": 11110 }, { "epoch": 0.94, "grad_norm": 0.8138821721076965, "learning_rate": 1.8350880221295496e-06, "loss": 0.7863, "step": 11115 }, { "epoch": 0.94, "grad_norm": 0.8005531430244446, "learning_rate": 1.8098707869609654e-06, "loss": 0.8392, "step": 11120 }, { "epoch": 0.94, "grad_norm": 0.7327057719230652, "learning_rate": 1.7848264342193333e-06, "loss": 1.0973, "step": 11125 }, { "epoch": 0.94, "grad_norm": 0.8505914807319641, "learning_rate": 1.7599550080000027e-06, "loss": 0.8634, "step": 11130 }, { "epoch": 0.94, "grad_norm": 0.8741824626922607, "learning_rate": 1.7352565520938558e-06, "loss": 0.8809, "step": 11135 }, { "epoch": 0.94, "grad_norm": 0.6897190809249878, "learning_rate": 1.7107311099872403e-06, "loss": 0.8575, "step": 11140 }, { "epoch": 0.94, "grad_norm": 0.772314190864563, "learning_rate": 1.6863787248618367e-06, "loss": 1.0116, "step": 11145 }, { "epoch": 0.94, "grad_norm": 0.7905736565589905, "learning_rate": 1.6621994395946916e-06, "loss": 0.8715, "step": 11150 }, { "epoch": 0.94, "grad_norm": 0.7269569039344788, "learning_rate": 1.6381932967580505e-06, "loss": 0.9212, "step": 11155 }, { "epoch": 0.94, "grad_norm": 0.7961433529853821, "learning_rate": 1.6143603386192474e-06, "loss": 0.8239, "step": 11160 }, { "epoch": 0.94, "grad_norm": 0.811042845249176, "learning_rate": 1.5907006071408049e-06, "loss": 0.9451, "step": 11165 }, { "epoch": 0.94, "grad_norm": 0.7868227958679199, "learning_rate": 1.5672141439801446e-06, "loss": 0.8004, "step": 11170 }, { "epoch": 0.94, "grad_norm": 0.7977198362350464, "learning_rate": 1.5439009904896773e-06, "loss": 0.9105, "step": 11175 }, { "epoch": 0.94, "grad_norm": 0.7236701250076294, "learning_rate": 1.5207611877166573e-06, "loss": 0.8506, "step": 11180 }, { "epoch": 0.94, "grad_norm": 0.6770230531692505, "learning_rate": 1.4977947764031053e-06, "loss": 0.7893, "step": 11185 }, { "epoch": 0.95, "grad_norm": 0.7709506154060364, "learning_rate": 1.4750017969857643e-06, "loss": 0.8009, "step": 11190 }, { "epoch": 0.95, "grad_norm": 0.8483154773712158, "learning_rate": 1.4523822895960216e-06, "loss": 0.7705, "step": 11195 }, { "epoch": 0.95, "grad_norm": 0.8976624608039856, "learning_rate": 1.4299362940598194e-06, "loss": 0.9007, "step": 11200 }, { "epoch": 0.95, "grad_norm": 0.9494867324829102, "learning_rate": 1.4076638498976113e-06, "loss": 0.8144, "step": 11205 }, { "epoch": 0.95, "grad_norm": 0.7936916351318359, "learning_rate": 1.3855649963242957e-06, "loss": 0.8789, "step": 11210 }, { "epoch": 0.95, "grad_norm": 0.7598190307617188, "learning_rate": 1.3636397722490813e-06, "loss": 0.8265, "step": 11215 }, { "epoch": 0.95, "grad_norm": 0.7455407977104187, "learning_rate": 1.3418882162755219e-06, "loss": 0.8494, "step": 11220 }, { "epoch": 0.95, "grad_norm": 0.8700529336929321, "learning_rate": 1.3203103667013827e-06, "loss": 0.9347, "step": 11225 }, { "epoch": 0.95, "grad_norm": 0.8811212778091431, "learning_rate": 1.298906261518551e-06, "loss": 0.7147, "step": 11230 }, { "epoch": 0.95, "grad_norm": 0.8739584684371948, "learning_rate": 1.2776759384130698e-06, "loss": 0.8998, "step": 11235 }, { "epoch": 0.95, "grad_norm": 0.8095049858093262, "learning_rate": 1.2566194347649385e-06, "loss": 0.9268, "step": 11240 }, { "epoch": 0.95, "grad_norm": 0.7221877574920654, "learning_rate": 1.2357367876481452e-06, "loss": 0.8186, "step": 11245 }, { "epoch": 0.95, "grad_norm": 0.8922355771064758, "learning_rate": 1.2150280338305787e-06, "loss": 0.8335, "step": 11250 }, { "epoch": 0.95, "grad_norm": 0.7169233560562134, "learning_rate": 1.194493209773928e-06, "loss": 0.7704, "step": 11255 }, { "epoch": 0.95, "grad_norm": 0.8172361254692078, "learning_rate": 1.1741323516336832e-06, "loss": 0.8524, "step": 11260 }, { "epoch": 0.95, "grad_norm": 0.7009252309799194, "learning_rate": 1.1539454952590123e-06, "loss": 0.8321, "step": 11265 }, { "epoch": 0.95, "grad_norm": 0.7184674143791199, "learning_rate": 1.133932676192695e-06, "loss": 0.8192, "step": 11270 }, { "epoch": 0.95, "grad_norm": 0.8454670906066895, "learning_rate": 1.114093929671145e-06, "loss": 0.9189, "step": 11275 }, { "epoch": 0.95, "grad_norm": 0.7921102046966553, "learning_rate": 1.0944292906242326e-06, "loss": 0.8797, "step": 11280 }, { "epoch": 0.95, "grad_norm": 0.7686296701431274, "learning_rate": 1.0749387936753064e-06, "loss": 0.9228, "step": 11285 }, { "epoch": 0.95, "grad_norm": 0.78754061460495, "learning_rate": 1.0556224731411157e-06, "loss": 0.857, "step": 11290 }, { "epoch": 0.95, "grad_norm": 0.749705970287323, "learning_rate": 1.0364803630316887e-06, "loss": 0.84, "step": 11295 }, { "epoch": 0.95, "grad_norm": 0.751549482345581, "learning_rate": 1.017512497050377e-06, "loss": 0.7576, "step": 11300 }, { "epoch": 0.95, "grad_norm": 0.6023859977722168, "learning_rate": 9.98718908593732e-07, "loss": 0.8114, "step": 11305 }, { "epoch": 0.96, "grad_norm": 0.9303193092346191, "learning_rate": 9.8009963075143e-07, "loss": 0.8892, "step": 11310 }, { "epoch": 0.96, "grad_norm": 0.9002019166946411, "learning_rate": 9.61654696306258e-07, "loss": 0.8828, "step": 11315 }, { "epoch": 0.96, "grad_norm": 0.8884360790252686, "learning_rate": 9.43384137734038e-07, "loss": 0.8621, "step": 11320 }, { "epoch": 0.96, "grad_norm": 0.8538162708282471, "learning_rate": 9.252879872035713e-07, "loss": 0.7332, "step": 11325 }, { "epoch": 0.96, "grad_norm": 0.7713654041290283, "learning_rate": 9.073662765765823e-07, "loss": 0.754, "step": 11330 }, { "epoch": 0.96, "grad_norm": 0.7992544174194336, "learning_rate": 8.896190374076518e-07, "loss": 0.9635, "step": 11335 }, { "epoch": 0.96, "grad_norm": 0.85658198595047, "learning_rate": 8.720463009441626e-07, "loss": 0.8007, "step": 11340 }, { "epoch": 0.96, "grad_norm": 0.8936882615089417, "learning_rate": 8.546480981262872e-07, "loss": 0.9472, "step": 11345 }, { "epoch": 0.96, "grad_norm": 0.8276568651199341, "learning_rate": 8.374244595868664e-07, "loss": 0.978, "step": 11350 }, { "epoch": 0.96, "grad_norm": 0.8250508308410645, "learning_rate": 8.203754156513865e-07, "loss": 0.9266, "step": 11355 }, { "epoch": 0.96, "grad_norm": 0.7581374049186707, "learning_rate": 8.03500996337958e-07, "loss": 0.8211, "step": 11360 }, { "epoch": 0.96, "grad_norm": 1.0592025518417358, "learning_rate": 7.868012313571927e-07, "loss": 0.9002, "step": 11365 }, { "epoch": 0.96, "grad_norm": 0.9346907138824463, "learning_rate": 7.702761501122147e-07, "loss": 0.8058, "step": 11370 }, { "epoch": 0.96, "grad_norm": 0.866581916809082, "learning_rate": 7.539257816985835e-07, "loss": 0.8184, "step": 11375 }, { "epoch": 0.96, "grad_norm": 0.8073518872261047, "learning_rate": 7.377501549042265e-07, "loss": 0.8043, "step": 11380 }, { "epoch": 0.96, "grad_norm": 0.8920919895172119, "learning_rate": 7.217492982094176e-07, "loss": 0.8727, "step": 11385 }, { "epoch": 0.96, "grad_norm": 0.7557914853096008, "learning_rate": 7.059232397867099e-07, "loss": 0.8605, "step": 11390 }, { "epoch": 0.96, "grad_norm": 0.7152570486068726, "learning_rate": 6.902720075009139e-07, "loss": 0.8231, "step": 11395 }, { "epoch": 0.96, "grad_norm": 0.7431889772415161, "learning_rate": 6.747956289089863e-07, "loss": 0.9424, "step": 11400 }, { "epoch": 0.96, "grad_norm": 0.7541787624359131, "learning_rate": 6.594941312600411e-07, "loss": 0.9519, "step": 11405 }, { "epoch": 0.96, "grad_norm": 0.7901094555854797, "learning_rate": 6.443675414952833e-07, "loss": 0.9928, "step": 11410 }, { "epoch": 0.96, "grad_norm": 1.0786751508712769, "learning_rate": 6.294158862479527e-07, "loss": 0.8042, "step": 11415 }, { "epoch": 0.96, "grad_norm": 0.8062751293182373, "learning_rate": 6.146391918433026e-07, "loss": 0.8099, "step": 11420 }, { "epoch": 0.97, "grad_norm": 0.8185136318206787, "learning_rate": 6.000374842984991e-07, "loss": 1.0152, "step": 11425 }, { "epoch": 0.97, "grad_norm": 0.7455396056175232, "learning_rate": 5.856107893226325e-07, "loss": 0.8298, "step": 11430 }, { "epoch": 0.97, "grad_norm": 0.8589451909065247, "learning_rate": 5.713591323166622e-07, "loss": 0.8908, "step": 11435 }, { "epoch": 0.97, "grad_norm": 1.0530104637145996, "learning_rate": 5.57282538373316e-07, "loss": 0.9245, "step": 11440 }, { "epoch": 0.97, "grad_norm": 0.8141350746154785, "learning_rate": 5.433810322771571e-07, "loss": 0.861, "step": 11445 }, { "epoch": 0.97, "grad_norm": 0.7331737875938416, "learning_rate": 5.296546385044065e-07, "loss": 0.8259, "step": 11450 }, { "epoch": 0.97, "grad_norm": 0.8267128467559814, "learning_rate": 5.161033812229987e-07, "loss": 0.9983, "step": 11455 }, { "epoch": 0.97, "grad_norm": 0.7879713177680969, "learning_rate": 5.027272842925146e-07, "loss": 0.8164, "step": 11460 }, { "epoch": 0.97, "grad_norm": 0.83629310131073, "learning_rate": 4.895263712641151e-07, "loss": 0.947, "step": 11465 }, { "epoch": 0.97, "grad_norm": 0.8497070074081421, "learning_rate": 4.7650066538051927e-07, "loss": 0.8704, "step": 11470 }, { "epoch": 0.97, "grad_norm": 0.8327845931053162, "learning_rate": 4.636501895759704e-07, "loss": 0.8605, "step": 11475 }, { "epoch": 0.97, "grad_norm": 0.8554712533950806, "learning_rate": 4.5097496647616977e-07, "loss": 0.8408, "step": 11480 }, { "epoch": 0.97, "grad_norm": 0.9654614329338074, "learning_rate": 4.3847501839827666e-07, "loss": 0.894, "step": 11485 }, { "epoch": 0.97, "grad_norm": 0.794713020324707, "learning_rate": 4.261503673508194e-07, "loss": 0.8114, "step": 11490 }, { "epoch": 0.97, "grad_norm": 0.8144010305404663, "learning_rate": 4.1400103503368425e-07, "loss": 0.8534, "step": 11495 }, { "epoch": 0.97, "grad_norm": 1.1428261995315552, "learning_rate": 4.0202704283810456e-07, "loss": 0.8794, "step": 11500 }, { "epoch": 0.97, "grad_norm": 0.9131525158882141, "learning_rate": 3.9022841184657155e-07, "loss": 0.7327, "step": 11505 }, { "epoch": 0.97, "grad_norm": 0.6796863675117493, "learning_rate": 3.7860516283282355e-07, "loss": 0.8537, "step": 11510 }, { "epoch": 0.97, "grad_norm": 0.7078248262405396, "learning_rate": 3.6715731626179027e-07, "loss": 0.8454, "step": 11515 }, { "epoch": 0.97, "grad_norm": 0.745050847530365, "learning_rate": 3.55884892289593e-07, "loss": 0.7683, "step": 11520 }, { "epoch": 0.97, "grad_norm": 0.64480060338974, "learning_rate": 3.447879107634888e-07, "loss": 0.768, "step": 11525 }, { "epoch": 0.97, "grad_norm": 0.8762190341949463, "learning_rate": 3.338663912218265e-07, "loss": 0.794, "step": 11530 }, { "epoch": 0.97, "grad_norm": 0.8294702172279358, "learning_rate": 3.23120352894013e-07, "loss": 0.914, "step": 11535 }, { "epoch": 0.97, "grad_norm": 0.7077884078025818, "learning_rate": 3.1254981470049126e-07, "loss": 0.8384, "step": 11540 }, { "epoch": 0.98, "grad_norm": 0.8789534568786621, "learning_rate": 3.021547952527293e-07, "loss": 0.8172, "step": 11545 }, { "epoch": 0.98, "grad_norm": 0.9262539744377136, "learning_rate": 2.9193531285311993e-07, "loss": 0.9344, "step": 11550 }, { "epoch": 0.98, "grad_norm": 0.981154203414917, "learning_rate": 2.818913854950256e-07, "loss": 0.8827, "step": 11555 }, { "epoch": 0.98, "grad_norm": 0.6264580488204956, "learning_rate": 2.720230308626781e-07, "loss": 0.7482, "step": 11560 }, { "epoch": 0.98, "grad_norm": 0.7378256320953369, "learning_rate": 2.6233026633118994e-07, "loss": 0.7769, "step": 11565 }, { "epoch": 0.98, "grad_norm": 0.7126420736312866, "learning_rate": 2.528131089665431e-07, "loss": 0.7937, "step": 11570 }, { "epoch": 0.98, "grad_norm": 0.800485372543335, "learning_rate": 2.4347157552548907e-07, "loss": 0.9828, "step": 11575 }, { "epoch": 0.98, "grad_norm": 0.8482642769813538, "learning_rate": 2.3430568245558227e-07, "loss": 1.0101, "step": 11580 }, { "epoch": 0.98, "grad_norm": 0.9000346064567566, "learning_rate": 2.2531544589512454e-07, "loss": 0.8043, "step": 11585 }, { "epoch": 0.98, "grad_norm": 0.6278972029685974, "learning_rate": 2.1650088167313177e-07, "loss": 0.6926, "step": 11590 }, { "epoch": 0.98, "grad_norm": 0.8249235153198242, "learning_rate": 2.0786200530933387e-07, "loss": 0.9562, "step": 11595 }, { "epoch": 0.98, "grad_norm": 0.7574777603149414, "learning_rate": 1.9939883201410826e-07, "loss": 0.8049, "step": 11600 }, { "epoch": 0.98, "grad_norm": 0.8108986020088196, "learning_rate": 1.911113766884909e-07, "loss": 0.8037, "step": 11605 }, { "epoch": 0.98, "grad_norm": 0.8171221613883972, "learning_rate": 1.8299965392413187e-07, "loss": 0.8173, "step": 11610 }, { "epoch": 0.98, "grad_norm": 0.8570646047592163, "learning_rate": 1.7506367800325108e-07, "loss": 0.8189, "step": 11615 }, { "epoch": 0.98, "grad_norm": 0.7071337699890137, "learning_rate": 1.6730346289864918e-07, "loss": 0.9277, "step": 11620 }, { "epoch": 0.98, "grad_norm": 0.8421007394790649, "learning_rate": 1.597190222736633e-07, "loss": 0.8913, "step": 11625 }, { "epoch": 0.98, "grad_norm": 0.746640682220459, "learning_rate": 1.5231036948215594e-07, "loss": 0.8051, "step": 11630 }, { "epoch": 0.98, "grad_norm": 1.10215163230896, "learning_rate": 1.4507751756845934e-07, "loss": 0.9606, "step": 11635 }, { "epoch": 0.98, "grad_norm": 0.7056140303611755, "learning_rate": 1.380204792673867e-07, "loss": 0.9655, "step": 11640 }, { "epoch": 0.98, "grad_norm": 0.896172046661377, "learning_rate": 1.3113926700420998e-07, "loss": 0.9775, "step": 11645 }, { "epoch": 0.98, "grad_norm": 0.7562682032585144, "learning_rate": 1.2443389289460427e-07, "loss": 0.7869, "step": 11650 }, { "epoch": 0.98, "grad_norm": 0.8313950300216675, "learning_rate": 1.1790436874465904e-07, "loss": 0.8413, "step": 11655 }, { "epoch": 0.98, "grad_norm": 0.9028708338737488, "learning_rate": 1.1155070605085583e-07, "loss": 0.8534, "step": 11660 }, { "epoch": 0.99, "grad_norm": 0.8687404990196228, "learning_rate": 1.0537291600000165e-07, "loss": 0.8209, "step": 11665 }, { "epoch": 0.99, "grad_norm": 0.8653462529182434, "learning_rate": 9.937100946930677e-08, "loss": 0.7397, "step": 11670 }, { "epoch": 0.99, "grad_norm": 0.7406777739524841, "learning_rate": 9.354499702625141e-08, "loss": 0.7934, "step": 11675 }, { "epoch": 0.99, "grad_norm": 0.6351541876792908, "learning_rate": 8.789488892864129e-08, "loss": 0.8001, "step": 11680 }, { "epoch": 0.99, "grad_norm": 0.8214691877365112, "learning_rate": 8.242069512456318e-08, "loss": 0.8547, "step": 11685 }, { "epoch": 0.99, "grad_norm": 0.8716254830360413, "learning_rate": 7.71224252523961e-08, "loss": 0.8056, "step": 11690 }, { "epoch": 0.99, "grad_norm": 0.8704701662063599, "learning_rate": 7.200008864073349e-08, "loss": 0.9058, "step": 11695 }, { "epoch": 0.99, "grad_norm": 0.8255096077919006, "learning_rate": 6.705369430843878e-08, "loss": 0.8133, "step": 11700 }, { "epoch": 0.99, "grad_norm": 0.6788018941879272, "learning_rate": 6.228325096457876e-08, "loss": 0.6706, "step": 11705 }, { "epoch": 0.99, "grad_norm": 1.2154595851898193, "learning_rate": 5.7688767008423627e-08, "loss": 0.9283, "step": 11710 }, { "epoch": 0.99, "grad_norm": 0.6604523062705994, "learning_rate": 5.327025052943579e-08, "loss": 0.7873, "step": 11715 }, { "epoch": 0.99, "grad_norm": 0.8354642987251282, "learning_rate": 4.902770930725886e-08, "loss": 0.8338, "step": 11720 }, { "epoch": 0.99, "grad_norm": 0.9577195048332214, "learning_rate": 4.4961150811695384e-08, "loss": 0.9615, "step": 11725 }, { "epoch": 0.99, "grad_norm": 0.7846125364303589, "learning_rate": 4.107058220270687e-08, "loss": 0.9433, "step": 11730 }, { "epoch": 0.99, "grad_norm": 0.8085949420928955, "learning_rate": 3.735601033035829e-08, "loss": 0.87, "step": 11735 }, { "epoch": 0.99, "grad_norm": 1.022504210472107, "learning_rate": 3.3817441734862455e-08, "loss": 0.8262, "step": 11740 }, { "epoch": 0.99, "grad_norm": 0.686873197555542, "learning_rate": 3.045488264656893e-08, "loss": 0.799, "step": 11745 }, { "epoch": 0.99, "grad_norm": 0.8503823280334473, "learning_rate": 2.7268338985875218e-08, "loss": 0.8444, "step": 11750 }, { "epoch": 0.99, "grad_norm": 0.7915425300598145, "learning_rate": 2.4257816363326692e-08, "loss": 0.8874, "step": 11755 }, { "epoch": 0.99, "grad_norm": 0.6971069574356079, "learning_rate": 2.1423320079494435e-08, "loss": 0.8429, "step": 11760 }, { "epoch": 0.99, "grad_norm": 0.8105065226554871, "learning_rate": 1.8764855125052993e-08, "loss": 0.9761, "step": 11765 }, { "epoch": 0.99, "grad_norm": 0.8171855211257935, "learning_rate": 1.6282426180758148e-08, "loss": 0.9214, "step": 11770 }, { "epoch": 0.99, "grad_norm": 1.099576711654663, "learning_rate": 1.3976037617380311e-08, "loss": 1.024, "step": 11775 }, { "epoch": 1.0, "grad_norm": 0.7364673614501953, "learning_rate": 1.1845693495760035e-08, "loss": 0.759, "step": 11780 }, { "epoch": 1.0, "grad_norm": 1.0711894035339355, "learning_rate": 9.891397566774708e-09, "loss": 0.7868, "step": 11785 }, { "epoch": 1.0, "grad_norm": 0.7905126214027405, "learning_rate": 8.113153271327446e-09, "loss": 0.8227, "step": 11790 }, { "epoch": 1.0, "grad_norm": 0.7753369808197021, "learning_rate": 6.510963740369303e-09, "loss": 0.7911, "step": 11795 }, { "epoch": 1.0, "grad_norm": 0.6878575086593628, "learning_rate": 5.08483179485486e-09, "loss": 0.9471, "step": 11800 }, { "epoch": 1.0, "grad_norm": 0.7836639285087585, "learning_rate": 3.83475994575333e-09, "loss": 0.8245, "step": 11805 }, { "epoch": 1.0, "grad_norm": 0.7580632567405701, "learning_rate": 2.7607503940707546e-09, "loss": 0.8384, "step": 11810 }, { "epoch": 1.0, "grad_norm": 0.7631229162216187, "learning_rate": 1.862805030783399e-09, "loss": 0.8937, "step": 11815 }, { "epoch": 1.0, "grad_norm": 0.764155387878418, "learning_rate": 1.1409254369154632e-09, "loss": 0.8793, "step": 11820 }, { "epoch": 1.0, "grad_norm": 0.8263822793960571, "learning_rate": 5.951128834613684e-10, "loss": 0.8754, "step": 11825 }, { "epoch": 1.0, "grad_norm": 0.8471561670303345, "learning_rate": 2.2536833143016467e-10, "loss": 0.8039, "step": 11830 }, { "epoch": 1.0, "grad_norm": 0.7613112330436707, "learning_rate": 3.169243183442916e-11, "loss": 1.0197, "step": 11835 }, { "epoch": 1.0, "step": 11838, "total_flos": 6.323834158736998e+16, "train_loss": 0.0, "train_runtime": 0.0141, "train_samples_per_second": 837064.766, "train_steps_per_second": 837064.766 } ], "logging_steps": 5, "max_steps": 11838, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 6.323834158736998e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }