| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.10252758988964045, |
| "eval_steps": 500, |
| "global_step": 1800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005695977216091135, |
| "grad_norm": 21.049573670903968, |
| "learning_rate": 3.4155597722960153e-09, |
| "loss": 0.4919, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.001139195443218227, |
| "grad_norm": 20.80684714819707, |
| "learning_rate": 7.210626185958254e-09, |
| "loss": 0.4437, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0017087931648273407, |
| "grad_norm": 33.130290630821996, |
| "learning_rate": 1.1005692599620494e-08, |
| "loss": 0.5198, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.002278390886436454, |
| "grad_norm": 12.510945540397115, |
| "learning_rate": 1.4800759013282731e-08, |
| "loss": 0.4968, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.002847988608045568, |
| "grad_norm": 15.943493472335307, |
| "learning_rate": 1.859582542694497e-08, |
| "loss": 0.4409, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0034175863296546814, |
| "grad_norm": 42.406097387194166, |
| "learning_rate": 2.239089184060721e-08, |
| "loss": 0.4684, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.003987184051263795, |
| "grad_norm": 24.406933558360457, |
| "learning_rate": 2.6185958254269448e-08, |
| "loss": 0.478, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.004556781772872908, |
| "grad_norm": 29.842569661833632, |
| "learning_rate": 2.9981024667931685e-08, |
| "loss": 0.4511, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.005126379494482022, |
| "grad_norm": 42.41367434650834, |
| "learning_rate": 3.3776091081593926e-08, |
| "loss": 0.4326, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.005695977216091136, |
| "grad_norm": 19.885296598548557, |
| "learning_rate": 3.757115749525617e-08, |
| "loss": 0.4347, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.006265574937700249, |
| "grad_norm": 25.71810252475294, |
| "learning_rate": 4.13662239089184e-08, |
| "loss": 0.4735, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.006835172659309363, |
| "grad_norm": 30.792769140339338, |
| "learning_rate": 4.516129032258064e-08, |
| "loss": 0.4933, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.007404770380918476, |
| "grad_norm": 16.942087666916184, |
| "learning_rate": 4.8956356736242883e-08, |
| "loss": 0.508, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.00797436810252759, |
| "grad_norm": 13.25121169703299, |
| "learning_rate": 5.275142314990512e-08, |
| "loss": 0.4854, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.008543965824136704, |
| "grad_norm": 26.66597693714079, |
| "learning_rate": 5.654648956356736e-08, |
| "loss": 0.4939, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.009113563545745816, |
| "grad_norm": 25.56128647346689, |
| "learning_rate": 6.03415559772296e-08, |
| "loss": 0.4886, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.00968316126735493, |
| "grad_norm": 24.66360202992372, |
| "learning_rate": 6.413662239089184e-08, |
| "loss": 0.5195, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.010252758988964043, |
| "grad_norm": 29.34378737951036, |
| "learning_rate": 6.793168880455408e-08, |
| "loss": 0.4434, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.010822356710573158, |
| "grad_norm": 56.85479253251122, |
| "learning_rate": 7.172675521821632e-08, |
| "loss": 0.421, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.011391954432182272, |
| "grad_norm": 32.3202020199265, |
| "learning_rate": 7.552182163187856e-08, |
| "loss": 0.447, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.011961552153791384, |
| "grad_norm": 30.909848483794523, |
| "learning_rate": 7.931688804554079e-08, |
| "loss": 0.4677, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.012531149875400499, |
| "grad_norm": 25.632205852181723, |
| "learning_rate": 8.311195445920303e-08, |
| "loss": 0.4739, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.013100747597009611, |
| "grad_norm": 17.49619627631964, |
| "learning_rate": 8.690702087286526e-08, |
| "loss": 0.4753, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.013670345318618726, |
| "grad_norm": 33.64843713209131, |
| "learning_rate": 9.07020872865275e-08, |
| "loss": 0.485, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.01423994304022784, |
| "grad_norm": 34.99600769438769, |
| "learning_rate": 9.449715370018974e-08, |
| "loss": 0.4711, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.014809540761836952, |
| "grad_norm": 29.524926487531715, |
| "learning_rate": 9.829222011385198e-08, |
| "loss": 0.4665, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.015379138483446067, |
| "grad_norm": 13.568503433545974, |
| "learning_rate": 1.0208728652751421e-07, |
| "loss": 0.4457, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.01594873620505518, |
| "grad_norm": 24.811742737462957, |
| "learning_rate": 1.0588235294117647e-07, |
| "loss": 0.4924, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.016518333926664294, |
| "grad_norm": 14.269729917230158, |
| "learning_rate": 1.0967741935483869e-07, |
| "loss": 0.4603, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.017087931648273408, |
| "grad_norm": 16.274688413588088, |
| "learning_rate": 1.1347248576850095e-07, |
| "loss": 0.5013, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.017657529369882522, |
| "grad_norm": 25.477411052433148, |
| "learning_rate": 1.1726755218216317e-07, |
| "loss": 0.4705, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.018227127091491633, |
| "grad_norm": 19.70072062267979, |
| "learning_rate": 1.2106261859582542e-07, |
| "loss": 0.4611, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.018796724813100747, |
| "grad_norm": 13.857865555631838, |
| "learning_rate": 1.2485768500948766e-07, |
| "loss": 0.4363, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.01936632253470986, |
| "grad_norm": 36.710229582900936, |
| "learning_rate": 1.286527514231499e-07, |
| "loss": 0.4937, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.019935920256318976, |
| "grad_norm": 16.21485980059241, |
| "learning_rate": 1.3244781783681214e-07, |
| "loss": 0.4843, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.020505517977928087, |
| "grad_norm": 18.51035631765946, |
| "learning_rate": 1.3624288425047438e-07, |
| "loss": 0.4953, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.0210751156995372, |
| "grad_norm": 109.65864479439105, |
| "learning_rate": 1.4003795066413662e-07, |
| "loss": 0.4452, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.021644713421146315, |
| "grad_norm": 26.812069103996944, |
| "learning_rate": 1.4383301707779884e-07, |
| "loss": 0.4745, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.02221431114275543, |
| "grad_norm": 750.501418758497, |
| "learning_rate": 1.476280834914611e-07, |
| "loss": 0.4178, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.022783908864364544, |
| "grad_norm": 12.593443551819714, |
| "learning_rate": 1.5142314990512332e-07, |
| "loss": 0.4774, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.023353506585973655, |
| "grad_norm": 41.1730678527844, |
| "learning_rate": 1.5521821631878558e-07, |
| "loss": 0.5109, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.02392310430758277, |
| "grad_norm": 16.51074339271402, |
| "learning_rate": 1.590132827324478e-07, |
| "loss": 0.4692, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.024492702029191883, |
| "grad_norm": 11.523289642261012, |
| "learning_rate": 1.6280834914611007e-07, |
| "loss": 0.417, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.025062299750800997, |
| "grad_norm": 21.107219252953012, |
| "learning_rate": 1.6660341555977228e-07, |
| "loss": 0.3798, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.025631897472410112, |
| "grad_norm": 8.312935431045693, |
| "learning_rate": 1.7039848197343455e-07, |
| "loss": 0.4586, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.026201495194019223, |
| "grad_norm": 17.659714103179056, |
| "learning_rate": 1.7419354838709676e-07, |
| "loss": 0.4637, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.026771092915628337, |
| "grad_norm": 35.28046535756356, |
| "learning_rate": 1.77988614800759e-07, |
| "loss": 0.4591, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.02734069063723745, |
| "grad_norm": 17.475339931922075, |
| "learning_rate": 1.8178368121442124e-07, |
| "loss": 0.4884, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.027910288358846565, |
| "grad_norm": 12.775420545423671, |
| "learning_rate": 1.8557874762808349e-07, |
| "loss": 0.4355, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.02847988608045568, |
| "grad_norm": 22.58921652660408, |
| "learning_rate": 1.8937381404174573e-07, |
| "loss": 0.5028, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.02904948380206479, |
| "grad_norm": 21.649711297403396, |
| "learning_rate": 1.9316888045540797e-07, |
| "loss": 0.4595, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.029619081523673905, |
| "grad_norm": 61.88775690696106, |
| "learning_rate": 1.969639468690702e-07, |
| "loss": 0.4419, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.03018867924528302, |
| "grad_norm": 18.72064581371778, |
| "learning_rate": 1.9999999319386685e-07, |
| "loss": 0.4346, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.030758276966892133, |
| "grad_norm": 20.753732350716753, |
| "learning_rate": 1.9999975497930434e-07, |
| "loss": 0.4233, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.03132787468850125, |
| "grad_norm": 50.13806900167064, |
| "learning_rate": 1.999991764590115e-07, |
| "loss": 0.4471, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.03189747241011036, |
| "grad_norm": 15.313720228827242, |
| "learning_rate": 1.99998257634957e-07, |
| "loss": 0.4284, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.032467070131719476, |
| "grad_norm": 13.936702397387482, |
| "learning_rate": 1.999969985102677e-07, |
| "loss": 0.4901, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.03303666785332859, |
| "grad_norm": 13.608501418916317, |
| "learning_rate": 1.9999539908922847e-07, |
| "loss": 0.4853, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.0336062655749377, |
| "grad_norm": 15.107322868355896, |
| "learning_rate": 1.9999345937728225e-07, |
| "loss": 0.4361, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.034175863296546816, |
| "grad_norm": 32.40056901599672, |
| "learning_rate": 1.9999117938103e-07, |
| "loss": 0.5263, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.034745461018155926, |
| "grad_norm": 9.964058712960675, |
| "learning_rate": 1.9998855910823074e-07, |
| "loss": 0.4443, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.035315058739765044, |
| "grad_norm": 22.67319149302576, |
| "learning_rate": 1.999855985678014e-07, |
| "loss": 0.453, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.035884656461374155, |
| "grad_norm": 32.08276760500707, |
| "learning_rate": 1.9998229776981686e-07, |
| "loss": 0.4704, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.036454254182983266, |
| "grad_norm": 35.57871112406775, |
| "learning_rate": 1.9997865672551e-07, |
| "loss": 0.4567, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.037023851904592384, |
| "grad_norm": 30.629691553067705, |
| "learning_rate": 1.9997467544727151e-07, |
| "loss": 0.4657, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.037593449626201494, |
| "grad_norm": 16.79894739182948, |
| "learning_rate": 1.9997035394864997e-07, |
| "loss": 0.4889, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.03816304734781061, |
| "grad_norm": 24.064206535589015, |
| "learning_rate": 1.9996569224435164e-07, |
| "loss": 0.4541, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.03873264506941972, |
| "grad_norm": 15.092611591816881, |
| "learning_rate": 1.9996069035024073e-07, |
| "loss": 0.4437, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.039302242791028834, |
| "grad_norm": 10.567345786993243, |
| "learning_rate": 1.9995534828333894e-07, |
| "loss": 0.4567, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.03987184051263795, |
| "grad_norm": 15.772143297135706, |
| "learning_rate": 1.9994966606182567e-07, |
| "loss": 0.4429, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.04044143823424706, |
| "grad_norm": 23.454081623401972, |
| "learning_rate": 1.9994364370503791e-07, |
| "loss": 0.4874, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.04101103595585617, |
| "grad_norm": 25.771274144252004, |
| "learning_rate": 1.9993728123347014e-07, |
| "loss": 0.4804, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.04158063367746529, |
| "grad_norm": 38.49240584512041, |
| "learning_rate": 1.9993057866877422e-07, |
| "loss": 0.4851, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.0421502313990744, |
| "grad_norm": 14.490580753823508, |
| "learning_rate": 1.999235360337595e-07, |
| "loss": 0.5076, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.04271982912068352, |
| "grad_norm": 73.59426722231183, |
| "learning_rate": 1.999161533523925e-07, |
| "loss": 0.4716, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.04328942684229263, |
| "grad_norm": 85.76885571174373, |
| "learning_rate": 1.9990843064979692e-07, |
| "loss": 0.4993, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.04385902456390174, |
| "grad_norm": 14.903196926933902, |
| "learning_rate": 1.999003679522537e-07, |
| "loss": 0.4791, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.04442862228551086, |
| "grad_norm": 18.928061501180686, |
| "learning_rate": 1.9989196528720064e-07, |
| "loss": 0.4483, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.04499822000711997, |
| "grad_norm": 19.436313523994027, |
| "learning_rate": 1.9988322268323266e-07, |
| "loss": 0.4549, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.04556781772872909, |
| "grad_norm": 38.861669459737385, |
| "learning_rate": 1.9987414017010133e-07, |
| "loss": 0.4298, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.0461374154503382, |
| "grad_norm": 24.571115668775043, |
| "learning_rate": 1.998647177787151e-07, |
| "loss": 0.4558, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.04670701317194731, |
| "grad_norm": 19.706898075912328, |
| "learning_rate": 1.9985495554113894e-07, |
| "loss": 0.4366, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.04727661089355643, |
| "grad_norm": 33.9617977848147, |
| "learning_rate": 1.998448534905944e-07, |
| "loss": 0.4577, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.04784620861516554, |
| "grad_norm": 42.96396193732204, |
| "learning_rate": 1.9983441166145946e-07, |
| "loss": 0.4851, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.048415806336774656, |
| "grad_norm": 21.29503243417614, |
| "learning_rate": 1.998236300892683e-07, |
| "loss": 0.4807, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.048985404058383766, |
| "grad_norm": 31.984418484238574, |
| "learning_rate": 1.9981250881071133e-07, |
| "loss": 0.4281, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.04955500177999288, |
| "grad_norm": 23.76695346707812, |
| "learning_rate": 1.9980104786363502e-07, |
| "loss": 0.4753, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.050124599501601995, |
| "grad_norm": 51.49266265495552, |
| "learning_rate": 1.9978924728704166e-07, |
| "loss": 0.4453, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.050694197223211106, |
| "grad_norm": 57.45228883254441, |
| "learning_rate": 1.997771071210895e-07, |
| "loss": 0.4547, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.051263794944820223, |
| "grad_norm": 582.5163847711418, |
| "learning_rate": 1.997646274070922e-07, |
| "loss": 0.4011, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.051833392666429334, |
| "grad_norm": 12.943726869700884, |
| "learning_rate": 1.9975180818751908e-07, |
| "loss": 0.4156, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.052402990388038445, |
| "grad_norm": 15.975864494882453, |
| "learning_rate": 1.997386495059948e-07, |
| "loss": 0.3637, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.05297258810964756, |
| "grad_norm": 20.386395091429563, |
| "learning_rate": 1.9972515140729928e-07, |
| "loss": 0.4506, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.053542185831256674, |
| "grad_norm": 16.014612208992087, |
| "learning_rate": 1.9971131393736732e-07, |
| "loss": 0.4581, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.05411178355286579, |
| "grad_norm": 13.034905667217853, |
| "learning_rate": 1.996971371432888e-07, |
| "loss": 0.4288, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.0546813812744749, |
| "grad_norm": 13.755268105445708, |
| "learning_rate": 1.996826210733083e-07, |
| "loss": 0.4704, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.05525097899608401, |
| "grad_norm": 14.709572404543191, |
| "learning_rate": 1.9966776577682488e-07, |
| "loss": 0.4514, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.05582057671769313, |
| "grad_norm": 267.85069242584797, |
| "learning_rate": 1.9965257130439217e-07, |
| "loss": 0.4667, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.05639017443930224, |
| "grad_norm": 36.2529434259595, |
| "learning_rate": 1.9963703770771795e-07, |
| "loss": 0.4929, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.05695977216091136, |
| "grad_norm": 138.25374119669067, |
| "learning_rate": 1.99621165039664e-07, |
| "loss": 0.4887, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.05752936988252047, |
| "grad_norm": 37.65440236342901, |
| "learning_rate": 1.9960495335424615e-07, |
| "loss": 0.4702, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.05809896760412958, |
| "grad_norm": 20.49543757338681, |
| "learning_rate": 1.9958840270663377e-07, |
| "loss": 0.4421, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.0586685653257387, |
| "grad_norm": 135.86209503027462, |
| "learning_rate": 1.9957151315314977e-07, |
| "loss": 0.4665, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.05923816304734781, |
| "grad_norm": 16.112172807163798, |
| "learning_rate": 1.9955428475127049e-07, |
| "loss": 0.4627, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.05980776076895693, |
| "grad_norm": 20.582916516737605, |
| "learning_rate": 1.9953671755962525e-07, |
| "loss": 0.4728, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.06037735849056604, |
| "grad_norm": 60.603879240132336, |
| "learning_rate": 1.995188116379964e-07, |
| "loss": 0.4804, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.06094695621217515, |
| "grad_norm": 31.78120339860008, |
| "learning_rate": 1.995005670473189e-07, |
| "loss": 0.4671, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.06151655393378427, |
| "grad_norm": 24.087070640843212, |
| "learning_rate": 1.9948198384968038e-07, |
| "loss": 0.4299, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.06208615165539338, |
| "grad_norm": 78.72280950118618, |
| "learning_rate": 1.994630621083206e-07, |
| "loss": 0.4402, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.0626557493770025, |
| "grad_norm": 26.03024474381767, |
| "learning_rate": 1.9944380188763154e-07, |
| "loss": 0.4909, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.0632253470986116, |
| "grad_norm": 19.38210538566336, |
| "learning_rate": 1.99424203253157e-07, |
| "loss": 0.4324, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.06379494482022072, |
| "grad_norm": 96.14283327480236, |
| "learning_rate": 1.9940426627159237e-07, |
| "loss": 0.3688, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.06436454254182983, |
| "grad_norm": 31.026454060990613, |
| "learning_rate": 1.9938399101078453e-07, |
| "loss": 0.5005, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.06493414026343895, |
| "grad_norm": 12.493715605004533, |
| "learning_rate": 1.9936337753973154e-07, |
| "loss": 0.4489, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.06550373798504806, |
| "grad_norm": 12.701919393018652, |
| "learning_rate": 1.9934242592858236e-07, |
| "loss": 0.4142, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.06607333570665717, |
| "grad_norm": 28.82714425455963, |
| "learning_rate": 1.9932113624863676e-07, |
| "loss": 0.4932, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.06664293342826628, |
| "grad_norm": 37.0311582587264, |
| "learning_rate": 1.9929950857234485e-07, |
| "loss": 0.4971, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.0672125311498754, |
| "grad_norm": 117.98131014467332, |
| "learning_rate": 1.9927754297330708e-07, |
| "loss": 0.4852, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.06778212887148452, |
| "grad_norm": 9.731916201335425, |
| "learning_rate": 1.9925523952627379e-07, |
| "loss": 0.4253, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.06835172659309363, |
| "grad_norm": 38.263236761476804, |
| "learning_rate": 1.992325983071451e-07, |
| "loss": 0.4321, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.06892132431470274, |
| "grad_norm": 15.381655794971056, |
| "learning_rate": 1.9920961939297058e-07, |
| "loss": 0.441, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.06949092203631185, |
| "grad_norm": 15.428597063217957, |
| "learning_rate": 1.991863028619489e-07, |
| "loss": 0.4968, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.07006051975792096, |
| "grad_norm": 45.071961716711186, |
| "learning_rate": 1.9916264879342785e-07, |
| "loss": 0.4305, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.07063011747953009, |
| "grad_norm": 27.232374368700274, |
| "learning_rate": 1.9913865726790373e-07, |
| "loss": 0.4366, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.0711997152011392, |
| "grad_norm": 21.18563628403933, |
| "learning_rate": 1.9911432836702127e-07, |
| "loss": 0.4833, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.07176931292274831, |
| "grad_norm": 13.543561740630265, |
| "learning_rate": 1.990896621735733e-07, |
| "loss": 0.4487, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.07233891064435742, |
| "grad_norm": 86.58996123562757, |
| "learning_rate": 1.9906465877150058e-07, |
| "loss": 0.4773, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.07290850836596653, |
| "grad_norm": 14.872934684284921, |
| "learning_rate": 1.9903931824589123e-07, |
| "loss": 0.4457, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.07347810608757566, |
| "grad_norm": 33.14051909456766, |
| "learning_rate": 1.9901364068298077e-07, |
| "loss": 0.4897, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.07404770380918477, |
| "grad_norm": 16.442246412948936, |
| "learning_rate": 1.989876261701516e-07, |
| "loss": 0.4477, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.07461730153079388, |
| "grad_norm": 15.10709676956974, |
| "learning_rate": 1.9896127479593287e-07, |
| "loss": 0.542, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.07518689925240299, |
| "grad_norm": 63.44702816378984, |
| "learning_rate": 1.9893458665000002e-07, |
| "loss": 0.4563, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.0757564969740121, |
| "grad_norm": 11.730796276591832, |
| "learning_rate": 1.9890756182317454e-07, |
| "loss": 0.4634, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.07632609469562122, |
| "grad_norm": 27.47043509263349, |
| "learning_rate": 1.9888020040742367e-07, |
| "loss": 0.5072, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.07689569241723034, |
| "grad_norm": 52.27539617258087, |
| "learning_rate": 1.9885250249586014e-07, |
| "loss": 0.4645, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.07746529013883945, |
| "grad_norm": 12.68105188761479, |
| "learning_rate": 1.9882446818274176e-07, |
| "loss": 0.4396, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.07803488786044856, |
| "grad_norm": 21.818578525864865, |
| "learning_rate": 1.9879609756347113e-07, |
| "loss": 0.4814, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.07860448558205767, |
| "grad_norm": 58.13255180180692, |
| "learning_rate": 1.987673907345953e-07, |
| "loss": 0.4867, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.07917408330366678, |
| "grad_norm": 14.770491903827162, |
| "learning_rate": 1.9873834779380556e-07, |
| "loss": 0.4893, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.0797436810252759, |
| "grad_norm": 16.635013592634976, |
| "learning_rate": 1.987089688399369e-07, |
| "loss": 0.4478, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.08031327874688501, |
| "grad_norm": 17.65859576674858, |
| "learning_rate": 1.9867925397296784e-07, |
| "loss": 0.4944, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.08088287646849412, |
| "grad_norm": 86.88472923093384, |
| "learning_rate": 1.9864920329402e-07, |
| "loss": 0.4616, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.08145247419010324, |
| "grad_norm": 25.736980272449554, |
| "learning_rate": 1.9861881690535784e-07, |
| "loss": 0.4348, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.08202207191171235, |
| "grad_norm": 21.180899350268074, |
| "learning_rate": 1.9858809491038823e-07, |
| "loss": 0.4156, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.08259166963332147, |
| "grad_norm": 94.81320893975935, |
| "learning_rate": 1.9855703741366013e-07, |
| "loss": 0.4449, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.08316126735493058, |
| "grad_norm": 15.599193486437276, |
| "learning_rate": 1.9852564452086424e-07, |
| "loss": 0.4869, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.08373086507653969, |
| "grad_norm": 20.953628168685903, |
| "learning_rate": 1.9849391633883262e-07, |
| "loss": 0.4373, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.0843004627981488, |
| "grad_norm": 38.52912326460994, |
| "learning_rate": 1.9846185297553842e-07, |
| "loss": 0.4605, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.08487006051975791, |
| "grad_norm": 22.139614672186013, |
| "learning_rate": 1.9842945454009527e-07, |
| "loss": 0.4333, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.08543965824136704, |
| "grad_norm": 15.268704177226823, |
| "learning_rate": 1.9839672114275726e-07, |
| "loss": 0.4497, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.08600925596297615, |
| "grad_norm": 29.168893226415655, |
| "learning_rate": 1.9836365289491823e-07, |
| "loss": 0.4613, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.08657885368458526, |
| "grad_norm": 21.606021719728066, |
| "learning_rate": 1.9833024990911165e-07, |
| "loss": 0.4617, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.08714845140619437, |
| "grad_norm": 32.88438245448298, |
| "learning_rate": 1.9829651229901004e-07, |
| "loss": 0.4377, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.08771804912780348, |
| "grad_norm": 14.247154488970322, |
| "learning_rate": 1.9826244017942467e-07, |
| "loss": 0.4355, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.08828764684941261, |
| "grad_norm": 20.872950619341626, |
| "learning_rate": 1.9822803366630527e-07, |
| "loss": 0.4497, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.08885724457102172, |
| "grad_norm": 17.850902788048966, |
| "learning_rate": 1.9819329287673946e-07, |
| "loss": 0.5261, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.08942684229263083, |
| "grad_norm": 16.138046631977943, |
| "learning_rate": 1.9815821792895235e-07, |
| "loss": 0.4969, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.08999644001423994, |
| "grad_norm": 19.458881544939857, |
| "learning_rate": 1.9812280894230636e-07, |
| "loss": 0.4784, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.09056603773584905, |
| "grad_norm": 17.653853307895435, |
| "learning_rate": 1.9808706603730057e-07, |
| "loss": 0.4463, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.09113563545745818, |
| "grad_norm": 15.623914718581604, |
| "learning_rate": 1.9805098933557044e-07, |
| "loss": 0.4121, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.09170523317906729, |
| "grad_norm": 102.23831398557063, |
| "learning_rate": 1.9801457895988732e-07, |
| "loss": 0.4202, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.0922748309006764, |
| "grad_norm": 18.72931059613502, |
| "learning_rate": 1.9797783503415818e-07, |
| "loss": 0.4243, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.09284442862228551, |
| "grad_norm": 237.94062671792636, |
| "learning_rate": 1.9794075768342494e-07, |
| "loss": 0.4527, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.09341402634389462, |
| "grad_norm": 20.831268870843843, |
| "learning_rate": 1.9790334703386428e-07, |
| "loss": 0.4888, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.09398362406550374, |
| "grad_norm": 20.77514897368684, |
| "learning_rate": 1.9786560321278714e-07, |
| "loss": 0.4432, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.09455322178711285, |
| "grad_norm": 30.08308934914739, |
| "learning_rate": 1.9782752634863814e-07, |
| "loss": 0.4323, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.09512281950872196, |
| "grad_norm": 34.03857180920022, |
| "learning_rate": 1.9778911657099544e-07, |
| "loss": 0.4347, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.09569241723033108, |
| "grad_norm": 41.38196703392111, |
| "learning_rate": 1.9775037401056998e-07, |
| "loss": 0.48, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.09626201495194019, |
| "grad_norm": 26.728838791837614, |
| "learning_rate": 1.9771129879920522e-07, |
| "loss": 0.4541, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.09683161267354931, |
| "grad_norm": 17.319090510690177, |
| "learning_rate": 1.976718910698767e-07, |
| "loss": 0.47, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.09740121039515842, |
| "grad_norm": 24.927565311503443, |
| "learning_rate": 1.9763215095669147e-07, |
| "loss": 0.4775, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.09797080811676753, |
| "grad_norm": 35.5217833486329, |
| "learning_rate": 1.9759207859488781e-07, |
| "loss": 0.4387, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.09854040583837664, |
| "grad_norm": 12.75295290738318, |
| "learning_rate": 1.975516741208346e-07, |
| "loss": 0.4733, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.09911000355998575, |
| "grad_norm": 23.901764027051986, |
| "learning_rate": 1.9751093767203084e-07, |
| "loss": 0.4549, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.09967960128159488, |
| "grad_norm": 16.507601695686727, |
| "learning_rate": 1.974698693871054e-07, |
| "loss": 0.4483, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.10024919900320399, |
| "grad_norm": 20.096118428482878, |
| "learning_rate": 1.9742846940581628e-07, |
| "loss": 0.4363, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.1008187967248131, |
| "grad_norm": 15.396477204440362, |
| "learning_rate": 1.9738673786905045e-07, |
| "loss": 0.4871, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.10138839444642221, |
| "grad_norm": 42.5121633336015, |
| "learning_rate": 1.9734467491882297e-07, |
| "loss": 0.5204, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.10195799216803132, |
| "grad_norm": 28.227316949939553, |
| "learning_rate": 1.9730228069827685e-07, |
| "loss": 0.4862, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.10252758988964045, |
| "grad_norm": 24.685923174663778, |
| "learning_rate": 1.972595553516824e-07, |
| "loss": 0.4545, |
| "step": 1800 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 17557, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 571832082890752.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|