{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.24249096418044722, "eval_steps": 500, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.8498192836089445e-05, "grad_norm": 7.0137038230896, "learning_rate": 0.0002, "loss": 11.525, "step": 10 }, { "epoch": 9.699638567217889e-05, "grad_norm": 4.870274543762207, "learning_rate": 0.0002, "loss": 2.3744, "step": 20 }, { "epoch": 0.00014549457850826833, "grad_norm": 0.42298200726509094, "learning_rate": 0.0002, "loss": 0.2928, "step": 30 }, { "epoch": 0.00019399277134435778, "grad_norm": 0.08328046649694443, "learning_rate": 0.0002, "loss": 0.0285, "step": 40 }, { "epoch": 0.00024249096418044722, "grad_norm": 0.011150314472615719, "learning_rate": 0.0002, "loss": 0.002, "step": 50 }, { "epoch": 0.00029098915701653667, "grad_norm": 0.046085014939308167, "learning_rate": 0.0002, "loss": 0.0028, "step": 60 }, { "epoch": 0.0003394873498526261, "grad_norm": 0.007020743563771248, "learning_rate": 0.0002, "loss": 0.0008, "step": 70 }, { "epoch": 0.00038798554268871556, "grad_norm": 0.0030814874917268753, "learning_rate": 0.0002, "loss": 0.0005, "step": 80 }, { "epoch": 0.000436483735524805, "grad_norm": 0.0018209334230050445, "learning_rate": 0.0002, "loss": 0.0004, "step": 90 }, { "epoch": 0.00048498192836089445, "grad_norm": 0.001966217067092657, "learning_rate": 0.0002, "loss": 0.0002, "step": 100 }, { "epoch": 0.0005334801211969839, "grad_norm": 0.0011740890331566334, "learning_rate": 0.0002, "loss": 0.0003, "step": 110 }, { "epoch": 0.0005819783140330733, "grad_norm": 0.0010490730637684464, "learning_rate": 0.0002, "loss": 0.0002, "step": 120 }, { "epoch": 0.0006304765068691628, "grad_norm": 0.0010500391945242882, "learning_rate": 0.0002, "loss": 0.0002, "step": 130 }, { "epoch": 0.0006789746997052522, "grad_norm": 0.0014898321824148297, "learning_rate": 0.0002, "loss": 0.0001, "step": 140 }, { "epoch": 0.0007274728925413417, "grad_norm": 0.000751811487134546, "learning_rate": 0.0002, "loss": 0.0001, "step": 150 }, { "epoch": 0.0007759710853774311, "grad_norm": 0.000634036841802299, "learning_rate": 0.0002, "loss": 0.0001, "step": 160 }, { "epoch": 0.0008244692782135206, "grad_norm": 0.0006518703885376453, "learning_rate": 0.0002, "loss": 0.0001, "step": 170 }, { "epoch": 0.00087296747104961, "grad_norm": 0.0008479053503833711, "learning_rate": 0.0002, "loss": 0.0001, "step": 180 }, { "epoch": 0.0009214656638856995, "grad_norm": 0.0005388145218603313, "learning_rate": 0.0002, "loss": 0.0001, "step": 190 }, { "epoch": 0.0009699638567217889, "grad_norm": 0.0004428931570146233, "learning_rate": 0.0002, "loss": 0.0001, "step": 200 }, { "epoch": 0.0010184620495578783, "grad_norm": 0.0006309392629191279, "learning_rate": 0.0002, "loss": 0.0001, "step": 210 }, { "epoch": 0.0010669602423939678, "grad_norm": 0.0027992769610136747, "learning_rate": 0.0002, "loss": 0.0001, "step": 220 }, { "epoch": 0.0011154584352300572, "grad_norm": 0.0007598961819894612, "learning_rate": 0.0002, "loss": 0.0001, "step": 230 }, { "epoch": 0.0011639566280661467, "grad_norm": 0.00043805173481814563, "learning_rate": 0.0002, "loss": 0.0001, "step": 240 }, { "epoch": 0.0012124548209022361, "grad_norm": 0.00045929220505058765, "learning_rate": 0.0002, "loss": 0.0, "step": 250 }, { "epoch": 0.0012609530137383256, "grad_norm": 0.00045443125418387353, "learning_rate": 0.0002, "loss": 0.0001, "step": 260 }, { "epoch": 0.001309451206574415, "grad_norm": 0.0005330107524059713, "learning_rate": 0.0002, "loss": 0.0001, "step": 270 }, { "epoch": 0.0013579493994105045, "grad_norm": 0.0009763655834831297, "learning_rate": 0.0002, "loss": 0.0001, "step": 280 }, { "epoch": 0.001406447592246594, "grad_norm": 0.0004014769510831684, "learning_rate": 0.0002, "loss": 0.0001, "step": 290 }, { "epoch": 0.0014549457850826833, "grad_norm": 0.001088621444068849, "learning_rate": 0.0002, "loss": 0.0, "step": 300 }, { "epoch": 0.0015034439779187728, "grad_norm": 0.0003289405722171068, "learning_rate": 0.0002, "loss": 0.0001, "step": 310 }, { "epoch": 0.0015519421707548622, "grad_norm": 0.0002596142003312707, "learning_rate": 0.0002, "loss": 0.0001, "step": 320 }, { "epoch": 0.0016004403635909517, "grad_norm": 0.00040801282739266753, "learning_rate": 0.0002, "loss": 0.0003, "step": 330 }, { "epoch": 0.0016489385564270411, "grad_norm": 0.0004958392237313092, "learning_rate": 0.0002, "loss": 0.0001, "step": 340 }, { "epoch": 0.0016974367492631306, "grad_norm": 0.0005178772262297571, "learning_rate": 0.0002, "loss": 0.0, "step": 350 }, { "epoch": 0.00174593494209922, "grad_norm": 0.0002908179594669491, "learning_rate": 0.0002, "loss": 0.0001, "step": 360 }, { "epoch": 0.0017944331349353095, "grad_norm": 0.0002578392450232059, "learning_rate": 0.0002, "loss": 0.0001, "step": 370 }, { "epoch": 0.001842931327771399, "grad_norm": 0.0002733236469794065, "learning_rate": 0.0002, "loss": 0.0001, "step": 380 }, { "epoch": 0.0018914295206074884, "grad_norm": 0.00032467060373164713, "learning_rate": 0.0002, "loss": 0.0, "step": 390 }, { "epoch": 0.0019399277134435778, "grad_norm": 0.0005388385616242886, "learning_rate": 0.0002, "loss": 0.0, "step": 400 }, { "epoch": 0.0019884259062796675, "grad_norm": 0.00021177809685468674, "learning_rate": 0.0002, "loss": 0.0, "step": 410 }, { "epoch": 0.0020369240991157567, "grad_norm": 0.00022166680719237775, "learning_rate": 0.0002, "loss": 0.0, "step": 420 }, { "epoch": 0.0020854222919518463, "grad_norm": 0.00033918791450560093, "learning_rate": 0.0002, "loss": 0.0, "step": 430 }, { "epoch": 0.0021339204847879356, "grad_norm": 0.00044534917105920613, "learning_rate": 0.0002, "loss": 0.0, "step": 440 }, { "epoch": 0.0021824186776240252, "grad_norm": 0.0001890509156510234, "learning_rate": 0.0002, "loss": 0.0, "step": 450 }, { "epoch": 0.0022309168704601145, "grad_norm": 0.00018653202278073877, "learning_rate": 0.0002, "loss": 0.0, "step": 460 }, { "epoch": 0.002279415063296204, "grad_norm": 0.0001782204199116677, "learning_rate": 0.0002, "loss": 0.0, "step": 470 }, { "epoch": 0.0023279132561322934, "grad_norm": 0.0009645981481298804, "learning_rate": 0.0002, "loss": 0.0, "step": 480 }, { "epoch": 0.002376411448968383, "grad_norm": 0.000186074961675331, "learning_rate": 0.0002, "loss": 0.0, "step": 490 }, { "epoch": 0.0024249096418044722, "grad_norm": 0.00019125571998301893, "learning_rate": 0.0002, "loss": 0.0, "step": 500 }, { "epoch": 0.002473407834640562, "grad_norm": 0.00018144839850720018, "learning_rate": 0.0002, "loss": 0.0, "step": 510 }, { "epoch": 0.002521906027476651, "grad_norm": 0.0002118117845384404, "learning_rate": 0.0002, "loss": 0.0, "step": 520 }, { "epoch": 0.002570404220312741, "grad_norm": 0.00035008261329494417, "learning_rate": 0.0002, "loss": 0.0, "step": 530 }, { "epoch": 0.00261890241314883, "grad_norm": 0.00023883483663666993, "learning_rate": 0.0002, "loss": 0.0, "step": 540 }, { "epoch": 0.0026674006059849197, "grad_norm": 0.00014292067498899996, "learning_rate": 0.0002, "loss": 0.0, "step": 550 }, { "epoch": 0.002715898798821009, "grad_norm": 0.00016217175289057195, "learning_rate": 0.0002, "loss": 0.0, "step": 560 }, { "epoch": 0.0027643969916570986, "grad_norm": 0.00013914685405325145, "learning_rate": 0.0002, "loss": 0.0, "step": 570 }, { "epoch": 0.002812895184493188, "grad_norm": 0.00014768107212148607, "learning_rate": 0.0002, "loss": 0.0, "step": 580 }, { "epoch": 0.0028613933773292775, "grad_norm": 0.00016935243911575526, "learning_rate": 0.0002, "loss": 0.0, "step": 590 }, { "epoch": 0.0029098915701653667, "grad_norm": 0.00013394730922300369, "learning_rate": 0.0002, "loss": 0.0, "step": 600 }, { "epoch": 0.0029583897630014564, "grad_norm": 0.0001128869189415127, "learning_rate": 0.0002, "loss": 0.0, "step": 610 }, { "epoch": 0.0030068879558375456, "grad_norm": 0.00011974652443313971, "learning_rate": 0.0002, "loss": 0.0, "step": 620 }, { "epoch": 0.0030553861486736352, "grad_norm": 0.00011526222806423903, "learning_rate": 0.0002, "loss": 0.0, "step": 630 }, { "epoch": 0.0031038843415097245, "grad_norm": 0.00013400876196101308, "learning_rate": 0.0002, "loss": 0.0, "step": 640 }, { "epoch": 0.003152382534345814, "grad_norm": 0.00010777600982692093, "learning_rate": 0.0002, "loss": 0.0, "step": 650 }, { "epoch": 0.0032008807271819034, "grad_norm": 0.0026077264919877052, "learning_rate": 0.0002, "loss": 0.0, "step": 660 }, { "epoch": 0.003249378920017993, "grad_norm": 0.00012641074135899544, "learning_rate": 0.0002, "loss": 0.0, "step": 670 }, { "epoch": 0.0032978771128540823, "grad_norm": 0.00015046396583784372, "learning_rate": 0.0002, "loss": 0.0, "step": 680 }, { "epoch": 0.003346375305690172, "grad_norm": 0.00013536795449908823, "learning_rate": 0.0002, "loss": 0.0, "step": 690 }, { "epoch": 0.003394873498526261, "grad_norm": 9.661448711995035e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 700 }, { "epoch": 0.003443371691362351, "grad_norm": 0.00011039614037144929, "learning_rate": 0.0002, "loss": 0.0, "step": 710 }, { "epoch": 0.00349186988419844, "grad_norm": 0.00012099206651328132, "learning_rate": 0.0002, "loss": 0.0, "step": 720 }, { "epoch": 0.0035403680770345297, "grad_norm": 9.411859355168417e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 730 }, { "epoch": 0.003588866269870619, "grad_norm": 0.00010046812531072646, "learning_rate": 0.0002, "loss": 0.0, "step": 740 }, { "epoch": 0.0036373644627067086, "grad_norm": 0.0001594393397681415, "learning_rate": 0.0002, "loss": 0.0, "step": 750 }, { "epoch": 0.003685862655542798, "grad_norm": 9.023043094202876e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 760 }, { "epoch": 0.0037343608483788875, "grad_norm": 8.159194840118289e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 770 }, { "epoch": 0.0037828590412149767, "grad_norm": 0.00015648700355086476, "learning_rate": 0.0002, "loss": 0.0, "step": 780 }, { "epoch": 0.0038313572340510664, "grad_norm": 0.0002606350462883711, "learning_rate": 0.0002, "loss": 0.0, "step": 790 }, { "epoch": 0.0038798554268871556, "grad_norm": 0.00011527106107678264, "learning_rate": 0.0002, "loss": 0.0, "step": 800 }, { "epoch": 0.003928353619723245, "grad_norm": 7.355995330726728e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 810 }, { "epoch": 0.003976851812559335, "grad_norm": 7.027355604805052e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 820 }, { "epoch": 0.004025350005395424, "grad_norm": 8.775261085247621e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 830 }, { "epoch": 0.004073848198231513, "grad_norm": 7.290283974725753e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 840 }, { "epoch": 0.004122346391067603, "grad_norm": 0.00010316750558558851, "learning_rate": 0.0002, "loss": 0.0, "step": 850 }, { "epoch": 0.004170844583903693, "grad_norm": 9.193110599881038e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 860 }, { "epoch": 0.004219342776739782, "grad_norm": 6.270845915423706e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 870 }, { "epoch": 0.004267840969575871, "grad_norm": 0.00011484194692457095, "learning_rate": 0.0002, "loss": 0.0, "step": 880 }, { "epoch": 0.00431633916241196, "grad_norm": 6.079157174099237e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 890 }, { "epoch": 0.0043648373552480505, "grad_norm": 6.669562571914867e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 900 }, { "epoch": 0.00441333554808414, "grad_norm": 6.574439612450078e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 910 }, { "epoch": 0.004461833740920229, "grad_norm": 6.148052489152178e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 920 }, { "epoch": 0.004510331933756318, "grad_norm": 6.824133743066341e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 930 }, { "epoch": 0.004558830126592408, "grad_norm": 6.784526340197772e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 940 }, { "epoch": 0.0046073283194284975, "grad_norm": 9.633138688514009e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 950 }, { "epoch": 0.004655826512264587, "grad_norm": 6.294013292063028e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 960 }, { "epoch": 0.004704324705100676, "grad_norm": 0.0001298923307331279, "learning_rate": 0.0002, "loss": 0.0, "step": 970 }, { "epoch": 0.004752822897936766, "grad_norm": 5.6816734286258e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 980 }, { "epoch": 0.004801321090772855, "grad_norm": 7.590373570565134e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 990 }, { "epoch": 0.0048498192836089445, "grad_norm": 6.614408630412072e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1000 }, { "epoch": 0.004898317476445034, "grad_norm": 7.579354860354215e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1010 }, { "epoch": 0.004946815669281124, "grad_norm": 9.739511733641848e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1020 }, { "epoch": 0.004995313862117213, "grad_norm": 5.352884909370914e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1030 }, { "epoch": 0.005043812054953302, "grad_norm": 5.128383054398e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1040 }, { "epoch": 0.0050923102477893915, "grad_norm": 5.3756932175019756e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1050 }, { "epoch": 0.005140808440625482, "grad_norm": 7.623614510521293e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1060 }, { "epoch": 0.005189306633461571, "grad_norm": 6.302021938608959e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1070 }, { "epoch": 0.00523780482629766, "grad_norm": 5.5692566093057394e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1080 }, { "epoch": 0.005286303019133749, "grad_norm": 7.295329851331189e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1090 }, { "epoch": 0.005334801211969839, "grad_norm": 7.901657954789698e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1100 }, { "epoch": 0.005383299404805929, "grad_norm": 5.348257764126174e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1110 }, { "epoch": 0.005431797597642018, "grad_norm": 0.0003021352458745241, "learning_rate": 0.0002, "loss": 0.0, "step": 1120 }, { "epoch": 0.005480295790478107, "grad_norm": 4.370836541056633e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1130 }, { "epoch": 0.005528793983314197, "grad_norm": 0.00014496722724288702, "learning_rate": 0.0002, "loss": 0.0, "step": 1140 }, { "epoch": 0.005577292176150286, "grad_norm": 4.216847082716413e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1150 }, { "epoch": 0.005625790368986376, "grad_norm": 6.0267560911597684e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1160 }, { "epoch": 0.005674288561822465, "grad_norm": 8.198683644877747e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1170 }, { "epoch": 0.005722786754658555, "grad_norm": 6.159309123177081e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1180 }, { "epoch": 0.005771284947494644, "grad_norm": 3.536698568495922e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1190 }, { "epoch": 0.005819783140330733, "grad_norm": 4.879735206486657e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1200 }, { "epoch": 0.005868281333166823, "grad_norm": 5.184320252737962e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1210 }, { "epoch": 0.005916779526002913, "grad_norm": 4.038035331177525e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1220 }, { "epoch": 0.005965277718839002, "grad_norm": 7.10531385266222e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1230 }, { "epoch": 0.006013775911675091, "grad_norm": 5.235036587691866e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1240 }, { "epoch": 0.00606227410451118, "grad_norm": 4.658092075260356e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1250 }, { "epoch": 0.0061107722973472705, "grad_norm": 4.321910091675818e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1260 }, { "epoch": 0.00615927049018336, "grad_norm": 5.6633591157151386e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1270 }, { "epoch": 0.006207768683019449, "grad_norm": 4.830410398426466e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1280 }, { "epoch": 0.006256266875855538, "grad_norm": 0.00018114270642399788, "learning_rate": 0.0002, "loss": 0.0, "step": 1290 }, { "epoch": 0.006304765068691628, "grad_norm": 3.893003668054007e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1300 }, { "epoch": 0.0063532632615277175, "grad_norm": 3.8624912122031674e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1310 }, { "epoch": 0.006401761454363807, "grad_norm": 4.9101181502919644e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1320 }, { "epoch": 0.006450259647199896, "grad_norm": 7.203716813819483e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1330 }, { "epoch": 0.006498757840035986, "grad_norm": 4.329906005295925e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1340 }, { "epoch": 0.006547256032872075, "grad_norm": 4.686972897616215e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1350 }, { "epoch": 0.0065957542257081645, "grad_norm": 5.5464788601966575e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1360 }, { "epoch": 0.006644252418544254, "grad_norm": 4.181311305728741e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1370 }, { "epoch": 0.006692750611380344, "grad_norm": 3.026279046025593e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1380 }, { "epoch": 0.006741248804216433, "grad_norm": 3.0295126634882763e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1390 }, { "epoch": 0.006789746997052522, "grad_norm": 3.080913666053675e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1400 }, { "epoch": 0.0068382451898886115, "grad_norm": 3.442809611442499e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1410 }, { "epoch": 0.006886743382724702, "grad_norm": 3.190052302670665e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1420 }, { "epoch": 0.006935241575560791, "grad_norm": 3.954674320993945e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1430 }, { "epoch": 0.00698373976839688, "grad_norm": 3.099193418165669e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1440 }, { "epoch": 0.007032237961232969, "grad_norm": 5.248703746474348e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1450 }, { "epoch": 0.007080736154069059, "grad_norm": 3.08235757984221e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1460 }, { "epoch": 0.007129234346905149, "grad_norm": 2.9010943762841634e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1470 }, { "epoch": 0.007177732539741238, "grad_norm": 2.777119880192913e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1480 }, { "epoch": 0.007226230732577327, "grad_norm": 3.2253497920464724e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1490 }, { "epoch": 0.007274728925413417, "grad_norm": 3.854385795420967e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1500 }, { "epoch": 0.007323227118249506, "grad_norm": 3.146160452160984e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1510 }, { "epoch": 0.007371725311085596, "grad_norm": 3.8721493183402345e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1520 }, { "epoch": 0.007420223503921685, "grad_norm": 3.830649802694097e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1530 }, { "epoch": 0.007468721696757775, "grad_norm": 4.165820428170264e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1540 }, { "epoch": 0.007517219889593864, "grad_norm": 2.9074370104353875e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1550 }, { "epoch": 0.007565718082429953, "grad_norm": 3.981046756962314e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1560 }, { "epoch": 0.007614216275266043, "grad_norm": 2.3329279429162852e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1570 }, { "epoch": 0.007662714468102133, "grad_norm": 2.3728951418888755e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1580 }, { "epoch": 0.007711212660938222, "grad_norm": 2.7157115255249664e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1590 }, { "epoch": 0.007759710853774311, "grad_norm": 0.00021933612879365683, "learning_rate": 0.0002, "loss": 0.0, "step": 1600 }, { "epoch": 0.0078082090466104, "grad_norm": 2.6189622076344676e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1610 }, { "epoch": 0.00785670723944649, "grad_norm": 2.3484824851038866e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1620 }, { "epoch": 0.00790520543228258, "grad_norm": 6.653254968114197e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1630 }, { "epoch": 0.00795370362511867, "grad_norm": 3.896644193446264e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1640 }, { "epoch": 0.008002201817954758, "grad_norm": 2.1929208742221817e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1650 }, { "epoch": 0.008050700010790848, "grad_norm": 4.070538125233725e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1660 }, { "epoch": 0.008099198203626937, "grad_norm": 3.272448884672485e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1670 }, { "epoch": 0.008147696396463027, "grad_norm": 2.060471524600871e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1680 }, { "epoch": 0.008196194589299117, "grad_norm": 2.8271982955629937e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1690 }, { "epoch": 0.008244692782135205, "grad_norm": 2.653411138453521e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1700 }, { "epoch": 0.008293190974971295, "grad_norm": 2.1678171833627857e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1710 }, { "epoch": 0.008341689167807385, "grad_norm": 3.568627289496362e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1720 }, { "epoch": 0.008390187360643474, "grad_norm": 2.1752657630713657e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1730 }, { "epoch": 0.008438685553479564, "grad_norm": 2.143480560334865e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1740 }, { "epoch": 0.008487183746315652, "grad_norm": 2.1234794985502958e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1750 }, { "epoch": 0.008535681939151742, "grad_norm": 2.4433711587334983e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1760 }, { "epoch": 0.008584180131987832, "grad_norm": 2.3376820536213927e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1770 }, { "epoch": 0.00863267832482392, "grad_norm": 1.906247234728653e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1780 }, { "epoch": 0.00868117651766001, "grad_norm": 2.017230690398719e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1790 }, { "epoch": 0.008729674710496101, "grad_norm": 1.922561386891175e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1800 }, { "epoch": 0.00877817290333219, "grad_norm": 2.3250850063050166e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1810 }, { "epoch": 0.00882667109616828, "grad_norm": 3.5205499443691224e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1820 }, { "epoch": 0.008875169289004368, "grad_norm": 0.0002299403859069571, "learning_rate": 0.0002, "loss": 0.0, "step": 1830 }, { "epoch": 0.008923667481840458, "grad_norm": 2.2499156330013648e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1840 }, { "epoch": 0.008972165674676548, "grad_norm": 2.3464735932066105e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1850 }, { "epoch": 0.009020663867512636, "grad_norm": 1.929642712639179e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1860 }, { "epoch": 0.009069162060348726, "grad_norm": 2.0325765945017338e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1870 }, { "epoch": 0.009117660253184817, "grad_norm": 1.6705007510608993e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1880 }, { "epoch": 0.009166158446020905, "grad_norm": 1.7195268810610287e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1890 }, { "epoch": 0.009214656638856995, "grad_norm": 2.0616414985852316e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1900 }, { "epoch": 0.009263154831693083, "grad_norm": 1.885048004623968e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1910 }, { "epoch": 0.009311653024529173, "grad_norm": 1.868306935648434e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1920 }, { "epoch": 0.009360151217365264, "grad_norm": 1.898629852803424e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1930 }, { "epoch": 0.009408649410201352, "grad_norm": 1.6915073501877487e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1940 }, { "epoch": 0.009457147603037442, "grad_norm": 1.8064161849906668e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1950 }, { "epoch": 0.009505645795873532, "grad_norm": 1.5304469343391247e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1960 }, { "epoch": 0.00955414398870962, "grad_norm": 1.5944673577905633e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1970 }, { "epoch": 0.00960264218154571, "grad_norm": 1.9765251636272296e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1980 }, { "epoch": 0.009651140374381799, "grad_norm": 1.6660320397932082e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 1990 }, { "epoch": 0.009699638567217889, "grad_norm": 2.0551906345644966e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2000 }, { "epoch": 0.009748136760053979, "grad_norm": 1.7338046745862812e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2010 }, { "epoch": 0.009796634952890067, "grad_norm": 1.5409554180223495e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2020 }, { "epoch": 0.009845133145726158, "grad_norm": 1.82247440534411e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2030 }, { "epoch": 0.009893631338562248, "grad_norm": 1.5623136278009042e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2040 }, { "epoch": 0.009942129531398336, "grad_norm": 1.765242632245645e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2050 }, { "epoch": 0.009990627724234426, "grad_norm": 2.3168264306150377e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2060 }, { "epoch": 0.010039125917070514, "grad_norm": 1.4715008546772879e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2070 }, { "epoch": 0.010087624109906605, "grad_norm": 1.6390282326028682e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2080 }, { "epoch": 0.010136122302742695, "grad_norm": 1.8113227270077914e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2090 }, { "epoch": 0.010184620495578783, "grad_norm": 1.522900493000634e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2100 }, { "epoch": 0.010233118688414873, "grad_norm": 5.7907847804017365e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2110 }, { "epoch": 0.010281616881250963, "grad_norm": 4.280299981473945e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2120 }, { "epoch": 0.010330115074087052, "grad_norm": 2.561258770583663e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2130 }, { "epoch": 0.010378613266923142, "grad_norm": 1.683459777268581e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2140 }, { "epoch": 0.01042711145975923, "grad_norm": 1.6231466361205094e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2150 }, { "epoch": 0.01047560965259532, "grad_norm": 1.5644258382963017e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2160 }, { "epoch": 0.01052410784543141, "grad_norm": 1.526105188531801e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2170 }, { "epoch": 0.010572606038267499, "grad_norm": 1.3587435205408838e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2180 }, { "epoch": 0.010621104231103589, "grad_norm": 1.4808405467192642e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2190 }, { "epoch": 0.010669602423939679, "grad_norm": 1.6233525457209907e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2200 }, { "epoch": 0.010718100616775767, "grad_norm": 1.4166364053380676e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2210 }, { "epoch": 0.010766598809611857, "grad_norm": 1.7918455341714434e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2220 }, { "epoch": 0.010815097002447946, "grad_norm": 1.4236725291993935e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2230 }, { "epoch": 0.010863595195284036, "grad_norm": 2.258961285406258e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2240 }, { "epoch": 0.010912093388120126, "grad_norm": 6.816037785029039e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2250 }, { "epoch": 0.010960591580956214, "grad_norm": 1.4133715922071133e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2260 }, { "epoch": 0.011009089773792304, "grad_norm": 7.256287790369242e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2270 }, { "epoch": 0.011057587966628394, "grad_norm": 1.3184239833208267e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2280 }, { "epoch": 0.011106086159464483, "grad_norm": 1.8319497030461207e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2290 }, { "epoch": 0.011154584352300573, "grad_norm": 1.9117042029392906e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2300 }, { "epoch": 0.011203082545136661, "grad_norm": 1.691196666797623e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2310 }, { "epoch": 0.011251580737972751, "grad_norm": 1.245241492142668e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2320 }, { "epoch": 0.011300078930808841, "grad_norm": 1.2204306585772429e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2330 }, { "epoch": 0.01134857712364493, "grad_norm": 1.2878972484031692e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2340 }, { "epoch": 0.01139707531648102, "grad_norm": 1.7712654880597256e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2350 }, { "epoch": 0.01144557350931711, "grad_norm": 1.104198963730596e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2360 }, { "epoch": 0.011494071702153198, "grad_norm": 1.6224576029344462e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2370 }, { "epoch": 0.011542569894989288, "grad_norm": 4.50775733042974e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2380 }, { "epoch": 0.011591068087825377, "grad_norm": 1.3533247511077207e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2390 }, { "epoch": 0.011639566280661467, "grad_norm": 1.2716705896309577e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2400 }, { "epoch": 0.011688064473497557, "grad_norm": 1.6828880688990466e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2410 }, { "epoch": 0.011736562666333645, "grad_norm": 9.919780495692976e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2420 }, { "epoch": 0.011785060859169735, "grad_norm": 1.0682289939722978e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2430 }, { "epoch": 0.011833559052005825, "grad_norm": 1.1918985364900436e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2440 }, { "epoch": 0.011882057244841914, "grad_norm": 1.3779738765151706e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2450 }, { "epoch": 0.011930555437678004, "grad_norm": 1.4122771062829997e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2460 }, { "epoch": 0.011979053630514092, "grad_norm": 9.715343367133755e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2470 }, { "epoch": 0.012027551823350182, "grad_norm": 9.562658306094818e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2480 }, { "epoch": 0.012076050016186272, "grad_norm": 1.53866258187918e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2490 }, { "epoch": 0.01212454820902236, "grad_norm": 9.04327771422686e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2500 }, { "epoch": 0.012173046401858451, "grad_norm": 1.553094625705853e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2510 }, { "epoch": 0.012221544594694541, "grad_norm": 7.808428927091882e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2520 }, { "epoch": 0.01227004278753063, "grad_norm": 0.00012583464558701962, "learning_rate": 0.0002, "loss": 0.0, "step": 2530 }, { "epoch": 0.01231854098036672, "grad_norm": 4.390890899230726e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2540 }, { "epoch": 0.012367039173202808, "grad_norm": 1.4778895092604216e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2550 }, { "epoch": 0.012415537366038898, "grad_norm": 3.14268545480445e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2560 }, { "epoch": 0.012464035558874988, "grad_norm": 9.115723514696583e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2570 }, { "epoch": 0.012512533751711076, "grad_norm": 1.0017302884079982e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2580 }, { "epoch": 0.012561031944547166, "grad_norm": 1.0675325938791502e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2590 }, { "epoch": 0.012609530137383257, "grad_norm": 2.359724931011442e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2600 }, { "epoch": 0.012658028330219345, "grad_norm": 9.685149052529596e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2610 }, { "epoch": 0.012706526523055435, "grad_norm": 9.80174354481278e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2620 }, { "epoch": 0.012755024715891523, "grad_norm": 1.0028861652244814e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2630 }, { "epoch": 0.012803522908727613, "grad_norm": 8.712539965927135e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2640 }, { "epoch": 0.012852021101563704, "grad_norm": 1.604030330781825e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2650 }, { "epoch": 0.012900519294399792, "grad_norm": 8.729934052098542e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2660 }, { "epoch": 0.012949017487235882, "grad_norm": 1.3966686310595833e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2670 }, { "epoch": 0.012997515680071972, "grad_norm": 8.341104148712475e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2680 }, { "epoch": 0.01304601387290806, "grad_norm": 9.81849279924063e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2690 }, { "epoch": 0.01309451206574415, "grad_norm": 8.882200745574664e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2700 }, { "epoch": 0.013143010258580239, "grad_norm": 8.753276233619545e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2710 }, { "epoch": 0.013191508451416329, "grad_norm": 1.2641076864383649e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2720 }, { "epoch": 0.013240006644252419, "grad_norm": 0.00010245622252114117, "learning_rate": 0.0002, "loss": 0.0, "step": 2730 }, { "epoch": 0.013288504837088507, "grad_norm": 7.4595632213458885e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2740 }, { "epoch": 0.013337003029924598, "grad_norm": 7.881838428147603e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2750 }, { "epoch": 0.013385501222760688, "grad_norm": 7.692422514082864e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2760 }, { "epoch": 0.013433999415596776, "grad_norm": 0.0005643400945700705, "learning_rate": 0.0002, "loss": 0.0, "step": 2770 }, { "epoch": 0.013482497608432866, "grad_norm": 4.5699194743065163e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2780 }, { "epoch": 0.013530995801268954, "grad_norm": 9.086073987418786e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2790 }, { "epoch": 0.013579493994105045, "grad_norm": 8.126808097586036e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2800 }, { "epoch": 0.013627992186941135, "grad_norm": 7.87043336458737e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2810 }, { "epoch": 0.013676490379777223, "grad_norm": 7.0593073360214476e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2820 }, { "epoch": 0.013724988572613313, "grad_norm": 8.957466889114585e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2830 }, { "epoch": 0.013773486765449403, "grad_norm": 8.654357770865317e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2840 }, { "epoch": 0.013821984958285492, "grad_norm": 2.161105840059463e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2850 }, { "epoch": 0.013870483151121582, "grad_norm": 7.29957764633582e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2860 }, { "epoch": 0.01391898134395767, "grad_norm": 7.690146958339028e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2870 }, { "epoch": 0.01396747953679376, "grad_norm": 1.1637890565907583e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 2880 }, { "epoch": 0.01401597772962985, "grad_norm": 6.603559086215682e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2890 }, { "epoch": 0.014064475922465939, "grad_norm": 9.100453098653816e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2900 }, { "epoch": 0.014112974115302029, "grad_norm": 6.8998360802652314e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2910 }, { "epoch": 0.014161472308138119, "grad_norm": 7.798586921126116e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2920 }, { "epoch": 0.014209970500974207, "grad_norm": 7.484350135200657e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2930 }, { "epoch": 0.014258468693810297, "grad_norm": 7.186685706983553e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2940 }, { "epoch": 0.014306966886646386, "grad_norm": 6.433962425944628e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2950 }, { "epoch": 0.014355465079482476, "grad_norm": 7.992311111593153e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2960 }, { "epoch": 0.014403963272318566, "grad_norm": 7.124964668037137e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2970 }, { "epoch": 0.014452461465154654, "grad_norm": 6.7107043832947966e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2980 }, { "epoch": 0.014500959657990744, "grad_norm": 6.842409675300587e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 2990 }, { "epoch": 0.014549457850826834, "grad_norm": 9.519959348835982e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3000 }, { "epoch": 0.014597956043662923, "grad_norm": 7.3539745244488586e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3010 }, { "epoch": 0.014646454236499013, "grad_norm": 3.542835474945605e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 3020 }, { "epoch": 0.014694952429335101, "grad_norm": 6.518949703604449e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3030 }, { "epoch": 0.014743450622171191, "grad_norm": 6.302459496509982e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3040 }, { "epoch": 0.014791948815007281, "grad_norm": 8.290636287711095e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3050 }, { "epoch": 0.01484044700784337, "grad_norm": 6.686010237899609e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3060 }, { "epoch": 0.01488894520067946, "grad_norm": 6.506842964881798e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3070 }, { "epoch": 0.01493744339351555, "grad_norm": 7.361989901255583e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3080 }, { "epoch": 0.014985941586351638, "grad_norm": 7.135491159715457e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3090 }, { "epoch": 0.015034439779187728, "grad_norm": 5.790159320895327e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3100 }, { "epoch": 0.015082937972023817, "grad_norm": 7.027762876532506e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3110 }, { "epoch": 0.015131436164859907, "grad_norm": 6.8007961999683175e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3120 }, { "epoch": 0.015179934357695997, "grad_norm": 5.944532404100755e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3130 }, { "epoch": 0.015228432550532085, "grad_norm": 7.326246759475907e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3140 }, { "epoch": 0.015276930743368175, "grad_norm": 6.906762337166583e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3150 }, { "epoch": 0.015325428936204265, "grad_norm": 8.032100595301017e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3160 }, { "epoch": 0.015373927129040354, "grad_norm": 6.6143284129793756e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3170 }, { "epoch": 0.015422425321876444, "grad_norm": 2.45353949139826e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 3180 }, { "epoch": 0.015470923514712532, "grad_norm": 5.806380613648798e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3190 }, { "epoch": 0.015519421707548622, "grad_norm": 5.925382538407575e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3200 }, { "epoch": 0.015567919900384712, "grad_norm": 9.669285645941272e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3210 }, { "epoch": 0.0156164180932208, "grad_norm": 7.251103397720726e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3220 }, { "epoch": 0.01566491628605689, "grad_norm": 5.892952685826458e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3230 }, { "epoch": 0.01571341447889298, "grad_norm": 6.900150765432045e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3240 }, { "epoch": 0.01576191267172907, "grad_norm": 4.975919637217885e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3250 }, { "epoch": 0.01581041086456516, "grad_norm": 6.328052222670522e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3260 }, { "epoch": 0.015858909057401248, "grad_norm": 7.860479854571167e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3270 }, { "epoch": 0.01590740725023734, "grad_norm": 9.689323633210734e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3280 }, { "epoch": 0.015955905443073428, "grad_norm": 6.136925549071748e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3290 }, { "epoch": 0.016004403635909516, "grad_norm": 5.009130745747825e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3300 }, { "epoch": 0.016052901828745605, "grad_norm": 6.14729424341931e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3310 }, { "epoch": 0.016101400021581697, "grad_norm": 8.438675649813376e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3320 }, { "epoch": 0.016149898214417785, "grad_norm": 6.502316409751074e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3330 }, { "epoch": 0.016198396407253873, "grad_norm": 4.764503955811961e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3340 }, { "epoch": 0.016246894600089965, "grad_norm": 7.841614205972292e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3350 }, { "epoch": 0.016295392792926053, "grad_norm": 7.957663910929114e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3360 }, { "epoch": 0.016343890985762142, "grad_norm": 5.29365797774517e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3370 }, { "epoch": 0.016392389178598234, "grad_norm": 1.014600638882257e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 3380 }, { "epoch": 0.016440887371434322, "grad_norm": 6.124740593804745e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3390 }, { "epoch": 0.01648938556427041, "grad_norm": 4.6603704504377674e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3400 }, { "epoch": 0.016537883757106502, "grad_norm": 5.970346592221176e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3410 }, { "epoch": 0.01658638194994259, "grad_norm": 8.478864401695319e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3420 }, { "epoch": 0.01663488014277868, "grad_norm": 5.140093435329618e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3430 }, { "epoch": 0.01668337833561477, "grad_norm": 4.593049197865184e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3440 }, { "epoch": 0.01673187652845086, "grad_norm": 6.244899395824177e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3450 }, { "epoch": 0.016780374721286948, "grad_norm": 5.985345978842815e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3460 }, { "epoch": 0.016828872914123036, "grad_norm": 1.1667018952721264e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 3470 }, { "epoch": 0.016877371106959128, "grad_norm": 7.372989330178825e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3480 }, { "epoch": 0.016925869299795216, "grad_norm": 6.632786244153976e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 3490 }, { "epoch": 0.016974367492631304, "grad_norm": 4.123858616367215e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3500 }, { "epoch": 0.017022865685467396, "grad_norm": 4.820563844987191e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3510 }, { "epoch": 0.017071363878303485, "grad_norm": 4.740771601063898e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3520 }, { "epoch": 0.017119862071139573, "grad_norm": 5.228375357546611e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3530 }, { "epoch": 0.017168360263975665, "grad_norm": 8.86427460500272e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3540 }, { "epoch": 0.017216858456811753, "grad_norm": 6.689768270007335e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3550 }, { "epoch": 0.01726535664964784, "grad_norm": 4.605703452398302e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3560 }, { "epoch": 0.017313854842483933, "grad_norm": 5.2545428843586706e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3570 }, { "epoch": 0.01736235303532002, "grad_norm": 4.553956387098879e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3580 }, { "epoch": 0.01741085122815611, "grad_norm": 6.510889306809986e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3590 }, { "epoch": 0.017459349420992202, "grad_norm": 1.626556877454277e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 3600 }, { "epoch": 0.01750784761382829, "grad_norm": 2.5818522772169672e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 3610 }, { "epoch": 0.01755634580666438, "grad_norm": 4.981830443284707e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3620 }, { "epoch": 0.017604843999500467, "grad_norm": 7.332404493354261e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3630 }, { "epoch": 0.01765334219233656, "grad_norm": 4.982913196727168e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3640 }, { "epoch": 0.017701840385172647, "grad_norm": 4.848973730986472e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3650 }, { "epoch": 0.017750338578008736, "grad_norm": 4.935544438922079e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3660 }, { "epoch": 0.017798836770844827, "grad_norm": 4.610640644386876e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3670 }, { "epoch": 0.017847334963680916, "grad_norm": 4.471769898373168e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3680 }, { "epoch": 0.017895833156517004, "grad_norm": 5.6214221331174485e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3690 }, { "epoch": 0.017944331349353096, "grad_norm": 3.928593287128024e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3700 }, { "epoch": 0.017992829542189184, "grad_norm": 5.109195626573637e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3710 }, { "epoch": 0.018041327735025273, "grad_norm": 4.352950327302096e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3720 }, { "epoch": 0.018089825927861364, "grad_norm": 5.096158020023722e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3730 }, { "epoch": 0.018138324120697453, "grad_norm": 4.2065034904226195e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3740 }, { "epoch": 0.01818682231353354, "grad_norm": 4.007813458883902e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3750 }, { "epoch": 0.018235320506369633, "grad_norm": 5.790697287011426e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3760 }, { "epoch": 0.01828381869920572, "grad_norm": 5.204662556934636e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3770 }, { "epoch": 0.01833231689204181, "grad_norm": 4.336858182796277e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3780 }, { "epoch": 0.018380815084877898, "grad_norm": 4.627388534572674e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3790 }, { "epoch": 0.01842931327771399, "grad_norm": 3.4933414099214133e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3800 }, { "epoch": 0.01847781147055008, "grad_norm": 4.357022589829285e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3810 }, { "epoch": 0.018526309663386167, "grad_norm": 4.3372529034968466e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3820 }, { "epoch": 0.01857480785622226, "grad_norm": 5.116212832945166e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3830 }, { "epoch": 0.018623306049058347, "grad_norm": 3.6247556636226363e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3840 }, { "epoch": 0.018671804241894435, "grad_norm": 3.6871861084364355e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3850 }, { "epoch": 0.018720302434730527, "grad_norm": 4.280244866095018e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 3860 }, { "epoch": 0.018768800627566615, "grad_norm": 0.000147980113979429, "learning_rate": 0.0002, "loss": 0.0002, "step": 3870 }, { "epoch": 0.018817298820402704, "grad_norm": 0.005175785161554813, "learning_rate": 0.0002, "loss": 0.0002, "step": 3880 }, { "epoch": 0.018865797013238796, "grad_norm": 0.0010898062027990818, "learning_rate": 0.0002, "loss": 0.0001, "step": 3890 }, { "epoch": 0.018914295206074884, "grad_norm": 0.000751074287109077, "learning_rate": 0.0002, "loss": 0.0, "step": 3900 }, { "epoch": 0.018962793398910972, "grad_norm": 0.000293889781460166, "learning_rate": 0.0002, "loss": 0.0, "step": 3910 }, { "epoch": 0.019011291591747064, "grad_norm": 0.06451054662466049, "learning_rate": 0.0002, "loss": 0.0003, "step": 3920 }, { "epoch": 0.019059789784583153, "grad_norm": 0.00029681934393011034, "learning_rate": 0.0002, "loss": 0.0002, "step": 3930 }, { "epoch": 0.01910828797741924, "grad_norm": 0.002394045004621148, "learning_rate": 0.0002, "loss": 0.0001, "step": 3940 }, { "epoch": 0.01915678617025533, "grad_norm": 0.000615072320215404, "learning_rate": 0.0002, "loss": 0.0, "step": 3950 }, { "epoch": 0.01920528436309142, "grad_norm": 0.00019204133423045278, "learning_rate": 0.0002, "loss": 0.0001, "step": 3960 }, { "epoch": 0.01925378255592751, "grad_norm": 0.00017246766947209835, "learning_rate": 0.0002, "loss": 0.0, "step": 3970 }, { "epoch": 0.019302280748763598, "grad_norm": 5.972445069346577e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 3980 }, { "epoch": 0.01935077894159969, "grad_norm": 8.652180258650333e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 3990 }, { "epoch": 0.019399277134435778, "grad_norm": 5.771993528469466e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4000 }, { "epoch": 0.019447775327271866, "grad_norm": 6.803420546930283e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4010 }, { "epoch": 0.019496273520107958, "grad_norm": 4.5632696128450334e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4020 }, { "epoch": 0.019544771712944047, "grad_norm": 3.5567369195632637e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4030 }, { "epoch": 0.019593269905780135, "grad_norm": 4.564813571050763e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4040 }, { "epoch": 0.019641768098616227, "grad_norm": 2.9909733711974695e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4050 }, { "epoch": 0.019690266291452315, "grad_norm": 3.533604831318371e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4060 }, { "epoch": 0.019738764484288403, "grad_norm": 2.3603570298291743e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4070 }, { "epoch": 0.019787262677124495, "grad_norm": 2.0972131096641533e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4080 }, { "epoch": 0.019835760869960584, "grad_norm": 2.308067269041203e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4090 }, { "epoch": 0.019884259062796672, "grad_norm": 1.7689066226012073e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4100 }, { "epoch": 0.01993275725563276, "grad_norm": 2.2692727725370787e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4110 }, { "epoch": 0.019981255448468852, "grad_norm": 1.6669189790263772e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4120 }, { "epoch": 0.02002975364130494, "grad_norm": 1.6372929167118855e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4130 }, { "epoch": 0.02007825183414103, "grad_norm": 2.0123196009080857e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4140 }, { "epoch": 0.02012675002697712, "grad_norm": 1.3425964425550774e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4150 }, { "epoch": 0.02017524821981321, "grad_norm": 1.52843877003761e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4160 }, { "epoch": 0.020223746412649297, "grad_norm": 1.2465356121538207e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4170 }, { "epoch": 0.02027224460548539, "grad_norm": 1.4900879250490107e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4180 }, { "epoch": 0.020320742798321478, "grad_norm": 1.3102378034091089e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4190 }, { "epoch": 0.020369240991157566, "grad_norm": 1.237213746207999e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4200 }, { "epoch": 0.020417739183993658, "grad_norm": 3.085875869146548e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4210 }, { "epoch": 0.020466237376829746, "grad_norm": 1.363729370496003e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4220 }, { "epoch": 0.020514735569665835, "grad_norm": 1.1033542250515893e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4230 }, { "epoch": 0.020563233762501926, "grad_norm": 1.0127472705789842e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4240 }, { "epoch": 0.020611731955338015, "grad_norm": 2.4914486857596785e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4250 }, { "epoch": 0.020660230148174103, "grad_norm": 1.1018728400813416e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4260 }, { "epoch": 0.02070872834101019, "grad_norm": 1.050081027642591e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4270 }, { "epoch": 0.020757226533846283, "grad_norm": 9.662238880991936e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4280 }, { "epoch": 0.02080572472668237, "grad_norm": 6.67058520775754e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4290 }, { "epoch": 0.02085422291951846, "grad_norm": 6.718300937791355e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4300 }, { "epoch": 0.020902721112354552, "grad_norm": 8.937502570915967e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4310 }, { "epoch": 0.02095121930519064, "grad_norm": 8.615298611402977e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4320 }, { "epoch": 0.02099971749802673, "grad_norm": 9.202142791764345e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4330 }, { "epoch": 0.02104821569086282, "grad_norm": 9.496915481577162e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4340 }, { "epoch": 0.02109671388369891, "grad_norm": 7.537736109952675e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4350 }, { "epoch": 0.021145212076534997, "grad_norm": 1.0372224096499849e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4360 }, { "epoch": 0.02119371026937109, "grad_norm": 7.578591976198368e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4370 }, { "epoch": 0.021242208462207177, "grad_norm": 8.458661795884836e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4380 }, { "epoch": 0.021290706655043266, "grad_norm": 6.466019385698019e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4390 }, { "epoch": 0.021339204847879358, "grad_norm": 6.0869924709550105e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4400 }, { "epoch": 0.021387703040715446, "grad_norm": 1.0014898180088494e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 4410 }, { "epoch": 0.021436201233551534, "grad_norm": 7.19835952622816e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4420 }, { "epoch": 0.021484699426387623, "grad_norm": 7.454506430804031e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4430 }, { "epoch": 0.021533197619223714, "grad_norm": 5.866335868631722e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4440 }, { "epoch": 0.021581695812059803, "grad_norm": 5.057648650108604e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4450 }, { "epoch": 0.02163019400489589, "grad_norm": 7.253324838529807e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4460 }, { "epoch": 0.021678692197731983, "grad_norm": 6.581742582056904e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4470 }, { "epoch": 0.02172719039056807, "grad_norm": 5.926709945924813e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4480 }, { "epoch": 0.02177568858340416, "grad_norm": 7.6030778473068494e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4490 }, { "epoch": 0.02182418677624025, "grad_norm": 9.102960575546604e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4500 }, { "epoch": 0.02187268496907634, "grad_norm": 6.020535238349112e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4510 }, { "epoch": 0.021921183161912428, "grad_norm": 6.535823558806442e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4520 }, { "epoch": 0.02196968135474852, "grad_norm": 5.935777153354138e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4530 }, { "epoch": 0.02201817954758461, "grad_norm": 4.272212208888959e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4540 }, { "epoch": 0.022066677740420697, "grad_norm": 6.957144705665996e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4550 }, { "epoch": 0.02211517593325679, "grad_norm": 6.685822881991044e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4560 }, { "epoch": 0.022163674126092877, "grad_norm": 5.528954716282897e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4570 }, { "epoch": 0.022212172318928965, "grad_norm": 5.255358701106161e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4580 }, { "epoch": 0.022260670511765054, "grad_norm": 4.053223165101372e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4590 }, { "epoch": 0.022309168704601146, "grad_norm": 3.8770899664086755e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4600 }, { "epoch": 0.022357666897437234, "grad_norm": 5.152530320629012e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4610 }, { "epoch": 0.022406165090273322, "grad_norm": 5.280640380078694e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4620 }, { "epoch": 0.022454663283109414, "grad_norm": 4.7365474529215135e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4630 }, { "epoch": 0.022503161475945502, "grad_norm": 4.591072865878232e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4640 }, { "epoch": 0.02255165966878159, "grad_norm": 3.327942522446392e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4650 }, { "epoch": 0.022600157861617683, "grad_norm": 4.487148544285446e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4660 }, { "epoch": 0.02264865605445377, "grad_norm": 7.578940312669147e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4670 }, { "epoch": 0.02269715424728986, "grad_norm": 4.973917384631932e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4680 }, { "epoch": 0.02274565244012595, "grad_norm": 3.4794293242157437e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4690 }, { "epoch": 0.02279415063296204, "grad_norm": 3.752985094251926e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4700 }, { "epoch": 0.022842648825798128, "grad_norm": 4.659466867451556e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4710 }, { "epoch": 0.02289114701863422, "grad_norm": 4.405026174936211e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4720 }, { "epoch": 0.022939645211470308, "grad_norm": 4.49555318482453e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4730 }, { "epoch": 0.022988143404306396, "grad_norm": 6.463425052061211e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4740 }, { "epoch": 0.023036641597142485, "grad_norm": 5.992133537802147e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4750 }, { "epoch": 0.023085139789978577, "grad_norm": 4.599171461450169e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4760 }, { "epoch": 0.023133637982814665, "grad_norm": 4.515142791206017e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4770 }, { "epoch": 0.023182136175650753, "grad_norm": 4.438672476680949e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4780 }, { "epoch": 0.023230634368486845, "grad_norm": 3.747899882000638e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4790 }, { "epoch": 0.023279132561322934, "grad_norm": 2.7550192953640362e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4800 }, { "epoch": 0.023327630754159022, "grad_norm": 3.6824023936787853e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4810 }, { "epoch": 0.023376128946995114, "grad_norm": 3.5385846786084585e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4820 }, { "epoch": 0.023424627139831202, "grad_norm": 3.852031568385428e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4830 }, { "epoch": 0.02347312533266729, "grad_norm": 3.915621618943987e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4840 }, { "epoch": 0.023521623525503382, "grad_norm": 2.721561941143591e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4850 }, { "epoch": 0.02357012171833947, "grad_norm": 3.8135217437229585e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4860 }, { "epoch": 0.02361861991117556, "grad_norm": 3.7928032270428957e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4870 }, { "epoch": 0.02366711810401165, "grad_norm": 3.428408490435686e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4880 }, { "epoch": 0.02371561629684774, "grad_norm": 3.1753231723996578e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4890 }, { "epoch": 0.023764114489683828, "grad_norm": 4.267127678758698e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4900 }, { "epoch": 0.023812612682519916, "grad_norm": 4.504745447775349e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4910 }, { "epoch": 0.023861110875356008, "grad_norm": 3.2750826903793495e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4920 }, { "epoch": 0.023909609068192096, "grad_norm": 3.1820620733924443e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4930 }, { "epoch": 0.023958107261028184, "grad_norm": 2.6675136268750066e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4940 }, { "epoch": 0.024006605453864276, "grad_norm": 2.58473164649331e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4950 }, { "epoch": 0.024055103646700365, "grad_norm": 5.350205356080551e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4960 }, { "epoch": 0.024103601839536453, "grad_norm": 4.113187060283963e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4970 }, { "epoch": 0.024152100032372545, "grad_norm": 3.677889480968588e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4980 }, { "epoch": 0.024200598225208633, "grad_norm": 3.0999124192021554e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 4990 }, { "epoch": 0.02424909641804472, "grad_norm": 7.150189503590809e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5000 }, { "epoch": 0.024297594610880813, "grad_norm": 3.3025883112713927e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5010 }, { "epoch": 0.024346092803716902, "grad_norm": 3.072261051784153e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5020 }, { "epoch": 0.02439459099655299, "grad_norm": 3.4884751585195772e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5030 }, { "epoch": 0.024443089189389082, "grad_norm": 3.689929371830658e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5040 }, { "epoch": 0.02449158738222517, "grad_norm": 2.274314510941622e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5050 }, { "epoch": 0.02454008557506126, "grad_norm": 2.847176801878959e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5060 }, { "epoch": 0.024588583767897347, "grad_norm": 6.622024102398427e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5070 }, { "epoch": 0.02463708196073344, "grad_norm": 3.037089982171892e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5080 }, { "epoch": 0.024685580153569527, "grad_norm": 2.118448264809558e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5090 }, { "epoch": 0.024734078346405616, "grad_norm": 2.7511309781402815e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5100 }, { "epoch": 0.024782576539241707, "grad_norm": 2.9641255423484836e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5110 }, { "epoch": 0.024831074732077796, "grad_norm": 3.823731731245061e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5120 }, { "epoch": 0.024879572924913884, "grad_norm": 2.6338213956478285e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5130 }, { "epoch": 0.024928071117749976, "grad_norm": 1.78948653228872e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5140 }, { "epoch": 0.024976569310586064, "grad_norm": 3.1887946079223184e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5150 }, { "epoch": 0.025025067503422153, "grad_norm": 4.198217993689468e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5160 }, { "epoch": 0.025073565696258245, "grad_norm": 2.6893685571849346e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5170 }, { "epoch": 0.025122063889094333, "grad_norm": 3.478677854218404e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5180 }, { "epoch": 0.02517056208193042, "grad_norm": 2.3415725536324317e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5190 }, { "epoch": 0.025219060274766513, "grad_norm": 2.59586090578523e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5200 }, { "epoch": 0.0252675584676026, "grad_norm": 2.6800321393238846e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5210 }, { "epoch": 0.02531605666043869, "grad_norm": 3.2251898574031657e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5220 }, { "epoch": 0.025364554853274778, "grad_norm": 2.7524524739419576e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5230 }, { "epoch": 0.02541305304611087, "grad_norm": 2.131657083737082e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5240 }, { "epoch": 0.02546155123894696, "grad_norm": 3.324904128021444e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5250 }, { "epoch": 0.025510049431783047, "grad_norm": 3.788725507547497e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5260 }, { "epoch": 0.02555854762461914, "grad_norm": 2.4323589968844317e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5270 }, { "epoch": 0.025607045817455227, "grad_norm": 2.717413508435129e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5280 }, { "epoch": 0.025655544010291315, "grad_norm": 1.5863333828747272e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5290 }, { "epoch": 0.025704042203127407, "grad_norm": 1.784949745342601e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5300 }, { "epoch": 0.025752540395963495, "grad_norm": 2.4657088033563923e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5310 }, { "epoch": 0.025801038588799584, "grad_norm": 2.1449973246490117e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5320 }, { "epoch": 0.025849536781635676, "grad_norm": 2.2178849121701205e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5330 }, { "epoch": 0.025898034974471764, "grad_norm": 1.7641636986809317e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5340 }, { "epoch": 0.025946533167307852, "grad_norm": 1.5724574495834531e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5350 }, { "epoch": 0.025995031360143944, "grad_norm": 2.458776179992128e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5360 }, { "epoch": 0.026043529552980033, "grad_norm": 1.046300803864142e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 5370 }, { "epoch": 0.02609202774581612, "grad_norm": 2.304275767528452e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5380 }, { "epoch": 0.02614052593865221, "grad_norm": 1.760232521519356e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5390 }, { "epoch": 0.0261890241314883, "grad_norm": 2.8884824132546782e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5400 }, { "epoch": 0.02623752232432439, "grad_norm": 2.2657613953924738e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5410 }, { "epoch": 0.026286020517160478, "grad_norm": 3.362335291967611e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5420 }, { "epoch": 0.02633451870999657, "grad_norm": 3.7523834635067033e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5430 }, { "epoch": 0.026383016902832658, "grad_norm": 1.6964039559752564e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5440 }, { "epoch": 0.026431515095668746, "grad_norm": 1.7424506495444803e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5450 }, { "epoch": 0.026480013288504838, "grad_norm": 1.981734158107429e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5460 }, { "epoch": 0.026528511481340927, "grad_norm": 2.2660519789496902e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5470 }, { "epoch": 0.026577009674177015, "grad_norm": 1.8804029195962357e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5480 }, { "epoch": 0.026625507867013107, "grad_norm": 1.597534946995438e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5490 }, { "epoch": 0.026674006059849195, "grad_norm": 1.8392929632682353e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5500 }, { "epoch": 0.026722504252685284, "grad_norm": 2.003287590923719e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5510 }, { "epoch": 0.026771002445521375, "grad_norm": 1.9878004877682542e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5520 }, { "epoch": 0.026819500638357464, "grad_norm": 2.1153778106963728e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5530 }, { "epoch": 0.026867998831193552, "grad_norm": 1.3602144690594287e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5540 }, { "epoch": 0.02691649702402964, "grad_norm": 1.456858626625035e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5550 }, { "epoch": 0.026964995216865732, "grad_norm": 2.006691147471429e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5560 }, { "epoch": 0.02701349340970182, "grad_norm": 1.7461542256569373e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5570 }, { "epoch": 0.02706199160253791, "grad_norm": 2.09904783332604e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5580 }, { "epoch": 0.027110489795374, "grad_norm": 1.873731889645569e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5590 }, { "epoch": 0.02715898798821009, "grad_norm": 1.4515887869492872e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5600 }, { "epoch": 0.027207486181046178, "grad_norm": 8.938342944020405e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5610 }, { "epoch": 0.02725598437388227, "grad_norm": 1.7140040426966152e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5620 }, { "epoch": 0.027304482566718358, "grad_norm": 1.7889514083435643e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5630 }, { "epoch": 0.027352980759554446, "grad_norm": 1.352867570858507e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5640 }, { "epoch": 0.027401478952390538, "grad_norm": 1.4753139794265735e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5650 }, { "epoch": 0.027449977145226626, "grad_norm": 3.1111608223000076e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5660 }, { "epoch": 0.027498475338062715, "grad_norm": 1.831096028581669e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5670 }, { "epoch": 0.027546973530898806, "grad_norm": 1.679124238762597e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5680 }, { "epoch": 0.027595471723734895, "grad_norm": 1.1117846270281007e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5690 }, { "epoch": 0.027643969916570983, "grad_norm": 1.332894839833898e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5700 }, { "epoch": 0.02769246810940707, "grad_norm": 1.863415377556521e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5710 }, { "epoch": 0.027740966302243163, "grad_norm": 2.1917510366620263e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5720 }, { "epoch": 0.02778946449507925, "grad_norm": 2.743742697930429e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5730 }, { "epoch": 0.02783796268791534, "grad_norm": 1.8529188992033596e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5740 }, { "epoch": 0.027886460880751432, "grad_norm": 1.8327979205423617e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5750 }, { "epoch": 0.02793495907358752, "grad_norm": 1.6647087477394962e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5760 }, { "epoch": 0.02798345726642361, "grad_norm": 3.528240540617844e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5770 }, { "epoch": 0.0280319554592597, "grad_norm": 1.6372666777897393e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5780 }, { "epoch": 0.02808045365209579, "grad_norm": 1.2784186083081295e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5790 }, { "epoch": 0.028128951844931877, "grad_norm": 6.4804671637830324e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5800 }, { "epoch": 0.02817745003776797, "grad_norm": 1.5408691069751512e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5810 }, { "epoch": 0.028225948230604057, "grad_norm": 1.5693647128500743e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5820 }, { "epoch": 0.028274446423440146, "grad_norm": 1.3767813697995734e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5830 }, { "epoch": 0.028322944616276238, "grad_norm": 1.5003261069068685e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5840 }, { "epoch": 0.028371442809112326, "grad_norm": 1.6482561022712616e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5850 }, { "epoch": 0.028419941001948414, "grad_norm": 1.8921342643807293e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5860 }, { "epoch": 0.028468439194784503, "grad_norm": 1.4811494111199863e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5870 }, { "epoch": 0.028516937387620594, "grad_norm": 1.6056187632784713e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5880 }, { "epoch": 0.028565435580456683, "grad_norm": 1.1609864714046125e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5890 }, { "epoch": 0.02861393377329277, "grad_norm": 1.0936063290500897e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5900 }, { "epoch": 0.028662431966128863, "grad_norm": 1.923538093251409e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5910 }, { "epoch": 0.02871093015896495, "grad_norm": 1.6176709323190153e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5920 }, { "epoch": 0.02875942835180104, "grad_norm": 1.4984035487941583e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5930 }, { "epoch": 0.02880792654463713, "grad_norm": 1.9110475477646105e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5940 }, { "epoch": 0.02885642473747322, "grad_norm": 1.1714619176927954e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5950 }, { "epoch": 0.02890492293030931, "grad_norm": 1.5909421335891238e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5960 }, { "epoch": 0.0289534211231454, "grad_norm": 1.7021901612679358e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5970 }, { "epoch": 0.02900191931598149, "grad_norm": 1.6353166074623005e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5980 }, { "epoch": 0.029050417508817577, "grad_norm": 1.140045696956804e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 5990 }, { "epoch": 0.02909891570165367, "grad_norm": 1.4807765182922594e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6000 }, { "epoch": 0.029147413894489757, "grad_norm": 1.6151669797181967e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6010 }, { "epoch": 0.029195912087325845, "grad_norm": 1.3339608813112136e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6020 }, { "epoch": 0.029244410280161934, "grad_norm": 1.475320686949999e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6030 }, { "epoch": 0.029292908472998026, "grad_norm": 9.851374898062204e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6040 }, { "epoch": 0.029341406665834114, "grad_norm": 1.0595335879770573e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6050 }, { "epoch": 0.029389904858670202, "grad_norm": 1.8264207710672054e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6060 }, { "epoch": 0.029438403051506294, "grad_norm": 1.3883766314393142e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6070 }, { "epoch": 0.029486901244342383, "grad_norm": 1.2820651136280503e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6080 }, { "epoch": 0.02953539943717847, "grad_norm": 8.203413131013804e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6090 }, { "epoch": 0.029583897630014563, "grad_norm": 1.0509206731512677e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6100 }, { "epoch": 0.02963239582285065, "grad_norm": 1.3165561085770605e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6110 }, { "epoch": 0.02968089401568674, "grad_norm": 1.371127495986002e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6120 }, { "epoch": 0.02972939220852283, "grad_norm": 1.3088398418403813e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6130 }, { "epoch": 0.02977789040135892, "grad_norm": 8.020974746614229e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6140 }, { "epoch": 0.029826388594195008, "grad_norm": 8.256639603132498e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6150 }, { "epoch": 0.0298748867870311, "grad_norm": 1.6114488516905112e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6160 }, { "epoch": 0.029923384979867188, "grad_norm": 1.1279647651463165e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6170 }, { "epoch": 0.029971883172703277, "grad_norm": 1.4041905842532287e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6180 }, { "epoch": 0.030020381365539365, "grad_norm": 9.05775664250541e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6190 }, { "epoch": 0.030068879558375457, "grad_norm": 8.512641898050788e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6200 }, { "epoch": 0.030117377751211545, "grad_norm": 1.1049038448618376e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6210 }, { "epoch": 0.030165875944047633, "grad_norm": 1.2509404996308149e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6220 }, { "epoch": 0.030214374136883725, "grad_norm": 1.1497409104777034e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6230 }, { "epoch": 0.030262872329719814, "grad_norm": 9.805697800402413e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6240 }, { "epoch": 0.030311370522555902, "grad_norm": 1.8075800198857905e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6250 }, { "epoch": 0.030359868715391994, "grad_norm": 1.2540725720100454e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6260 }, { "epoch": 0.030408366908228082, "grad_norm": 1.2826269539800705e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6270 }, { "epoch": 0.03045686510106417, "grad_norm": 1.2466510952435783e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6280 }, { "epoch": 0.030505363293900262, "grad_norm": 8.158988293871516e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6290 }, { "epoch": 0.03055386148673635, "grad_norm": 8.310245789289183e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6300 }, { "epoch": 0.03060235967957244, "grad_norm": 1.0808730621647555e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6310 }, { "epoch": 0.03065085787240853, "grad_norm": 1.0902017493208405e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6320 }, { "epoch": 0.03069935606524462, "grad_norm": 1.2667898090512608e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6330 }, { "epoch": 0.030747854258080708, "grad_norm": 8.880857080839633e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6340 }, { "epoch": 0.030796352450916796, "grad_norm": 7.395253760478226e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6350 }, { "epoch": 0.030844850643752888, "grad_norm": 1.128622443502536e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6360 }, { "epoch": 0.030893348836588976, "grad_norm": 1.08216170247033e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6370 }, { "epoch": 0.030941847029425065, "grad_norm": 1.072320173989283e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6380 }, { "epoch": 0.030990345222261156, "grad_norm": 8.766087375988718e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6390 }, { "epoch": 0.031038843415097245, "grad_norm": 7.576267080366961e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6400 }, { "epoch": 0.031087341607933333, "grad_norm": 1.0021351499744924e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6410 }, { "epoch": 0.031135839800769425, "grad_norm": 1.1709004184012883e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6420 }, { "epoch": 0.031184337993605513, "grad_norm": 1.559836050546437e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6430 }, { "epoch": 0.0312328361864416, "grad_norm": 1.2029696563331527e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6440 }, { "epoch": 0.031281334379277694, "grad_norm": 9.419988487024966e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6450 }, { "epoch": 0.03132983257211378, "grad_norm": 1.206288402499922e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6460 }, { "epoch": 0.03137833076494987, "grad_norm": 1.1634209613475832e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6470 }, { "epoch": 0.03142682895778596, "grad_norm": 9.549381729812012e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6480 }, { "epoch": 0.03147532715062205, "grad_norm": 1.4586838688046555e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6490 }, { "epoch": 0.03152382534345814, "grad_norm": 7.601746006002941e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6500 }, { "epoch": 0.03157232353629423, "grad_norm": 1.1197259937034687e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6510 }, { "epoch": 0.03162082172913032, "grad_norm": 9.980121831176803e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6520 }, { "epoch": 0.03166931992196641, "grad_norm": 9.971429335564608e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6530 }, { "epoch": 0.031717818114802496, "grad_norm": 8.570155500819965e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6540 }, { "epoch": 0.031766316307638584, "grad_norm": 7.794938028382603e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6550 }, { "epoch": 0.03181481450047468, "grad_norm": 1.0422185141578666e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6560 }, { "epoch": 0.03186331269331077, "grad_norm": 1.103413865166658e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6570 }, { "epoch": 0.031911810886146856, "grad_norm": 9.470844020142977e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6580 }, { "epoch": 0.031960309078982944, "grad_norm": 1.091200033442874e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6590 }, { "epoch": 0.03200880727181903, "grad_norm": 9.823380651141633e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6600 }, { "epoch": 0.03205730546465512, "grad_norm": 1.8910671997218742e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6610 }, { "epoch": 0.03210580365749121, "grad_norm": 1.0724853609644924e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6620 }, { "epoch": 0.032154301850327305, "grad_norm": 1.0504863894311711e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6630 }, { "epoch": 0.03220280004316339, "grad_norm": 6.74482748763694e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6640 }, { "epoch": 0.03225129823599948, "grad_norm": 7.49486048334802e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6650 }, { "epoch": 0.03229979642883557, "grad_norm": 1.0481577419341193e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6660 }, { "epoch": 0.03234829462167166, "grad_norm": 1.0818408782142797e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6670 }, { "epoch": 0.03239679281450775, "grad_norm": 9.401964007338393e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6680 }, { "epoch": 0.03244529100734384, "grad_norm": 7.998722821866977e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6690 }, { "epoch": 0.03249378920017993, "grad_norm": 6.959142524465278e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6700 }, { "epoch": 0.03254228739301602, "grad_norm": 9.302750072492927e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6710 }, { "epoch": 0.03259078558585211, "grad_norm": 1.097988160836394e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6720 }, { "epoch": 0.032639283778688195, "grad_norm": 1.0671021755115362e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6730 }, { "epoch": 0.032687781971524284, "grad_norm": 6.495812954199209e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6740 }, { "epoch": 0.03273628016436037, "grad_norm": 6.774899361516873e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6750 }, { "epoch": 0.03278477835719647, "grad_norm": 9.678135484136874e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6760 }, { "epoch": 0.032833276550032556, "grad_norm": 1.3723405345444917e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6770 }, { "epoch": 0.032881774742868644, "grad_norm": 8.860332627591561e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6780 }, { "epoch": 0.03293027293570473, "grad_norm": 5.868838002243137e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6790 }, { "epoch": 0.03297877112854082, "grad_norm": 8.487099876219872e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6800 }, { "epoch": 0.03302726932137691, "grad_norm": 8.51440518090385e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6810 }, { "epoch": 0.033075767514213004, "grad_norm": 8.678387644067698e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6820 }, { "epoch": 0.03312426570704909, "grad_norm": 8.714200703252573e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6830 }, { "epoch": 0.03317276389988518, "grad_norm": 6.84522376559471e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6840 }, { "epoch": 0.03322126209272127, "grad_norm": 9.642675422583125e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6850 }, { "epoch": 0.03326976028555736, "grad_norm": 8.291227118206734e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6860 }, { "epoch": 0.033318258478393446, "grad_norm": 7.980240752658574e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6870 }, { "epoch": 0.03336675667122954, "grad_norm": 7.942321076370717e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6880 }, { "epoch": 0.03341525486406563, "grad_norm": 7.118452458598767e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6890 }, { "epoch": 0.03346375305690172, "grad_norm": 7.434635449499183e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6900 }, { "epoch": 0.03351225124973781, "grad_norm": 7.97359405169118e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6910 }, { "epoch": 0.033560749442573895, "grad_norm": 7.983838372638274e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6920 }, { "epoch": 0.03360924763540998, "grad_norm": 8.643933711027785e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6930 }, { "epoch": 0.03365774582824607, "grad_norm": 1.1884338846357423e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 6940 }, { "epoch": 0.03370624402108217, "grad_norm": 9.361456818623992e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6950 }, { "epoch": 0.033754742213918255, "grad_norm": 8.003628408914665e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6960 }, { "epoch": 0.033803240406754344, "grad_norm": 9.414268333785003e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6970 }, { "epoch": 0.03385173859959043, "grad_norm": 8.157892921190069e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6980 }, { "epoch": 0.03390023679242652, "grad_norm": 6.370673872879706e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 6990 }, { "epoch": 0.03394873498526261, "grad_norm": 5.599525252364401e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7000 }, { "epoch": 0.033997233178098704, "grad_norm": 7.631907692484674e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7010 }, { "epoch": 0.03404573137093479, "grad_norm": 7.817619689376443e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7020 }, { "epoch": 0.03409422956377088, "grad_norm": 7.983555860846536e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7030 }, { "epoch": 0.03414272775660697, "grad_norm": 6.396456342372403e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7040 }, { "epoch": 0.03419122594944306, "grad_norm": 5.164787921785319e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7050 }, { "epoch": 0.034239724142279146, "grad_norm": 1.0428847190269153e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 7060 }, { "epoch": 0.034288222335115234, "grad_norm": 8.104654511953413e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7070 }, { "epoch": 0.03433672052795133, "grad_norm": 7.261737096087018e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7080 }, { "epoch": 0.03438521872078742, "grad_norm": 5.642561973218108e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7090 }, { "epoch": 0.034433716913623506, "grad_norm": 7.073576853144914e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7100 }, { "epoch": 0.034482215106459595, "grad_norm": 1.015169232232438e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 7110 }, { "epoch": 0.03453071329929568, "grad_norm": 1.3893140931031667e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 7120 }, { "epoch": 0.03457921149213177, "grad_norm": 7.896301781329385e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7130 }, { "epoch": 0.03462770968496787, "grad_norm": 8.494426992911031e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7140 }, { "epoch": 0.034676207877803955, "grad_norm": 1.2362729648884851e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 7150 }, { "epoch": 0.03472470607064004, "grad_norm": 7.205815677480132e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7160 }, { "epoch": 0.03477320426347613, "grad_norm": 1.0394353466836037e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 7170 }, { "epoch": 0.03482170245631222, "grad_norm": 7.907627264103212e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7180 }, { "epoch": 0.03487020064914831, "grad_norm": 5.34492016868171e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7190 }, { "epoch": 0.034918698841984404, "grad_norm": 6.402156600415765e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7200 }, { "epoch": 0.03496719703482049, "grad_norm": 7.026828257039597e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7210 }, { "epoch": 0.03501569522765658, "grad_norm": 6.985297318351513e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7220 }, { "epoch": 0.03506419342049267, "grad_norm": 7.680914677621331e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7230 }, { "epoch": 0.03511269161332876, "grad_norm": 6.360843372021918e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7240 }, { "epoch": 0.035161189806164846, "grad_norm": 5.803499334433582e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7250 }, { "epoch": 0.035209687999000934, "grad_norm": 6.745006544406351e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7260 }, { "epoch": 0.03525818619183703, "grad_norm": 8.165670806192793e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7270 }, { "epoch": 0.03530668438467312, "grad_norm": 6.444406608352438e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7280 }, { "epoch": 0.035355182577509206, "grad_norm": 5.073684405942913e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7290 }, { "epoch": 0.035403680770345294, "grad_norm": 6.176195483931224e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7300 }, { "epoch": 0.03545217896318138, "grad_norm": 2.04472735276795e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 7310 }, { "epoch": 0.03550067715601747, "grad_norm": 6.790695579184103e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7320 }, { "epoch": 0.035549175348853566, "grad_norm": 7.038543117232621e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7330 }, { "epoch": 0.035597673541689655, "grad_norm": 6.160259999887785e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7340 }, { "epoch": 0.03564617173452574, "grad_norm": 5.123088158143219e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7350 }, { "epoch": 0.03569466992736183, "grad_norm": 6.473468374679214e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7360 }, { "epoch": 0.03574316812019792, "grad_norm": 9.152551001534448e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7370 }, { "epoch": 0.03579166631303401, "grad_norm": 6.788147857150761e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7380 }, { "epoch": 0.0358401645058701, "grad_norm": 4.875589638686506e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7390 }, { "epoch": 0.03588866269870619, "grad_norm": 5.757606800216308e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7400 }, { "epoch": 0.03593716089154228, "grad_norm": 6.48851482765167e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7410 }, { "epoch": 0.03598565908437837, "grad_norm": 6.584435254808341e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7420 }, { "epoch": 0.03603415727721446, "grad_norm": 1.234022420248948e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 7430 }, { "epoch": 0.036082655470050545, "grad_norm": 6.042373001946544e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7440 }, { "epoch": 0.036131153662886634, "grad_norm": 6.047339979886601e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7450 }, { "epoch": 0.03617965185572273, "grad_norm": 1.0778143177958555e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 7460 }, { "epoch": 0.03622815004855882, "grad_norm": 6.020958380759112e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7470 }, { "epoch": 0.036276648241394906, "grad_norm": 6.792161002522334e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7480 }, { "epoch": 0.036325146434230994, "grad_norm": 6.618384986722958e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7490 }, { "epoch": 0.03637364462706708, "grad_norm": 6.073414624552242e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7500 }, { "epoch": 0.03642214281990317, "grad_norm": 6.522837452394015e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7510 }, { "epoch": 0.036470641012739266, "grad_norm": 7.305418421310605e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7520 }, { "epoch": 0.036519139205575354, "grad_norm": 7.006030955380993e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7530 }, { "epoch": 0.03656763739841144, "grad_norm": 6.681711397504841e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7540 }, { "epoch": 0.03661613559124753, "grad_norm": 4.743242243421264e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7550 }, { "epoch": 0.03666463378408362, "grad_norm": 6.2939659528638e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7560 }, { "epoch": 0.03671313197691971, "grad_norm": 5.732297267968534e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7570 }, { "epoch": 0.036761630169755796, "grad_norm": 5.827391191814968e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7580 }, { "epoch": 0.03681012836259189, "grad_norm": 4.680445044868975e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7590 }, { "epoch": 0.03685862655542798, "grad_norm": 4.6225366645558097e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7600 }, { "epoch": 0.03690712474826407, "grad_norm": 6.138201911198848e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7610 }, { "epoch": 0.03695562294110016, "grad_norm": 7.542025173279399e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7620 }, { "epoch": 0.037004121133936245, "grad_norm": 6.334535669338948e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7630 }, { "epoch": 0.03705261932677233, "grad_norm": 5.103269131723209e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7640 }, { "epoch": 0.03710111751960843, "grad_norm": 7.306296652132005e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7650 }, { "epoch": 0.03714961571244452, "grad_norm": 6.181127787385776e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7660 }, { "epoch": 0.037198113905280605, "grad_norm": 5.709754873350903e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7670 }, { "epoch": 0.037246612098116694, "grad_norm": 8.841549288263195e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7680 }, { "epoch": 0.03729511029095278, "grad_norm": 5.19513832841767e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7690 }, { "epoch": 0.03734360848378887, "grad_norm": 6.452839329540438e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7700 }, { "epoch": 0.03739210667662496, "grad_norm": 7.550966074632015e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7710 }, { "epoch": 0.037440604869461054, "grad_norm": 5.433660703602072e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7720 }, { "epoch": 0.03748910306229714, "grad_norm": 7.591722237521026e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7730 }, { "epoch": 0.03753760125513323, "grad_norm": 4.188176774277963e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7740 }, { "epoch": 0.03758609944796932, "grad_norm": 5.383336088016222e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7750 }, { "epoch": 0.03763459764080541, "grad_norm": 5.872768156223174e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7760 }, { "epoch": 0.037683095833641496, "grad_norm": 5.60473665700556e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7770 }, { "epoch": 0.03773159402647759, "grad_norm": 6.036083277649595e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7780 }, { "epoch": 0.03778009221931368, "grad_norm": 4.321168205478898e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7790 }, { "epoch": 0.03782859041214977, "grad_norm": 4.3130378912792366e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7800 }, { "epoch": 0.037877088604985856, "grad_norm": 5.408683136920445e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7810 }, { "epoch": 0.037925586797821945, "grad_norm": 5.400399913924048e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7820 }, { "epoch": 0.03797408499065803, "grad_norm": 8.067245858001115e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7830 }, { "epoch": 0.03802258318349413, "grad_norm": 5.015812689634913e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7840 }, { "epoch": 0.03807108137633022, "grad_norm": 6.149574346636655e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 7850 }, { "epoch": 0.038119579569166305, "grad_norm": 5.355835810405551e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7860 }, { "epoch": 0.03816807776200239, "grad_norm": 6.066790092518204e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7870 }, { "epoch": 0.03821657595483848, "grad_norm": 5.298075507198519e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7880 }, { "epoch": 0.03826507414767457, "grad_norm": 6.718771601299522e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7890 }, { "epoch": 0.03831357234051066, "grad_norm": 4.283078567368648e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7900 }, { "epoch": 0.038362070533346754, "grad_norm": 4.996046527594444e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7910 }, { "epoch": 0.03841056872618284, "grad_norm": 4.998582312509825e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7920 }, { "epoch": 0.03845906691901893, "grad_norm": 5.108827849653608e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7930 }, { "epoch": 0.03850756511185502, "grad_norm": 4.1524378957547015e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7940 }, { "epoch": 0.03855606330469111, "grad_norm": 4.31933386835226e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7950 }, { "epoch": 0.038604561497527196, "grad_norm": 5.009148367207672e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7960 }, { "epoch": 0.03865305969036329, "grad_norm": 5.24012136793317e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7970 }, { "epoch": 0.03870155788319938, "grad_norm": 5.609752520285838e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7980 }, { "epoch": 0.03875005607603547, "grad_norm": 5.304485739543452e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 7990 }, { "epoch": 0.038798554268871556, "grad_norm": 4.6127621544655995e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8000 }, { "epoch": 0.038847052461707644, "grad_norm": 5.227798283158336e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8010 }, { "epoch": 0.03889555065454373, "grad_norm": 7.799648642503598e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8020 }, { "epoch": 0.03894404884737983, "grad_norm": 5.012563519812829e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8030 }, { "epoch": 0.038992547040215916, "grad_norm": 3.9399373008564e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8040 }, { "epoch": 0.039041045233052005, "grad_norm": 3.9998613488023693e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8050 }, { "epoch": 0.03908954342588809, "grad_norm": 4.97131679821905e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8060 }, { "epoch": 0.03913804161872418, "grad_norm": 6.478445016000478e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8070 }, { "epoch": 0.03918653981156027, "grad_norm": 4.5900370082563313e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8080 }, { "epoch": 0.03923503800439636, "grad_norm": 3.7699422250625503e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8090 }, { "epoch": 0.03928353619723245, "grad_norm": 8.519071457158134e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8100 }, { "epoch": 0.03933203439006854, "grad_norm": 5.320661102814483e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8110 }, { "epoch": 0.03938053258290463, "grad_norm": 4.878108939010417e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8120 }, { "epoch": 0.03942903077574072, "grad_norm": 5.0534913498268e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8130 }, { "epoch": 0.03947752896857681, "grad_norm": 6.659232099082146e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8140 }, { "epoch": 0.039526027161412895, "grad_norm": 3.71293424450414e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8150 }, { "epoch": 0.03957452535424899, "grad_norm": 5.656905841533444e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8160 }, { "epoch": 0.03962302354708508, "grad_norm": 5.048476054980711e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8170 }, { "epoch": 0.03967152173992117, "grad_norm": 5.021475999456015e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8180 }, { "epoch": 0.039720019932757256, "grad_norm": 4.048346511353884e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8190 }, { "epoch": 0.039768518125593344, "grad_norm": 4.0420462710244465e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8200 }, { "epoch": 0.03981701631842943, "grad_norm": 4.993646598450141e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8210 }, { "epoch": 0.03986551451126552, "grad_norm": 4.796136749973812e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8220 }, { "epoch": 0.039914012704101616, "grad_norm": 8.992248012873461e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8230 }, { "epoch": 0.039962510896937704, "grad_norm": 4.721522373074549e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8240 }, { "epoch": 0.04001100908977379, "grad_norm": 3.993347377218015e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8250 }, { "epoch": 0.04005950728260988, "grad_norm": 4.356319323051139e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8260 }, { "epoch": 0.04010800547544597, "grad_norm": 7.000215305197344e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8270 }, { "epoch": 0.04015650366828206, "grad_norm": 4.5022227368463064e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8280 }, { "epoch": 0.04020500186111815, "grad_norm": 3.921499569514708e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8290 }, { "epoch": 0.04025350005395424, "grad_norm": 4.1981601839324867e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8300 }, { "epoch": 0.04030199824679033, "grad_norm": 1.6515614333911799e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 8310 }, { "epoch": 0.04035049643962642, "grad_norm": 7.034395821392536e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 8320 }, { "epoch": 0.04039899463246251, "grad_norm": 4.2169222069787793e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8330 }, { "epoch": 0.040447492825298595, "grad_norm": 3.4939384363497084e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8340 }, { "epoch": 0.04049599101813469, "grad_norm": 4.55593777815011e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8350 }, { "epoch": 0.04054448921097078, "grad_norm": 4.523839720604883e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8360 }, { "epoch": 0.04059298740380687, "grad_norm": 4.401984767810063e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8370 }, { "epoch": 0.040641485596642955, "grad_norm": 4.6896937533347227e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8380 }, { "epoch": 0.040689983789479044, "grad_norm": 3.908307348865492e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8390 }, { "epoch": 0.04073848198231513, "grad_norm": 3.6054268548468826e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8400 }, { "epoch": 0.04078698017515122, "grad_norm": 4.186235003089678e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8410 }, { "epoch": 0.040835478367987316, "grad_norm": 4.3413379557932785e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8420 }, { "epoch": 0.040883976560823404, "grad_norm": 4.7489993448834866e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8430 }, { "epoch": 0.04093247475365949, "grad_norm": 3.8657213963233517e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8440 }, { "epoch": 0.04098097294649558, "grad_norm": 8.2509012599985e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8450 }, { "epoch": 0.04102947113933167, "grad_norm": 4.044908905598277e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8460 }, { "epoch": 0.04107796933216776, "grad_norm": 4.3782713987639e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8470 }, { "epoch": 0.04112646752500385, "grad_norm": 7.181362775554589e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8480 }, { "epoch": 0.04117496571783994, "grad_norm": 3.5100816830890835e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8490 }, { "epoch": 0.04122346391067603, "grad_norm": 3.8231729604376596e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8500 }, { "epoch": 0.04127196210351212, "grad_norm": 4.497243537571194e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8510 }, { "epoch": 0.041320460296348206, "grad_norm": 5.833948080180562e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8520 }, { "epoch": 0.041368958489184295, "grad_norm": 4.3376752500989824e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8530 }, { "epoch": 0.04141745668202038, "grad_norm": 4.332780179083784e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8540 }, { "epoch": 0.04146595487485648, "grad_norm": 3.8972996208030963e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8550 }, { "epoch": 0.04151445306769257, "grad_norm": 4.4725439352077956e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8560 }, { "epoch": 0.041562951260528655, "grad_norm": 1.1057395568059292e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 8570 }, { "epoch": 0.04161144945336474, "grad_norm": 4.2113285303457815e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8580 }, { "epoch": 0.04165994764620083, "grad_norm": 3.578455221031618e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8590 }, { "epoch": 0.04170844583903692, "grad_norm": 3.385379443443526e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8600 }, { "epoch": 0.041756944031873015, "grad_norm": 4.10367562153624e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8610 }, { "epoch": 0.041805442224709104, "grad_norm": 4.162179152444878e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8620 }, { "epoch": 0.04185394041754519, "grad_norm": 4.04438480927638e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8630 }, { "epoch": 0.04190243861038128, "grad_norm": 3.668726265004807e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8640 }, { "epoch": 0.04195093680321737, "grad_norm": 3.179738996550441e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8650 }, { "epoch": 0.04199943499605346, "grad_norm": 3.813426019405597e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8660 }, { "epoch": 0.04204793318888955, "grad_norm": 4.788302589986415e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8670 }, { "epoch": 0.04209643138172564, "grad_norm": 4.272194757959369e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8680 }, { "epoch": 0.04214492957456173, "grad_norm": 3.2110554570863314e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8690 }, { "epoch": 0.04219342776739782, "grad_norm": 3.613016872350272e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8700 }, { "epoch": 0.042241925960233906, "grad_norm": 5.202090278544347e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8710 }, { "epoch": 0.042290424153069994, "grad_norm": 3.953390432798187e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8720 }, { "epoch": 0.04233892234590608, "grad_norm": 3.796628789132228e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8730 }, { "epoch": 0.04238742053874218, "grad_norm": 3.2960309681584476e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8740 }, { "epoch": 0.042435918731578266, "grad_norm": 3.4798839010363736e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8750 }, { "epoch": 0.042484416924414355, "grad_norm": 3.919400910490367e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8760 }, { "epoch": 0.04253291511725044, "grad_norm": 4.103507933450601e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8770 }, { "epoch": 0.04258141331008653, "grad_norm": 4.154849193582777e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8780 }, { "epoch": 0.04262991150292262, "grad_norm": 3.3287113865299034e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8790 }, { "epoch": 0.042678409695758715, "grad_norm": 3.3010329048011045e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8800 }, { "epoch": 0.0427269078885948, "grad_norm": 4.2432475311215967e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8810 }, { "epoch": 0.04277540608143089, "grad_norm": 3.7796701235492947e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8820 }, { "epoch": 0.04282390427426698, "grad_norm": 3.59220450718567e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8830 }, { "epoch": 0.04287240246710307, "grad_norm": 3.7502070426853606e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8840 }, { "epoch": 0.04292090065993916, "grad_norm": 1.1267715080975904e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 8850 }, { "epoch": 0.042969398852775245, "grad_norm": 3.643346531134739e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8860 }, { "epoch": 0.04301789704561134, "grad_norm": 3.566180453162815e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8870 }, { "epoch": 0.04306639523844743, "grad_norm": 1.162246121566568e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 8880 }, { "epoch": 0.04311489343128352, "grad_norm": 3.6712489759338496e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8890 }, { "epoch": 0.043163391624119606, "grad_norm": 3.0351276336659794e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8900 }, { "epoch": 0.043211889816955694, "grad_norm": 4.3384383729971887e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8910 }, { "epoch": 0.04326038800979178, "grad_norm": 4.730307523459487e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8920 }, { "epoch": 0.04330888620262788, "grad_norm": 3.6654429891314066e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8930 }, { "epoch": 0.043357384395463966, "grad_norm": 4.812827683053911e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8940 }, { "epoch": 0.043405882588300054, "grad_norm": 3.6323277186056657e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8950 }, { "epoch": 0.04345438078113614, "grad_norm": 3.5985345903100097e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8960 }, { "epoch": 0.04350287897397223, "grad_norm": 3.6599789154934115e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8970 }, { "epoch": 0.04355137716680832, "grad_norm": 3.521062126310426e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8980 }, { "epoch": 0.043599875359644415, "grad_norm": 3.428517914016993e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 8990 }, { "epoch": 0.0436483735524805, "grad_norm": 2.778284908799833e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9000 }, { "epoch": 0.04369687174531659, "grad_norm": 1.6908201132537215e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 9010 }, { "epoch": 0.04374536993815268, "grad_norm": 3.5599609304881596e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9020 }, { "epoch": 0.04379386813098877, "grad_norm": 3.8089589793344203e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9030 }, { "epoch": 0.043842366323824856, "grad_norm": 3.445194920459471e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9040 }, { "epoch": 0.043890864516660945, "grad_norm": 3.025182877536281e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9050 }, { "epoch": 0.04393936270949704, "grad_norm": 3.318648964523163e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9060 }, { "epoch": 0.04398786090233313, "grad_norm": 3.5269297882223327e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9070 }, { "epoch": 0.04403635909516922, "grad_norm": 3.3202908866769576e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9080 }, { "epoch": 0.044084857288005305, "grad_norm": 3.2286934015246516e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9090 }, { "epoch": 0.044133355480841394, "grad_norm": 3.7075025716148957e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9100 }, { "epoch": 0.04418185367367748, "grad_norm": 3.686309071326832e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9110 }, { "epoch": 0.04423035186651358, "grad_norm": 3.420352356897638e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9120 }, { "epoch": 0.044278850059349666, "grad_norm": 3.460581581293809e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9130 }, { "epoch": 0.044327348252185754, "grad_norm": 3.487014055281179e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9140 }, { "epoch": 0.04437584644502184, "grad_norm": 3.189046253737615e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9150 }, { "epoch": 0.04442434463785793, "grad_norm": 3.2825894891175267e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9160 }, { "epoch": 0.04447284283069402, "grad_norm": 3.3830235679488396e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9170 }, { "epoch": 0.04452134102353011, "grad_norm": 3.5176898904865084e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9180 }, { "epoch": 0.0445698392163662, "grad_norm": 3.295745329978672e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9190 }, { "epoch": 0.04461833740920229, "grad_norm": 5.336094091035193e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9200 }, { "epoch": 0.04466683560203838, "grad_norm": 3.1343805062533647e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9210 }, { "epoch": 0.04471533379487447, "grad_norm": 3.225960369945824e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9220 }, { "epoch": 0.044763831987710556, "grad_norm": 3.555639693786361e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9230 }, { "epoch": 0.044812330180546645, "grad_norm": 3.2639772484799323e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9240 }, { "epoch": 0.04486082837338274, "grad_norm": 5.073067086414085e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9250 }, { "epoch": 0.04490932656621883, "grad_norm": 3.708474309860321e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9260 }, { "epoch": 0.04495782475905492, "grad_norm": 3.1465046390621865e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9270 }, { "epoch": 0.045006322951891005, "grad_norm": 3.091458609105757e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9280 }, { "epoch": 0.04505482114472709, "grad_norm": 2.89860963675892e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9290 }, { "epoch": 0.04510331933756318, "grad_norm": 4.4806188270740677e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9300 }, { "epoch": 0.04515181753039928, "grad_norm": 3.407543545108638e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9310 }, { "epoch": 0.045200315723235365, "grad_norm": 2.113773916789796e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 9320 }, { "epoch": 0.045248813916071454, "grad_norm": 3.0525532679348544e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9330 }, { "epoch": 0.04529731210890754, "grad_norm": 2.899535047617974e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9340 }, { "epoch": 0.04534581030174363, "grad_norm": 2.804708856274374e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9350 }, { "epoch": 0.04539430849457972, "grad_norm": 3.040734100068221e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9360 }, { "epoch": 0.04544280668741581, "grad_norm": 2.783171169085108e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9370 }, { "epoch": 0.0454913048802519, "grad_norm": 5.924884476371517e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9380 }, { "epoch": 0.04553980307308799, "grad_norm": 2.6447096956871974e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9390 }, { "epoch": 0.04558830126592408, "grad_norm": 3.013361151715799e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9400 }, { "epoch": 0.04563679945876017, "grad_norm": 3.017968310814467e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9410 }, { "epoch": 0.045685297651596256, "grad_norm": 3.10202636910617e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9420 }, { "epoch": 0.045733795844432344, "grad_norm": 1.9047204204980517e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 9430 }, { "epoch": 0.04578229403726844, "grad_norm": 2.9343880214582896e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9440 }, { "epoch": 0.04583079223010453, "grad_norm": 3.4486373579056817e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9450 }, { "epoch": 0.045879290422940616, "grad_norm": 3.136311192974972e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9460 }, { "epoch": 0.045927788615776705, "grad_norm": 3.190152142451552e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9470 }, { "epoch": 0.04597628680861279, "grad_norm": 2.862318240204331e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9480 }, { "epoch": 0.04602478500144888, "grad_norm": 2.5649458734733344e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9490 }, { "epoch": 0.04607328319428497, "grad_norm": 2.7821405979011615e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9500 }, { "epoch": 0.046121781387121065, "grad_norm": 3.137430155675247e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9510 }, { "epoch": 0.04617027957995715, "grad_norm": 3.4749237443065795e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9520 }, { "epoch": 0.04621877777279324, "grad_norm": 3.144482434436213e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9530 }, { "epoch": 0.04626727596562933, "grad_norm": 4.984272550245805e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9540 }, { "epoch": 0.04631577415846542, "grad_norm": 2.307631802977994e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 9550 }, { "epoch": 0.04636427235130151, "grad_norm": 3.189324786490033e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9560 }, { "epoch": 0.0464127705441376, "grad_norm": 2.900400772887224e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9570 }, { "epoch": 0.04646126873697369, "grad_norm": 4.337754262451199e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9580 }, { "epoch": 0.04650976692980978, "grad_norm": 4.4763194750885305e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9590 }, { "epoch": 0.04655826512264587, "grad_norm": 2.5215908294740075e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9600 }, { "epoch": 0.046606763315481956, "grad_norm": 2.95420051088513e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9610 }, { "epoch": 0.046655261508318044, "grad_norm": 2.713608182602911e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9620 }, { "epoch": 0.04670375970115414, "grad_norm": 3.09839379042387e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9630 }, { "epoch": 0.04675225789399023, "grad_norm": 2.983123579269886e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9640 }, { "epoch": 0.046800756086826316, "grad_norm": 2.796552109884942e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9650 }, { "epoch": 0.046849254279662404, "grad_norm": 3.087539539592399e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9660 }, { "epoch": 0.04689775247249849, "grad_norm": 3.101143875028356e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9670 }, { "epoch": 0.04694625066533458, "grad_norm": 2.837877843830938e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9680 }, { "epoch": 0.04699474885817067, "grad_norm": 6.008360173836991e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9690 }, { "epoch": 0.047043247051006765, "grad_norm": 2.460497228184977e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9700 }, { "epoch": 0.04709174524384285, "grad_norm": 2.595882619971235e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9710 }, { "epoch": 0.04714024343667894, "grad_norm": 2.7429152282820723e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9720 }, { "epoch": 0.04718874162951503, "grad_norm": 2.881259888454224e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9730 }, { "epoch": 0.04723723982235112, "grad_norm": 2.627700723678572e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9740 }, { "epoch": 0.047285738015187206, "grad_norm": 3.491051359105768e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9750 }, { "epoch": 0.0473342362080233, "grad_norm": 2.629074060678249e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9760 }, { "epoch": 0.04738273440085939, "grad_norm": 2.793360636133002e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9770 }, { "epoch": 0.04743123259369548, "grad_norm": 2.5638081524448353e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9780 }, { "epoch": 0.04747973078653157, "grad_norm": 2.5892535404636874e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9790 }, { "epoch": 0.047528228979367655, "grad_norm": 2.7623818255051447e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9800 }, { "epoch": 0.047576727172203744, "grad_norm": 4.906503932033957e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9810 }, { "epoch": 0.04762522536503983, "grad_norm": 2.468717070769344e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9820 }, { "epoch": 0.04767372355787593, "grad_norm": 2.512308583391132e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9830 }, { "epoch": 0.047722221750712016, "grad_norm": 2.402814516244689e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9840 }, { "epoch": 0.047770719943548104, "grad_norm": 3.8224212062232255e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9850 }, { "epoch": 0.04781921813638419, "grad_norm": 2.8552122444125416e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9860 }, { "epoch": 0.04786771632922028, "grad_norm": 4.580899997108645e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9870 }, { "epoch": 0.04791621452205637, "grad_norm": 2.44193500975598e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9880 }, { "epoch": 0.047964712714892464, "grad_norm": 2.4921837393776514e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9890 }, { "epoch": 0.04801321090772855, "grad_norm": 2.621483474740671e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9900 }, { "epoch": 0.04806170910056464, "grad_norm": 3.394978875803645e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9910 }, { "epoch": 0.04811020729340073, "grad_norm": 2.547069186675799e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9920 }, { "epoch": 0.04815870548623682, "grad_norm": 2.6152858367822773e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9930 }, { "epoch": 0.048207203679072906, "grad_norm": 2.698396883715759e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9940 }, { "epoch": 0.048255701871909, "grad_norm": 2.415486903828423e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9950 }, { "epoch": 0.04830420006474509, "grad_norm": 2.637122520354751e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9960 }, { "epoch": 0.04835269825758118, "grad_norm": 4.1173333897859266e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9970 }, { "epoch": 0.048401196450417266, "grad_norm": 2.3820312833322532e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9980 }, { "epoch": 0.048449694643253355, "grad_norm": 2.5276406745433633e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 9990 }, { "epoch": 0.04849819283608944, "grad_norm": 2.599767014999088e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10000 }, { "epoch": 0.04854669102892553, "grad_norm": 2.430252834528801e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10010 }, { "epoch": 0.04859518922176163, "grad_norm": 2.579058957508096e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10020 }, { "epoch": 0.048643687414597715, "grad_norm": 2.5355566890539194e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10030 }, { "epoch": 0.048692185607433804, "grad_norm": 2.364104432217573e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10040 }, { "epoch": 0.04874068380026989, "grad_norm": 2.0856076332620432e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10050 }, { "epoch": 0.04878918199310598, "grad_norm": 2.684835749278136e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10060 }, { "epoch": 0.04883768018594207, "grad_norm": 2.419271538656176e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10070 }, { "epoch": 0.048886178378778164, "grad_norm": 6.215381631591299e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10080 }, { "epoch": 0.04893467657161425, "grad_norm": 2.3560370721043e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10090 }, { "epoch": 0.04898317476445034, "grad_norm": 2.0652255727782176e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10100 }, { "epoch": 0.04903167295728643, "grad_norm": 3.389664016140159e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10110 }, { "epoch": 0.04908017115012252, "grad_norm": 2.4648429075568856e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10120 }, { "epoch": 0.049128669342958606, "grad_norm": 4.527557564415474e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10130 }, { "epoch": 0.049177167535794694, "grad_norm": 2.702424808376236e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10140 }, { "epoch": 0.04922566572863079, "grad_norm": 2.927654065842944e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10150 }, { "epoch": 0.04927416392146688, "grad_norm": 2.4386656605202006e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10160 }, { "epoch": 0.049322662114302966, "grad_norm": 4.057161788750818e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10170 }, { "epoch": 0.049371160307139055, "grad_norm": 3.7518586282203614e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10180 }, { "epoch": 0.04941965849997514, "grad_norm": 2.360881126151071e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10190 }, { "epoch": 0.04946815669281123, "grad_norm": 2.6424589805174037e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10200 }, { "epoch": 0.04951665488564733, "grad_norm": 2.568623926890723e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10210 }, { "epoch": 0.049565153078483415, "grad_norm": 2.306534696572271e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10220 }, { "epoch": 0.0496136512713195, "grad_norm": 2.823268516749522e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10230 }, { "epoch": 0.04966214946415559, "grad_norm": 2.3629090151189303e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10240 }, { "epoch": 0.04971064765699168, "grad_norm": 2.1334344069146027e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10250 }, { "epoch": 0.04975914584982777, "grad_norm": 2.569789785411558e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10260 }, { "epoch": 0.049807644042663864, "grad_norm": 4.785859459843778e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10270 }, { "epoch": 0.04985614223549995, "grad_norm": 2.832293262144958e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10280 }, { "epoch": 0.04990464042833604, "grad_norm": 4.6631973305011343e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10290 }, { "epoch": 0.04995313862117213, "grad_norm": 1.1924909131266759e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 10300 }, { "epoch": 0.05000163681400822, "grad_norm": 2.2525922815930244e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10310 }, { "epoch": 0.050050135006844305, "grad_norm": 2.2171019509187317e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10320 }, { "epoch": 0.050098633199680394, "grad_norm": 2.576471729298646e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10330 }, { "epoch": 0.05014713139251649, "grad_norm": 2.157564580329563e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10340 }, { "epoch": 0.05019562958535258, "grad_norm": 2.251244950457476e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10350 }, { "epoch": 0.050244127778188666, "grad_norm": 2.655171726928529e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10360 }, { "epoch": 0.050292625971024754, "grad_norm": 2.0884839102564e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10370 }, { "epoch": 0.05034112416386084, "grad_norm": 4.0478187202097615e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10380 }, { "epoch": 0.05038962235669693, "grad_norm": 2.215132468563752e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10390 }, { "epoch": 0.050438120549533026, "grad_norm": 2.0373121856209764e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10400 }, { "epoch": 0.050486618742369115, "grad_norm": 3.5122289432365505e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10410 }, { "epoch": 0.0505351169352052, "grad_norm": 8.751738391765684e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10420 }, { "epoch": 0.05058361512804129, "grad_norm": 2.3239806523633888e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10430 }, { "epoch": 0.05063211332087738, "grad_norm": 2.0474811890380806e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10440 }, { "epoch": 0.05068061151371347, "grad_norm": 2.036125579252257e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10450 }, { "epoch": 0.050729109706549556, "grad_norm": 2.3500406598486734e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10460 }, { "epoch": 0.05077760789938565, "grad_norm": 2.0780743170689675e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10470 }, { "epoch": 0.05082610609222174, "grad_norm": 2.0936521138992248e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10480 }, { "epoch": 0.05087460428505783, "grad_norm": 3.7227226812319714e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10490 }, { "epoch": 0.05092310247789392, "grad_norm": 2.657001800798753e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10500 }, { "epoch": 0.050971600670730005, "grad_norm": 2.0629566677143885e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10510 }, { "epoch": 0.051020098863566093, "grad_norm": 2.4140877030731644e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10520 }, { "epoch": 0.05106859705640219, "grad_norm": 2.078863730048397e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10530 }, { "epoch": 0.05111709524923828, "grad_norm": 2.755589036951278e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10540 }, { "epoch": 0.051165593442074365, "grad_norm": 1.949199202044838e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10550 }, { "epoch": 0.051214091634910454, "grad_norm": 2.0696106162176875e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10560 }, { "epoch": 0.05126258982774654, "grad_norm": 3.5485896887621493e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10570 }, { "epoch": 0.05131108802058263, "grad_norm": 2.3783212554917554e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10580 }, { "epoch": 0.051359586213418726, "grad_norm": 2.443300957111205e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10590 }, { "epoch": 0.051408084406254814, "grad_norm": 2.2989401315953728e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10600 }, { "epoch": 0.0514565825990909, "grad_norm": 2.4241339247055294e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10610 }, { "epoch": 0.05150508079192699, "grad_norm": 2.0721820703784033e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10620 }, { "epoch": 0.05155357898476308, "grad_norm": 1.9464853551198757e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10630 }, { "epoch": 0.05160207717759917, "grad_norm": 2.0241820664068655e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10640 }, { "epoch": 0.051650575370435256, "grad_norm": 7.218935138553206e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10650 }, { "epoch": 0.05169907356327135, "grad_norm": 2.596629542495066e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10660 }, { "epoch": 0.05174757175610744, "grad_norm": 2.483372156802943e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10670 }, { "epoch": 0.05179606994894353, "grad_norm": 3.0746016932425846e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10680 }, { "epoch": 0.051844568141779616, "grad_norm": 2.0028022618134855e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10690 }, { "epoch": 0.051893066334615705, "grad_norm": 1.9200319911760744e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10700 }, { "epoch": 0.05194156452745179, "grad_norm": 1.8507445531668054e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10710 }, { "epoch": 0.05199006272028789, "grad_norm": 2.0206501005759492e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10720 }, { "epoch": 0.05203856091312398, "grad_norm": 2.0237351350260724e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10730 }, { "epoch": 0.052087059105960065, "grad_norm": 2.2718261050158617e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10740 }, { "epoch": 0.052135557298796154, "grad_norm": 4.58447743767465e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10750 }, { "epoch": 0.05218405549163224, "grad_norm": 2.438910087221302e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10760 }, { "epoch": 0.05223255368446833, "grad_norm": 2.1740403610692738e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10770 }, { "epoch": 0.05228105187730442, "grad_norm": 1.8560172065917868e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10780 }, { "epoch": 0.052329550070140514, "grad_norm": 2.0144535994859325e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10790 }, { "epoch": 0.0523780482629766, "grad_norm": 2.3627978862350574e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10800 }, { "epoch": 0.05242654645581269, "grad_norm": 2.091567807838146e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10810 }, { "epoch": 0.05247504464864878, "grad_norm": 2.2223149187539093e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10820 }, { "epoch": 0.05252354284148487, "grad_norm": 2.817416486777802e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10830 }, { "epoch": 0.052572041034320956, "grad_norm": 3.8401927326958685e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10840 }, { "epoch": 0.05262053922715705, "grad_norm": 1.7728457635257655e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10850 }, { "epoch": 0.05266903741999314, "grad_norm": 3.2540906431677286e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 10860 }, { "epoch": 0.05271753561282923, "grad_norm": 2.1224089152838133e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10870 }, { "epoch": 0.052766033805665316, "grad_norm": 2.1905961489210313e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10880 }, { "epoch": 0.052814531998501404, "grad_norm": 2.928287017311959e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10890 }, { "epoch": 0.05286303019133749, "grad_norm": 2.1560313712143397e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10900 }, { "epoch": 0.05291152838417359, "grad_norm": 1.9066766299147275e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10910 }, { "epoch": 0.052960026577009676, "grad_norm": 1.9479028878777171e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10920 }, { "epoch": 0.053008524769845765, "grad_norm": 1.9227361747198302e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10930 }, { "epoch": 0.05305702296268185, "grad_norm": 1.706535215362237e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10940 }, { "epoch": 0.05310552115551794, "grad_norm": 2.4404425857937895e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10950 }, { "epoch": 0.05315401934835403, "grad_norm": 1.8107259336375137e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10960 }, { "epoch": 0.05320251754119012, "grad_norm": 1.9428702557888755e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10970 }, { "epoch": 0.053251015734026214, "grad_norm": 1.927888746422468e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10980 }, { "epoch": 0.0532995139268623, "grad_norm": 1.7632152093938203e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 10990 }, { "epoch": 0.05334801211969839, "grad_norm": 1.834013971802051e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11000 }, { "epoch": 0.05339651031253448, "grad_norm": 4.0486935404260294e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11010 }, { "epoch": 0.05344500850537057, "grad_norm": 4.240386601850332e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11020 }, { "epoch": 0.053493506698206655, "grad_norm": 3.080599526583683e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11030 }, { "epoch": 0.05354200489104275, "grad_norm": 1.9585509392072709e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11040 }, { "epoch": 0.05359050308387884, "grad_norm": 1.598853600626171e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11050 }, { "epoch": 0.05363900127671493, "grad_norm": 2.0775341624812427e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11060 }, { "epoch": 0.053687499469551016, "grad_norm": 2.2011221290085814e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11070 }, { "epoch": 0.053735997662387104, "grad_norm": 1.8903222098742845e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11080 }, { "epoch": 0.05378449585522319, "grad_norm": 1.746670790225835e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11090 }, { "epoch": 0.05383299404805928, "grad_norm": 1.75501497778896e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11100 }, { "epoch": 0.053881492240895376, "grad_norm": 1.695653963906807e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11110 }, { "epoch": 0.053929990433731465, "grad_norm": 1.6941469027642597e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11120 }, { "epoch": 0.05397848862656755, "grad_norm": 2.1677038830603124e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11130 }, { "epoch": 0.05402698681940364, "grad_norm": 1.608661506224962e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11140 }, { "epoch": 0.05407548501223973, "grad_norm": 1.8060370621242328e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11150 }, { "epoch": 0.05412398320507582, "grad_norm": 1.8626630549078982e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11160 }, { "epoch": 0.05417248139791191, "grad_norm": 1.8984061966875743e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11170 }, { "epoch": 0.054220979590748, "grad_norm": 1.640679414549595e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11180 }, { "epoch": 0.05426947778358409, "grad_norm": 1.6872260744094092e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11190 }, { "epoch": 0.05431797597642018, "grad_norm": 1.5609788306392147e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11200 }, { "epoch": 0.05436647416925627, "grad_norm": 1.721937934462403e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11210 }, { "epoch": 0.054414972362092355, "grad_norm": 1.6119579981932475e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11220 }, { "epoch": 0.05446347055492845, "grad_norm": 2.231286799769805e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11230 }, { "epoch": 0.05451196874776454, "grad_norm": 1.5403516329115519e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11240 }, { "epoch": 0.05456046694060063, "grad_norm": 2.1657481852344063e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11250 }, { "epoch": 0.054608965133436715, "grad_norm": 1.8087796149757196e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11260 }, { "epoch": 0.054657463326272804, "grad_norm": 1.8566397841368598e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11270 }, { "epoch": 0.05470596151910889, "grad_norm": 1.9922045169096236e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11280 }, { "epoch": 0.05475445971194498, "grad_norm": 1.5332615532770433e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11290 }, { "epoch": 0.054802957904781076, "grad_norm": 1.5274680720267497e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11300 }, { "epoch": 0.054851456097617164, "grad_norm": 1.6430348637186398e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11310 }, { "epoch": 0.05489995429045325, "grad_norm": 1.6945354275321733e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11320 }, { "epoch": 0.05494845248328934, "grad_norm": 1.6635607380521833e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11330 }, { "epoch": 0.05499695067612543, "grad_norm": 1.6045096629113687e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11340 }, { "epoch": 0.05504544886896152, "grad_norm": 2.035901331964851e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11350 }, { "epoch": 0.05509394706179761, "grad_norm": 1.8627596887199616e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11360 }, { "epoch": 0.0551424452546337, "grad_norm": 1.8926611744518596e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11370 }, { "epoch": 0.05519094344746979, "grad_norm": 1.7566533472290757e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11380 }, { "epoch": 0.05523944164030588, "grad_norm": 1.63390950547182e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11390 }, { "epoch": 0.055287939833141966, "grad_norm": 1.6547456027637963e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11400 }, { "epoch": 0.055336438025978055, "grad_norm": 1.846593562504495e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11410 }, { "epoch": 0.05538493621881414, "grad_norm": 1.6132118219047697e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11420 }, { "epoch": 0.05543343441165024, "grad_norm": 1.9026960274004523e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11430 }, { "epoch": 0.05548193260448633, "grad_norm": 1.573097847540339e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11440 }, { "epoch": 0.055530430797322415, "grad_norm": 1.742394175607842e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11450 }, { "epoch": 0.0555789289901585, "grad_norm": 2.566425223449187e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11460 }, { "epoch": 0.05562742718299459, "grad_norm": 1.9543981011338474e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11470 }, { "epoch": 0.05567592537583068, "grad_norm": 1.6550822579119995e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11480 }, { "epoch": 0.055724423568666775, "grad_norm": 1.805968139478864e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11490 }, { "epoch": 0.055772921761502864, "grad_norm": 1.5172422251907847e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11500 }, { "epoch": 0.05582141995433895, "grad_norm": 1.624089378537974e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11510 }, { "epoch": 0.05586991814717504, "grad_norm": 1.7884634928577725e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11520 }, { "epoch": 0.05591841634001113, "grad_norm": 1.6463066287997208e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11530 }, { "epoch": 0.05596691453284722, "grad_norm": 1.706565058157139e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11540 }, { "epoch": 0.05601541272568331, "grad_norm": 1.5283875143268233e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11550 }, { "epoch": 0.0560639109185194, "grad_norm": 1.676246625947897e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11560 }, { "epoch": 0.05611240911135549, "grad_norm": 2.8768221227437607e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11570 }, { "epoch": 0.05616090730419158, "grad_norm": 6.027379413353628e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11580 }, { "epoch": 0.056209405497027666, "grad_norm": 1.840355707827257e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11590 }, { "epoch": 0.056257903689863754, "grad_norm": 2.4421095190518827e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11600 }, { "epoch": 0.05630640188269984, "grad_norm": 1.489560332856854e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11610 }, { "epoch": 0.05635490007553594, "grad_norm": 1.520514416597507e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11620 }, { "epoch": 0.056403398268372026, "grad_norm": 1.622946967927419e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11630 }, { "epoch": 0.056451896461208115, "grad_norm": 1.5022777688500355e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11640 }, { "epoch": 0.0565003946540442, "grad_norm": 1.376663476548856e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11650 }, { "epoch": 0.05654889284688029, "grad_norm": 1.5301817768431647e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11660 }, { "epoch": 0.05659739103971638, "grad_norm": 3.904280561073392e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11670 }, { "epoch": 0.056645889232552475, "grad_norm": 1.5540284437065566e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11680 }, { "epoch": 0.056694387425388564, "grad_norm": 1.481822096138785e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11690 }, { "epoch": 0.05674288561822465, "grad_norm": 1.9984823040886113e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11700 }, { "epoch": 0.05679138381106074, "grad_norm": 2.1120740711921826e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11710 }, { "epoch": 0.05683988200389683, "grad_norm": 1.5964214128416643e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11720 }, { "epoch": 0.05688838019673292, "grad_norm": 1.4147352089821652e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11730 }, { "epoch": 0.056936878389569005, "grad_norm": 2.3705288754172216e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11740 }, { "epoch": 0.0569853765824051, "grad_norm": 1.458254814679094e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11750 }, { "epoch": 0.05703387477524119, "grad_norm": 2.0722306715015293e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11760 }, { "epoch": 0.05708237296807728, "grad_norm": 2.0009137813303823e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11770 }, { "epoch": 0.057130871160913366, "grad_norm": 1.5318155988097715e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11780 }, { "epoch": 0.057179369353749454, "grad_norm": 1.5678757847581437e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11790 }, { "epoch": 0.05722786754658554, "grad_norm": 1.352235159401971e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11800 }, { "epoch": 0.05727636573942164, "grad_norm": 4.067521217621106e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11810 }, { "epoch": 0.057324863932257726, "grad_norm": 1.4707664774959994e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11820 }, { "epoch": 0.057373362125093814, "grad_norm": 5.866911578777945e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11830 }, { "epoch": 0.0574218603179299, "grad_norm": 1.602438430836628e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11840 }, { "epoch": 0.05747035851076599, "grad_norm": 1.6795772239674989e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11850 }, { "epoch": 0.05751885670360208, "grad_norm": 1.6107094324979698e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11860 }, { "epoch": 0.057567354896438175, "grad_norm": 1.3854267422175326e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11870 }, { "epoch": 0.05761585308927426, "grad_norm": 1.8840938764697057e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11880 }, { "epoch": 0.05766435128211035, "grad_norm": 1.2900729018383572e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11890 }, { "epoch": 0.05771284947494644, "grad_norm": 1.3272938303998671e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11900 }, { "epoch": 0.05776134766778253, "grad_norm": 3.7796792184963124e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11910 }, { "epoch": 0.05780984586061862, "grad_norm": 1.5161150201947748e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11920 }, { "epoch": 0.057858344053454705, "grad_norm": 1.4005055959387391e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11930 }, { "epoch": 0.0579068422462908, "grad_norm": 1.9367922732271836e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11940 }, { "epoch": 0.05795534043912689, "grad_norm": 1.4656748703600897e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11950 }, { "epoch": 0.05800383863196298, "grad_norm": 2.0049570537139516e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11960 }, { "epoch": 0.058052336824799065, "grad_norm": 1.9846473264806264e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11970 }, { "epoch": 0.058100835017635154, "grad_norm": 1.3952815436368837e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11980 }, { "epoch": 0.05814933321047124, "grad_norm": 1.39196700388311e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 11990 }, { "epoch": 0.05819783140330734, "grad_norm": 1.4046248963950347e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12000 }, { "epoch": 0.058246329596143426, "grad_norm": 1.8839274389392813e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 12010 }, { "epoch": 0.058294827788979514, "grad_norm": 1.364215478361075e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12020 }, { "epoch": 0.0583433259818156, "grad_norm": 1.7661351137121528e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12030 }, { "epoch": 0.05839182417465169, "grad_norm": 1.2347508970833587e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12040 }, { "epoch": 0.05844032236748778, "grad_norm": 1.3252110875328071e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12050 }, { "epoch": 0.05848882056032387, "grad_norm": 1.6208500142056437e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12060 }, { "epoch": 0.05853731875315996, "grad_norm": 1.7921298933742946e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12070 }, { "epoch": 0.05858581694599605, "grad_norm": 1.6676413849836536e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12080 }, { "epoch": 0.05863431513883214, "grad_norm": 1.8433156867558864e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12090 }, { "epoch": 0.05868281333166823, "grad_norm": 1.422474298351517e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12100 }, { "epoch": 0.058731311524504316, "grad_norm": 1.2804419213807705e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12110 }, { "epoch": 0.058779809717340405, "grad_norm": 1.405069269821979e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12120 }, { "epoch": 0.0588283079101765, "grad_norm": 1.3836759649166197e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12130 }, { "epoch": 0.05887680610301259, "grad_norm": 1.1936083410546416e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12140 }, { "epoch": 0.05892530429584868, "grad_norm": 1.2034429630602972e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12150 }, { "epoch": 0.058973802488684765, "grad_norm": 2.973823995944258e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12160 }, { "epoch": 0.05902230068152085, "grad_norm": 1.0036828825832345e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 12170 }, { "epoch": 0.05907079887435694, "grad_norm": 1.2833537255119154e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12180 }, { "epoch": 0.05911929706719304, "grad_norm": 1.3001893250930152e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12190 }, { "epoch": 0.059167795260029125, "grad_norm": 1.794468147409134e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12200 }, { "epoch": 0.059216293452865214, "grad_norm": 1.2812436978038022e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12210 }, { "epoch": 0.0592647916457013, "grad_norm": 2.7634473553916905e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12220 }, { "epoch": 0.05931328983853739, "grad_norm": 1.4476023579845787e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12230 }, { "epoch": 0.05936178803137348, "grad_norm": 1.3254660302663979e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12240 }, { "epoch": 0.05941028622420957, "grad_norm": 1.2000390370303649e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12250 }, { "epoch": 0.05945878441704566, "grad_norm": 1.806847194529837e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 12260 }, { "epoch": 0.05950728260988175, "grad_norm": 4.1465600588708185e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 12270 }, { "epoch": 0.05955578080271784, "grad_norm": 4.288770014682086e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 12280 }, { "epoch": 0.05960427899555393, "grad_norm": 2.4465557544317562e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 12290 }, { "epoch": 0.059652777188390016, "grad_norm": 2.2078288566262927e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 12300 }, { "epoch": 0.059701275381226104, "grad_norm": 1.23626773529395e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 12310 }, { "epoch": 0.0597497735740622, "grad_norm": 9.489020840192097e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12320 }, { "epoch": 0.05979827176689829, "grad_norm": 6.394117235686281e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12330 }, { "epoch": 0.059846769959734376, "grad_norm": 5.427386327028216e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12340 }, { "epoch": 0.059895268152570465, "grad_norm": 8.132565199048258e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12350 }, { "epoch": 0.05994376634540655, "grad_norm": 5.876786985936633e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12360 }, { "epoch": 0.05999226453824264, "grad_norm": 4.875068384535552e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12370 }, { "epoch": 0.06004076273107873, "grad_norm": 4.7439633021895133e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12380 }, { "epoch": 0.060089260923914825, "grad_norm": 3.9223087355821917e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12390 }, { "epoch": 0.06013775911675091, "grad_norm": 3.312187288884161e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12400 }, { "epoch": 0.060186257309587, "grad_norm": 4.491065510592307e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12410 }, { "epoch": 0.06023475550242309, "grad_norm": 4.2556428070383845e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12420 }, { "epoch": 0.06028325369525918, "grad_norm": 3.83829103611788e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12430 }, { "epoch": 0.06033175188809527, "grad_norm": 2.8048685862813727e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12440 }, { "epoch": 0.06038025008093136, "grad_norm": 3.216497361790971e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12450 }, { "epoch": 0.06042874827376745, "grad_norm": 3.592546704567212e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12460 }, { "epoch": 0.06047724646660354, "grad_norm": 3.032028530469688e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12470 }, { "epoch": 0.06052574465943963, "grad_norm": 2.810862724800245e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12480 }, { "epoch": 0.060574242852275716, "grad_norm": 2.2883044437094213e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12490 }, { "epoch": 0.060622741045111804, "grad_norm": 3.6237514677850413e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12500 }, { "epoch": 0.0606712392379479, "grad_norm": 2.774621634671348e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12510 }, { "epoch": 0.06071973743078399, "grad_norm": 2.4381225216529856e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12520 }, { "epoch": 0.060768235623620076, "grad_norm": 4.103011690403946e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12530 }, { "epoch": 0.060816733816456164, "grad_norm": 3.71287137568288e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 12540 }, { "epoch": 0.06086523200929225, "grad_norm": 3.239187890358153e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 12550 }, { "epoch": 0.06091373020212834, "grad_norm": 5.170616077521117e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12560 }, { "epoch": 0.06096222839496443, "grad_norm": 4.0289981484420423e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12570 }, { "epoch": 0.061010726587800525, "grad_norm": 2.722357521633967e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12580 }, { "epoch": 0.06105922478063661, "grad_norm": 2.895866373364697e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12590 }, { "epoch": 0.0611077229734727, "grad_norm": 2.7819294246000936e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12600 }, { "epoch": 0.06115622116630879, "grad_norm": 2.099733222848954e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12610 }, { "epoch": 0.06120471935914488, "grad_norm": 2.0681646617504157e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12620 }, { "epoch": 0.06125321755198097, "grad_norm": 2.1514932768695871e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12630 }, { "epoch": 0.06130171574481706, "grad_norm": 3.9023905173962703e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12640 }, { "epoch": 0.06135021393765315, "grad_norm": 2.3264765047770197e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12650 }, { "epoch": 0.06139871213048924, "grad_norm": 1.8483206076780334e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12660 }, { "epoch": 0.06144721032332533, "grad_norm": 2.449215230626578e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12670 }, { "epoch": 0.061495708516161415, "grad_norm": 2.1466831867655856e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12680 }, { "epoch": 0.061544206708997504, "grad_norm": 5.0024107167701e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12690 }, { "epoch": 0.06159270490183359, "grad_norm": 5.135466381034348e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12700 }, { "epoch": 0.06164120309466969, "grad_norm": 1.9090660430265416e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12710 }, { "epoch": 0.061689701287505776, "grad_norm": 2.017050064750947e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12720 }, { "epoch": 0.061738199480341864, "grad_norm": 1.971289549373978e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12730 }, { "epoch": 0.06178669767317795, "grad_norm": 2.665134957169357e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12740 }, { "epoch": 0.06183519586601404, "grad_norm": 1.6806335167984798e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12750 }, { "epoch": 0.06188369405885013, "grad_norm": 1.6718108497570938e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12760 }, { "epoch": 0.061932192251686224, "grad_norm": 1.6894614418561105e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12770 }, { "epoch": 0.06198069044452231, "grad_norm": 2.0074091366950597e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12780 }, { "epoch": 0.0620291886373584, "grad_norm": 1.5056691893278185e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12790 }, { "epoch": 0.06207768683019449, "grad_norm": 2.6512955741964106e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12800 }, { "epoch": 0.06212618502303058, "grad_norm": 1.4769548783988284e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12810 }, { "epoch": 0.062174683215866666, "grad_norm": 1.5125165475637914e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12820 }, { "epoch": 0.06222318140870276, "grad_norm": 1.7088640902329644e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12830 }, { "epoch": 0.06227167960153885, "grad_norm": 1.8734586149093957e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12840 }, { "epoch": 0.06232017779437494, "grad_norm": 1.4026632300101483e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12850 }, { "epoch": 0.06236867598721103, "grad_norm": 1.4165590300763142e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12860 }, { "epoch": 0.062417174180047115, "grad_norm": 2.227826314538106e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12870 }, { "epoch": 0.0624656723728832, "grad_norm": 1.7096650140047132e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12880 }, { "epoch": 0.0625141705657193, "grad_norm": 1.3205514903802396e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12890 }, { "epoch": 0.06256266875855539, "grad_norm": 1.4980923879193142e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12900 }, { "epoch": 0.06261116695139148, "grad_norm": 1.4310097640191088e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12910 }, { "epoch": 0.06265966514422756, "grad_norm": 4.307483720822347e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12920 }, { "epoch": 0.06270816333706365, "grad_norm": 1.4520303182052885e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12930 }, { "epoch": 0.06275666152989974, "grad_norm": 1.5075622172844305e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12940 }, { "epoch": 0.06280515972273583, "grad_norm": 1.20027280559043e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12950 }, { "epoch": 0.06285365791557192, "grad_norm": 1.3822275946040463e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12960 }, { "epoch": 0.062902156108408, "grad_norm": 1.5486092763694614e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12970 }, { "epoch": 0.0629506543012441, "grad_norm": 1.3237556117928762e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12980 }, { "epoch": 0.0629991524940802, "grad_norm": 1.2335111421180045e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 12990 }, { "epoch": 0.06304765068691628, "grad_norm": 1.2887475975276175e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13000 }, { "epoch": 0.06309614887975237, "grad_norm": 1.1856147352773405e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13010 }, { "epoch": 0.06314464707258846, "grad_norm": 1.1818755751846766e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13020 }, { "epoch": 0.06319314526542455, "grad_norm": 1.4217305022157234e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13030 }, { "epoch": 0.06324164345826064, "grad_norm": 1.3369466955737153e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13040 }, { "epoch": 0.06329014165109673, "grad_norm": 1.3357802686186915e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13050 }, { "epoch": 0.06333863984393281, "grad_norm": 1.5231441352625552e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13060 }, { "epoch": 0.0633871380367689, "grad_norm": 1.1447088610339051e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13070 }, { "epoch": 0.06343563622960499, "grad_norm": 1.1974418612226145e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13080 }, { "epoch": 0.06348413442244108, "grad_norm": 1.6025526861085382e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13090 }, { "epoch": 0.06353263261527717, "grad_norm": 1.2529896764590376e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13100 }, { "epoch": 0.06358113080811326, "grad_norm": 1.1263367838409977e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13110 }, { "epoch": 0.06362962900094936, "grad_norm": 1.3917714625222288e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13120 }, { "epoch": 0.06367812719378545, "grad_norm": 1.125366750898138e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13130 }, { "epoch": 0.06372662538662154, "grad_norm": 1.156731741502881e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13140 }, { "epoch": 0.06377512357945762, "grad_norm": 1.1373037267503605e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13150 }, { "epoch": 0.06382362177229371, "grad_norm": 1.070632720256981e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13160 }, { "epoch": 0.0638721199651298, "grad_norm": 2.546984205764602e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13170 }, { "epoch": 0.06392061815796589, "grad_norm": 1.0610010292566585e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13180 }, { "epoch": 0.06396911635080198, "grad_norm": 1.1339106009700117e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13190 }, { "epoch": 0.06401761454363807, "grad_norm": 1.0153282659075558e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13200 }, { "epoch": 0.06406611273647415, "grad_norm": 1.1896559470869761e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13210 }, { "epoch": 0.06411461092931024, "grad_norm": 1.1686329060012213e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13220 }, { "epoch": 0.06416310912214633, "grad_norm": 1.1472139505031009e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13230 }, { "epoch": 0.06421160731498242, "grad_norm": 1.0042127485121455e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13240 }, { "epoch": 0.06426010550781852, "grad_norm": 1.2290482231946953e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13250 }, { "epoch": 0.06430860370065461, "grad_norm": 1.0838508757160525e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13260 }, { "epoch": 0.0643571018934907, "grad_norm": 1.0277313577944369e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13270 }, { "epoch": 0.06440560008632679, "grad_norm": 1.0845156594996297e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13280 }, { "epoch": 0.06445409827916287, "grad_norm": 1.1000148703033119e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13290 }, { "epoch": 0.06450259647199896, "grad_norm": 1.1231417573753788e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13300 }, { "epoch": 0.06455109466483505, "grad_norm": 1.1486007167604839e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13310 }, { "epoch": 0.06459959285767114, "grad_norm": 1.0544451356508944e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13320 }, { "epoch": 0.06464809105050723, "grad_norm": 1.319243096986611e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13330 }, { "epoch": 0.06469658924334332, "grad_norm": 2.0789980226254556e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13340 }, { "epoch": 0.0647450874361794, "grad_norm": 1.5184843960014405e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13350 }, { "epoch": 0.0647935856290155, "grad_norm": 1.0856112453438982e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13360 }, { "epoch": 0.06484208382185158, "grad_norm": 1.0442575160141132e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13370 }, { "epoch": 0.06489058201468768, "grad_norm": 3.2770751090538397e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13380 }, { "epoch": 0.06493908020752377, "grad_norm": 1.0561042529388942e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13390 }, { "epoch": 0.06498757840035986, "grad_norm": 9.465068728786719e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 13400 }, { "epoch": 0.06503607659319595, "grad_norm": 1.036883148231027e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13410 }, { "epoch": 0.06508457478603204, "grad_norm": 1.0965318608668895e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13420 }, { "epoch": 0.06513307297886813, "grad_norm": 9.122967270513982e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 13430 }, { "epoch": 0.06518157117170421, "grad_norm": 1.100208919524448e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13440 }, { "epoch": 0.0652300693645403, "grad_norm": 1.4618295551827032e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13450 }, { "epoch": 0.06527856755737639, "grad_norm": 1.012498884733759e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13460 }, { "epoch": 0.06532706575021248, "grad_norm": 1.2279259919978358e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13470 }, { "epoch": 0.06537556394304857, "grad_norm": 1.5814515563761233e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13480 }, { "epoch": 0.06542406213588466, "grad_norm": 1.0365518221533421e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13490 }, { "epoch": 0.06547256032872074, "grad_norm": 1.4408708182145347e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13500 }, { "epoch": 0.06552105852155685, "grad_norm": 1.1994956139460555e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13510 }, { "epoch": 0.06556955671439293, "grad_norm": 9.175217030588101e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 13520 }, { "epoch": 0.06561805490722902, "grad_norm": 8.869474044104209e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 13530 }, { "epoch": 0.06566655310006511, "grad_norm": 9.783255450201978e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 13540 }, { "epoch": 0.0657150512929012, "grad_norm": 9.774170450782549e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 13550 }, { "epoch": 0.06576354948573729, "grad_norm": 9.734538508610058e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 13560 }, { "epoch": 0.06581204767857338, "grad_norm": 1.0830221697233355e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13570 }, { "epoch": 0.06586054587140946, "grad_norm": 1.1251313480897807e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13580 }, { "epoch": 0.06590904406424555, "grad_norm": 8.92394567131305e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 13590 }, { "epoch": 0.06595754225708164, "grad_norm": 8.951297303383399e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 13600 }, { "epoch": 0.06600604044991773, "grad_norm": 1.1316183901044496e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13610 }, { "epoch": 0.06605453864275382, "grad_norm": 8.853297828181894e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 13620 }, { "epoch": 0.06610303683558992, "grad_norm": 1.1072063898609485e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 13630 }, { "epoch": 0.06615153502842601, "grad_norm": 2.120400677085854e-05, "learning_rate": 0.0002, "loss": 0.0001, "step": 13640 }, { "epoch": 0.0662000332212621, "grad_norm": 7.659533730475232e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 13650 }, { "epoch": 0.06624853141409819, "grad_norm": 2.475585824868176e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 13660 }, { "epoch": 0.06629702960693427, "grad_norm": 9.927425708156079e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13670 }, { "epoch": 0.06634552779977036, "grad_norm": 5.700075689674122e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13680 }, { "epoch": 0.06639402599260645, "grad_norm": 2.642633262439631e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13690 }, { "epoch": 0.06644252418544254, "grad_norm": 1.9008115259566694e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13700 }, { "epoch": 0.06649102237827863, "grad_norm": 3.663543111542822e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13710 }, { "epoch": 0.06653952057111472, "grad_norm": 2.093479452014435e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 13720 }, { "epoch": 0.0665880187639508, "grad_norm": 2.6756592887977604e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13730 }, { "epoch": 0.06663651695678689, "grad_norm": 1.3146595847501885e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13740 }, { "epoch": 0.06668501514962298, "grad_norm": 9.663082209954155e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13750 }, { "epoch": 0.06673351334245908, "grad_norm": 1.857948404904164e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13760 }, { "epoch": 0.06678201153529517, "grad_norm": 1.5948513691910193e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13770 }, { "epoch": 0.06683050972813126, "grad_norm": 1.3461800563163706e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13780 }, { "epoch": 0.06687900792096735, "grad_norm": 5.416817202785751e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13790 }, { "epoch": 0.06692750611380344, "grad_norm": 1.0194620472248062e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13800 }, { "epoch": 0.06697600430663952, "grad_norm": 1.1876308008140768e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13810 }, { "epoch": 0.06702450249947561, "grad_norm": 9.960523357221973e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13820 }, { "epoch": 0.0670730006923117, "grad_norm": 1.1821230145869777e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13830 }, { "epoch": 0.06712149888514779, "grad_norm": 6.1611524415639e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13840 }, { "epoch": 0.06716999707798388, "grad_norm": 5.326583618625591e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13850 }, { "epoch": 0.06721849527081997, "grad_norm": 1.0787086921482114e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13860 }, { "epoch": 0.06726699346365606, "grad_norm": 1.0415471933811205e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13870 }, { "epoch": 0.06731549165649214, "grad_norm": 1.027241410156421e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 13880 }, { "epoch": 0.06736398984932825, "grad_norm": 5.821320883114822e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13890 }, { "epoch": 0.06741248804216433, "grad_norm": 4.7377716327901e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13900 }, { "epoch": 0.06746098623500042, "grad_norm": 9.986919167204178e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13910 }, { "epoch": 0.06750948442783651, "grad_norm": 7.794018301865435e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13920 }, { "epoch": 0.0675579826206726, "grad_norm": 9.423710594091972e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13930 }, { "epoch": 0.06760648081350869, "grad_norm": 5.194033860789204e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13940 }, { "epoch": 0.06765497900634478, "grad_norm": 6.886729124744306e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13950 }, { "epoch": 0.06770347719918086, "grad_norm": 7.429349011545128e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13960 }, { "epoch": 0.06775197539201695, "grad_norm": 1.2762084224959835e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 13970 }, { "epoch": 0.06780047358485304, "grad_norm": 6.854502885289548e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13980 }, { "epoch": 0.06784897177768913, "grad_norm": 4.692470270128979e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 13990 }, { "epoch": 0.06789746997052522, "grad_norm": 4.523027143932268e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14000 }, { "epoch": 0.0679459681633613, "grad_norm": 6.811654316152271e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14010 }, { "epoch": 0.06799446635619741, "grad_norm": 6.202778308761481e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14020 }, { "epoch": 0.0680429645490335, "grad_norm": 8.874109198586666e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14030 }, { "epoch": 0.06809146274186959, "grad_norm": 3.571450122308306e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14040 }, { "epoch": 0.06813996093470567, "grad_norm": 3.3191511761287984e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14050 }, { "epoch": 0.06818845912754176, "grad_norm": 5.538560685636185e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14060 }, { "epoch": 0.06823695732037785, "grad_norm": 4.74625778679183e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14070 }, { "epoch": 0.06828545551321394, "grad_norm": 5.278673143038759e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14080 }, { "epoch": 0.06833395370605003, "grad_norm": 3.338001306474325e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14090 }, { "epoch": 0.06838245189888612, "grad_norm": 3.366894816281274e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14100 }, { "epoch": 0.0684309500917222, "grad_norm": 6.05489162808226e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14110 }, { "epoch": 0.06847944828455829, "grad_norm": 4.7188032681333425e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14120 }, { "epoch": 0.06852794647739438, "grad_norm": 5.926149810875359e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14130 }, { "epoch": 0.06857644467023047, "grad_norm": 2.7852433959196787e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 14140 }, { "epoch": 0.06862494286306657, "grad_norm": 3.0274063078650215e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14150 }, { "epoch": 0.06867344105590266, "grad_norm": 5.01824786169891e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14160 }, { "epoch": 0.06872193924873875, "grad_norm": 4.759345983984531e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14170 }, { "epoch": 0.06877043744157484, "grad_norm": 4.582150552323583e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14180 }, { "epoch": 0.06881893563441092, "grad_norm": 2.878994962429715e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14190 }, { "epoch": 0.06886743382724701, "grad_norm": 4.0361177866543585e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14200 }, { "epoch": 0.0689159320200831, "grad_norm": 3.721835355463554e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14210 }, { "epoch": 0.06896443021291919, "grad_norm": 3.673491733025003e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14220 }, { "epoch": 0.06901292840575528, "grad_norm": 3.5943887155553966e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14230 }, { "epoch": 0.06906142659859137, "grad_norm": 2.823342413194041e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14240 }, { "epoch": 0.06910992479142745, "grad_norm": 2.8407208674252615e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14250 }, { "epoch": 0.06915842298426354, "grad_norm": 4.431254296832776e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14260 }, { "epoch": 0.06920692117709965, "grad_norm": 3.6063681818632176e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14270 }, { "epoch": 0.06925541936993573, "grad_norm": 3.9728428191665444e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14280 }, { "epoch": 0.06930391756277182, "grad_norm": 2.484695471594023e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14290 }, { "epoch": 0.06935241575560791, "grad_norm": 2.436624413348909e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14300 }, { "epoch": 0.069400913948444, "grad_norm": 4.010669272247469e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14310 }, { "epoch": 0.06944941214128009, "grad_norm": 5.384424071053218e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14320 }, { "epoch": 0.06949791033411618, "grad_norm": 3.703332538407267e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14330 }, { "epoch": 0.06954640852695226, "grad_norm": 2.4178979174394044e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14340 }, { "epoch": 0.06959490671978835, "grad_norm": 2.298286005952832e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14350 }, { "epoch": 0.06964340491262444, "grad_norm": 3.4497011824896617e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14360 }, { "epoch": 0.06969190310546053, "grad_norm": 3.688319907269033e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14370 }, { "epoch": 0.06974040129829662, "grad_norm": 3.2581141340415343e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14380 }, { "epoch": 0.0697888994911327, "grad_norm": 2.46499837430747e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14390 }, { "epoch": 0.06983739768396881, "grad_norm": 2.2764432117128308e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14400 }, { "epoch": 0.0698858958768049, "grad_norm": 3.6462094499256636e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14410 }, { "epoch": 0.06993439406964098, "grad_norm": 6.465395472332602e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14420 }, { "epoch": 0.06998289226247707, "grad_norm": 3.3169720836667693e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14430 }, { "epoch": 0.07003139045531316, "grad_norm": 2.421293743282149e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14440 }, { "epoch": 0.07007988864814925, "grad_norm": 2.5758839683476253e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14450 }, { "epoch": 0.07012838684098534, "grad_norm": 3.146257370190142e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14460 }, { "epoch": 0.07017688503382143, "grad_norm": 2.804474377171573e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14470 }, { "epoch": 0.07022538322665751, "grad_norm": 3.20026856570621e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14480 }, { "epoch": 0.0702738814194936, "grad_norm": 2.1250833981412143e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14490 }, { "epoch": 0.07032237961232969, "grad_norm": 2.1179339171339961e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14500 }, { "epoch": 0.07037087780516578, "grad_norm": 2.8960255349375075e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14510 }, { "epoch": 0.07041937599800187, "grad_norm": 7.362595511040126e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14520 }, { "epoch": 0.07046787419083797, "grad_norm": 2.8418781994332676e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14530 }, { "epoch": 0.07051637238367406, "grad_norm": 2.100841953733834e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14540 }, { "epoch": 0.07056487057651015, "grad_norm": 2.0331459893441206e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14550 }, { "epoch": 0.07061336876934624, "grad_norm": 3.6070429132450954e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14560 }, { "epoch": 0.07066186696218232, "grad_norm": 3.0654891247650085e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14570 }, { "epoch": 0.07071036515501841, "grad_norm": 2.8248825856280746e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14580 }, { "epoch": 0.0707588633478545, "grad_norm": 2.1741941225172923e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14590 }, { "epoch": 0.07080736154069059, "grad_norm": 2.2865322080178885e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14600 }, { "epoch": 0.07085585973352668, "grad_norm": 2.6128921604140487e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14610 }, { "epoch": 0.07090435792636277, "grad_norm": 3.2220785328718193e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14620 }, { "epoch": 0.07095285611919885, "grad_norm": 3.012238778410392e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14630 }, { "epoch": 0.07100135431203494, "grad_norm": 3.6915986356689245e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14640 }, { "epoch": 0.07104985250487103, "grad_norm": 1.9580501486871071e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14650 }, { "epoch": 0.07109835069770713, "grad_norm": 2.560047391853004e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14660 }, { "epoch": 0.07114684889054322, "grad_norm": 2.8379949412737915e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14670 }, { "epoch": 0.07119534708337931, "grad_norm": 2.7257709689365583e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14680 }, { "epoch": 0.0712438452762154, "grad_norm": 1.9753846913772577e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14690 }, { "epoch": 0.07129234346905149, "grad_norm": 1.8719103422881744e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14700 }, { "epoch": 0.07134084166188757, "grad_norm": 2.458009760175628e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14710 }, { "epoch": 0.07138933985472366, "grad_norm": 2.8798262974305544e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14720 }, { "epoch": 0.07143783804755975, "grad_norm": 2.506060639007046e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14730 }, { "epoch": 0.07148633624039584, "grad_norm": 1.7982544875394524e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14740 }, { "epoch": 0.07153483443323193, "grad_norm": 1.8696583481414564e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14750 }, { "epoch": 0.07158333262606802, "grad_norm": 2.467685931151209e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14760 }, { "epoch": 0.0716318308189041, "grad_norm": 2.467983506448945e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14770 }, { "epoch": 0.0716803290117402, "grad_norm": 2.584867786481482e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14780 }, { "epoch": 0.0717288272045763, "grad_norm": 1.8059995454677846e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14790 }, { "epoch": 0.07177732539741238, "grad_norm": 1.6827885929160402e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14800 }, { "epoch": 0.07182582359024847, "grad_norm": 2.7996259177598404e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14810 }, { "epoch": 0.07187432178308456, "grad_norm": 2.514729828817508e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14820 }, { "epoch": 0.07192281997592065, "grad_norm": 2.4215697180807183e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14830 }, { "epoch": 0.07197131816875674, "grad_norm": 1.7334026836124394e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14840 }, { "epoch": 0.07201981636159283, "grad_norm": 1.8299282089628832e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14850 }, { "epoch": 0.07206831455442891, "grad_norm": 2.30445650117872e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14860 }, { "epoch": 0.072116812747265, "grad_norm": 2.488721122517745e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14870 }, { "epoch": 0.07216531094010109, "grad_norm": 2.3228614054460195e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14880 }, { "epoch": 0.07221380913293718, "grad_norm": 2.2144223521536333e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14890 }, { "epoch": 0.07226230732577327, "grad_norm": 3.163161181873875e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14900 }, { "epoch": 0.07231080551860937, "grad_norm": 3.6894738286719075e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14910 }, { "epoch": 0.07235930371144546, "grad_norm": 3.230965432976518e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14920 }, { "epoch": 0.07240780190428155, "grad_norm": 3.011604974290094e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14930 }, { "epoch": 0.07245630009711763, "grad_norm": 2.3487750411277375e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14940 }, { "epoch": 0.07250479828995372, "grad_norm": 1.684240089616651e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14950 }, { "epoch": 0.07255329648278981, "grad_norm": 2.238062251080919e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14960 }, { "epoch": 0.0726017946756259, "grad_norm": 2.3043787678034278e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14970 }, { "epoch": 0.07265029286846199, "grad_norm": 2.1673713490599766e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14980 }, { "epoch": 0.07269879106129808, "grad_norm": 3.1426748137164395e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 14990 }, { "epoch": 0.07274728925413416, "grad_norm": 6.378872399182001e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15000 }, { "epoch": 0.07279578744697025, "grad_norm": 2.0370364950395015e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15010 }, { "epoch": 0.07284428563980634, "grad_norm": 2.41753838281511e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15020 }, { "epoch": 0.07289278383264243, "grad_norm": 2.2050052450595103e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15030 }, { "epoch": 0.07294128202547853, "grad_norm": 1.6407089731274027e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15040 }, { "epoch": 0.07298978021831462, "grad_norm": 1.6902916399885726e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15050 }, { "epoch": 0.07303827841115071, "grad_norm": 2.2012578426711116e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15060 }, { "epoch": 0.0730867766039868, "grad_norm": 2.062542989733629e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15070 }, { "epoch": 0.07313527479682289, "grad_norm": 2.147784812223108e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15080 }, { "epoch": 0.07318377298965897, "grad_norm": 1.7485213277268485e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15090 }, { "epoch": 0.07323227118249506, "grad_norm": 1.671069753683696e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15100 }, { "epoch": 0.07328076937533115, "grad_norm": 4.955608119416866e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15110 }, { "epoch": 0.07332926756816724, "grad_norm": 2.1553965723342117e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15120 }, { "epoch": 0.07337776576100333, "grad_norm": 2.3646560975976172e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15130 }, { "epoch": 0.07342626395383942, "grad_norm": 1.913799394515081e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15140 }, { "epoch": 0.0734747621466755, "grad_norm": 1.5310203593799088e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15150 }, { "epoch": 0.07352326033951159, "grad_norm": 1.945236789424598e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15160 }, { "epoch": 0.0735717585323477, "grad_norm": 2.013914581766585e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15170 }, { "epoch": 0.07362025672518378, "grad_norm": 1.9751864499539806e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15180 }, { "epoch": 0.07366875491801987, "grad_norm": 1.4703095985169057e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15190 }, { "epoch": 0.07371725311085596, "grad_norm": 1.4849622687052033e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15200 }, { "epoch": 0.07376575130369205, "grad_norm": 2.090163633283737e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15210 }, { "epoch": 0.07381424949652814, "grad_norm": 3.1179121151581057e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15220 }, { "epoch": 0.07386274768936422, "grad_norm": 2.0635575026517472e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15230 }, { "epoch": 0.07391124588220031, "grad_norm": 1.6025211380110704e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15240 }, { "epoch": 0.0739597440750364, "grad_norm": 1.6174368511201465e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15250 }, { "epoch": 0.07400824226787249, "grad_norm": 1.9940023321396438e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15260 }, { "epoch": 0.07405674046070858, "grad_norm": 2.02061812615284e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15270 }, { "epoch": 0.07410523865354467, "grad_norm": 2.049963399031185e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15280 }, { "epoch": 0.07415373684638076, "grad_norm": 1.418526522911634e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15290 }, { "epoch": 0.07420223503921686, "grad_norm": 1.6407955172326183e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15300 }, { "epoch": 0.07425073323205295, "grad_norm": 2.0752082718900056e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15310 }, { "epoch": 0.07429923142488903, "grad_norm": 1.8945755186905444e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15320 }, { "epoch": 0.07434772961772512, "grad_norm": 1.9421729291480005e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15330 }, { "epoch": 0.07439622781056121, "grad_norm": 2.5472297693340806e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15340 }, { "epoch": 0.0744447260033973, "grad_norm": 1.3342751969958044e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15350 }, { "epoch": 0.07449322419623339, "grad_norm": 1.950814549900315e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15360 }, { "epoch": 0.07454172238906948, "grad_norm": 1.938779945476199e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15370 }, { "epoch": 0.07459022058190556, "grad_norm": 1.8412070801332447e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15380 }, { "epoch": 0.07463871877474165, "grad_norm": 1.7802248919451813e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15390 }, { "epoch": 0.07468721696757774, "grad_norm": 1.4524175639962777e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15400 }, { "epoch": 0.07473571516041383, "grad_norm": 2.337784081873906e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15410 }, { "epoch": 0.07478421335324992, "grad_norm": 1.8390971945336787e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15420 }, { "epoch": 0.07483271154608602, "grad_norm": 1.7269464080982289e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15430 }, { "epoch": 0.07488120973892211, "grad_norm": 1.9534483897132304e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15440 }, { "epoch": 0.0749297079317582, "grad_norm": 1.6589552842560806e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15450 }, { "epoch": 0.07497820612459428, "grad_norm": 1.94692674426733e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15460 }, { "epoch": 0.07502670431743037, "grad_norm": 1.6861446283655823e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15470 }, { "epoch": 0.07507520251026646, "grad_norm": 1.7868269708287698e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15480 }, { "epoch": 0.07512370070310255, "grad_norm": 1.3140173393821897e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15490 }, { "epoch": 0.07517219889593864, "grad_norm": 1.7665020379808993e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15500 }, { "epoch": 0.07522069708877473, "grad_norm": 1.7910005567500775e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15510 }, { "epoch": 0.07526919528161082, "grad_norm": 1.7578655331362825e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15520 }, { "epoch": 0.0753176934744469, "grad_norm": 1.7408066810276068e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15530 }, { "epoch": 0.07536619166728299, "grad_norm": 1.500512496477313e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15540 }, { "epoch": 0.0754146898601191, "grad_norm": 1.200523485067606e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15550 }, { "epoch": 0.07546318805295518, "grad_norm": 2.0358328356451239e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15560 }, { "epoch": 0.07551168624579127, "grad_norm": 1.6551597070701973e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15570 }, { "epoch": 0.07556018443862736, "grad_norm": 1.7994383938457759e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15580 }, { "epoch": 0.07560868263146345, "grad_norm": 1.3909988183513633e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15590 }, { "epoch": 0.07565718082429954, "grad_norm": 1.3391289144237817e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15600 }, { "epoch": 0.07570567901713562, "grad_norm": 1.583080688760674e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15610 }, { "epoch": 0.07575417720997171, "grad_norm": 1.6966282601060811e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15620 }, { "epoch": 0.0758026754028078, "grad_norm": 1.781927778665704e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15630 }, { "epoch": 0.07585117359564389, "grad_norm": 1.4613939924856822e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15640 }, { "epoch": 0.07589967178847998, "grad_norm": 2.3642999735784542e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15650 }, { "epoch": 0.07594816998131607, "grad_norm": 1.5343219672558916e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15660 }, { "epoch": 0.07599666817415215, "grad_norm": 1.611418127822617e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15670 }, { "epoch": 0.07604516636698826, "grad_norm": 1.521597852160994e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15680 }, { "epoch": 0.07609366455982434, "grad_norm": 1.1977640212990082e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15690 }, { "epoch": 0.07614216275266043, "grad_norm": 1.3131223397522263e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15700 }, { "epoch": 0.07619066094549652, "grad_norm": 1.643072380375088e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15710 }, { "epoch": 0.07623915913833261, "grad_norm": 1.607112380952458e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15720 }, { "epoch": 0.0762876573311687, "grad_norm": 3.0670000228383287e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15730 }, { "epoch": 0.07633615552400479, "grad_norm": 1.28364789020452e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15740 }, { "epoch": 0.07638465371684088, "grad_norm": 1.5702526923178084e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15750 }, { "epoch": 0.07643315190967696, "grad_norm": 1.6187073015316855e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15760 }, { "epoch": 0.07648165010251305, "grad_norm": 2.3774161661549442e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15770 }, { "epoch": 0.07653014829534914, "grad_norm": 1.4619982380281726e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15780 }, { "epoch": 0.07657864648818523, "grad_norm": 1.2897625367713772e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15790 }, { "epoch": 0.07662714468102132, "grad_norm": 1.20727349894878e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15800 }, { "epoch": 0.07667564287385742, "grad_norm": 2.3551933736598585e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15810 }, { "epoch": 0.07672414106669351, "grad_norm": 1.5777314388287778e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15820 }, { "epoch": 0.0767726392595296, "grad_norm": 1.502024815636105e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15830 }, { "epoch": 0.07682113745236568, "grad_norm": 1.4119255808964226e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15840 }, { "epoch": 0.07686963564520177, "grad_norm": 1.327944403328729e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15850 }, { "epoch": 0.07691813383803786, "grad_norm": 1.5031810107757337e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15860 }, { "epoch": 0.07696663203087395, "grad_norm": 1.8486365149783524e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15870 }, { "epoch": 0.07701513022371004, "grad_norm": 1.7183420197852683e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15880 }, { "epoch": 0.07706362841654613, "grad_norm": 1.3507539620150055e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15890 }, { "epoch": 0.07711212660938221, "grad_norm": 1.215817775346295e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15900 }, { "epoch": 0.0771606248022183, "grad_norm": 2.992765928411245e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15910 }, { "epoch": 0.07720912299505439, "grad_norm": 1.5324117441650742e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15920 }, { "epoch": 0.07725762118789048, "grad_norm": 1.584285627131976e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15930 }, { "epoch": 0.07730611938072658, "grad_norm": 1.2508220947893278e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15940 }, { "epoch": 0.07735461757356267, "grad_norm": 1.2261683934866596e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15950 }, { "epoch": 0.07740311576639876, "grad_norm": 1.6398227842273627e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15960 }, { "epoch": 0.07745161395923485, "grad_norm": 1.4750102650396002e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15970 }, { "epoch": 0.07750011215207094, "grad_norm": 1.6159832227913284e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15980 }, { "epoch": 0.07754861034490702, "grad_norm": 1.1861759929843174e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 15990 }, { "epoch": 0.07759710853774311, "grad_norm": 1.4260736236337834e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16000 }, { "epoch": 0.0776456067305792, "grad_norm": 1.7578300059994945e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16010 }, { "epoch": 0.07769410492341529, "grad_norm": 1.9737476009140664e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16020 }, { "epoch": 0.07774260311625138, "grad_norm": 1.4661554814665578e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16030 }, { "epoch": 0.07779110130908747, "grad_norm": 1.0857472432235227e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16040 }, { "epoch": 0.07783959950192355, "grad_norm": 1.282310932992914e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16050 }, { "epoch": 0.07788809769475966, "grad_norm": 1.5039194067867356e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16060 }, { "epoch": 0.07793659588759574, "grad_norm": 1.5812956632998976e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16070 }, { "epoch": 0.07798509408043183, "grad_norm": 1.4998980191194278e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16080 }, { "epoch": 0.07803359227326792, "grad_norm": 1.102429081356604e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16090 }, { "epoch": 0.07808209046610401, "grad_norm": 1.2004900895590254e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16100 }, { "epoch": 0.0781305886589401, "grad_norm": 3.827751982043992e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16110 }, { "epoch": 0.07817908685177619, "grad_norm": 1.489514573904671e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16120 }, { "epoch": 0.07822758504461227, "grad_norm": 1.3511238705632422e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16130 }, { "epoch": 0.07827608323744836, "grad_norm": 1.2251399539309205e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16140 }, { "epoch": 0.07832458143028445, "grad_norm": 1.1041667136169053e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16150 }, { "epoch": 0.07837307962312054, "grad_norm": 1.6007915348836832e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16160 }, { "epoch": 0.07842157781595663, "grad_norm": 1.4250531421566848e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16170 }, { "epoch": 0.07847007600879272, "grad_norm": 1.5663620445138804e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16180 }, { "epoch": 0.07851857420162882, "grad_norm": 1.628862520419716e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16190 }, { "epoch": 0.0785670723944649, "grad_norm": 1.1479292538751906e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16200 }, { "epoch": 0.078615570587301, "grad_norm": 1.3859016689821146e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16210 }, { "epoch": 0.07866406878013708, "grad_norm": 3.15792163974038e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16220 }, { "epoch": 0.07871256697297317, "grad_norm": 2.5279717874582275e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16230 }, { "epoch": 0.07876106516580926, "grad_norm": 1.3178015478843008e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16240 }, { "epoch": 0.07880956335864535, "grad_norm": 1.1248205566971592e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16250 }, { "epoch": 0.07885806155148144, "grad_norm": 1.446631330281889e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16260 }, { "epoch": 0.07890655974431753, "grad_norm": 1.3807910193008865e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16270 }, { "epoch": 0.07895505793715361, "grad_norm": 1.3793140851703356e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16280 }, { "epoch": 0.0790035561299897, "grad_norm": 1.1096292951151554e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16290 }, { "epoch": 0.07905205432282579, "grad_norm": 1.214948213146272e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16300 }, { "epoch": 0.07910055251566188, "grad_norm": 1.3794419828627724e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16310 }, { "epoch": 0.07914905070849798, "grad_norm": 1.5185159440989082e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16320 }, { "epoch": 0.07919754890133407, "grad_norm": 1.974925964987051e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16330 }, { "epoch": 0.07924604709417016, "grad_norm": 1.1852587533667247e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16340 }, { "epoch": 0.07929454528700625, "grad_norm": 1.1166988400646005e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16350 }, { "epoch": 0.07934304347984233, "grad_norm": 1.4287942917690089e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16360 }, { "epoch": 0.07939154167267842, "grad_norm": 1.7459690582199983e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16370 }, { "epoch": 0.07944003986551451, "grad_norm": 1.434999603588949e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16380 }, { "epoch": 0.0794885380583506, "grad_norm": 1.2174288599453575e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16390 }, { "epoch": 0.07953703625118669, "grad_norm": 1.176514956569008e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16400 }, { "epoch": 0.07958553444402278, "grad_norm": 1.4077704690862447e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16410 }, { "epoch": 0.07963403263685886, "grad_norm": 1.4372190548783692e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16420 }, { "epoch": 0.07968253082969495, "grad_norm": 1.303178578382358e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16430 }, { "epoch": 0.07973102902253104, "grad_norm": 1.1027951529740676e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16440 }, { "epoch": 0.07977952721536714, "grad_norm": 1.1814326228432037e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16450 }, { "epoch": 0.07982802540820323, "grad_norm": 1.4846729357032018e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16460 }, { "epoch": 0.07987652360103932, "grad_norm": 1.2090521295249346e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16470 }, { "epoch": 0.07992502179387541, "grad_norm": 1.3329196235645213e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16480 }, { "epoch": 0.0799735199867115, "grad_norm": 1.0170835906819775e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16490 }, { "epoch": 0.08002201817954759, "grad_norm": 1.0046204579339246e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16500 }, { "epoch": 0.08007051637238367, "grad_norm": 1.3120538255861902e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16510 }, { "epoch": 0.08011901456521976, "grad_norm": 1.3846882040979835e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16520 }, { "epoch": 0.08016751275805585, "grad_norm": 1.372491453821567e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16530 }, { "epoch": 0.08021601095089194, "grad_norm": 9.699181902078635e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 16540 }, { "epoch": 0.08026450914372803, "grad_norm": 1.0273450357090042e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16550 }, { "epoch": 0.08031300733656412, "grad_norm": 1.2983879571493162e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16560 }, { "epoch": 0.0803615055294002, "grad_norm": 1.460146705767329e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16570 }, { "epoch": 0.0804100037222363, "grad_norm": 1.212320626109431e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16580 }, { "epoch": 0.0804585019150724, "grad_norm": 1.0703432451464323e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16590 }, { "epoch": 0.08050700010790848, "grad_norm": 1.3093448103518313e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16600 }, { "epoch": 0.08055549830074457, "grad_norm": 1.260900432953349e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16610 }, { "epoch": 0.08060399649358066, "grad_norm": 1.7249007555619755e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16620 }, { "epoch": 0.08065249468641675, "grad_norm": 1.5773227346471685e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16630 }, { "epoch": 0.08070099287925284, "grad_norm": 1.005919187946347e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16640 }, { "epoch": 0.08074949107208892, "grad_norm": 9.55920924639031e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 16650 }, { "epoch": 0.08079798926492501, "grad_norm": 1.839831327288266e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16660 }, { "epoch": 0.0808464874577611, "grad_norm": 1.2546313143957377e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16670 }, { "epoch": 0.08089498565059719, "grad_norm": 8.519617722413386e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16680 }, { "epoch": 0.08094348384343328, "grad_norm": 1.9748425472698727e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16690 }, { "epoch": 0.08099198203626938, "grad_norm": 1.0445297959904565e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16700 }, { "epoch": 0.08104048022910547, "grad_norm": 1.2471146249026788e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16710 }, { "epoch": 0.08108897842194156, "grad_norm": 1.755845033812875e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16720 }, { "epoch": 0.08113747661477765, "grad_norm": 1.4257865643685363e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16730 }, { "epoch": 0.08118597480761373, "grad_norm": 1.1500128493935335e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16740 }, { "epoch": 0.08123447300044982, "grad_norm": 1.3220711991834833e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16750 }, { "epoch": 0.08128297119328591, "grad_norm": 1.327090899394534e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16760 }, { "epoch": 0.081331469386122, "grad_norm": 1.2283437911264627e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16770 }, { "epoch": 0.08137996757895809, "grad_norm": 1.319239402164385e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16780 }, { "epoch": 0.08142846577179418, "grad_norm": 1.23328433687675e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16790 }, { "epoch": 0.08147696396463026, "grad_norm": 9.174256376809353e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 16800 }, { "epoch": 0.08152546215746635, "grad_norm": 2.756387118552084e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16810 }, { "epoch": 0.08157396035030244, "grad_norm": 1.2868615328898159e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16820 }, { "epoch": 0.08162245854313854, "grad_norm": 1.204850690328385e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16830 }, { "epoch": 0.08167095673597463, "grad_norm": 9.470620909723948e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 16840 }, { "epoch": 0.08171945492881072, "grad_norm": 1.039461778873374e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16850 }, { "epoch": 0.08176795312164681, "grad_norm": 1.2421197936873796e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16860 }, { "epoch": 0.0818164513144829, "grad_norm": 1.296684644103152e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16870 }, { "epoch": 0.08186494950731898, "grad_norm": 1.3468559245666256e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16880 }, { "epoch": 0.08191344770015507, "grad_norm": 3.017659366832959e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16890 }, { "epoch": 0.08196194589299116, "grad_norm": 8.621864111546529e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 16900 }, { "epoch": 0.08201044408582725, "grad_norm": 1.9731989198135125e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16910 }, { "epoch": 0.08205894227866334, "grad_norm": 1.3959812861230603e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16920 }, { "epoch": 0.08210744047149943, "grad_norm": 1.083636291809853e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16930 }, { "epoch": 0.08215593866433551, "grad_norm": 9.29214607481299e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 16940 }, { "epoch": 0.0822044368571716, "grad_norm": 2.601520634470944e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16950 }, { "epoch": 0.0822529350500077, "grad_norm": 1.1925648379929044e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16960 }, { "epoch": 0.0823014332428438, "grad_norm": 1.3966408118903928e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16970 }, { "epoch": 0.08234993143567988, "grad_norm": 1.1897641627456323e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16980 }, { "epoch": 0.08239842962851597, "grad_norm": 1.2857469755545026e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 16990 }, { "epoch": 0.08244692782135206, "grad_norm": 9.638425524371996e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17000 }, { "epoch": 0.08249542601418815, "grad_norm": 1.5035824674214382e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17010 }, { "epoch": 0.08254392420702424, "grad_norm": 1.6779314648829313e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17020 }, { "epoch": 0.08259242239986032, "grad_norm": 1.831001128493881e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17030 }, { "epoch": 0.08264092059269641, "grad_norm": 9.526317512609239e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17040 }, { "epoch": 0.0826894187855325, "grad_norm": 9.286927138418832e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17050 }, { "epoch": 0.08273791697836859, "grad_norm": 1.3465941606227716e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17060 }, { "epoch": 0.08278641517120468, "grad_norm": 1.2581347164086765e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17070 }, { "epoch": 0.08283491336404077, "grad_norm": 1.1585584758222467e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17080 }, { "epoch": 0.08288341155687687, "grad_norm": 1.0456028576300014e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17090 }, { "epoch": 0.08293190974971296, "grad_norm": 1.2673227445247903e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17100 }, { "epoch": 0.08298040794254904, "grad_norm": 1.612757785096619e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17110 }, { "epoch": 0.08302890613538513, "grad_norm": 1.1494296359160217e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17120 }, { "epoch": 0.08307740432822122, "grad_norm": 1.1721703430112029e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17130 }, { "epoch": 0.08312590252105731, "grad_norm": 8.38146689829955e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17140 }, { "epoch": 0.0831744007138934, "grad_norm": 1.2110993452552066e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17150 }, { "epoch": 0.08322289890672949, "grad_norm": 1.1228497243109814e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17160 }, { "epoch": 0.08327139709956558, "grad_norm": 1.0834553876293285e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17170 }, { "epoch": 0.08331989529240166, "grad_norm": 1.294254161621211e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17180 }, { "epoch": 0.08336839348523775, "grad_norm": 8.462424716526584e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17190 }, { "epoch": 0.08341689167807384, "grad_norm": 2.2104879349171824e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17200 }, { "epoch": 0.08346538987090993, "grad_norm": 1.1058391180540639e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17210 }, { "epoch": 0.08351388806374603, "grad_norm": 1.5218788007587136e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17220 }, { "epoch": 0.08356238625658212, "grad_norm": 1.17071820682213e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17230 }, { "epoch": 0.08361088444941821, "grad_norm": 8.513544003108109e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17240 }, { "epoch": 0.0836593826422543, "grad_norm": 1.0507751824206935e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17250 }, { "epoch": 0.08370788083509038, "grad_norm": 1.0478305512151564e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17260 }, { "epoch": 0.08375637902792647, "grad_norm": 1.1001382205222399e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17270 }, { "epoch": 0.08380487722076256, "grad_norm": 1.1004332378661275e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17280 }, { "epoch": 0.08385337541359865, "grad_norm": 8.850554422679124e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17290 }, { "epoch": 0.08390187360643474, "grad_norm": 8.867005618640178e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17300 }, { "epoch": 0.08395037179927083, "grad_norm": 1.0338271039245228e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17310 }, { "epoch": 0.08399886999210691, "grad_norm": 1.4094854350332753e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17320 }, { "epoch": 0.084047368184943, "grad_norm": 1.2656713010983367e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17330 }, { "epoch": 0.0840958663777791, "grad_norm": 1.1949852307679976e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17340 }, { "epoch": 0.0841443645706152, "grad_norm": 8.359801029200753e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17350 }, { "epoch": 0.08419286276345128, "grad_norm": 1.0792888360811048e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17360 }, { "epoch": 0.08424136095628737, "grad_norm": 1.1221199258670822e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17370 }, { "epoch": 0.08428985914912346, "grad_norm": 1.11102920641315e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17380 }, { "epoch": 0.08433835734195955, "grad_norm": 8.429647380125971e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17390 }, { "epoch": 0.08438685553479564, "grad_norm": 8.119526029304325e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17400 }, { "epoch": 0.08443535372763172, "grad_norm": 1.6899949173421192e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17410 }, { "epoch": 0.08448385192046781, "grad_norm": 9.88350095099122e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17420 }, { "epoch": 0.0845323501133039, "grad_norm": 1.0175251929922524e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17430 }, { "epoch": 0.08458084830613999, "grad_norm": 8.726727429575476e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17440 }, { "epoch": 0.08462934649897608, "grad_norm": 8.21290342400971e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17450 }, { "epoch": 0.08467784469181217, "grad_norm": 1.6180769080165192e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17460 }, { "epoch": 0.08472634288464827, "grad_norm": 1.2762210133132612e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17470 }, { "epoch": 0.08477484107748436, "grad_norm": 1.0823438145735054e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17480 }, { "epoch": 0.08482333927032044, "grad_norm": 8.929947625802015e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17490 }, { "epoch": 0.08487183746315653, "grad_norm": 7.91035930092221e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17500 }, { "epoch": 0.08492033565599262, "grad_norm": 1.2229962464971322e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17510 }, { "epoch": 0.08496883384882871, "grad_norm": 1.1342216765797275e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17520 }, { "epoch": 0.0850173320416648, "grad_norm": 1.1770914909448038e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17530 }, { "epoch": 0.08506583023450089, "grad_norm": 8.30457906886295e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17540 }, { "epoch": 0.08511432842733697, "grad_norm": 1.3424214273527468e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17550 }, { "epoch": 0.08516282662017306, "grad_norm": 9.918216647974987e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17560 }, { "epoch": 0.08521132481300915, "grad_norm": 2.8972306154173566e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17570 }, { "epoch": 0.08525982300584524, "grad_norm": 1.2288269601867796e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17580 }, { "epoch": 0.08530832119868133, "grad_norm": 8.634107473426411e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17590 }, { "epoch": 0.08535681939151743, "grad_norm": 8.305351428816721e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17600 }, { "epoch": 0.08540531758435352, "grad_norm": 9.80157537355808e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17610 }, { "epoch": 0.0854538157771896, "grad_norm": 1.0026050034639411e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17620 }, { "epoch": 0.0855023139700257, "grad_norm": 1.0997111132837745e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17630 }, { "epoch": 0.08555081216286178, "grad_norm": 8.07060374086177e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17640 }, { "epoch": 0.08559931035569787, "grad_norm": 9.44257863011444e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17650 }, { "epoch": 0.08564780854853396, "grad_norm": 1.5569123945624597e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17660 }, { "epoch": 0.08569630674137005, "grad_norm": 1.6239951605712122e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17670 }, { "epoch": 0.08574480493420614, "grad_norm": 1.3685721000911144e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17680 }, { "epoch": 0.08579330312704223, "grad_norm": 9.233740883018982e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17690 }, { "epoch": 0.08584180131987831, "grad_norm": 2.664287990228331e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17700 }, { "epoch": 0.0858902995127144, "grad_norm": 1.2892679990272882e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17710 }, { "epoch": 0.08593879770555049, "grad_norm": 9.977896553436949e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17720 }, { "epoch": 0.08598729589838659, "grad_norm": 9.213977136823814e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17730 }, { "epoch": 0.08603579409122268, "grad_norm": 1.1611009398393435e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17740 }, { "epoch": 0.08608429228405877, "grad_norm": 8.395718964493426e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17750 }, { "epoch": 0.08613279047689486, "grad_norm": 1.005670995368746e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17760 }, { "epoch": 0.08618128866973095, "grad_norm": 1.173690336031541e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17770 }, { "epoch": 0.08622978686256703, "grad_norm": 1.5402089559302112e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17780 }, { "epoch": 0.08627828505540312, "grad_norm": 8.84766606645826e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17790 }, { "epoch": 0.08632678324823921, "grad_norm": 7.788617750748017e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17800 }, { "epoch": 0.0863752814410753, "grad_norm": 1.3030854972839734e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17810 }, { "epoch": 0.08642377963391139, "grad_norm": 1.281483719139942e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17820 }, { "epoch": 0.08647227782674748, "grad_norm": 9.38688842211377e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17830 }, { "epoch": 0.08652077601958356, "grad_norm": 7.94445256246945e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17840 }, { "epoch": 0.08656927421241965, "grad_norm": 1.0739258016201347e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17850 }, { "epoch": 0.08661777240525576, "grad_norm": 8.746970706852153e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 17860 }, { "epoch": 0.08666627059809184, "grad_norm": 4.293668212085322e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17870 }, { "epoch": 0.08671476879092793, "grad_norm": 2.2197099269760656e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17880 }, { "epoch": 0.08676326698376402, "grad_norm": 8.262706074901871e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17890 }, { "epoch": 0.08681176517660011, "grad_norm": 1.0685030815693608e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17900 }, { "epoch": 0.0868602633694362, "grad_norm": 1.3397185227859154e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17910 }, { "epoch": 0.08690876156227229, "grad_norm": 1.2946004801506206e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17920 }, { "epoch": 0.08695725975510837, "grad_norm": 3.207028953511326e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17930 }, { "epoch": 0.08700575794794446, "grad_norm": 6.45229434326211e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17940 }, { "epoch": 0.08705425614078055, "grad_norm": 6.453949197293696e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17950 }, { "epoch": 0.08710275433361664, "grad_norm": 9.384315546867583e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17960 }, { "epoch": 0.08715125252645273, "grad_norm": 8.740699541931463e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17970 }, { "epoch": 0.08719975071928883, "grad_norm": 1.0068146139019518e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 17980 }, { "epoch": 0.08724824891212492, "grad_norm": 6.393555196382295e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 17990 }, { "epoch": 0.087296747104961, "grad_norm": 6.636799554371464e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18000 }, { "epoch": 0.0873452452977971, "grad_norm": 8.860917688480185e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18010 }, { "epoch": 0.08739374349063318, "grad_norm": 8.77438282032017e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18020 }, { "epoch": 0.08744224168346927, "grad_norm": 1.0762479263348723e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 18030 }, { "epoch": 0.08749073987630536, "grad_norm": 6.93660098249893e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18040 }, { "epoch": 0.08753923806914145, "grad_norm": 7.292773318567924e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18050 }, { "epoch": 0.08758773626197754, "grad_norm": 8.062463763280903e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18060 }, { "epoch": 0.08763623445481362, "grad_norm": 8.411993235313275e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18070 }, { "epoch": 0.08768473264764971, "grad_norm": 1.120715609204126e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 18080 }, { "epoch": 0.0877332308404858, "grad_norm": 5.9571917176981515e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18090 }, { "epoch": 0.08778172903332189, "grad_norm": 6.085829085122896e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18100 }, { "epoch": 0.08783022722615799, "grad_norm": 8.08964983889382e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18110 }, { "epoch": 0.08787872541899408, "grad_norm": 1.3899540363127016e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 18120 }, { "epoch": 0.08792722361183017, "grad_norm": 1.0096228209022229e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 18130 }, { "epoch": 0.08797572180466626, "grad_norm": 3.4222082945234433e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 18140 }, { "epoch": 0.08802421999750235, "grad_norm": 6.835112031922108e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18150 }, { "epoch": 0.08807271819033843, "grad_norm": 7.168518578737348e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18160 }, { "epoch": 0.08812121638317452, "grad_norm": 7.854507089177787e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18170 }, { "epoch": 0.08816971457601061, "grad_norm": 7.787181743879046e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18180 }, { "epoch": 0.0882182127688467, "grad_norm": 6.317604572814162e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18190 }, { "epoch": 0.08826671096168279, "grad_norm": 5.859958562837164e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18200 }, { "epoch": 0.08831520915451888, "grad_norm": 7.9146410314479e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18210 }, { "epoch": 0.08836370734735496, "grad_norm": 7.579436811511187e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18220 }, { "epoch": 0.08841220554019105, "grad_norm": 1.563522715741783e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 18230 }, { "epoch": 0.08846070373302715, "grad_norm": 5.487552456884259e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18240 }, { "epoch": 0.08850920192586324, "grad_norm": 7.76172157657129e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18250 }, { "epoch": 0.08855770011869933, "grad_norm": 7.137168722692877e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18260 }, { "epoch": 0.08860619831153542, "grad_norm": 7.499882315187278e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18270 }, { "epoch": 0.08865469650437151, "grad_norm": 7.402153556768098e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18280 }, { "epoch": 0.0887031946972076, "grad_norm": 5.455219209693496e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18290 }, { "epoch": 0.08875169289004368, "grad_norm": 5.480192299955888e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18300 }, { "epoch": 0.08880019108287977, "grad_norm": 7.300378257468765e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18310 }, { "epoch": 0.08884868927571586, "grad_norm": 8.105704552008319e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18320 }, { "epoch": 0.08889718746855195, "grad_norm": 1.0595341848329554e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 18330 }, { "epoch": 0.08894568566138804, "grad_norm": 5.5996359549226327e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18340 }, { "epoch": 0.08899418385422413, "grad_norm": 4.774691220177374e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18350 }, { "epoch": 0.08904268204706021, "grad_norm": 7.501508747509433e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18360 }, { "epoch": 0.08909118023989632, "grad_norm": 7.213566277641803e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18370 }, { "epoch": 0.0891396784327324, "grad_norm": 7.492886311410984e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18380 }, { "epoch": 0.0891881766255685, "grad_norm": 5.559051174941487e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18390 }, { "epoch": 0.08923667481840458, "grad_norm": 5.366542410456532e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18400 }, { "epoch": 0.08928517301124067, "grad_norm": 7.088587494763487e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18410 }, { "epoch": 0.08933367120407676, "grad_norm": 9.268030964904028e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18420 }, { "epoch": 0.08938216939691285, "grad_norm": 6.708349076234299e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18430 }, { "epoch": 0.08943066758974894, "grad_norm": 5.7966040856172185e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18440 }, { "epoch": 0.08947916578258502, "grad_norm": 1.1210649830672992e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 18450 }, { "epoch": 0.08952766397542111, "grad_norm": 6.841144539748711e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18460 }, { "epoch": 0.0895761621682572, "grad_norm": 6.71244464456322e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18470 }, { "epoch": 0.08962466036109329, "grad_norm": 7.024937076494098e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18480 }, { "epoch": 0.08967315855392938, "grad_norm": 7.541165558677676e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18490 }, { "epoch": 0.08972165674676548, "grad_norm": 5.194473828851187e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18500 }, { "epoch": 0.08977015493960157, "grad_norm": 7.320841888258656e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18510 }, { "epoch": 0.08981865313243766, "grad_norm": 6.9235042587934e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18520 }, { "epoch": 0.08986715132527374, "grad_norm": 8.306872700813983e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18530 }, { "epoch": 0.08991564951810983, "grad_norm": 5.657402368797193e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18540 }, { "epoch": 0.08996414771094592, "grad_norm": 5.829413041169573e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18550 }, { "epoch": 0.09001264590378201, "grad_norm": 8.650867044934785e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18560 }, { "epoch": 0.0900611440966181, "grad_norm": 6.115689643593214e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18570 }, { "epoch": 0.09010964228945419, "grad_norm": 7.551197711563873e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18580 }, { "epoch": 0.09015814048229027, "grad_norm": 9.808900358621031e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18590 }, { "epoch": 0.09020663867512636, "grad_norm": 4.896087446581987e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18600 }, { "epoch": 0.09025513686796245, "grad_norm": 6.533828411647846e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18610 }, { "epoch": 0.09030363506079855, "grad_norm": 8.43379197590366e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18620 }, { "epoch": 0.09035213325363464, "grad_norm": 8.692627062600877e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18630 }, { "epoch": 0.09040063144647073, "grad_norm": 1.1095544039108063e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 18640 }, { "epoch": 0.09044912963930682, "grad_norm": 5.8713990114256376e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18650 }, { "epoch": 0.09049762783214291, "grad_norm": 6.441592148576092e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18660 }, { "epoch": 0.090546126024979, "grad_norm": 6.832412680068956e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18670 }, { "epoch": 0.09059462421781508, "grad_norm": 7.510554667078395e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18680 }, { "epoch": 0.09064312241065117, "grad_norm": 5.263183666670557e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18690 }, { "epoch": 0.09069162060348726, "grad_norm": 5.128428526290918e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18700 }, { "epoch": 0.09074011879632335, "grad_norm": 6.888988934861118e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18710 }, { "epoch": 0.09078861698915944, "grad_norm": 7.152046777036958e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18720 }, { "epoch": 0.09083711518199553, "grad_norm": 6.184831136124558e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18730 }, { "epoch": 0.09088561337483161, "grad_norm": 5.476170628071486e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18740 }, { "epoch": 0.09093411156766772, "grad_norm": 5.1326654926242554e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18750 }, { "epoch": 0.0909826097605038, "grad_norm": 5.848624340387687e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18760 }, { "epoch": 0.0910311079533399, "grad_norm": 6.15918196444909e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18770 }, { "epoch": 0.09107960614617598, "grad_norm": 7.224552689422126e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18780 }, { "epoch": 0.09112810433901207, "grad_norm": 5.667001801157312e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18790 }, { "epoch": 0.09117660253184816, "grad_norm": 6.471226043913703e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18800 }, { "epoch": 0.09122510072468425, "grad_norm": 6.866634549851369e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18810 }, { "epoch": 0.09127359891752033, "grad_norm": 7.759497577808361e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18820 }, { "epoch": 0.09132209711035642, "grad_norm": 6.909017002953988e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18830 }, { "epoch": 0.09137059530319251, "grad_norm": 5.09794446656997e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18840 }, { "epoch": 0.0914190934960286, "grad_norm": 5.4518999093033926e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18850 }, { "epoch": 0.09146759168886469, "grad_norm": 6.751653103265198e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18860 }, { "epoch": 0.09151608988170078, "grad_norm": 6.531141139021202e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18870 }, { "epoch": 0.09156458807453688, "grad_norm": 6.385541695408392e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18880 }, { "epoch": 0.09161308626737297, "grad_norm": 4.9104357913165586e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18890 }, { "epoch": 0.09166158446020906, "grad_norm": 4.9874699925567256e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18900 }, { "epoch": 0.09171008265304514, "grad_norm": 9.374268472583935e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18910 }, { "epoch": 0.09175858084588123, "grad_norm": 6.438450128598561e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18920 }, { "epoch": 0.09180707903871732, "grad_norm": 6.530215301836506e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18930 }, { "epoch": 0.09185557723155341, "grad_norm": 1.0600238908864412e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 18940 }, { "epoch": 0.0919040754243895, "grad_norm": 5.1526605204799125e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18950 }, { "epoch": 0.09195257361722559, "grad_norm": 2.5716346385706856e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 18960 }, { "epoch": 0.09200107181006167, "grad_norm": 8.34128499604958e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18970 }, { "epoch": 0.09204957000289776, "grad_norm": 7.207502505934826e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18980 }, { "epoch": 0.09209806819573385, "grad_norm": 5.305272665623306e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 18990 }, { "epoch": 0.09214656638856994, "grad_norm": 6.00338836420633e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19000 }, { "epoch": 0.09219506458140604, "grad_norm": 6.467256241649011e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19010 }, { "epoch": 0.09224356277424213, "grad_norm": 6.333212354547868e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19020 }, { "epoch": 0.09229206096707822, "grad_norm": 7.065295903885271e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19030 }, { "epoch": 0.0923405591599143, "grad_norm": 5.226698363003379e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19040 }, { "epoch": 0.0923890573527504, "grad_norm": 4.469752568070362e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19050 }, { "epoch": 0.09243755554558648, "grad_norm": 9.165828629420503e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19060 }, { "epoch": 0.09248605373842257, "grad_norm": 7.444754857033331e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19070 }, { "epoch": 0.09253455193125866, "grad_norm": 6.277016950662073e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19080 }, { "epoch": 0.09258305012409475, "grad_norm": 5.827073579212083e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19090 }, { "epoch": 0.09263154831693084, "grad_norm": 5.8868597108130416e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19100 }, { "epoch": 0.09268004650976693, "grad_norm": 6.316316358834229e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19110 }, { "epoch": 0.09272854470260301, "grad_norm": 6.827423248978448e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19120 }, { "epoch": 0.0927770428954391, "grad_norm": 5.986498763377313e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19130 }, { "epoch": 0.0928255410882752, "grad_norm": 6.126634133352127e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19140 }, { "epoch": 0.09287403928111129, "grad_norm": 7.297936832628693e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19150 }, { "epoch": 0.09292253747394738, "grad_norm": 6.163556776073165e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19160 }, { "epoch": 0.09297103566678347, "grad_norm": 6.185958056903473e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19170 }, { "epoch": 0.09301953385961956, "grad_norm": 6.306417077439619e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19180 }, { "epoch": 0.09306803205245565, "grad_norm": 5.2817668461102585e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19190 }, { "epoch": 0.09311653024529173, "grad_norm": 4.9366423837682305e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19200 }, { "epoch": 0.09316502843812782, "grad_norm": 6.005807762221593e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19210 }, { "epoch": 0.09321352663096391, "grad_norm": 6.423889686857365e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19220 }, { "epoch": 0.0932620248238, "grad_norm": 5.858276352910252e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19230 }, { "epoch": 0.09331052301663609, "grad_norm": 6.4752853745631e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19240 }, { "epoch": 0.09335902120947218, "grad_norm": 4.750003412823389e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19250 }, { "epoch": 0.09340751940230828, "grad_norm": 9.532158884439923e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19260 }, { "epoch": 0.09345601759514437, "grad_norm": 1.3429091438865726e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 19270 }, { "epoch": 0.09350451578798046, "grad_norm": 7.782681876733477e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19280 }, { "epoch": 0.09355301398081654, "grad_norm": 4.9384905054239425e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19290 }, { "epoch": 0.09360151217365263, "grad_norm": 7.052211969948985e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19300 }, { "epoch": 0.09365001036648872, "grad_norm": 6.429085175341243e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19310 }, { "epoch": 0.09369850855932481, "grad_norm": 6.262569485215863e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19320 }, { "epoch": 0.0937470067521609, "grad_norm": 6.095700655350811e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19330 }, { "epoch": 0.09379550494499699, "grad_norm": 4.8347200021225945e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19340 }, { "epoch": 0.09384400313783307, "grad_norm": 5.1163155490030476e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19350 }, { "epoch": 0.09389250133066916, "grad_norm": 5.614378650875551e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19360 }, { "epoch": 0.09394099952350525, "grad_norm": 6.583361766843154e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19370 }, { "epoch": 0.09398949771634134, "grad_norm": 5.917682699418947e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19380 }, { "epoch": 0.09403799590917744, "grad_norm": 5.4493416001832884e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19390 }, { "epoch": 0.09408649410201353, "grad_norm": 4.826513944067301e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19400 }, { "epoch": 0.09413499229484962, "grad_norm": 6.434139265820704e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19410 }, { "epoch": 0.0941834904876857, "grad_norm": 6.407261565755107e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19420 }, { "epoch": 0.0942319886805218, "grad_norm": 6.55625740364485e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19430 }, { "epoch": 0.09428048687335788, "grad_norm": 8.534326667586356e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19440 }, { "epoch": 0.09432898506619397, "grad_norm": 5.857385332319609e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19450 }, { "epoch": 0.09437748325903006, "grad_norm": 6.143171304984207e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19460 }, { "epoch": 0.09442598145186615, "grad_norm": 6.535119467798722e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19470 }, { "epoch": 0.09447447964470224, "grad_norm": 6.120576045987036e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19480 }, { "epoch": 0.09452297783753832, "grad_norm": 5.721298634853156e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19490 }, { "epoch": 0.09457147603037441, "grad_norm": 5.4829140339052174e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19500 }, { "epoch": 0.0946199742232105, "grad_norm": 6.414128961296228e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19510 }, { "epoch": 0.0946684724160466, "grad_norm": 6.3270633177126e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19520 }, { "epoch": 0.09471697060888269, "grad_norm": 6.474276403878321e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19530 }, { "epoch": 0.09476546880171878, "grad_norm": 5.195924757117609e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19540 }, { "epoch": 0.09481396699455487, "grad_norm": 5.143322567846553e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19550 }, { "epoch": 0.09486246518739096, "grad_norm": 2.999189803176705e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 19560 }, { "epoch": 0.09491096338022705, "grad_norm": 8.062096412686515e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19570 }, { "epoch": 0.09495946157306313, "grad_norm": 6.06565109251278e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19580 }, { "epoch": 0.09500795976589922, "grad_norm": 5.091679966540141e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19590 }, { "epoch": 0.09505645795873531, "grad_norm": 5.458583274275952e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19600 }, { "epoch": 0.0951049561515714, "grad_norm": 6.745762703985747e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19610 }, { "epoch": 0.09515345434440749, "grad_norm": 6.486271786343423e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19620 }, { "epoch": 0.09520195253724358, "grad_norm": 6.311949363180247e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19630 }, { "epoch": 0.09525045073007966, "grad_norm": 4.8101043148562894e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19640 }, { "epoch": 0.09529894892291577, "grad_norm": 4.7999243690810545e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19650 }, { "epoch": 0.09534744711575185, "grad_norm": 7.109392896609279e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19660 }, { "epoch": 0.09539594530858794, "grad_norm": 7.648912259128338e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19670 }, { "epoch": 0.09544444350142403, "grad_norm": 6.260258089696435e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19680 }, { "epoch": 0.09549294169426012, "grad_norm": 5.53226904287385e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19690 }, { "epoch": 0.09554143988709621, "grad_norm": 5.522467461105407e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19700 }, { "epoch": 0.0955899380799323, "grad_norm": 6.353828041483212e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19710 }, { "epoch": 0.09563843627276838, "grad_norm": 6.941991159692407e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19720 }, { "epoch": 0.09568693446560447, "grad_norm": 5.931378055379355e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19730 }, { "epoch": 0.09573543265844056, "grad_norm": 5.2669339112298985e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19740 }, { "epoch": 0.09578393085127665, "grad_norm": 5.207850506394607e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19750 }, { "epoch": 0.09583242904411274, "grad_norm": 7.23596613738664e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19760 }, { "epoch": 0.09588092723694883, "grad_norm": 6.389800688566538e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19770 }, { "epoch": 0.09592942542978493, "grad_norm": 1.1578054426308881e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 19780 }, { "epoch": 0.09597792362262102, "grad_norm": 5.1432390790751015e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19790 }, { "epoch": 0.0960264218154571, "grad_norm": 5.1320579785851805e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19800 }, { "epoch": 0.0960749200082932, "grad_norm": 6.460664536689364e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19810 }, { "epoch": 0.09612341820112928, "grad_norm": 6.491762860605377e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19820 }, { "epoch": 0.09617191639396537, "grad_norm": 5.717768303270532e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19830 }, { "epoch": 0.09622041458680146, "grad_norm": 6.867506385788147e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19840 }, { "epoch": 0.09626891277963755, "grad_norm": 5.397945557206185e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19850 }, { "epoch": 0.09631741097247364, "grad_norm": 6.61308021676632e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19860 }, { "epoch": 0.09636590916530972, "grad_norm": 6.83920404753735e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19870 }, { "epoch": 0.09641440735814581, "grad_norm": 9.16263829253694e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19880 }, { "epoch": 0.0964629055509819, "grad_norm": 5.150987902879933e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19890 }, { "epoch": 0.096511403743818, "grad_norm": 4.589173130398194e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19900 }, { "epoch": 0.09655990193665409, "grad_norm": 8.628710190805577e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19910 }, { "epoch": 0.09660840012949018, "grad_norm": 6.605226587907964e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19920 }, { "epoch": 0.09665689832232627, "grad_norm": 6.966160270849286e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19930 }, { "epoch": 0.09670539651516236, "grad_norm": 5.2595172661540346e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19940 }, { "epoch": 0.09675389470799844, "grad_norm": 5.059279217789481e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19950 }, { "epoch": 0.09680239290083453, "grad_norm": 6.711663047553884e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19960 }, { "epoch": 0.09685089109367062, "grad_norm": 6.04783849667001e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19970 }, { "epoch": 0.09689938928650671, "grad_norm": 7.635195231614489e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19980 }, { "epoch": 0.0969478874793428, "grad_norm": 4.980662993148144e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 19990 }, { "epoch": 0.09699638567217889, "grad_norm": 5.121515656014708e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20000 }, { "epoch": 0.09704488386501497, "grad_norm": 6.373976191298425e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20010 }, { "epoch": 0.09709338205785106, "grad_norm": 6.317939238442705e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20020 }, { "epoch": 0.09714188025068717, "grad_norm": 7.074112318150583e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20030 }, { "epoch": 0.09719037844352325, "grad_norm": 5.1866713590698055e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20040 }, { "epoch": 0.09723887663635934, "grad_norm": 5.677921421920473e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20050 }, { "epoch": 0.09728737482919543, "grad_norm": 6.06311232331791e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20060 }, { "epoch": 0.09733587302203152, "grad_norm": 7.384205247262798e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20070 }, { "epoch": 0.09738437121486761, "grad_norm": 6.931086460326696e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20080 }, { "epoch": 0.0974328694077037, "grad_norm": 5.973826944227767e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20090 }, { "epoch": 0.09748136760053978, "grad_norm": 5.087345655852005e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20100 }, { "epoch": 0.09752986579337587, "grad_norm": 5.959399373978158e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20110 }, { "epoch": 0.09757836398621196, "grad_norm": 6.976015498594279e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20120 }, { "epoch": 0.09762686217904805, "grad_norm": 5.743675046687713e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 20130 }, { "epoch": 0.09767536037188414, "grad_norm": 5.50636372054214e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20140 }, { "epoch": 0.09772385856472023, "grad_norm": 5.110740275426906e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20150 }, { "epoch": 0.09777235675755633, "grad_norm": 5.863685714757594e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20160 }, { "epoch": 0.09782085495039242, "grad_norm": 6.752503622919903e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20170 }, { "epoch": 0.0978693531432285, "grad_norm": 7.707284055413766e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20180 }, { "epoch": 0.09791785133606459, "grad_norm": 5.15951015245264e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20190 }, { "epoch": 0.09796634952890068, "grad_norm": 5.3071648409286354e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20200 }, { "epoch": 0.09801484772173677, "grad_norm": 7.297278159512643e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20210 }, { "epoch": 0.09806334591457286, "grad_norm": 6.574722277719047e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20220 }, { "epoch": 0.09811184410740895, "grad_norm": 6.49164846322492e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20230 }, { "epoch": 0.09816034230024503, "grad_norm": 5.889416954119042e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20240 }, { "epoch": 0.09820884049308112, "grad_norm": 1.0945365858106015e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 20250 }, { "epoch": 0.09825733868591721, "grad_norm": 7.913379818091926e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20260 }, { "epoch": 0.0983058368787533, "grad_norm": 8.237519466547383e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20270 }, { "epoch": 0.09835433507158939, "grad_norm": 6.117638662317404e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20280 }, { "epoch": 0.09840283326442549, "grad_norm": 5.426666405128344e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20290 }, { "epoch": 0.09845133145726158, "grad_norm": 6.173600297643134e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20300 }, { "epoch": 0.09849982965009767, "grad_norm": 6.353911174983295e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20310 }, { "epoch": 0.09854832784293376, "grad_norm": 6.017027232019245e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20320 }, { "epoch": 0.09859682603576984, "grad_norm": 6.340851399500025e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20330 }, { "epoch": 0.09864532422860593, "grad_norm": 8.650327032455607e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20340 }, { "epoch": 0.09869382242144202, "grad_norm": 8.310267674005445e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20350 }, { "epoch": 0.09874232061427811, "grad_norm": 6.504512839455856e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20360 }, { "epoch": 0.0987908188071142, "grad_norm": 7.442072558205837e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20370 }, { "epoch": 0.09883931699995029, "grad_norm": 8.050694333405772e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20380 }, { "epoch": 0.09888781519278637, "grad_norm": 6.299749344407246e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20390 }, { "epoch": 0.09893631338562246, "grad_norm": 5.729990348868341e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20400 }, { "epoch": 0.09898481157845855, "grad_norm": 8.584679989098731e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20410 }, { "epoch": 0.09903330977129465, "grad_norm": 9.335021644574226e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20420 }, { "epoch": 0.09908180796413074, "grad_norm": 9.868181649608232e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20430 }, { "epoch": 0.09913030615696683, "grad_norm": 5.8228636135027045e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20440 }, { "epoch": 0.09917880434980292, "grad_norm": 6.060006541019902e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20450 }, { "epoch": 0.099227302542639, "grad_norm": 6.268179220114689e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20460 }, { "epoch": 0.0992758007354751, "grad_norm": 6.946486763581561e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20470 }, { "epoch": 0.09932429892831118, "grad_norm": 1.262784934397132e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 20480 }, { "epoch": 0.09937279712114727, "grad_norm": 5.669035729738425e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20490 }, { "epoch": 0.09942129531398336, "grad_norm": 6.08435186677525e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20500 }, { "epoch": 0.09946979350681945, "grad_norm": 8.721931266109095e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20510 }, { "epoch": 0.09951829169965554, "grad_norm": 6.86923868897793e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20520 }, { "epoch": 0.09956678989249163, "grad_norm": 6.535574215149609e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20530 }, { "epoch": 0.09961528808532773, "grad_norm": 1.0323956445290605e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 20540 }, { "epoch": 0.09966378627816382, "grad_norm": 8.247894811574952e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20550 }, { "epoch": 0.0997122844709999, "grad_norm": 6.844913968961919e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20560 }, { "epoch": 0.09976078266383599, "grad_norm": 6.76729356996475e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20570 }, { "epoch": 0.09980928085667208, "grad_norm": 7.60832108426257e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20580 }, { "epoch": 0.09985777904950817, "grad_norm": 5.9141530783790586e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20590 }, { "epoch": 0.09990627724234426, "grad_norm": 7.216623032491043e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20600 }, { "epoch": 0.09995477543518035, "grad_norm": 7.038796923097834e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20610 }, { "epoch": 0.10000327362801643, "grad_norm": 7.330967122243237e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20620 }, { "epoch": 0.10005177182085252, "grad_norm": 7.026759618611322e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20630 }, { "epoch": 0.10010027001368861, "grad_norm": 9.133450618037386e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20640 }, { "epoch": 0.1001487682065247, "grad_norm": 8.191783962274712e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20650 }, { "epoch": 0.10019726639936079, "grad_norm": 7.055940898226254e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20660 }, { "epoch": 0.10024576459219689, "grad_norm": 7.418208269882598e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20670 }, { "epoch": 0.10029426278503298, "grad_norm": 7.011237102005907e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20680 }, { "epoch": 0.10034276097786907, "grad_norm": 6.696117793580925e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20690 }, { "epoch": 0.10039125917070515, "grad_norm": 9.788283961142952e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20700 }, { "epoch": 0.10043975736354124, "grad_norm": 7.252987188621773e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20710 }, { "epoch": 0.10048825555637733, "grad_norm": 7.546425706550508e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20720 }, { "epoch": 0.10053675374921342, "grad_norm": 7.998162487865557e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20730 }, { "epoch": 0.10058525194204951, "grad_norm": 1.498323030091342e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 20740 }, { "epoch": 0.1006337501348856, "grad_norm": 6.50128271217909e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20750 }, { "epoch": 0.10068224832772169, "grad_norm": 7.554407943644037e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20760 }, { "epoch": 0.10073074652055777, "grad_norm": 8.728101619226436e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20770 }, { "epoch": 0.10077924471339386, "grad_norm": 7.884514729994407e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20780 }, { "epoch": 0.10082774290622995, "grad_norm": 6.840873112423651e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20790 }, { "epoch": 0.10087624109906605, "grad_norm": 6.776261329832778e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20800 }, { "epoch": 0.10092473929190214, "grad_norm": 8.534457407449736e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20810 }, { "epoch": 0.10097323748473823, "grad_norm": 8.436224163688166e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20820 }, { "epoch": 0.10102173567757432, "grad_norm": 8.258979278252809e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20830 }, { "epoch": 0.1010702338704104, "grad_norm": 7.199206208952091e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20840 }, { "epoch": 0.1011187320632465, "grad_norm": 7.869788021253044e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20850 }, { "epoch": 0.10116723025608258, "grad_norm": 7.591802955175808e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 20860 }, { "epoch": 0.10121572844891867, "grad_norm": 2.228965740869171e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 20870 }, { "epoch": 0.10126422664175476, "grad_norm": 1.2998594911550754e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 20880 }, { "epoch": 0.10131272483459085, "grad_norm": 8.058213296635586e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20890 }, { "epoch": 0.10136122302742694, "grad_norm": 7.54891402721114e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20900 }, { "epoch": 0.10140972122026302, "grad_norm": 8.623215563829945e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20910 }, { "epoch": 0.10145821941309911, "grad_norm": 8.051708277889702e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20920 }, { "epoch": 0.10150671760593521, "grad_norm": 8.461705647277995e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20930 }, { "epoch": 0.1015552157987713, "grad_norm": 7.267654211773333e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20940 }, { "epoch": 0.10160371399160739, "grad_norm": 7.822571035376313e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20950 }, { "epoch": 0.10165221218444348, "grad_norm": 8.169877219188493e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20960 }, { "epoch": 0.10170071037727957, "grad_norm": 1.0063829591899776e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 20970 }, { "epoch": 0.10174920857011566, "grad_norm": 2.7635951482807286e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 20980 }, { "epoch": 0.10179770676295175, "grad_norm": 8.776491711159906e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 20990 }, { "epoch": 0.10184620495578783, "grad_norm": 9.928854183272051e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21000 }, { "epoch": 0.10189470314862392, "grad_norm": 8.925528760528323e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21010 }, { "epoch": 0.10194320134146001, "grad_norm": 8.290369635233219e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21020 }, { "epoch": 0.1019916995342961, "grad_norm": 8.010609064967866e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21030 }, { "epoch": 0.10204019772713219, "grad_norm": 1.0369086567152408e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21040 }, { "epoch": 0.10208869591996828, "grad_norm": 1.1654906018065958e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21050 }, { "epoch": 0.10213719411280438, "grad_norm": 1.0006755246649846e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21060 }, { "epoch": 0.10218569230564047, "grad_norm": 7.651156863630604e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21070 }, { "epoch": 0.10223419049847655, "grad_norm": 9.44048039741574e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21080 }, { "epoch": 0.10228268869131264, "grad_norm": 7.951969394071057e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21090 }, { "epoch": 0.10233118688414873, "grad_norm": 7.473200014374015e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21100 }, { "epoch": 0.10237968507698482, "grad_norm": 8.422117048212385e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21110 }, { "epoch": 0.10242818326982091, "grad_norm": 7.091589537822074e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21120 }, { "epoch": 0.102476681462657, "grad_norm": 1.7115660000399657e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21130 }, { "epoch": 0.10252517965549308, "grad_norm": 9.135413847616292e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21140 }, { "epoch": 0.10257367784832917, "grad_norm": 8.704832055173028e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21150 }, { "epoch": 0.10262217604116526, "grad_norm": 1.1432311453063448e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21160 }, { "epoch": 0.10267067423400135, "grad_norm": 8.77651444852745e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21170 }, { "epoch": 0.10271917242683745, "grad_norm": 9.717028603972722e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21180 }, { "epoch": 0.10276767061967354, "grad_norm": 8.702385656533806e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21190 }, { "epoch": 0.10281616881250963, "grad_norm": 8.391792505335616e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21200 }, { "epoch": 0.10286466700534572, "grad_norm": 8.961468012103069e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21210 }, { "epoch": 0.1029131651981818, "grad_norm": 9.520967125808966e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21220 }, { "epoch": 0.1029616633910179, "grad_norm": 9.309817272651344e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21230 }, { "epoch": 0.10301016158385398, "grad_norm": 1.012347397022495e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21240 }, { "epoch": 0.10305865977669007, "grad_norm": 1.17611342886903e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21250 }, { "epoch": 0.10310715796952616, "grad_norm": 1.110861447273237e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21260 }, { "epoch": 0.10315565616236225, "grad_norm": 1.1320069148723633e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21270 }, { "epoch": 0.10320415435519834, "grad_norm": 9.737380679553098e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21280 }, { "epoch": 0.10325265254803442, "grad_norm": 9.695789060515381e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21290 }, { "epoch": 0.10330115074087051, "grad_norm": 9.27002830053425e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21300 }, { "epoch": 0.10334964893370661, "grad_norm": 9.245611920505326e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21310 }, { "epoch": 0.1033981471265427, "grad_norm": 9.060970995733442e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21320 }, { "epoch": 0.10344664531937879, "grad_norm": 1.0177398479527255e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21330 }, { "epoch": 0.10349514351221488, "grad_norm": 9.39400592869788e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21340 }, { "epoch": 0.10354364170505097, "grad_norm": 1.0237640424293204e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21350 }, { "epoch": 0.10359213989788706, "grad_norm": 9.513537690963858e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21360 }, { "epoch": 0.10364063809072314, "grad_norm": 8.949295704496762e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21370 }, { "epoch": 0.10368913628355923, "grad_norm": 1.3026405554228404e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21380 }, { "epoch": 0.10373763447639532, "grad_norm": 1.3177836422073597e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21390 }, { "epoch": 0.10378613266923141, "grad_norm": 8.584742516859478e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21400 }, { "epoch": 0.1038346308620675, "grad_norm": 1.0201123501474285e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21410 }, { "epoch": 0.10388312905490359, "grad_norm": 1.0015524765094597e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21420 }, { "epoch": 0.10393162724773967, "grad_norm": 9.45244593708594e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21430 }, { "epoch": 0.10398012544057578, "grad_norm": 9.469711415022175e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21440 }, { "epoch": 0.10402862363341187, "grad_norm": 1.0521421245357487e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21450 }, { "epoch": 0.10407712182624795, "grad_norm": 1.056581737657325e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21460 }, { "epoch": 0.10412562001908404, "grad_norm": 1.1286275025668147e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21470 }, { "epoch": 0.10417411821192013, "grad_norm": 1.0304725606147258e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21480 }, { "epoch": 0.10422261640475622, "grad_norm": 9.042962290095602e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21490 }, { "epoch": 0.10427111459759231, "grad_norm": 9.723414251538998e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21500 }, { "epoch": 0.1043196127904284, "grad_norm": 1.3571592205607885e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21510 }, { "epoch": 0.10436811098326448, "grad_norm": 9.276681822711907e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21520 }, { "epoch": 0.10441660917610057, "grad_norm": 1.0264891869837811e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21530 }, { "epoch": 0.10446510736893666, "grad_norm": 1.0037337716539696e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21540 }, { "epoch": 0.10451360556177275, "grad_norm": 1.0109538095548487e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21550 }, { "epoch": 0.10456210375460884, "grad_norm": 9.382412002878482e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21560 }, { "epoch": 0.10461060194744494, "grad_norm": 9.650525356619255e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 21570 }, { "epoch": 0.10465910014028103, "grad_norm": 1.0895946189748429e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21580 }, { "epoch": 0.10470759833311712, "grad_norm": 1.0789425175516953e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21590 }, { "epoch": 0.1047560965259532, "grad_norm": 1.09008176707448e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21600 }, { "epoch": 0.10480459471878929, "grad_norm": 1.0772671288350466e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21610 }, { "epoch": 0.10485309291162538, "grad_norm": 1.0575691078429372e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21620 }, { "epoch": 0.10490159110446147, "grad_norm": 1.1179011494277802e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21630 }, { "epoch": 0.10495008929729756, "grad_norm": 1.0292455243643417e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21640 }, { "epoch": 0.10499858749013365, "grad_norm": 1.0558422758322195e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21650 }, { "epoch": 0.10504708568296973, "grad_norm": 1.2466463772398129e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21660 }, { "epoch": 0.10509558387580582, "grad_norm": 1.0221340573934867e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21670 }, { "epoch": 0.10514408206864191, "grad_norm": 1.2922720316055347e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21680 }, { "epoch": 0.105192580261478, "grad_norm": 1.1596268478797356e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21690 }, { "epoch": 0.1052410784543141, "grad_norm": 1.120397357112779e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21700 }, { "epoch": 0.10528957664715019, "grad_norm": 1.2335497956428298e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21710 }, { "epoch": 0.10533807483998628, "grad_norm": 1.472513133649045e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21720 }, { "epoch": 0.10538657303282237, "grad_norm": 1.1936380417409964e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21730 }, { "epoch": 0.10543507122565846, "grad_norm": 1.351982632513682e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21740 }, { "epoch": 0.10548356941849454, "grad_norm": 1.561385829518258e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21750 }, { "epoch": 0.10553206761133063, "grad_norm": 1.257431279100274e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21760 }, { "epoch": 0.10558056580416672, "grad_norm": 8.754814189160243e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21770 }, { "epoch": 0.10562906399700281, "grad_norm": 1.4905472767168249e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21780 }, { "epoch": 0.1056775621898389, "grad_norm": 1.2288116124636872e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21790 }, { "epoch": 0.10572606038267499, "grad_norm": 1.3733033199514466e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21800 }, { "epoch": 0.10577455857551107, "grad_norm": 1.9449831256679317e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21810 }, { "epoch": 0.10582305676834718, "grad_norm": 1.4954488847251923e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21820 }, { "epoch": 0.10587155496118326, "grad_norm": 1.4010456084179168e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21830 }, { "epoch": 0.10592005315401935, "grad_norm": 1.3467548853896005e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21840 }, { "epoch": 0.10596855134685544, "grad_norm": 1.4417614124795364e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21850 }, { "epoch": 0.10601704953969153, "grad_norm": 2.0744896289670578e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21860 }, { "epoch": 0.10606554773252762, "grad_norm": 1.25650686300105e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21870 }, { "epoch": 0.1061140459253637, "grad_norm": 1.1957853018884634e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21880 }, { "epoch": 0.1061625441181998, "grad_norm": 1.2182766795376665e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21890 }, { "epoch": 0.10621104231103588, "grad_norm": 1.338700172937024e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21900 }, { "epoch": 0.10625954050387197, "grad_norm": 2.1165915597975982e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21910 }, { "epoch": 0.10630803869670806, "grad_norm": 1.3492591222075134e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21920 }, { "epoch": 0.10635653688954415, "grad_norm": 1.59622203455001e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21930 }, { "epoch": 0.10640503508238024, "grad_norm": 1.4556719918346062e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21940 }, { "epoch": 0.10645353327521634, "grad_norm": 1.7224675730176386e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 21950 }, { "epoch": 0.10650203146805243, "grad_norm": 1.352865126591496e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21960 }, { "epoch": 0.10655052966088852, "grad_norm": 1.3259220565942087e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21970 }, { "epoch": 0.1065990278537246, "grad_norm": 1.174255928049206e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21980 }, { "epoch": 0.10664752604656069, "grad_norm": 1.2235190638421045e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 21990 }, { "epoch": 0.10669602423939678, "grad_norm": 1.210410829344255e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22000 }, { "epoch": 0.10674452243223287, "grad_norm": 1.3875020954401407e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22010 }, { "epoch": 0.10679302062506896, "grad_norm": 1.8254843325848924e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22020 }, { "epoch": 0.10684151881790505, "grad_norm": 1.5640951289697114e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22030 }, { "epoch": 0.10689001701074113, "grad_norm": 1.2976974517187045e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22040 }, { "epoch": 0.10693851520357722, "grad_norm": 1.4538477444148157e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22050 }, { "epoch": 0.10698701339641331, "grad_norm": 1.3775900242762873e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22060 }, { "epoch": 0.1070355115892494, "grad_norm": 1.4079844845582556e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22070 }, { "epoch": 0.1070840097820855, "grad_norm": 1.2175951269455254e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22080 }, { "epoch": 0.10713250797492159, "grad_norm": 1.3589716729711654e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22090 }, { "epoch": 0.10718100616775768, "grad_norm": 1.4409386039915262e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22100 }, { "epoch": 0.10722950436059377, "grad_norm": 1.2041455477174168e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22110 }, { "epoch": 0.10727800255342985, "grad_norm": 1.1864852922371938e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22120 }, { "epoch": 0.10732650074626594, "grad_norm": 1.1590370974090547e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22130 }, { "epoch": 0.10737499893910203, "grad_norm": 1.2480998634600837e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22140 }, { "epoch": 0.10742349713193812, "grad_norm": 1.1339734840021265e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22150 }, { "epoch": 0.10747199532477421, "grad_norm": 1.5193722902040463e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22160 }, { "epoch": 0.1075204935176103, "grad_norm": 1.2498534829319397e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22170 }, { "epoch": 0.10756899171044638, "grad_norm": 1.2010846717203094e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22180 }, { "epoch": 0.10761748990328247, "grad_norm": 1.1047458770008234e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22190 }, { "epoch": 0.10766598809611856, "grad_norm": 1.1696584323317438e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22200 }, { "epoch": 0.10771448628895466, "grad_norm": 1.1387555076680655e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22210 }, { "epoch": 0.10776298448179075, "grad_norm": 1.003240512886805e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22220 }, { "epoch": 0.10781148267462684, "grad_norm": 1.1787764719883853e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22230 }, { "epoch": 0.10785998086746293, "grad_norm": 1.364476389653646e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22240 }, { "epoch": 0.10790847906029902, "grad_norm": 1.2458973230877746e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22250 }, { "epoch": 0.1079569772531351, "grad_norm": 1.8815774183167377e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22260 }, { "epoch": 0.1080054754459712, "grad_norm": 1.249517254109378e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22270 }, { "epoch": 0.10805397363880728, "grad_norm": 1.4015490990004764e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22280 }, { "epoch": 0.10810247183164337, "grad_norm": 1.3029418255428027e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22290 }, { "epoch": 0.10815097002447946, "grad_norm": 1.1224245355379026e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22300 }, { "epoch": 0.10819946821731555, "grad_norm": 1.1660230114785008e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22310 }, { "epoch": 0.10824796641015164, "grad_norm": 1.5195246305665933e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22320 }, { "epoch": 0.10829646460298772, "grad_norm": 1.2278816541311244e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22330 }, { "epoch": 0.10834496279582383, "grad_norm": 1.3692043410173937e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22340 }, { "epoch": 0.10839346098865991, "grad_norm": 1.0373408798614037e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22350 }, { "epoch": 0.108441959181496, "grad_norm": 1.533416593701986e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22360 }, { "epoch": 0.10849045737433209, "grad_norm": 1.3497442807874904e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22370 }, { "epoch": 0.10853895556716818, "grad_norm": 1.03509016469161e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22380 }, { "epoch": 0.10858745376000427, "grad_norm": 1.122924544461057e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22390 }, { "epoch": 0.10863595195284036, "grad_norm": 9.718839066863438e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22400 }, { "epoch": 0.10868445014567645, "grad_norm": 1.1142783051809602e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22410 }, { "epoch": 0.10873294833851253, "grad_norm": 1.0410251150005934e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22420 }, { "epoch": 0.10878144653134862, "grad_norm": 1.3446836533148598e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22430 }, { "epoch": 0.10882994472418471, "grad_norm": 1.0179389420272855e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22440 }, { "epoch": 0.1088784429170208, "grad_norm": 1.0265700467471106e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22450 }, { "epoch": 0.1089269411098569, "grad_norm": 1.0957496243690912e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22460 }, { "epoch": 0.10897543930269299, "grad_norm": 1.1769621011126219e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22470 }, { "epoch": 0.10902393749552908, "grad_norm": 1.354679568521533e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22480 }, { "epoch": 0.10907243568836517, "grad_norm": 1.3629296802264435e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22490 }, { "epoch": 0.10912093388120125, "grad_norm": 1.222592231897579e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22500 }, { "epoch": 0.10916943207403734, "grad_norm": 1.0850771303694273e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22510 }, { "epoch": 0.10921793026687343, "grad_norm": 1.0386423809904954e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22520 }, { "epoch": 0.10926642845970952, "grad_norm": 2.4462235614919337e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22530 }, { "epoch": 0.10931492665254561, "grad_norm": 9.786785426513234e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22540 }, { "epoch": 0.1093634248453817, "grad_norm": 9.994587202299954e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22550 }, { "epoch": 0.10941192303821778, "grad_norm": 1.7222788528670208e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22560 }, { "epoch": 0.10946042123105387, "grad_norm": 1.1647713193951859e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22570 }, { "epoch": 0.10950891942388996, "grad_norm": 9.500114117599878e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22580 }, { "epoch": 0.10955741761672606, "grad_norm": 1.0055985200096984e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22590 }, { "epoch": 0.10960591580956215, "grad_norm": 9.580924142937874e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22600 }, { "epoch": 0.10965441400239824, "grad_norm": 1.6007771819204208e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22610 }, { "epoch": 0.10970291219523433, "grad_norm": 1.561609224154381e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22620 }, { "epoch": 0.10975141038807042, "grad_norm": 9.787500232505408e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22630 }, { "epoch": 0.1097999085809065, "grad_norm": 9.310716819754816e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22640 }, { "epoch": 0.1098484067737426, "grad_norm": 1.2511735292264348e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22650 }, { "epoch": 0.10989690496657868, "grad_norm": 4.2735808847282897e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22660 }, { "epoch": 0.10994540315941477, "grad_norm": 1.1458124760110877e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22670 }, { "epoch": 0.10999390135225086, "grad_norm": 1.096128059430157e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22680 }, { "epoch": 0.11004239954508695, "grad_norm": 9.388476485128194e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22690 }, { "epoch": 0.11009089773792304, "grad_norm": 1.7004295216338505e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22700 }, { "epoch": 0.11013939593075912, "grad_norm": 1.054767295727288e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22710 }, { "epoch": 0.11018789412359523, "grad_norm": 9.759524033370326e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22720 }, { "epoch": 0.11023639231643131, "grad_norm": 9.159364111610557e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22730 }, { "epoch": 0.1102848905092674, "grad_norm": 1.1434683955258151e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22740 }, { "epoch": 0.11033338870210349, "grad_norm": 1.2531121740266826e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22750 }, { "epoch": 0.11038188689493958, "grad_norm": 1.6693708460024936e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22760 }, { "epoch": 0.11043038508777567, "grad_norm": 1.339376041187279e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22770 }, { "epoch": 0.11047888328061176, "grad_norm": 1.1250850917576827e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22780 }, { "epoch": 0.11052738147344784, "grad_norm": 9.639403941719138e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22790 }, { "epoch": 0.11057587966628393, "grad_norm": 1.0529836202977094e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22800 }, { "epoch": 0.11062437785912002, "grad_norm": 1.4637069511991285e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22810 }, { "epoch": 0.11067287605195611, "grad_norm": 2.1447557685405627e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22820 }, { "epoch": 0.1107213742447922, "grad_norm": 1.573411054778262e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22830 }, { "epoch": 0.11076987243762829, "grad_norm": 8.853989186263789e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22840 }, { "epoch": 0.11081837063046439, "grad_norm": 1.0298739994141215e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22850 }, { "epoch": 0.11086686882330048, "grad_norm": 1.3959245848127466e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22860 }, { "epoch": 0.11091536701613657, "grad_norm": 1.3803581566662615e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22870 }, { "epoch": 0.11096386520897265, "grad_norm": 9.079655427512989e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22880 }, { "epoch": 0.11101236340180874, "grad_norm": 9.945206613792834e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22890 }, { "epoch": 0.11106086159464483, "grad_norm": 9.496189790070275e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22900 }, { "epoch": 0.11110935978748092, "grad_norm": 1.1864845106401845e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22910 }, { "epoch": 0.111157857980317, "grad_norm": 8.653145044945632e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22920 }, { "epoch": 0.1112063561731531, "grad_norm": 8.337131873759063e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22930 }, { "epoch": 0.11125485436598918, "grad_norm": 1.3318086189428868e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22940 }, { "epoch": 0.11130335255882527, "grad_norm": 9.681903634373157e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22950 }, { "epoch": 0.11135185075166136, "grad_norm": 1.0894476787370877e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22960 }, { "epoch": 0.11140034894449745, "grad_norm": 8.609995205688392e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 22970 }, { "epoch": 0.11144884713733355, "grad_norm": 1.0925889171176095e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22980 }, { "epoch": 0.11149734533016964, "grad_norm": 1.0428219354707835e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 22990 }, { "epoch": 0.11154584352300573, "grad_norm": 9.344486784357287e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23000 }, { "epoch": 0.11159434171584182, "grad_norm": 1.1719206582938568e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23010 }, { "epoch": 0.1116428399086779, "grad_norm": 1.5961339272507757e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23020 }, { "epoch": 0.11169133810151399, "grad_norm": 1.7547161235142994e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23030 }, { "epoch": 0.11173983629435008, "grad_norm": 1.0284558982220915e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23040 }, { "epoch": 0.11178833448718617, "grad_norm": 9.025740865808984e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23050 }, { "epoch": 0.11183683268002226, "grad_norm": 9.188574523477655e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23060 }, { "epoch": 0.11188533087285835, "grad_norm": 1.209500339882652e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23070 }, { "epoch": 0.11193382906569443, "grad_norm": 9.873268425053539e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23080 }, { "epoch": 0.11198232725853052, "grad_norm": 1.0608633971287418e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23090 }, { "epoch": 0.11203082545136663, "grad_norm": 8.461714173790824e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23100 }, { "epoch": 0.11207932364420271, "grad_norm": 1.0458570187665828e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23110 }, { "epoch": 0.1121278218370388, "grad_norm": 1.3903627404943109e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23120 }, { "epoch": 0.11217632002987489, "grad_norm": 1.4763620015401102e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23130 }, { "epoch": 0.11222481822271098, "grad_norm": 9.914423060308764e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23140 }, { "epoch": 0.11227331641554707, "grad_norm": 8.293982034501823e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23150 }, { "epoch": 0.11232181460838316, "grad_norm": 9.703384762360656e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23160 }, { "epoch": 0.11237031280121924, "grad_norm": 1.1550825718131819e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23170 }, { "epoch": 0.11241881099405533, "grad_norm": 8.399933193459219e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23180 }, { "epoch": 0.11246730918689142, "grad_norm": 9.952631074838791e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23190 }, { "epoch": 0.11251580737972751, "grad_norm": 8.105062931917928e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23200 }, { "epoch": 0.1125643055725636, "grad_norm": 1.0073919298747569e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23210 }, { "epoch": 0.11261280376539969, "grad_norm": 1.5798045183146314e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23220 }, { "epoch": 0.11266130195823579, "grad_norm": 1.0511131876000945e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23230 }, { "epoch": 0.11270980015107188, "grad_norm": 1.407804290920467e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23240 }, { "epoch": 0.11275829834390796, "grad_norm": 1.4044393026324542e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23250 }, { "epoch": 0.11280679653674405, "grad_norm": 1.0746015277618426e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23260 }, { "epoch": 0.11285529472958014, "grad_norm": 1.2295650719806872e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23270 }, { "epoch": 0.11290379292241623, "grad_norm": 7.394390166837184e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23280 }, { "epoch": 0.11295229111525232, "grad_norm": 8.391219097347857e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23290 }, { "epoch": 0.1130007893080884, "grad_norm": 1.0048924536931736e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23300 }, { "epoch": 0.1130492875009245, "grad_norm": 1.0076497858335642e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23310 }, { "epoch": 0.11309778569376058, "grad_norm": 7.019175285449819e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23320 }, { "epoch": 0.11314628388659667, "grad_norm": 8.082967184463996e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23330 }, { "epoch": 0.11319478207943276, "grad_norm": 1.0295735108911686e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23340 }, { "epoch": 0.11324328027226885, "grad_norm": 1.4088151090163592e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23350 }, { "epoch": 0.11329177846510495, "grad_norm": 9.484641338985966e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23360 }, { "epoch": 0.11334027665794104, "grad_norm": 1.0169286213113082e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23370 }, { "epoch": 0.11338877485077713, "grad_norm": 1.7040989064298628e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23380 }, { "epoch": 0.11343727304361322, "grad_norm": 8.500983028625342e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23390 }, { "epoch": 0.1134857712364493, "grad_norm": 9.062514294555513e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23400 }, { "epoch": 0.11353426942928539, "grad_norm": 1.3211699467774451e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23410 }, { "epoch": 0.11358276762212148, "grad_norm": 9.413051316187193e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23420 }, { "epoch": 0.11363126581495757, "grad_norm": 1.5320074453484267e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23430 }, { "epoch": 0.11367976400779366, "grad_norm": 1.0655950433147154e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23440 }, { "epoch": 0.11372826220062975, "grad_norm": 7.494475084968144e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23450 }, { "epoch": 0.11377676039346583, "grad_norm": 1.4596928110677254e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23460 }, { "epoch": 0.11382525858630192, "grad_norm": 8.169365628418745e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23470 }, { "epoch": 0.11387375677913801, "grad_norm": 1.162506890750592e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23480 }, { "epoch": 0.11392225497197411, "grad_norm": 9.341318474298532e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 23490 }, { "epoch": 0.1139707531648102, "grad_norm": 1.6246742973180517e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23500 }, { "epoch": 0.11401925135764629, "grad_norm": 1.1644347353012563e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23510 }, { "epoch": 0.11406774955048238, "grad_norm": 0.012669106014072895, "learning_rate": 0.0002, "loss": 0.0005, "step": 23520 }, { "epoch": 0.11411624774331847, "grad_norm": 0.0005228096270002425, "learning_rate": 0.0002, "loss": 0.0005, "step": 23530 }, { "epoch": 0.11416474593615455, "grad_norm": 1.8366828953730874e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 23540 }, { "epoch": 0.11421324412899064, "grad_norm": 0.01173289492726326, "learning_rate": 0.0002, "loss": 0.0, "step": 23550 }, { "epoch": 0.11426174232182673, "grad_norm": 1.9368320863577537e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 23560 }, { "epoch": 0.11431024051466282, "grad_norm": 5.362110823625699e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 23570 }, { "epoch": 0.11435873870749891, "grad_norm": 0.0005084203439764678, "learning_rate": 0.0002, "loss": 0.0, "step": 23580 }, { "epoch": 0.114407236900335, "grad_norm": 0.00021293939789757133, "learning_rate": 0.0002, "loss": 0.0, "step": 23590 }, { "epoch": 0.11445573509317108, "grad_norm": 3.794170424953336e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 23600 }, { "epoch": 0.11450423328600719, "grad_norm": 5.480676918523386e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 23610 }, { "epoch": 0.11455273147884328, "grad_norm": 8.298031275444373e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23620 }, { "epoch": 0.11460122967167936, "grad_norm": 7.750251711513556e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23630 }, { "epoch": 0.11464972786451545, "grad_norm": 9.520416597297299e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23640 }, { "epoch": 0.11469822605735154, "grad_norm": 4.172957233095076e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23650 }, { "epoch": 0.11474672425018763, "grad_norm": 9.290013736062974e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23660 }, { "epoch": 0.11479522244302372, "grad_norm": 5.652597110383795e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23670 }, { "epoch": 0.1148437206358598, "grad_norm": 6.890324470987252e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23680 }, { "epoch": 0.1148922188286959, "grad_norm": 1.1707300018315436e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 23690 }, { "epoch": 0.11494071702153198, "grad_norm": 5.802764349027711e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23700 }, { "epoch": 0.11498921521436807, "grad_norm": 9.614528835300007e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23710 }, { "epoch": 0.11503771340720416, "grad_norm": 6.272785526562075e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23720 }, { "epoch": 0.11508621160004025, "grad_norm": 7.572676281597523e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23730 }, { "epoch": 0.11513470979287635, "grad_norm": 7.442255878231663e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23740 }, { "epoch": 0.11518320798571244, "grad_norm": 3.42186865509575e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23750 }, { "epoch": 0.11523170617854853, "grad_norm": 8.892985761121963e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23760 }, { "epoch": 0.11528020437138461, "grad_norm": 6.768634079890035e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23770 }, { "epoch": 0.1153287025642207, "grad_norm": 7.577955898341315e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23780 }, { "epoch": 0.11537720075705679, "grad_norm": 3.104522079411254e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23790 }, { "epoch": 0.11542569894989288, "grad_norm": 1.129211341321934e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 23800 }, { "epoch": 0.11547419714272897, "grad_norm": 6.970869890210452e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23810 }, { "epoch": 0.11552269533556506, "grad_norm": 6.065100706109661e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23820 }, { "epoch": 0.11557119352840114, "grad_norm": 5.39485711215093e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23830 }, { "epoch": 0.11561969172123723, "grad_norm": 4.3537070837373903e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23840 }, { "epoch": 0.11566818991407332, "grad_norm": 2.776214387267828e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23850 }, { "epoch": 0.11571668810690941, "grad_norm": 4.626666907370236e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23860 }, { "epoch": 0.11576518629974551, "grad_norm": 6.072170322113379e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23870 }, { "epoch": 0.1158136844925816, "grad_norm": 1.127712152992899e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 23880 }, { "epoch": 0.11586218268541769, "grad_norm": 2.7916829026253254e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23890 }, { "epoch": 0.11591068087825378, "grad_norm": 3.0602205924878945e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23900 }, { "epoch": 0.11595917907108987, "grad_norm": 5.141008614373277e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23910 }, { "epoch": 0.11600767726392595, "grad_norm": 5.884603524464183e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23920 }, { "epoch": 0.11605617545676204, "grad_norm": 5.15802071276994e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23930 }, { "epoch": 0.11610467364959813, "grad_norm": 5.006122023587523e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23940 }, { "epoch": 0.11615317184243422, "grad_norm": 2.872666300390847e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23950 }, { "epoch": 0.11620167003527031, "grad_norm": 5.288866304908879e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23960 }, { "epoch": 0.1162501682281064, "grad_norm": 5.153736992724589e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23970 }, { "epoch": 0.11629866642094248, "grad_norm": 5.128709972268553e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23980 }, { "epoch": 0.11634716461377857, "grad_norm": 6.444705036301457e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 23990 }, { "epoch": 0.11639566280661467, "grad_norm": 3.054605883789918e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24000 }, { "epoch": 0.11644416099945076, "grad_norm": 4.924916652271349e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24010 }, { "epoch": 0.11649265919228685, "grad_norm": 4.329952503212553e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24020 }, { "epoch": 0.11654115738512294, "grad_norm": 1.8929782754639746e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 24030 }, { "epoch": 0.11658965557795903, "grad_norm": 2.6174444656135165e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24040 }, { "epoch": 0.11663815377079512, "grad_norm": 3.2088794910123397e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24050 }, { "epoch": 0.1166866519636312, "grad_norm": 5.06689843859931e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24060 }, { "epoch": 0.1167351501564673, "grad_norm": 5.121675599184528e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24070 }, { "epoch": 0.11678364834930338, "grad_norm": 4.4482803218670597e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24080 }, { "epoch": 0.11683214654213947, "grad_norm": 5.040927248955995e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24090 }, { "epoch": 0.11688064473497556, "grad_norm": 2.964894747492508e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24100 }, { "epoch": 0.11692914292781165, "grad_norm": 3.531890342856059e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 24110 }, { "epoch": 0.11697764112064774, "grad_norm": 4.44659775666878e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24120 }, { "epoch": 0.11702613931348384, "grad_norm": 5.351762411009986e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24130 }, { "epoch": 0.11707463750631993, "grad_norm": 2.5655481294961646e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24140 }, { "epoch": 0.11712313569915601, "grad_norm": 6.820056910328276e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24150 }, { "epoch": 0.1171716338919921, "grad_norm": 7.501580512325745e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24160 }, { "epoch": 0.11722013208482819, "grad_norm": 3.898141471836425e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24170 }, { "epoch": 0.11726863027766428, "grad_norm": 6.666215881523385e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24180 }, { "epoch": 0.11731712847050037, "grad_norm": 2.801828884457791e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24190 }, { "epoch": 0.11736562666333646, "grad_norm": 2.2601261662202887e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24200 }, { "epoch": 0.11741412485617254, "grad_norm": 3.873126104281255e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24210 }, { "epoch": 0.11746262304900863, "grad_norm": 8.652816063658975e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24220 }, { "epoch": 0.11751112124184472, "grad_norm": 3.9606297264072055e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24230 }, { "epoch": 0.11755961943468081, "grad_norm": 4.148214145516249e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24240 }, { "epoch": 0.11760811762751691, "grad_norm": 7.045799179650203e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24250 }, { "epoch": 0.117656615820353, "grad_norm": 3.3267465937569796e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24260 }, { "epoch": 0.11770511401318909, "grad_norm": 3.8845632843731437e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24270 }, { "epoch": 0.11775361220602518, "grad_norm": 4.148099890244339e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24280 }, { "epoch": 0.11780211039886127, "grad_norm": 3.958161585160269e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24290 }, { "epoch": 0.11785060859169735, "grad_norm": 2.519753365959332e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24300 }, { "epoch": 0.11789910678453344, "grad_norm": 3.4267281989741605e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24310 }, { "epoch": 0.11794760497736953, "grad_norm": 3.393707004306634e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24320 }, { "epoch": 0.11799610317020562, "grad_norm": 3.867667146550957e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24330 }, { "epoch": 0.1180446013630417, "grad_norm": 2.1809023564856034e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24340 }, { "epoch": 0.1180930995558778, "grad_norm": 2.08192332706858e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24350 }, { "epoch": 0.11814159774871388, "grad_norm": 3.744276284578518e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24360 }, { "epoch": 0.11819009594154997, "grad_norm": 3.2060850685411424e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24370 }, { "epoch": 0.11823859413438607, "grad_norm": 3.355901583290688e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24380 }, { "epoch": 0.11828709232722216, "grad_norm": 2.069373579161038e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24390 }, { "epoch": 0.11833559052005825, "grad_norm": 2.0949512702372886e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24400 }, { "epoch": 0.11838408871289434, "grad_norm": 3.7229332860988507e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24410 }, { "epoch": 0.11843258690573043, "grad_norm": 3.61648687885463e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24420 }, { "epoch": 0.11848108509856652, "grad_norm": 3.123785745628993e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24430 }, { "epoch": 0.1185295832914026, "grad_norm": 2.4497870754203177e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24440 }, { "epoch": 0.11857808148423869, "grad_norm": 2.498854883015156e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24450 }, { "epoch": 0.11862657967707478, "grad_norm": 3.066585350097739e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24460 }, { "epoch": 0.11867507786991087, "grad_norm": 3.721925736499543e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24470 }, { "epoch": 0.11872357606274696, "grad_norm": 3.30220274236126e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24480 }, { "epoch": 0.11877207425558305, "grad_norm": 3.0865803068991227e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24490 }, { "epoch": 0.11882057244841913, "grad_norm": 2.2472460159406182e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24500 }, { "epoch": 0.11886907064125524, "grad_norm": 1.1150870022902382e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 24510 }, { "epoch": 0.11891756883409133, "grad_norm": 2.9650178134943417e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24520 }, { "epoch": 0.11896606702692741, "grad_norm": 3.114764126621594e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24530 }, { "epoch": 0.1190145652197635, "grad_norm": 3.065784994760179e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24540 }, { "epoch": 0.11906306341259959, "grad_norm": 2.1291154439495585e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24550 }, { "epoch": 0.11911156160543568, "grad_norm": 4.1685808582769823e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24560 }, { "epoch": 0.11916005979827177, "grad_norm": 3.5020414657083165e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24570 }, { "epoch": 0.11920855799110786, "grad_norm": 3.0678057783006807e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24580 }, { "epoch": 0.11925705618394394, "grad_norm": 2.033270618539973e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24590 }, { "epoch": 0.11930555437678003, "grad_norm": 1.9340906476372766e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24600 }, { "epoch": 0.11935405256961612, "grad_norm": 2.9693663350371935e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24610 }, { "epoch": 0.11940255076245221, "grad_norm": 3.0583527177441283e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24620 }, { "epoch": 0.1194510489552883, "grad_norm": 1.065287392520986e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 24630 }, { "epoch": 0.1194995471481244, "grad_norm": 2.1922011228525662e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24640 }, { "epoch": 0.11954804534096049, "grad_norm": 8.089220386864326e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24650 }, { "epoch": 0.11959654353379658, "grad_norm": 2.8774155680366675e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24660 }, { "epoch": 0.11964504172663266, "grad_norm": 3.151637315568223e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24670 }, { "epoch": 0.11969353991946875, "grad_norm": 3.1564817959406355e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24680 }, { "epoch": 0.11974203811230484, "grad_norm": 1.9419039176682418e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24690 }, { "epoch": 0.11979053630514093, "grad_norm": 1.9245912596943526e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24700 }, { "epoch": 0.11983903449797702, "grad_norm": 3.133058044113568e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24710 }, { "epoch": 0.1198875326908131, "grad_norm": 3.8654351897093875e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24720 }, { "epoch": 0.1199360308836492, "grad_norm": 2.8164188847767946e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24730 }, { "epoch": 0.11998452907648528, "grad_norm": 1.8282162272953428e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24740 }, { "epoch": 0.12003302726932137, "grad_norm": 1.9012529151041235e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24750 }, { "epoch": 0.12008152546215746, "grad_norm": 3.1504430353379576e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24760 }, { "epoch": 0.12013002365499356, "grad_norm": 2.634265570122807e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24770 }, { "epoch": 0.12017852184782965, "grad_norm": 2.61761044839659e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24780 }, { "epoch": 0.12022702004066574, "grad_norm": 2.1069222100322804e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24790 }, { "epoch": 0.12027551823350183, "grad_norm": 3.615340062879113e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24800 }, { "epoch": 0.12032401642633792, "grad_norm": 1.0191923820457305e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 24810 }, { "epoch": 0.120372514619174, "grad_norm": 1.1060657243433525e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 24820 }, { "epoch": 0.12042101281201009, "grad_norm": 2.5685261562102824e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24830 }, { "epoch": 0.12046951100484618, "grad_norm": 1.8475765273251454e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24840 }, { "epoch": 0.12051800919768227, "grad_norm": 1.8027351700311556e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24850 }, { "epoch": 0.12056650739051836, "grad_norm": 2.481428396094998e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24860 }, { "epoch": 0.12061500558335445, "grad_norm": 2.320645506870278e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24870 }, { "epoch": 0.12066350377619053, "grad_norm": 2.2641195585038076e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24880 }, { "epoch": 0.12071200196902664, "grad_norm": 2.8319402645138325e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24890 }, { "epoch": 0.12076050016186272, "grad_norm": 2.0001202472030855e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24900 }, { "epoch": 0.12080899835469881, "grad_norm": 2.5245768142667657e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24910 }, { "epoch": 0.1208574965475349, "grad_norm": 3.0459975164376374e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24920 }, { "epoch": 0.12090599474037099, "grad_norm": 2.99599008712903e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24930 }, { "epoch": 0.12095449293320708, "grad_norm": 1.8175744287418638e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24940 }, { "epoch": 0.12100299112604317, "grad_norm": 1.7716419620228407e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24950 }, { "epoch": 0.12105148931887925, "grad_norm": 2.4206050852626504e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24960 }, { "epoch": 0.12109998751171534, "grad_norm": 2.285730715811951e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24970 }, { "epoch": 0.12114848570455143, "grad_norm": 2.2687214595862315e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24980 }, { "epoch": 0.12119698389738752, "grad_norm": 2.546875919051672e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 24990 }, { "epoch": 0.12124548209022361, "grad_norm": 1.9509602111611457e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25000 }, { "epoch": 0.1212939802830597, "grad_norm": 2.2779089192681568e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25010 }, { "epoch": 0.1213424784758958, "grad_norm": 2.3814362748453277e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25020 }, { "epoch": 0.12139097666873189, "grad_norm": 2.421121507723001e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25030 }, { "epoch": 0.12143947486156798, "grad_norm": 1.7756744341568265e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25040 }, { "epoch": 0.12148797305440406, "grad_norm": 1.7705161781123024e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25050 }, { "epoch": 0.12153647124724015, "grad_norm": 2.3887349698270555e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25060 }, { "epoch": 0.12158496944007624, "grad_norm": 2.341890734669505e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25070 }, { "epoch": 0.12163346763291233, "grad_norm": 2.3292164996746578e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25080 }, { "epoch": 0.12168196582574842, "grad_norm": 1.7781320593712735e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25090 }, { "epoch": 0.1217304640185845, "grad_norm": 3.1418716162079363e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25100 }, { "epoch": 0.1217789622114206, "grad_norm": 2.170245352317579e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25110 }, { "epoch": 0.12182746040425668, "grad_norm": 2.0251151511274657e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25120 }, { "epoch": 0.12187595859709277, "grad_norm": 2.2134810251372983e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25130 }, { "epoch": 0.12192445678992886, "grad_norm": 1.9208947321658343e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25140 }, { "epoch": 0.12197295498276496, "grad_norm": 1.786797270142415e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25150 }, { "epoch": 0.12202145317560105, "grad_norm": 2.0756614560468734e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25160 }, { "epoch": 0.12206995136843714, "grad_norm": 2.8750122282872326e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25170 }, { "epoch": 0.12211844956127323, "grad_norm": 2.0402748646120017e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25180 }, { "epoch": 0.12216694775410931, "grad_norm": 5.374618581299728e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25190 }, { "epoch": 0.1222154459469454, "grad_norm": 2.386623521033471e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25200 }, { "epoch": 0.12226394413978149, "grad_norm": 2.2191910886704136e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25210 }, { "epoch": 0.12231244233261758, "grad_norm": 2.3216861677610723e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25220 }, { "epoch": 0.12236094052545367, "grad_norm": 2.401797303264175e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25230 }, { "epoch": 0.12240943871828976, "grad_norm": 1.68573009773354e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25240 }, { "epoch": 0.12245793691112584, "grad_norm": 1.6734149710373458e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25250 }, { "epoch": 0.12250643510396193, "grad_norm": 2.1647657888479443e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25260 }, { "epoch": 0.12255493329679802, "grad_norm": 1.9527746530911827e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25270 }, { "epoch": 0.12260343148963412, "grad_norm": 2.013531315014916e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25280 }, { "epoch": 0.12265192968247021, "grad_norm": 1.5824772958694666e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25290 }, { "epoch": 0.1227004278753063, "grad_norm": 2.809432544381707e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25300 }, { "epoch": 0.12274892606814239, "grad_norm": 2.0972396441720775e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25310 }, { "epoch": 0.12279742426097848, "grad_norm": 2.097860800631679e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25320 }, { "epoch": 0.12284592245381457, "grad_norm": 2.0329076733105467e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25330 }, { "epoch": 0.12289442064665065, "grad_norm": 1.8216542230220512e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25340 }, { "epoch": 0.12294291883948674, "grad_norm": 1.687037070041697e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25350 }, { "epoch": 0.12299141703232283, "grad_norm": 5.672489464814134e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25360 }, { "epoch": 0.12303991522515892, "grad_norm": 2.070459004244185e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25370 }, { "epoch": 0.12308841341799501, "grad_norm": 2.007922716984467e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25380 }, { "epoch": 0.1231369116108311, "grad_norm": 3.95072873971003e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25390 }, { "epoch": 0.12318540980366718, "grad_norm": 2.1517323034458968e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25400 }, { "epoch": 0.12323390799650329, "grad_norm": 1.9556955521693453e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25410 }, { "epoch": 0.12328240618933937, "grad_norm": 1.9685911922806554e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25420 }, { "epoch": 0.12333090438217546, "grad_norm": 1.9915853499696823e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25430 }, { "epoch": 0.12337940257501155, "grad_norm": 1.5794852004091808e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25440 }, { "epoch": 0.12342790076784764, "grad_norm": 1.9820139129933523e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25450 }, { "epoch": 0.12347639896068373, "grad_norm": 2.2177364655817655e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25460 }, { "epoch": 0.12352489715351982, "grad_norm": 1.96319149381452e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25470 }, { "epoch": 0.1235733953463559, "grad_norm": 1.926264872054162e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25480 }, { "epoch": 0.123621893539192, "grad_norm": 3.692540531119448e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25490 }, { "epoch": 0.12367039173202808, "grad_norm": 1.6353857290596352e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25500 }, { "epoch": 0.12371888992486417, "grad_norm": 2.0657520849454158e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25510 }, { "epoch": 0.12376738811770026, "grad_norm": 2.017509075358248e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25520 }, { "epoch": 0.12381588631053636, "grad_norm": 2.3569280926949432e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25530 }, { "epoch": 0.12386438450337245, "grad_norm": 1.5491181670768128e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25540 }, { "epoch": 0.12391288269620854, "grad_norm": 7.603844665027282e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25550 }, { "epoch": 0.12396138088904463, "grad_norm": 1.911709972546305e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25560 }, { "epoch": 0.12400987908188071, "grad_norm": 2.231174676126102e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25570 }, { "epoch": 0.1240583772747168, "grad_norm": 1.9779450610712956e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25580 }, { "epoch": 0.12410687546755289, "grad_norm": 1.907190352312682e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25590 }, { "epoch": 0.12415537366038898, "grad_norm": 1.3911642327002482e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25600 }, { "epoch": 0.12420387185322507, "grad_norm": 1.9427788799930568e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25610 }, { "epoch": 0.12425237004606116, "grad_norm": 1.8933846490654105e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25620 }, { "epoch": 0.12430086823889724, "grad_norm": 1.845679520329213e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25630 }, { "epoch": 0.12434936643173333, "grad_norm": 1.5336600256432575e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25640 }, { "epoch": 0.12439786462456942, "grad_norm": 1.6990694007290585e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25650 }, { "epoch": 0.12444636281740552, "grad_norm": 1.9300813391964766e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25660 }, { "epoch": 0.12449486101024161, "grad_norm": 1.0055492793981102e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 25670 }, { "epoch": 0.1245433592030777, "grad_norm": 2.5902343736561306e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25680 }, { "epoch": 0.12459185739591379, "grad_norm": 1.6756423804054066e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25690 }, { "epoch": 0.12464035558874988, "grad_norm": 1.5999212621409242e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25700 }, { "epoch": 0.12468885378158596, "grad_norm": 3.005873452366359e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25710 }, { "epoch": 0.12473735197442205, "grad_norm": 2.007473085541278e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25720 }, { "epoch": 0.12478585016725814, "grad_norm": 2.13228489087669e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25730 }, { "epoch": 0.12483434836009423, "grad_norm": 1.7662674167695513e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25740 }, { "epoch": 0.12488284655293032, "grad_norm": 1.9911450976906053e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25750 }, { "epoch": 0.1249313447457664, "grad_norm": 1.8989992156548396e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25760 }, { "epoch": 0.1249798429386025, "grad_norm": 1.7302851063050184e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25770 }, { "epoch": 0.1250283411314386, "grad_norm": 2.0628144170586893e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25780 }, { "epoch": 0.12507683932427469, "grad_norm": 1.5126499874895671e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25790 }, { "epoch": 0.12512533751711077, "grad_norm": 1.2950567906955257e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25800 }, { "epoch": 0.12517383570994686, "grad_norm": 9.122820301854517e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 25810 }, { "epoch": 0.12522233390278295, "grad_norm": 1.761493138019432e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25820 }, { "epoch": 0.12527083209561904, "grad_norm": 1.81910849050837e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25830 }, { "epoch": 0.12531933028845513, "grad_norm": 1.5223332638925058e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25840 }, { "epoch": 0.12536782848129122, "grad_norm": 1.5611550452376832e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25850 }, { "epoch": 0.1254163266741273, "grad_norm": 1.9769332482155733e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25860 }, { "epoch": 0.1254648248669634, "grad_norm": 1.7597521662082727e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25870 }, { "epoch": 0.12551332305979948, "grad_norm": 1.7583160172307544e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25880 }, { "epoch": 0.12556182125263557, "grad_norm": 1.4077281207391934e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25890 }, { "epoch": 0.12561031944547166, "grad_norm": 1.3925669861691858e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25900 }, { "epoch": 0.12565881763830775, "grad_norm": 1.7488144976596232e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25910 }, { "epoch": 0.12570731583114383, "grad_norm": 1.8300134740911744e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25920 }, { "epoch": 0.12575581402397992, "grad_norm": 1.7064031965219328e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25930 }, { "epoch": 0.125804312216816, "grad_norm": 1.4418263560855848e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25940 }, { "epoch": 0.1258528104096521, "grad_norm": 1.4665833703020326e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25950 }, { "epoch": 0.1259013086024882, "grad_norm": 1.7484298098224826e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25960 }, { "epoch": 0.12594980679532428, "grad_norm": 2.298939216416329e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 25970 }, { "epoch": 0.1259983049881604, "grad_norm": 1.8458834460943763e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25980 }, { "epoch": 0.12604680318099648, "grad_norm": 1.4863496744510485e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 25990 }, { "epoch": 0.12609530137383257, "grad_norm": 1.4611624976623716e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26000 }, { "epoch": 0.12614379956666866, "grad_norm": 1.725219078707596e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26010 }, { "epoch": 0.12619229775950475, "grad_norm": 1.777470686192828e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26020 }, { "epoch": 0.12624079595234083, "grad_norm": 1.7884809722090722e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26030 }, { "epoch": 0.12628929414517692, "grad_norm": 1.3506139850960608e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26040 }, { "epoch": 0.126337792338013, "grad_norm": 3.7925357787571556e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26050 }, { "epoch": 0.1263862905308491, "grad_norm": 1.8330233331198542e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26060 }, { "epoch": 0.1264347887236852, "grad_norm": 1.6670134073137888e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26070 }, { "epoch": 0.12648328691652128, "grad_norm": 1.720784297276623e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26080 }, { "epoch": 0.12653178510935736, "grad_norm": 1.4091372690927528e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26090 }, { "epoch": 0.12658028330219345, "grad_norm": 1.6569667593557824e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26100 }, { "epoch": 0.12662878149502954, "grad_norm": 2.041656728124508e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26110 }, { "epoch": 0.12667727968786563, "grad_norm": 1.6284724324577837e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26120 }, { "epoch": 0.12672577788070172, "grad_norm": 1.9916251403628848e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26130 }, { "epoch": 0.1267742760735378, "grad_norm": 1.412085737229063e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26140 }, { "epoch": 0.1268227742663739, "grad_norm": 1.4038899109891645e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26150 }, { "epoch": 0.12687127245920998, "grad_norm": 1.640296147797926e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26160 }, { "epoch": 0.12691977065204607, "grad_norm": 1.633831914205075e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26170 }, { "epoch": 0.12696826884488216, "grad_norm": 1.778525131612696e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26180 }, { "epoch": 0.12701676703771825, "grad_norm": 2.5921264068529126e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26190 }, { "epoch": 0.12706526523055434, "grad_norm": 2.0335797046300286e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26200 }, { "epoch": 0.12711376342339042, "grad_norm": 1.485429379499692e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26210 }, { "epoch": 0.1271622616162265, "grad_norm": 1.6628848698019283e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26220 }, { "epoch": 0.1272107598090626, "grad_norm": 1.837265273252342e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26230 }, { "epoch": 0.12725925800189872, "grad_norm": 1.3875801130325272e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26240 }, { "epoch": 0.1273077561947348, "grad_norm": 1.3493469452896534e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26250 }, { "epoch": 0.1273562543875709, "grad_norm": 1.5460432223335374e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26260 }, { "epoch": 0.12740475258040698, "grad_norm": 1.712200941028641e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26270 }, { "epoch": 0.12745325077324307, "grad_norm": 2.5507415557513013e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26280 }, { "epoch": 0.12750174896607916, "grad_norm": 1.304285319747578e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26290 }, { "epoch": 0.12755024715891525, "grad_norm": 1.2512526836871984e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26300 }, { "epoch": 0.12759874535175134, "grad_norm": 1.5497600713842985e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26310 }, { "epoch": 0.12764724354458742, "grad_norm": 1.5089008797986025e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26320 }, { "epoch": 0.1276957417374235, "grad_norm": 1.6145078518547962e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26330 }, { "epoch": 0.1277442399302596, "grad_norm": 1.6660285950820253e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26340 }, { "epoch": 0.1277927381230957, "grad_norm": 1.3134709320183902e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26350 }, { "epoch": 0.12784123631593178, "grad_norm": 1.4419187266412337e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26360 }, { "epoch": 0.12788973450876787, "grad_norm": 2.2488718798285845e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26370 }, { "epoch": 0.12793823270160395, "grad_norm": 1.6609926944965991e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26380 }, { "epoch": 0.12798673089444004, "grad_norm": 1.349360729818727e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26390 }, { "epoch": 0.12803522908727613, "grad_norm": 1.4887343979808065e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26400 }, { "epoch": 0.12808372728011222, "grad_norm": 1.6083993159554666e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26410 }, { "epoch": 0.1281322254729483, "grad_norm": 1.6249560985670541e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26420 }, { "epoch": 0.1281807236657844, "grad_norm": 1.6066228170075192e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26430 }, { "epoch": 0.12822922185862048, "grad_norm": 1.2759200274103932e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26440 }, { "epoch": 0.12827772005145657, "grad_norm": 1.3052714109562658e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26450 }, { "epoch": 0.12832621824429266, "grad_norm": 1.6049635576109722e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26460 }, { "epoch": 0.12837471643712875, "grad_norm": 1.4892439992308937e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26470 }, { "epoch": 0.12842321462996484, "grad_norm": 1.4791631031130237e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26480 }, { "epoch": 0.12847171282280095, "grad_norm": 4.5462297748599667e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26490 }, { "epoch": 0.12852021101563704, "grad_norm": 1.597027790012362e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26500 }, { "epoch": 0.12856870920847313, "grad_norm": 1.6548513315228774e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26510 }, { "epoch": 0.12861720740130922, "grad_norm": 1.567247949196826e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26520 }, { "epoch": 0.1286657055941453, "grad_norm": 1.5217234761166765e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26530 }, { "epoch": 0.1287142037869814, "grad_norm": 1.2783210934230738e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26540 }, { "epoch": 0.12876270197981748, "grad_norm": 1.291462723429504e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26550 }, { "epoch": 0.12881120017265357, "grad_norm": 1.5217075599593954e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26560 }, { "epoch": 0.12885969836548966, "grad_norm": 1.427731461944859e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26570 }, { "epoch": 0.12890819655832575, "grad_norm": 1.5481347759305208e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26580 }, { "epoch": 0.12895669475116184, "grad_norm": 1.2080882072496024e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26590 }, { "epoch": 0.12900519294399793, "grad_norm": 1.3371084151003743e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26600 }, { "epoch": 0.12905369113683401, "grad_norm": 1.492719832185685e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26610 }, { "epoch": 0.1291021893296701, "grad_norm": 1.481541573866707e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26620 }, { "epoch": 0.1291506875225062, "grad_norm": 1.5681186482652265e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26630 }, { "epoch": 0.12919918571534228, "grad_norm": 1.263949798158137e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26640 }, { "epoch": 0.12924768390817837, "grad_norm": 2.696245076094783e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26650 }, { "epoch": 0.12929618210101446, "grad_norm": 1.4543329029947927e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26660 }, { "epoch": 0.12934468029385054, "grad_norm": 1.5199410086097487e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26670 }, { "epoch": 0.12939317848668663, "grad_norm": 1.4686230542793055e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26680 }, { "epoch": 0.12944167667952272, "grad_norm": 1.2701535467840586e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26690 }, { "epoch": 0.1294901748723588, "grad_norm": 1.300503100765127e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26700 }, { "epoch": 0.1295386730651949, "grad_norm": 1.5161879218794638e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26710 }, { "epoch": 0.129587171258031, "grad_norm": 1.5017371879366692e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26720 }, { "epoch": 0.12963566945086707, "grad_norm": 1.4933081615708943e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26730 }, { "epoch": 0.12968416764370316, "grad_norm": 1.381899181751578e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26740 }, { "epoch": 0.12973266583653928, "grad_norm": 1.3879106575132028e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26750 }, { "epoch": 0.12978116402937537, "grad_norm": 1.5374592976513668e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26760 }, { "epoch": 0.12982966222221146, "grad_norm": 1.572796008986188e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26770 }, { "epoch": 0.12987816041504754, "grad_norm": 1.4618588295434165e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26780 }, { "epoch": 0.12992665860788363, "grad_norm": 2.172768205355169e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26790 }, { "epoch": 0.12997515680071972, "grad_norm": 2.2551630252110044e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26800 }, { "epoch": 0.1300236549935558, "grad_norm": 1.477684463679907e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26810 }, { "epoch": 0.1300721531863919, "grad_norm": 1.292452651568965e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26820 }, { "epoch": 0.130120651379228, "grad_norm": 4.1397919403607375e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26830 }, { "epoch": 0.13016914957206407, "grad_norm": 8.303943559440086e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26840 }, { "epoch": 0.13021764776490016, "grad_norm": 1.1666142540889268e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26850 }, { "epoch": 0.13026614595773625, "grad_norm": 1.283734860635377e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26860 }, { "epoch": 0.13031464415057234, "grad_norm": 1.5143368159442616e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26870 }, { "epoch": 0.13036314234340843, "grad_norm": 1.303987176015653e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26880 }, { "epoch": 0.13041164053624452, "grad_norm": 1.402010383344532e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26890 }, { "epoch": 0.1304601387290806, "grad_norm": 1.7651245798333548e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26900 }, { "epoch": 0.1305086369219167, "grad_norm": 1.3134952325799532e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26910 }, { "epoch": 0.13055713511475278, "grad_norm": 4.2161389046668774e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26920 }, { "epoch": 0.13060563330758887, "grad_norm": 1.320122891002029e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26930 }, { "epoch": 0.13065413150042496, "grad_norm": 1.1188518556082272e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26940 }, { "epoch": 0.13070262969326105, "grad_norm": 1.5111456264094159e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26950 }, { "epoch": 0.13075112788609713, "grad_norm": 1.2785940839421528e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26960 }, { "epoch": 0.13079962607893322, "grad_norm": 1.2942929572545836e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26970 }, { "epoch": 0.1308481242717693, "grad_norm": 1.2189995857170288e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26980 }, { "epoch": 0.1308966224646054, "grad_norm": 1.1581656877979185e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 26990 }, { "epoch": 0.1309451206574415, "grad_norm": 1.0497979729962026e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27000 }, { "epoch": 0.1309936188502776, "grad_norm": 1.1570001845484512e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27010 }, { "epoch": 0.1310421170431137, "grad_norm": 1.2156745299307659e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27020 }, { "epoch": 0.13109061523594978, "grad_norm": 1.2198749743674853e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27030 }, { "epoch": 0.13113911342878587, "grad_norm": 1.1195024285370891e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27040 }, { "epoch": 0.13118761162162196, "grad_norm": 1.1888932505144112e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27050 }, { "epoch": 0.13123610981445805, "grad_norm": 1.2450951203391014e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27060 }, { "epoch": 0.13128460800729413, "grad_norm": 1.3441629675980948e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27070 }, { "epoch": 0.13133310620013022, "grad_norm": 1.2958399508988805e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27080 }, { "epoch": 0.1313816043929663, "grad_norm": 1.1276116396174984e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27090 }, { "epoch": 0.1314301025858024, "grad_norm": 1.0599087829632481e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27100 }, { "epoch": 0.1314786007786385, "grad_norm": 1.2696040130322217e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27110 }, { "epoch": 0.13152709897147458, "grad_norm": 1.2813188732252456e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27120 }, { "epoch": 0.13157559716431066, "grad_norm": 1.2394168891205481e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27130 }, { "epoch": 0.13162409535714675, "grad_norm": 1.0169213737754035e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27140 }, { "epoch": 0.13167259354998284, "grad_norm": 1.2694307827132434e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27150 }, { "epoch": 0.13172109174281893, "grad_norm": 1.2909090685297997e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27160 }, { "epoch": 0.13176958993565502, "grad_norm": 1.2782854241777386e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27170 }, { "epoch": 0.1318180881284911, "grad_norm": 1.2487532785598887e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27180 }, { "epoch": 0.1318665863213272, "grad_norm": 1.1319085757577341e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27190 }, { "epoch": 0.13191508451416328, "grad_norm": 1.2139979332914663e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27200 }, { "epoch": 0.13196358270699937, "grad_norm": 1.1906582386700393e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27210 }, { "epoch": 0.13201208089983546, "grad_norm": 1.1890035978012747e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27220 }, { "epoch": 0.13206057909267155, "grad_norm": 1.2545700656119152e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27230 }, { "epoch": 0.13210907728550764, "grad_norm": 1.1615836825740189e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27240 }, { "epoch": 0.13215757547834373, "grad_norm": 1.0532395577911302e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27250 }, { "epoch": 0.13220607367117984, "grad_norm": 1.2489279299643385e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27260 }, { "epoch": 0.13225457186401593, "grad_norm": 1.2697582008058816e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27270 }, { "epoch": 0.13230307005685202, "grad_norm": 1.4542878545853455e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27280 }, { "epoch": 0.1323515682496881, "grad_norm": 1.1637126817731769e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27290 }, { "epoch": 0.1324000664425242, "grad_norm": 1.1418650558425725e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27300 }, { "epoch": 0.13244856463536028, "grad_norm": 1.2991191056244134e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27310 }, { "epoch": 0.13249706282819637, "grad_norm": 1.2676416361045995e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27320 }, { "epoch": 0.13254556102103246, "grad_norm": 1.295927347655379e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27330 }, { "epoch": 0.13259405921386855, "grad_norm": 1.114378704869523e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27340 }, { "epoch": 0.13264255740670464, "grad_norm": 1.2297007856432174e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27350 }, { "epoch": 0.13269105559954072, "grad_norm": 1.2609113753114798e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27360 }, { "epoch": 0.1327395537923768, "grad_norm": 1.3175981905533263e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27370 }, { "epoch": 0.1327880519852129, "grad_norm": 1.2314939112911816e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27380 }, { "epoch": 0.132836550178049, "grad_norm": 1.1348573281111385e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27390 }, { "epoch": 0.13288504837088508, "grad_norm": 1.121900581324553e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27400 }, { "epoch": 0.13293354656372117, "grad_norm": 1.3541884413825755e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27410 }, { "epoch": 0.13298204475655725, "grad_norm": 1.2277287453343888e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27420 }, { "epoch": 0.13303054294939334, "grad_norm": 1.2426845330537617e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27430 }, { "epoch": 0.13307904114222943, "grad_norm": 1.1365165875076855e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27440 }, { "epoch": 0.13312753933506552, "grad_norm": 1.2080005262760096e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27450 }, { "epoch": 0.1331760375279016, "grad_norm": 1.2165426710453175e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27460 }, { "epoch": 0.1332245357207377, "grad_norm": 1.3940567100689805e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27470 }, { "epoch": 0.13327303391357379, "grad_norm": 1.2137952865032275e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27480 }, { "epoch": 0.13332153210640987, "grad_norm": 1.4423447680655954e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27490 }, { "epoch": 0.13337003029924596, "grad_norm": 1.208962885357323e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27500 }, { "epoch": 0.13341852849208205, "grad_norm": 1.2776995106378308e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27510 }, { "epoch": 0.13346702668491817, "grad_norm": 1.255779551456726e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27520 }, { "epoch": 0.13351552487775425, "grad_norm": 1.4541090820330282e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27530 }, { "epoch": 0.13356402307059034, "grad_norm": 1.1011668021865262e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27540 }, { "epoch": 0.13361252126342643, "grad_norm": 1.3246534535937826e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27550 }, { "epoch": 0.13366101945626252, "grad_norm": 1.205643229695852e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27560 }, { "epoch": 0.1337095176490986, "grad_norm": 1.3449808022869547e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27570 }, { "epoch": 0.1337580158419347, "grad_norm": 1.2656406056521519e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27580 }, { "epoch": 0.13380651403477078, "grad_norm": 1.1859074078302001e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27590 }, { "epoch": 0.13385501222760687, "grad_norm": 1.220167291648977e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27600 }, { "epoch": 0.13390351042044296, "grad_norm": 1.321342750770782e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27610 }, { "epoch": 0.13395200861327905, "grad_norm": 1.2233171275966015e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27620 }, { "epoch": 0.13400050680611514, "grad_norm": 1.216322544905779e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27630 }, { "epoch": 0.13404900499895123, "grad_norm": 1.166466248037068e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27640 }, { "epoch": 0.13409750319178732, "grad_norm": 1.0652927073806495e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27650 }, { "epoch": 0.1341460013846234, "grad_norm": 1.2803624827029125e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27660 }, { "epoch": 0.1341944995774595, "grad_norm": 1.2582556507823028e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27670 }, { "epoch": 0.13424299777029558, "grad_norm": 1.2309600094795314e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27680 }, { "epoch": 0.13429149596313167, "grad_norm": 1.044802360183894e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27690 }, { "epoch": 0.13433999415596776, "grad_norm": 1.0815421802590208e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27700 }, { "epoch": 0.13438849234880385, "grad_norm": 1.849942066201038e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27710 }, { "epoch": 0.13443699054163993, "grad_norm": 1.2163810936272057e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27720 }, { "epoch": 0.13448548873447602, "grad_norm": 1.3208088489591319e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27730 }, { "epoch": 0.1345339869273121, "grad_norm": 1.186060103464115e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27740 }, { "epoch": 0.1345824851201482, "grad_norm": 1.1244385689224146e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27750 }, { "epoch": 0.1346309833129843, "grad_norm": 1.29742872445604e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27760 }, { "epoch": 0.1346794815058204, "grad_norm": 1.221467584855418e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27770 }, { "epoch": 0.1347279796986565, "grad_norm": 3.137389796847856e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27780 }, { "epoch": 0.13477647789149258, "grad_norm": 1.1322740078867355e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27790 }, { "epoch": 0.13482497608432867, "grad_norm": 1.1984235470663407e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27800 }, { "epoch": 0.13487347427716476, "grad_norm": 1.3206093285589304e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27810 }, { "epoch": 0.13492197247000084, "grad_norm": 1.265989340026863e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27820 }, { "epoch": 0.13497047066283693, "grad_norm": 1.3287514377680054e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27830 }, { "epoch": 0.13501896885567302, "grad_norm": 1.1841507330245804e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27840 }, { "epoch": 0.1350674670485091, "grad_norm": 1.2921412917421549e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27850 }, { "epoch": 0.1351159652413452, "grad_norm": 1.2518236758296553e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27860 }, { "epoch": 0.1351644634341813, "grad_norm": 1.2383880232391675e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27870 }, { "epoch": 0.13521296162701738, "grad_norm": 1.3128206433066225e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27880 }, { "epoch": 0.13526145981985346, "grad_norm": 1.1377242259413833e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27890 }, { "epoch": 0.13530995801268955, "grad_norm": 1.098835724633318e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27900 }, { "epoch": 0.13535845620552564, "grad_norm": 1.207015571935699e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27910 }, { "epoch": 0.13540695439836173, "grad_norm": 1.3152187250398129e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27920 }, { "epoch": 0.13545545259119782, "grad_norm": 1.2026474394133402e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27930 }, { "epoch": 0.1355039507840339, "grad_norm": 1.4157986072405038e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27940 }, { "epoch": 0.13555244897687, "grad_norm": 1.063812860024882e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27950 }, { "epoch": 0.13560094716970608, "grad_norm": 1.1784127451619497e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27960 }, { "epoch": 0.13564944536254217, "grad_norm": 1.2395013015975564e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27970 }, { "epoch": 0.13569794355537826, "grad_norm": 1.2586112063672772e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27980 }, { "epoch": 0.13574644174821435, "grad_norm": 1.153538917719743e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 27990 }, { "epoch": 0.13579493994105044, "grad_norm": 1.2956341777226044e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28000 }, { "epoch": 0.13584343813388652, "grad_norm": 1.2828267870190757e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28010 }, { "epoch": 0.1358919363267226, "grad_norm": 1.292255404905518e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28020 }, { "epoch": 0.13594043451955873, "grad_norm": 1.542768330864419e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28030 }, { "epoch": 0.13598893271239482, "grad_norm": 1.4695346806092857e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28040 }, { "epoch": 0.1360374309052309, "grad_norm": 1.1406270772340577e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28050 }, { "epoch": 0.136085929098067, "grad_norm": 1.3100053308789938e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28060 }, { "epoch": 0.13613442729090308, "grad_norm": 1.3367017004384252e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28070 }, { "epoch": 0.13618292548373917, "grad_norm": 1.2750155065077706e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28080 }, { "epoch": 0.13623142367657526, "grad_norm": 1.0760572877188679e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28090 }, { "epoch": 0.13627992186941135, "grad_norm": 1.1356353013525222e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28100 }, { "epoch": 0.13632842006224744, "grad_norm": 1.182911333330594e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28110 }, { "epoch": 0.13637691825508352, "grad_norm": 1.4160879402425053e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28120 }, { "epoch": 0.1364254164479196, "grad_norm": 1.3073467641788739e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28130 }, { "epoch": 0.1364739146407557, "grad_norm": 1.7615835190554208e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28140 }, { "epoch": 0.1365224128335918, "grad_norm": 1.1849245140638232e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28150 }, { "epoch": 0.13657091102642788, "grad_norm": 1.1723675186203764e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28160 }, { "epoch": 0.13661940921926397, "grad_norm": 1.298680274430808e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28170 }, { "epoch": 0.13666790741210005, "grad_norm": 1.1841994762562535e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28180 }, { "epoch": 0.13671640560493614, "grad_norm": 1.1375851016737215e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28190 }, { "epoch": 0.13676490379777223, "grad_norm": 1.174931298919546e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28200 }, { "epoch": 0.13681340199060832, "grad_norm": 1.1332187455082021e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28210 }, { "epoch": 0.1368619001834444, "grad_norm": 1.2843156582675874e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28220 }, { "epoch": 0.1369103983762805, "grad_norm": 1.0928869187409873e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 28230 }, { "epoch": 0.13695889656911658, "grad_norm": 1.0731056931945204e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28240 }, { "epoch": 0.13700739476195267, "grad_norm": 1.0558454732745304e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28250 }, { "epoch": 0.13705589295478876, "grad_norm": 1.803318951942856e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28260 }, { "epoch": 0.13710439114762485, "grad_norm": 1.2180086628177378e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28270 }, { "epoch": 0.13715288934046094, "grad_norm": 1.3485116312494938e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28280 }, { "epoch": 0.13720138753329705, "grad_norm": 1.5145590168685885e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 28290 }, { "epoch": 0.13724988572613314, "grad_norm": 1.0985040432842652e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28300 }, { "epoch": 0.13729838391896923, "grad_norm": 1.4649940283106844e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28310 }, { "epoch": 0.13734688211180532, "grad_norm": 1.2967956308784778e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28320 }, { "epoch": 0.1373953803046414, "grad_norm": 1.3327350245617708e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28330 }, { "epoch": 0.1374438784974775, "grad_norm": 1.1704049285299334e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28340 }, { "epoch": 0.13749237669031358, "grad_norm": 1.2342401589648944e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28350 }, { "epoch": 0.13754087488314967, "grad_norm": 1.471814101705604e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28360 }, { "epoch": 0.13758937307598576, "grad_norm": 1.2306158225783292e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28370 }, { "epoch": 0.13763787126882185, "grad_norm": 1.2290331596886972e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28380 }, { "epoch": 0.13768636946165794, "grad_norm": 1.11231557298197e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28390 }, { "epoch": 0.13773486765449403, "grad_norm": 1.1831686208552128e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28400 }, { "epoch": 0.1377833658473301, "grad_norm": 1.458539884424681e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28410 }, { "epoch": 0.1378318640401662, "grad_norm": 1.280343298049047e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28420 }, { "epoch": 0.1378803622330023, "grad_norm": 1.261694961840476e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28430 }, { "epoch": 0.13792886042583838, "grad_norm": 1.1798508126048546e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28440 }, { "epoch": 0.13797735861867447, "grad_norm": 1.1584033643430303e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28450 }, { "epoch": 0.13802585681151056, "grad_norm": 1.1537424882135383e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28460 }, { "epoch": 0.13807435500434664, "grad_norm": 1.237838773704425e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28470 }, { "epoch": 0.13812285319718273, "grad_norm": 1.223279184614512e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28480 }, { "epoch": 0.13817135139001882, "grad_norm": 1.1193323246061482e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28490 }, { "epoch": 0.1382198495828549, "grad_norm": 1.1321147752596517e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28500 }, { "epoch": 0.138268347775691, "grad_norm": 1.2570495755426236e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28510 }, { "epoch": 0.13831684596852709, "grad_norm": 1.3602726767203421e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28520 }, { "epoch": 0.13836534416136317, "grad_norm": 3.391050995560363e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28530 }, { "epoch": 0.1384138423541993, "grad_norm": 1.2223742373862478e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28540 }, { "epoch": 0.13846234054703538, "grad_norm": 1.1262088861485609e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28550 }, { "epoch": 0.13851083873987147, "grad_norm": 1.1396137011843166e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28560 }, { "epoch": 0.13855933693270756, "grad_norm": 1.2179718567040254e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28570 }, { "epoch": 0.13860783512554364, "grad_norm": 4.879200901086733e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28580 }, { "epoch": 0.13865633331837973, "grad_norm": 1.1334479665947583e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28590 }, { "epoch": 0.13870483151121582, "grad_norm": 1.0774380143629969e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28600 }, { "epoch": 0.1387533297040519, "grad_norm": 1.2474369270876196e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28610 }, { "epoch": 0.138801827896888, "grad_norm": 1.226536170406689e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28620 }, { "epoch": 0.13885032608972409, "grad_norm": 1.2379457814404304e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28630 }, { "epoch": 0.13889882428256017, "grad_norm": 1.0558945007232978e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28640 }, { "epoch": 0.13894732247539626, "grad_norm": 1.159421643137648e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28650 }, { "epoch": 0.13899582066823235, "grad_norm": 1.2960137496520474e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28660 }, { "epoch": 0.13904431886106844, "grad_norm": 1.1742058347863349e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28670 }, { "epoch": 0.13909281705390453, "grad_norm": 1.1972717572916736e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28680 }, { "epoch": 0.13914131524674062, "grad_norm": 1.2018952588732645e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28690 }, { "epoch": 0.1391898134395767, "grad_norm": 1.1656134546456087e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28700 }, { "epoch": 0.1392383116324128, "grad_norm": 1.1737418503798835e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28710 }, { "epoch": 0.13928680982524888, "grad_norm": 1.3123741382514709e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28720 }, { "epoch": 0.13933530801808497, "grad_norm": 1.1801677146650036e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28730 }, { "epoch": 0.13938380621092106, "grad_norm": 1.2865604048784007e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28740 }, { "epoch": 0.13943230440375715, "grad_norm": 1.0805717209905197e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28750 }, { "epoch": 0.13948080259659323, "grad_norm": 1.198496875076671e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28760 }, { "epoch": 0.13952930078942932, "grad_norm": 1.4011193627538887e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28770 }, { "epoch": 0.1395777989822654, "grad_norm": 1.2003361860024597e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28780 }, { "epoch": 0.1396262971751015, "grad_norm": 1.1278851275164925e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28790 }, { "epoch": 0.13967479536793762, "grad_norm": 1.1094992657945113e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28800 }, { "epoch": 0.1397232935607737, "grad_norm": 3.4204398957626836e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28810 }, { "epoch": 0.1397717917536098, "grad_norm": 1.494432524395961e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28820 }, { "epoch": 0.13982028994644588, "grad_norm": 1.1807075139813605e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28830 }, { "epoch": 0.13986878813928197, "grad_norm": 1.237797562225751e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28840 }, { "epoch": 0.13991728633211806, "grad_norm": 3.1134995879256167e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 28850 }, { "epoch": 0.13996578452495415, "grad_norm": 1.1907182084769374e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28860 }, { "epoch": 0.14001428271779023, "grad_norm": 1.166506962135827e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28870 }, { "epoch": 0.14006278091062632, "grad_norm": 1.2695376483407017e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28880 }, { "epoch": 0.1401112791034624, "grad_norm": 1.0314455778370757e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28890 }, { "epoch": 0.1401597772962985, "grad_norm": 1.0586225585029752e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28900 }, { "epoch": 0.1402082754891346, "grad_norm": 1.4124837832696358e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28910 }, { "epoch": 0.14025677368197068, "grad_norm": 1.213588234350027e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28920 }, { "epoch": 0.14030527187480676, "grad_norm": 1.1671867383711287e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28930 }, { "epoch": 0.14035377006764285, "grad_norm": 1.272974259336479e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28940 }, { "epoch": 0.14040226826047894, "grad_norm": 1.0960908980450768e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28950 }, { "epoch": 0.14045076645331503, "grad_norm": 1.1103573172022152e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28960 }, { "epoch": 0.14049926464615112, "grad_norm": 1.193854473058309e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28970 }, { "epoch": 0.1405477628389872, "grad_norm": 1.316665105832726e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28980 }, { "epoch": 0.1405962610318233, "grad_norm": 1.4450354512973718e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 28990 }, { "epoch": 0.14064475922465938, "grad_norm": 1.0449616638652515e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29000 }, { "epoch": 0.14069325741749547, "grad_norm": 1.203933663873613e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29010 }, { "epoch": 0.14074175561033156, "grad_norm": 1.197271330966032e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29020 }, { "epoch": 0.14079025380316765, "grad_norm": 3.424773069582443e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29030 }, { "epoch": 0.14083875199600374, "grad_norm": 1.5246230589127663e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29040 }, { "epoch": 0.14088725018883985, "grad_norm": 1.0412273354631907e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29050 }, { "epoch": 0.14093574838167594, "grad_norm": 1.1904887742275605e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29060 }, { "epoch": 0.14098424657451203, "grad_norm": 1.285673505435625e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29070 }, { "epoch": 0.14103274476734812, "grad_norm": 1.134488059051364e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29080 }, { "epoch": 0.1410812429601842, "grad_norm": 9.956302449154464e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29090 }, { "epoch": 0.1411297411530203, "grad_norm": 1.0930163085731692e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29100 }, { "epoch": 0.14117823934585638, "grad_norm": 1.1628218032910809e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29110 }, { "epoch": 0.14122673753869247, "grad_norm": 1.1327645665915043e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29120 }, { "epoch": 0.14127523573152856, "grad_norm": 1.1478747552473578e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29130 }, { "epoch": 0.14132373392436465, "grad_norm": 1.050926243806316e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29140 }, { "epoch": 0.14137223211720074, "grad_norm": 1.1711222924759568e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29150 }, { "epoch": 0.14142073031003682, "grad_norm": 5.016774480282038e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29160 }, { "epoch": 0.1414692285028729, "grad_norm": 1.455463376487387e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29170 }, { "epoch": 0.141517726695709, "grad_norm": 1.1649719766637645e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29180 }, { "epoch": 0.1415662248885451, "grad_norm": 1.1736344873725102e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29190 }, { "epoch": 0.14161472308138118, "grad_norm": 1.983890740575589e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29200 }, { "epoch": 0.14166322127421727, "grad_norm": 1.1050778425669705e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29210 }, { "epoch": 0.14171171946705335, "grad_norm": 1.4759372390926728e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29220 }, { "epoch": 0.14176021765988944, "grad_norm": 1.0945065298528789e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29230 }, { "epoch": 0.14180871585272553, "grad_norm": 9.890080576724358e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29240 }, { "epoch": 0.14185721404556162, "grad_norm": 1.0362015956388859e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29250 }, { "epoch": 0.1419057122383977, "grad_norm": 1.2068690580235852e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29260 }, { "epoch": 0.1419542104312338, "grad_norm": 1.2815712580049876e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29270 }, { "epoch": 0.14200270862406988, "grad_norm": 1.2996345333249337e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29280 }, { "epoch": 0.14205120681690597, "grad_norm": 1.4979423212935217e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29290 }, { "epoch": 0.14209970500974206, "grad_norm": 1.0759521273939754e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29300 }, { "epoch": 0.14214820320257818, "grad_norm": 4.992352842236869e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29310 }, { "epoch": 0.14219670139541427, "grad_norm": 1.1352604190051352e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29320 }, { "epoch": 0.14224519958825035, "grad_norm": 1.1873576255538865e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29330 }, { "epoch": 0.14229369778108644, "grad_norm": 9.93880888700005e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29340 }, { "epoch": 0.14234219597392253, "grad_norm": 1.0006591111277885e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29350 }, { "epoch": 0.14239069416675862, "grad_norm": 1.2034458052312402e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29360 }, { "epoch": 0.1424391923595947, "grad_norm": 1.0662870408850722e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29370 }, { "epoch": 0.1424876905524308, "grad_norm": 1.049682438747368e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29380 }, { "epoch": 0.14253618874526688, "grad_norm": 1.155413187348131e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29390 }, { "epoch": 0.14258468693810297, "grad_norm": 1.0755637447346089e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29400 }, { "epoch": 0.14263318513093906, "grad_norm": 1.190696323760676e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29410 }, { "epoch": 0.14268168332377515, "grad_norm": 1.0374964176662616e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29420 }, { "epoch": 0.14273018151661124, "grad_norm": 1.1231208674189475e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29430 }, { "epoch": 0.14277867970944733, "grad_norm": 1.037498691403016e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29440 }, { "epoch": 0.14282717790228341, "grad_norm": 9.52221483885296e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29450 }, { "epoch": 0.1428756760951195, "grad_norm": 1.3304507717748493e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29460 }, { "epoch": 0.1429241742879556, "grad_norm": 1.0791689675215821e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29470 }, { "epoch": 0.14297267248079168, "grad_norm": 1.17136139010654e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29480 }, { "epoch": 0.14302117067362777, "grad_norm": 1.0192147215093428e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29490 }, { "epoch": 0.14306966886646386, "grad_norm": 9.511389720273655e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29500 }, { "epoch": 0.14311816705929994, "grad_norm": 1.0594092714200087e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29510 }, { "epoch": 0.14316666525213603, "grad_norm": 1.1487549045341439e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29520 }, { "epoch": 0.14321516344497212, "grad_norm": 1.0723307752869005e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29530 }, { "epoch": 0.1432636616378082, "grad_norm": 9.520299215637351e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29540 }, { "epoch": 0.1433121598306443, "grad_norm": 9.648839238707296e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29550 }, { "epoch": 0.1433606580234804, "grad_norm": 1.157065909751509e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29560 }, { "epoch": 0.1434091562163165, "grad_norm": 1.0797572258525179e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29570 }, { "epoch": 0.1434576544091526, "grad_norm": 1.1091604790181009e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29580 }, { "epoch": 0.14350615260198868, "grad_norm": 9.78213563485042e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29590 }, { "epoch": 0.14355465079482477, "grad_norm": 1.1614948647320489e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29600 }, { "epoch": 0.14360314898766086, "grad_norm": 1.0504072633921169e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29610 }, { "epoch": 0.14365164718049694, "grad_norm": 1.0611022105422308e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29620 }, { "epoch": 0.14370014537333303, "grad_norm": 1.1571159319601065e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29630 }, { "epoch": 0.14374864356616912, "grad_norm": 9.334427630847131e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29640 }, { "epoch": 0.1437971417590052, "grad_norm": 1.0230409230871373e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29650 }, { "epoch": 0.1438456399518413, "grad_norm": 1.0741068479092064e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29660 }, { "epoch": 0.14389413814467739, "grad_norm": 1.1705554214813674e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29670 }, { "epoch": 0.14394263633751347, "grad_norm": 1.320819222883074e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29680 }, { "epoch": 0.14399113453034956, "grad_norm": 1.059022096683293e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29690 }, { "epoch": 0.14403963272318565, "grad_norm": 9.475976980866108e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29700 }, { "epoch": 0.14408813091602174, "grad_norm": 1.0464808042343066e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29710 }, { "epoch": 0.14413662910885783, "grad_norm": 1.056656486753127e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29720 }, { "epoch": 0.14418512730169392, "grad_norm": 9.945975421032927e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29730 }, { "epoch": 0.14423362549453, "grad_norm": 1.0144952966584242e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29740 }, { "epoch": 0.1442821236873661, "grad_norm": 1.1731142279813866e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29750 }, { "epoch": 0.14433062188020218, "grad_norm": 1.0175692466418695e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29760 }, { "epoch": 0.14437912007303827, "grad_norm": 1.1025015567156515e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29770 }, { "epoch": 0.14442761826587436, "grad_norm": 1.0851982068516008e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29780 }, { "epoch": 0.14447611645871045, "grad_norm": 8.921625038738057e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29790 }, { "epoch": 0.14452461465154653, "grad_norm": 9.668418243791166e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29800 }, { "epoch": 0.14457311284438262, "grad_norm": 1.0544170692128318e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29810 }, { "epoch": 0.14462161103721874, "grad_norm": 1.0658693838649924e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29820 }, { "epoch": 0.14467010923005483, "grad_norm": 1.0408197681499587e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29830 }, { "epoch": 0.14471860742289092, "grad_norm": 9.489740193657781e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29840 }, { "epoch": 0.144767105615727, "grad_norm": 1.026352620669968e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29850 }, { "epoch": 0.1448156038085631, "grad_norm": 1.0762251179130544e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29860 }, { "epoch": 0.14486410200139918, "grad_norm": 1.084846417143126e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29870 }, { "epoch": 0.14491260019423527, "grad_norm": 1.186080709203452e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29880 }, { "epoch": 0.14496109838707136, "grad_norm": 9.850041493564277e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29890 }, { "epoch": 0.14500959657990745, "grad_norm": 9.335355599660033e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29900 }, { "epoch": 0.14505809477274353, "grad_norm": 1.0099628156012841e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29910 }, { "epoch": 0.14510659296557962, "grad_norm": 1.0534136407613914e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29920 }, { "epoch": 0.1451550911584157, "grad_norm": 1.359760233299312e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29930 }, { "epoch": 0.1452035893512518, "grad_norm": 9.168604009346382e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29940 }, { "epoch": 0.1452520875440879, "grad_norm": 8.719686661606829e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29950 }, { "epoch": 0.14530058573692398, "grad_norm": 9.92594166859817e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29960 }, { "epoch": 0.14534908392976006, "grad_norm": 1.2152133876952576e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29970 }, { "epoch": 0.14539758212259615, "grad_norm": 1.0592186328040043e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 29980 }, { "epoch": 0.14544608031543224, "grad_norm": 9.37214608143222e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 29990 }, { "epoch": 0.14549457850826833, "grad_norm": 8.862274825105487e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30000 }, { "epoch": 0.14554307670110442, "grad_norm": 1.0411911688379405e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30010 }, { "epoch": 0.1455915748939405, "grad_norm": 1.225210581878855e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30020 }, { "epoch": 0.1456400730867766, "grad_norm": 1.0455563170808091e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30030 }, { "epoch": 0.14568857127961268, "grad_norm": 9.542309697962992e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30040 }, { "epoch": 0.14573706947244877, "grad_norm": 9.197632522273125e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30050 }, { "epoch": 0.14578556766528486, "grad_norm": 9.734305450592728e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30060 }, { "epoch": 0.14583406585812095, "grad_norm": 1.1730353577377173e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30070 }, { "epoch": 0.14588256405095706, "grad_norm": 1.2113814307213033e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30080 }, { "epoch": 0.14593106224379315, "grad_norm": 9.485288643418244e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30090 }, { "epoch": 0.14597956043662924, "grad_norm": 8.829540831811755e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30100 }, { "epoch": 0.14602805862946533, "grad_norm": 1.0749908341267655e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30110 }, { "epoch": 0.14607655682230142, "grad_norm": 1.0747782397402261e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30120 }, { "epoch": 0.1461250550151375, "grad_norm": 1.0527896421308469e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30130 }, { "epoch": 0.1461735532079736, "grad_norm": 9.066120298939495e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30140 }, { "epoch": 0.14622205140080968, "grad_norm": 9.056433469822878e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30150 }, { "epoch": 0.14627054959364577, "grad_norm": 9.963838465409935e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30160 }, { "epoch": 0.14631904778648186, "grad_norm": 1.0849899467757496e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30170 }, { "epoch": 0.14636754597931795, "grad_norm": 9.807163792174833e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30180 }, { "epoch": 0.14641604417215404, "grad_norm": 8.650953731148547e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30190 }, { "epoch": 0.14646454236499012, "grad_norm": 1.1778851671806478e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30200 }, { "epoch": 0.1465130405578262, "grad_norm": 1.0607398337469931e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30210 }, { "epoch": 0.1465615387506623, "grad_norm": 1.4204150033947371e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30220 }, { "epoch": 0.1466100369434984, "grad_norm": 1.2317200059897004e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30230 }, { "epoch": 0.14665853513633448, "grad_norm": 1.0032832165052241e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30240 }, { "epoch": 0.14670703332917057, "grad_norm": 8.748611435294151e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30250 }, { "epoch": 0.14675553152200665, "grad_norm": 1.5067649883349077e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30260 }, { "epoch": 0.14680402971484274, "grad_norm": 1.4436632511660719e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30270 }, { "epoch": 0.14685252790767883, "grad_norm": 1.1021771229025035e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30280 }, { "epoch": 0.14690102610051492, "grad_norm": 8.676706642063436e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30290 }, { "epoch": 0.146949524293351, "grad_norm": 8.826947350826231e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30300 }, { "epoch": 0.1469980224861871, "grad_norm": 1.0572287578725081e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30310 }, { "epoch": 0.14704652067902318, "grad_norm": 1.0072572820263304e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30320 }, { "epoch": 0.1470950188718593, "grad_norm": 1.0881568357490323e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30330 }, { "epoch": 0.1471435170646954, "grad_norm": 8.558587438756149e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30340 }, { "epoch": 0.14719201525753148, "grad_norm": 8.130933792926953e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30350 }, { "epoch": 0.14724051345036757, "grad_norm": 1.1836538504894634e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30360 }, { "epoch": 0.14728901164320365, "grad_norm": 9.445471960134455e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30370 }, { "epoch": 0.14733750983603974, "grad_norm": 1.1812194600224757e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30380 }, { "epoch": 0.14738600802887583, "grad_norm": 1.706811758594995e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30390 }, { "epoch": 0.14743450622171192, "grad_norm": 8.93497258402931e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30400 }, { "epoch": 0.147483004414548, "grad_norm": 1.0721415577563675e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30410 }, { "epoch": 0.1475315026073841, "grad_norm": 1.1307622571621323e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30420 }, { "epoch": 0.14758000080022018, "grad_norm": 1.0989369769731638e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30430 }, { "epoch": 0.14762849899305627, "grad_norm": 1.1384636877664889e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30440 }, { "epoch": 0.14767699718589236, "grad_norm": 9.904551490080848e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30450 }, { "epoch": 0.14772549537872845, "grad_norm": 9.420359248224486e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30460 }, { "epoch": 0.14777399357156454, "grad_norm": 1.2775871027770336e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 30470 }, { "epoch": 0.14782249176440063, "grad_norm": 1.2294167106574605e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30480 }, { "epoch": 0.14787098995723671, "grad_norm": 9.016788027338407e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30490 }, { "epoch": 0.1479194881500728, "grad_norm": 8.695599973407298e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30500 }, { "epoch": 0.1479679863429089, "grad_norm": 1.3058226500106684e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30510 }, { "epoch": 0.14801648453574498, "grad_norm": 9.012424584398104e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30520 }, { "epoch": 0.14806498272858107, "grad_norm": 1.2418139760939084e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30530 }, { "epoch": 0.14811348092141716, "grad_norm": 1.7030694721142936e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30540 }, { "epoch": 0.14816197911425324, "grad_norm": 8.814175345150943e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30550 }, { "epoch": 0.14821047730708933, "grad_norm": 9.261660949277939e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30560 }, { "epoch": 0.14825897549992542, "grad_norm": 9.296701364291948e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30570 }, { "epoch": 0.1483074736927615, "grad_norm": 9.134226530704836e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30580 }, { "epoch": 0.14835597188559763, "grad_norm": 8.732975231851015e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30590 }, { "epoch": 0.14840447007843371, "grad_norm": 7.688814207540418e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30600 }, { "epoch": 0.1484529682712698, "grad_norm": 8.33549833600955e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30610 }, { "epoch": 0.1485014664641059, "grad_norm": 9.693803093568931e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30620 }, { "epoch": 0.14854996465694198, "grad_norm": 1.1732488047755396e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30630 }, { "epoch": 0.14859846284977807, "grad_norm": 8.519250371818998e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30640 }, { "epoch": 0.14864696104261416, "grad_norm": 8.637074699890945e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30650 }, { "epoch": 0.14869545923545024, "grad_norm": 1.0229792479776734e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30660 }, { "epoch": 0.14874395742828633, "grad_norm": 1.1072251737687111e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30670 }, { "epoch": 0.14879245562112242, "grad_norm": 2.920261579220096e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30680 }, { "epoch": 0.1488409538139585, "grad_norm": 7.966207249410218e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30690 }, { "epoch": 0.1488894520067946, "grad_norm": 1.1897648732883681e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30700 }, { "epoch": 0.1489379501996307, "grad_norm": 9.442376125434748e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30710 }, { "epoch": 0.14898644839246677, "grad_norm": 1.0411807949139984e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30720 }, { "epoch": 0.14903494658530286, "grad_norm": 1.0470678546425916e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30730 }, { "epoch": 0.14908344477813895, "grad_norm": 8.100982284986458e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30740 }, { "epoch": 0.14913194297097504, "grad_norm": 7.693044778989133e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30750 }, { "epoch": 0.14918044116381113, "grad_norm": 1.0201359401662558e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30760 }, { "epoch": 0.14922893935664722, "grad_norm": 1.0812774320356766e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30770 }, { "epoch": 0.1492774375494833, "grad_norm": 9.446941362512007e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30780 }, { "epoch": 0.1493259357423194, "grad_norm": 8.2548382351888e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30790 }, { "epoch": 0.14937443393515548, "grad_norm": 7.684333525048714e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30800 }, { "epoch": 0.14942293212799157, "grad_norm": 1.0217028290071539e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30810 }, { "epoch": 0.14947143032082766, "grad_norm": 1.0410264650317913e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30820 }, { "epoch": 0.14951992851366375, "grad_norm": 1.1225541385329052e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30830 }, { "epoch": 0.14956842670649984, "grad_norm": 8.414613006380023e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30840 }, { "epoch": 0.14961692489933595, "grad_norm": 7.45209192132279e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30850 }, { "epoch": 0.14966542309217204, "grad_norm": 8.318413335928199e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30860 }, { "epoch": 0.14971392128500813, "grad_norm": 9.569654935148719e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30870 }, { "epoch": 0.14976241947784422, "grad_norm": 1.2599032572779834e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30880 }, { "epoch": 0.1498109176706803, "grad_norm": 9.345030349550143e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30890 }, { "epoch": 0.1498594158635164, "grad_norm": 1.854678828294709e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30900 }, { "epoch": 0.14990791405635248, "grad_norm": 1.4331341446904844e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30910 }, { "epoch": 0.14995641224918857, "grad_norm": 1.1413278144800643e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30920 }, { "epoch": 0.15000491044202466, "grad_norm": 9.514387500075827e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30930 }, { "epoch": 0.15005340863486075, "grad_norm": 1.2531599224985257e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30940 }, { "epoch": 0.15010190682769683, "grad_norm": 7.840129967462417e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30950 }, { "epoch": 0.15015040502053292, "grad_norm": 8.894282643723272e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30960 }, { "epoch": 0.150198903213369, "grad_norm": 1.0101761915848328e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 30970 }, { "epoch": 0.1502474014062051, "grad_norm": 9.521962596181766e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30980 }, { "epoch": 0.1502958995990412, "grad_norm": 7.760503706322197e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 30990 }, { "epoch": 0.15034439779187728, "grad_norm": 6.84224588098914e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31000 }, { "epoch": 0.15039289598471337, "grad_norm": 8.649707439190024e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31010 }, { "epoch": 0.15044139417754945, "grad_norm": 1.0627779545302474e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31020 }, { "epoch": 0.15048989237038554, "grad_norm": 3.255174760852242e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31030 }, { "epoch": 0.15053839056322163, "grad_norm": 8.241685378607144e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31040 }, { "epoch": 0.15058688875605772, "grad_norm": 7.400905843724104e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31050 }, { "epoch": 0.1506353869488938, "grad_norm": 8.356146707910739e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31060 }, { "epoch": 0.1506838851417299, "grad_norm": 9.755152063917194e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31070 }, { "epoch": 0.15073238333456598, "grad_norm": 9.088640240406676e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31080 }, { "epoch": 0.15078088152740207, "grad_norm": 7.633362031356228e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31090 }, { "epoch": 0.1508293797202382, "grad_norm": 7.947412683506627e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31100 }, { "epoch": 0.15087787791307428, "grad_norm": 8.672248696939278e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31110 }, { "epoch": 0.15092637610591036, "grad_norm": 1.3380964958287223e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31120 }, { "epoch": 0.15097487429874645, "grad_norm": 9.169604453518332e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31130 }, { "epoch": 0.15102337249158254, "grad_norm": 1.5551839283034496e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31140 }, { "epoch": 0.15107187068441863, "grad_norm": 7.343143693105958e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31150 }, { "epoch": 0.15112036887725472, "grad_norm": 9.122910427095121e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31160 }, { "epoch": 0.1511688670700908, "grad_norm": 8.460183664737997e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31170 }, { "epoch": 0.1512173652629269, "grad_norm": 8.884434521405637e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31180 }, { "epoch": 0.15126586345576298, "grad_norm": 7.524810996528686e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31190 }, { "epoch": 0.15131436164859907, "grad_norm": 9.459445493575913e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31200 }, { "epoch": 0.15136285984143516, "grad_norm": 1.3880074334338133e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31210 }, { "epoch": 0.15141135803427125, "grad_norm": 1.1334991967260066e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31220 }, { "epoch": 0.15145985622710734, "grad_norm": 9.124345012878621e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31230 }, { "epoch": 0.15150835441994343, "grad_norm": 7.477571273284411e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31240 }, { "epoch": 0.1515568526127795, "grad_norm": 1.6550961845496204e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31250 }, { "epoch": 0.1516053508056156, "grad_norm": 1.0150269957875935e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31260 }, { "epoch": 0.1516538489984517, "grad_norm": 4.4279639155320183e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31270 }, { "epoch": 0.15170234719128778, "grad_norm": 8.99091077144476e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31280 }, { "epoch": 0.15175084538412387, "grad_norm": 7.953737934940364e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31290 }, { "epoch": 0.15179934357695996, "grad_norm": 7.321933281900783e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31300 }, { "epoch": 0.15184784176979604, "grad_norm": 3.4923644420814526e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31310 }, { "epoch": 0.15189633996263213, "grad_norm": 9.293735558912886e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31320 }, { "epoch": 0.15194483815546822, "grad_norm": 9.074433648947888e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31330 }, { "epoch": 0.1519933363483043, "grad_norm": 7.101718324520334e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31340 }, { "epoch": 0.1520418345411404, "grad_norm": 8.58021564908995e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31350 }, { "epoch": 0.1520903327339765, "grad_norm": 2.207397926667909e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31360 }, { "epoch": 0.1521388309268126, "grad_norm": 8.025137532285953e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31370 }, { "epoch": 0.1521873291196487, "grad_norm": 1.6083778575648466e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31380 }, { "epoch": 0.15223582731248478, "grad_norm": 7.382065092542689e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31390 }, { "epoch": 0.15228432550532087, "grad_norm": 6.457186429997819e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31400 }, { "epoch": 0.15233282369815696, "grad_norm": 8.564483522377486e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31410 }, { "epoch": 0.15238132189099304, "grad_norm": 1.349034448594466e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31420 }, { "epoch": 0.15242982008382913, "grad_norm": 1.2119875236749067e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31430 }, { "epoch": 0.15247831827666522, "grad_norm": 8.695988640283758e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31440 }, { "epoch": 0.1525268164695013, "grad_norm": 8.43094625224694e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31450 }, { "epoch": 0.1525753146623374, "grad_norm": 1.015440673768353e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31460 }, { "epoch": 0.15262381285517349, "grad_norm": 1.109242901975449e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31470 }, { "epoch": 0.15267231104800957, "grad_norm": 8.516465754837554e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31480 }, { "epoch": 0.15272080924084566, "grad_norm": 8.752046198878816e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31490 }, { "epoch": 0.15276930743368175, "grad_norm": 9.415251867039842e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31500 }, { "epoch": 0.15281780562651784, "grad_norm": 8.35921341035828e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31510 }, { "epoch": 0.15286630381935393, "grad_norm": 2.26641674316852e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31520 }, { "epoch": 0.15291480201219002, "grad_norm": 8.983623445146804e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31530 }, { "epoch": 0.1529633002050261, "grad_norm": 1.4268562154029496e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31540 }, { "epoch": 0.1530117983978622, "grad_norm": 7.381683531093586e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31550 }, { "epoch": 0.15306029659069828, "grad_norm": 9.245937349078304e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31560 }, { "epoch": 0.15310879478353437, "grad_norm": 9.039509052399808e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31570 }, { "epoch": 0.15315729297637046, "grad_norm": 8.903095505274905e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31580 }, { "epoch": 0.15320579116920655, "grad_norm": 1.0566232333530934e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31590 }, { "epoch": 0.15325428936204263, "grad_norm": 7.68923271721178e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31600 }, { "epoch": 0.15330278755487875, "grad_norm": 9.342530660205739e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31610 }, { "epoch": 0.15335128574771484, "grad_norm": 7.966855974927967e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31620 }, { "epoch": 0.15339978394055093, "grad_norm": 2.121488904549551e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31630 }, { "epoch": 0.15344828213338702, "grad_norm": 7.465779106041737e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31640 }, { "epoch": 0.1534967803262231, "grad_norm": 7.693157755284119e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31650 }, { "epoch": 0.1535452785190592, "grad_norm": 1.0105230785484309e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31660 }, { "epoch": 0.15359377671189528, "grad_norm": 1.7190552625834243e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31670 }, { "epoch": 0.15364227490473137, "grad_norm": 2.1966727103972516e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31680 }, { "epoch": 0.15369077309756746, "grad_norm": 7.52870832343433e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31690 }, { "epoch": 0.15373927129040355, "grad_norm": 8.890226865787554e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31700 }, { "epoch": 0.15378776948323963, "grad_norm": 9.077301399429416e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31710 }, { "epoch": 0.15383626767607572, "grad_norm": 8.724385480718411e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31720 }, { "epoch": 0.1538847658689118, "grad_norm": 6.947312414240514e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31730 }, { "epoch": 0.1539332640617479, "grad_norm": 8.205984869391614e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31740 }, { "epoch": 0.153981762254584, "grad_norm": 8.8847457391239e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31750 }, { "epoch": 0.15403026044742008, "grad_norm": 7.82430618073704e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31760 }, { "epoch": 0.15407875864025616, "grad_norm": 8.063515366529828e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31770 }, { "epoch": 0.15412725683309225, "grad_norm": 1.0494387936432759e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31780 }, { "epoch": 0.15417575502592834, "grad_norm": 1.5065253933244094e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31790 }, { "epoch": 0.15422425321876443, "grad_norm": 8.78826398320598e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31800 }, { "epoch": 0.15427275141160052, "grad_norm": 8.899343129087356e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31810 }, { "epoch": 0.1543212496044366, "grad_norm": 9.067220929637188e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31820 }, { "epoch": 0.1543697477972727, "grad_norm": 2.030572829880839e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31830 }, { "epoch": 0.15441824599010878, "grad_norm": 1.5242970619055995e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31840 }, { "epoch": 0.15446674418294487, "grad_norm": 9.993686944653746e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31850 }, { "epoch": 0.15451524237578096, "grad_norm": 7.456684869566743e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31860 }, { "epoch": 0.15456374056861708, "grad_norm": 9.912417908708449e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31870 }, { "epoch": 0.15461223876145316, "grad_norm": 1.102264519659002e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31880 }, { "epoch": 0.15466073695428925, "grad_norm": 1.169546948176503e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31890 }, { "epoch": 0.15470923514712534, "grad_norm": 6.329073443112065e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31900 }, { "epoch": 0.15475773333996143, "grad_norm": 1.0603633171513138e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31910 }, { "epoch": 0.15480623153279752, "grad_norm": 2.7379545031180896e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31920 }, { "epoch": 0.1548547297256336, "grad_norm": 7.853130057355884e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31930 }, { "epoch": 0.1549032279184697, "grad_norm": 9.685813040505309e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31940 }, { "epoch": 0.15495172611130578, "grad_norm": 6.718354228496537e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31950 }, { "epoch": 0.15500022430414187, "grad_norm": 9.86499273381014e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31960 }, { "epoch": 0.15504872249697796, "grad_norm": 7.185880690485646e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31970 }, { "epoch": 0.15509722068981405, "grad_norm": 1.2269281057797343e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 31980 }, { "epoch": 0.15514571888265014, "grad_norm": 7.552934988552806e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 31990 }, { "epoch": 0.15519421707548622, "grad_norm": 8.970271636599136e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32000 }, { "epoch": 0.1552427152683223, "grad_norm": 1.0872754074853219e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32010 }, { "epoch": 0.1552912134611584, "grad_norm": 7.697607173895449e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32020 }, { "epoch": 0.1553397116539945, "grad_norm": 9.125189137648704e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32030 }, { "epoch": 0.15538820984683058, "grad_norm": 8.133819306976875e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32040 }, { "epoch": 0.15543670803966667, "grad_norm": 7.118820377627344e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32050 }, { "epoch": 0.15548520623250275, "grad_norm": 1.1063487903584246e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32060 }, { "epoch": 0.15553370442533884, "grad_norm": 1.0568434305469054e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32070 }, { "epoch": 0.15558220261817493, "grad_norm": 9.432129388642352e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32080 }, { "epoch": 0.15563070081101102, "grad_norm": 5.8880804232330775e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32090 }, { "epoch": 0.1556791990038471, "grad_norm": 8.253678629444039e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32100 }, { "epoch": 0.1557276971966832, "grad_norm": 3.2745873568273964e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32110 }, { "epoch": 0.1557761953895193, "grad_norm": 1.4202312570432696e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32120 }, { "epoch": 0.1558246935823554, "grad_norm": 3.596272790673538e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32130 }, { "epoch": 0.1558731917751915, "grad_norm": 7.017020919874994e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32140 }, { "epoch": 0.15592168996802758, "grad_norm": 1.0423813989746122e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32150 }, { "epoch": 0.15597018816086367, "grad_norm": 1.431198626278274e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32160 }, { "epoch": 0.15601868635369975, "grad_norm": 1.336126587148101e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32170 }, { "epoch": 0.15606718454653584, "grad_norm": 8.814430429993081e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32180 }, { "epoch": 0.15611568273937193, "grad_norm": 1.2387629055865546e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32190 }, { "epoch": 0.15616418093220802, "grad_norm": 7.262889312187326e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32200 }, { "epoch": 0.1562126791250441, "grad_norm": 8.123235062384992e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32210 }, { "epoch": 0.1562611773178802, "grad_norm": 9.354452146226322e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32220 }, { "epoch": 0.15630967551071628, "grad_norm": 1.6231815891387669e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32230 }, { "epoch": 0.15635817370355237, "grad_norm": 8.121884320644313e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32240 }, { "epoch": 0.15640667189638846, "grad_norm": 1.1572408453730532e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32250 }, { "epoch": 0.15645517008922455, "grad_norm": 7.965898873862898e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32260 }, { "epoch": 0.15650366828206064, "grad_norm": 7.701536475224202e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32270 }, { "epoch": 0.15655216647489673, "grad_norm": 2.2874093019709107e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32280 }, { "epoch": 0.15660066466773281, "grad_norm": 9.003865386603138e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32290 }, { "epoch": 0.1566491628605689, "grad_norm": 1.3956345412680093e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32300 }, { "epoch": 0.156697661053405, "grad_norm": 9.689720315009254e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32310 }, { "epoch": 0.15674615924624108, "grad_norm": 7.808533553088637e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32320 }, { "epoch": 0.15679465743907717, "grad_norm": 9.375290233037958e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32330 }, { "epoch": 0.15684315563191326, "grad_norm": 8.565417175532275e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32340 }, { "epoch": 0.15689165382474934, "grad_norm": 7.025845860653135e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32350 }, { "epoch": 0.15694015201758543, "grad_norm": 9.099932185563375e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32360 }, { "epoch": 0.15698865021042152, "grad_norm": 8.03945141569784e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32370 }, { "epoch": 0.15703714840325764, "grad_norm": 7.251326650248302e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32380 }, { "epoch": 0.15708564659609373, "grad_norm": 8.504506610051976e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32390 }, { "epoch": 0.1571341447889298, "grad_norm": 1.1162929069996608e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32400 }, { "epoch": 0.1571826429817659, "grad_norm": 8.312363775075937e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32410 }, { "epoch": 0.157231141174602, "grad_norm": 8.014481522877759e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32420 }, { "epoch": 0.15727963936743808, "grad_norm": 1.557778404048804e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32430 }, { "epoch": 0.15732813756027417, "grad_norm": 9.78228129611125e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32440 }, { "epoch": 0.15737663575311026, "grad_norm": 8.229736181419867e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32450 }, { "epoch": 0.15742513394594634, "grad_norm": 8.731912970461053e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32460 }, { "epoch": 0.15747363213878243, "grad_norm": 9.602443640233105e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32470 }, { "epoch": 0.15752213033161852, "grad_norm": 9.396514144555113e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32480 }, { "epoch": 0.1575706285244546, "grad_norm": 6.162315457913792e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32490 }, { "epoch": 0.1576191267172907, "grad_norm": 1.242136988821585e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32500 }, { "epoch": 0.15766762491012679, "grad_norm": 7.820386827006587e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32510 }, { "epoch": 0.15771612310296287, "grad_norm": 1.0306269615512065e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32520 }, { "epoch": 0.15776462129579896, "grad_norm": 3.261360461692675e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32530 }, { "epoch": 0.15781311948863505, "grad_norm": 5.8208556197314465e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32540 }, { "epoch": 0.15786161768147114, "grad_norm": 5.7430572297789695e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32550 }, { "epoch": 0.15791011587430723, "grad_norm": 1.49705272178835e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32560 }, { "epoch": 0.15795861406714332, "grad_norm": 8.456682110136171e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32570 }, { "epoch": 0.1580071122599794, "grad_norm": 8.451602440118222e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32580 }, { "epoch": 0.1580556104528155, "grad_norm": 6.406532548908217e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32590 }, { "epoch": 0.15810410864565158, "grad_norm": 6.208860270362493e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32600 }, { "epoch": 0.15815260683848767, "grad_norm": 1.0505092262746984e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32610 }, { "epoch": 0.15820110503132376, "grad_norm": 9.98682523345451e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32620 }, { "epoch": 0.15824960322415985, "grad_norm": 7.763418352624285e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32630 }, { "epoch": 0.15829810141699596, "grad_norm": 7.389896694576237e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32640 }, { "epoch": 0.15834659960983205, "grad_norm": 6.705490562808336e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32650 }, { "epoch": 0.15839509780266814, "grad_norm": 9.563564162817784e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32660 }, { "epoch": 0.15844359599550423, "grad_norm": 7.909201826805656e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32670 }, { "epoch": 0.15849209418834032, "grad_norm": 7.061272100372662e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32680 }, { "epoch": 0.1585405923811764, "grad_norm": 1.2243928892985423e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32690 }, { "epoch": 0.1585890905740125, "grad_norm": 6.87980659108689e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32700 }, { "epoch": 0.15863758876684858, "grad_norm": 7.869532936410906e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32710 }, { "epoch": 0.15868608695968467, "grad_norm": 7.635847509845917e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32720 }, { "epoch": 0.15873458515252076, "grad_norm": 7.059075102233692e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32730 }, { "epoch": 0.15878308334535685, "grad_norm": 5.774890254883758e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32740 }, { "epoch": 0.15883158153819293, "grad_norm": 1.1163271551595244e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32750 }, { "epoch": 0.15888007973102902, "grad_norm": 7.500021581563487e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32760 }, { "epoch": 0.1589285779238651, "grad_norm": 1.6066026375938236e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32770 }, { "epoch": 0.1589770761167012, "grad_norm": 7.153256120773221e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32780 }, { "epoch": 0.1590255743095373, "grad_norm": 1.1838641711392484e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32790 }, { "epoch": 0.15907407250237338, "grad_norm": 6.657965911927022e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32800 }, { "epoch": 0.15912257069520946, "grad_norm": 8.10291922448414e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32810 }, { "epoch": 0.15917106888804555, "grad_norm": 7.384979738844777e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32820 }, { "epoch": 0.15921956708088164, "grad_norm": 1.1330016747024274e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32830 }, { "epoch": 0.15926806527371773, "grad_norm": 7.814931990424157e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32840 }, { "epoch": 0.15931656346655382, "grad_norm": 1.5104227202300535e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32850 }, { "epoch": 0.1593650616593899, "grad_norm": 7.391380307808504e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32860 }, { "epoch": 0.159413559852226, "grad_norm": 9.731412120572713e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32870 }, { "epoch": 0.15946205804506208, "grad_norm": 6.456021139911172e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32880 }, { "epoch": 0.1595105562378982, "grad_norm": 6.37064871966686e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32890 }, { "epoch": 0.1595590544307343, "grad_norm": 1.0079469348056591e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32900 }, { "epoch": 0.15960755262357038, "grad_norm": 8.87962272599907e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32910 }, { "epoch": 0.15965605081640646, "grad_norm": 7.673590829426757e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32920 }, { "epoch": 0.15970454900924255, "grad_norm": 7.972707294356951e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32930 }, { "epoch": 0.15975304720207864, "grad_norm": 5.7394849761749356e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32940 }, { "epoch": 0.15980154539491473, "grad_norm": 9.010893364802541e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32950 }, { "epoch": 0.15985004358775082, "grad_norm": 1.141164460705113e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32960 }, { "epoch": 0.1598985417805869, "grad_norm": 1.0463645594427362e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32970 }, { "epoch": 0.159947039973423, "grad_norm": 9.316190130448376e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 32980 }, { "epoch": 0.15999553816625908, "grad_norm": 1.730425793766699e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 32990 }, { "epoch": 0.16004403635909517, "grad_norm": 6.91616079961932e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33000 }, { "epoch": 0.16009253455193126, "grad_norm": 7.360515752452557e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33010 }, { "epoch": 0.16014103274476735, "grad_norm": 6.900192062175847e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33020 }, { "epoch": 0.16018953093760344, "grad_norm": 5.8876267416962946e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33030 }, { "epoch": 0.16023802913043952, "grad_norm": 7.48171089526295e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33040 }, { "epoch": 0.1602865273232756, "grad_norm": 6.705982968924218e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33050 }, { "epoch": 0.1603350255161117, "grad_norm": 7.711547311828326e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33060 }, { "epoch": 0.1603835237089478, "grad_norm": 7.462000439772964e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33070 }, { "epoch": 0.16043202190178388, "grad_norm": 7.652663924773151e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33080 }, { "epoch": 0.16048052009461997, "grad_norm": 6.835042398734004e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33090 }, { "epoch": 0.16052901828745605, "grad_norm": 4.9509480959386565e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33100 }, { "epoch": 0.16057751648029214, "grad_norm": 1.0355385171578746e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33110 }, { "epoch": 0.16062601467312823, "grad_norm": 6.286575882086254e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33120 }, { "epoch": 0.16067451286596432, "grad_norm": 7.424109327303086e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33130 }, { "epoch": 0.1607230110588004, "grad_norm": 8.128085937642027e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33140 }, { "epoch": 0.16077150925163652, "grad_norm": 6.303181265820967e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33150 }, { "epoch": 0.1608200074444726, "grad_norm": 6.408780706124162e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33160 }, { "epoch": 0.1608685056373087, "grad_norm": 9.830377223352116e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33170 }, { "epoch": 0.1609170038301448, "grad_norm": 7.639598464947994e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33180 }, { "epoch": 0.16096550202298088, "grad_norm": 1.4548561466654064e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33190 }, { "epoch": 0.16101400021581697, "grad_norm": 6.604045665881131e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33200 }, { "epoch": 0.16106249840865305, "grad_norm": 5.973269878722931e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33210 }, { "epoch": 0.16111099660148914, "grad_norm": 5.8672434022355446e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33220 }, { "epoch": 0.16115949479432523, "grad_norm": 8.535499773643096e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33230 }, { "epoch": 0.16120799298716132, "grad_norm": 1.248466645620283e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33240 }, { "epoch": 0.1612564911799974, "grad_norm": 8.201728718404411e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33250 }, { "epoch": 0.1613049893728335, "grad_norm": 6.056824304323527e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33260 }, { "epoch": 0.16135348756566958, "grad_norm": 7.281886382770608e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33270 }, { "epoch": 0.16140198575850567, "grad_norm": 7.84517197871537e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33280 }, { "epoch": 0.16145048395134176, "grad_norm": 6.862121182393821e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33290 }, { "epoch": 0.16149898214417785, "grad_norm": 7.896322529177269e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33300 }, { "epoch": 0.16154748033701394, "grad_norm": 1.0236308156663654e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33310 }, { "epoch": 0.16159597852985003, "grad_norm": 5.625236454420701e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33320 }, { "epoch": 0.16164447672268611, "grad_norm": 6.37390158431117e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33330 }, { "epoch": 0.1616929749155222, "grad_norm": 5.292703875170446e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33340 }, { "epoch": 0.1617414731083583, "grad_norm": 6.166064991930398e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33350 }, { "epoch": 0.16178997130119438, "grad_norm": 6.315309519777657e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33360 }, { "epoch": 0.16183846949403047, "grad_norm": 9.015553814606392e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33370 }, { "epoch": 0.16188696768686656, "grad_norm": 6.41805186774036e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33380 }, { "epoch": 0.16193546587970264, "grad_norm": 6.172746935817486e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33390 }, { "epoch": 0.16198396407253876, "grad_norm": 5.699650174051385e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33400 }, { "epoch": 0.16203246226537485, "grad_norm": 8.928904549065919e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33410 }, { "epoch": 0.16208096045821094, "grad_norm": 6.688546250188665e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33420 }, { "epoch": 0.16212945865104703, "grad_norm": 9.321927052496903e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33430 }, { "epoch": 0.16217795684388311, "grad_norm": 1.219745655589577e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33440 }, { "epoch": 0.1622264550367192, "grad_norm": 5.982175821372948e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33450 }, { "epoch": 0.1622749532295553, "grad_norm": 1.0702563457698488e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33460 }, { "epoch": 0.16232345142239138, "grad_norm": 1.086462617649886e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33470 }, { "epoch": 0.16237194961522747, "grad_norm": 2.879971816582838e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33480 }, { "epoch": 0.16242044780806356, "grad_norm": 7.17470882705129e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33490 }, { "epoch": 0.16246894600089964, "grad_norm": 1.692062596703181e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33500 }, { "epoch": 0.16251744419373573, "grad_norm": 5.872689357033778e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33510 }, { "epoch": 0.16256594238657182, "grad_norm": 8.07474478392578e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33520 }, { "epoch": 0.1626144405794079, "grad_norm": 6.499504223711483e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33530 }, { "epoch": 0.162662938772244, "grad_norm": 5.013822956811964e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33540 }, { "epoch": 0.1627114369650801, "grad_norm": 6.494876458873478e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33550 }, { "epoch": 0.16275993515791617, "grad_norm": 1.0949921147584973e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33560 }, { "epoch": 0.16280843335075226, "grad_norm": 8.092601433418167e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33570 }, { "epoch": 0.16285693154358835, "grad_norm": 1.4939905668143183e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33580 }, { "epoch": 0.16290542973642444, "grad_norm": 8.14929990156088e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33590 }, { "epoch": 0.16295392792926053, "grad_norm": 5.9222386994406406e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33600 }, { "epoch": 0.16300242612209662, "grad_norm": 5.4755563638764215e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33610 }, { "epoch": 0.1630509243149327, "grad_norm": 7.276047142568132e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33620 }, { "epoch": 0.1630994225077688, "grad_norm": 6.078403913534203e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33630 }, { "epoch": 0.16314792070060488, "grad_norm": 7.03377835975516e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33640 }, { "epoch": 0.16319641889344097, "grad_norm": 4.738393855063805e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33650 }, { "epoch": 0.16324491708627709, "grad_norm": 7.165712645473832e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33660 }, { "epoch": 0.16329341527911317, "grad_norm": 1.4154889527162595e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33670 }, { "epoch": 0.16334191347194926, "grad_norm": 6.976682698223158e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33680 }, { "epoch": 0.16339041166478535, "grad_norm": 5.680818304654167e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33690 }, { "epoch": 0.16343890985762144, "grad_norm": 5.60844135577554e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33700 }, { "epoch": 0.16348740805045753, "grad_norm": 5.908195532811078e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33710 }, { "epoch": 0.16353590624329362, "grad_norm": 7.230617171671838e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33720 }, { "epoch": 0.1635844044361297, "grad_norm": 3.876841105920903e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33730 }, { "epoch": 0.1636329026289658, "grad_norm": 7.206921281976975e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33740 }, { "epoch": 0.16368140082180188, "grad_norm": 5.436245587020494e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33750 }, { "epoch": 0.16372989901463797, "grad_norm": 7.673258295426422e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33760 }, { "epoch": 0.16377839720747406, "grad_norm": 8.28029769195382e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33770 }, { "epoch": 0.16382689540031015, "grad_norm": 6.366790472611683e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33780 }, { "epoch": 0.16387539359314623, "grad_norm": 6.405372232620721e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33790 }, { "epoch": 0.16392389178598232, "grad_norm": 1.1223718132669092e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33800 }, { "epoch": 0.1639723899788184, "grad_norm": 6.663383089744457e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33810 }, { "epoch": 0.1640208881716545, "grad_norm": 9.289151847724497e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33820 }, { "epoch": 0.1640693863644906, "grad_norm": 8.234302839582597e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33830 }, { "epoch": 0.16411788455732668, "grad_norm": 7.902406196080847e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33840 }, { "epoch": 0.16416638275016276, "grad_norm": 4.5469811027487594e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33850 }, { "epoch": 0.16421488094299885, "grad_norm": 3.7885803294557263e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33860 }, { "epoch": 0.16426337913583494, "grad_norm": 1.6891387133455282e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33870 }, { "epoch": 0.16431187732867103, "grad_norm": 8.002118789818269e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33880 }, { "epoch": 0.16436037552150712, "grad_norm": 5.919199352888427e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33890 }, { "epoch": 0.1644088737143432, "grad_norm": 4.6183288304746384e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33900 }, { "epoch": 0.1644573719071793, "grad_norm": 7.542506352820055e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33910 }, { "epoch": 0.1645058701000154, "grad_norm": 5.5033265056181335e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33920 }, { "epoch": 0.1645543682928515, "grad_norm": 5.4259281512258895e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33930 }, { "epoch": 0.1646028664856876, "grad_norm": 1.6853785211878858e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33940 }, { "epoch": 0.16465136467852368, "grad_norm": 2.0421590818386903e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 33950 }, { "epoch": 0.16469986287135976, "grad_norm": 7.565876813941941e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33960 }, { "epoch": 0.16474836106419585, "grad_norm": 6.146807862705828e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33970 }, { "epoch": 0.16479685925703194, "grad_norm": 6.312011180398258e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33980 }, { "epoch": 0.16484535744986803, "grad_norm": 5.530262114916695e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 33990 }, { "epoch": 0.16489385564270412, "grad_norm": 8.740072132695786e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34000 }, { "epoch": 0.1649423538355402, "grad_norm": 7.669763135709218e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34010 }, { "epoch": 0.1649908520283763, "grad_norm": 8.688036245985131e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34020 }, { "epoch": 0.16503935022121238, "grad_norm": 8.188333566749861e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34030 }, { "epoch": 0.16508784841404847, "grad_norm": 7.636025145529857e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34040 }, { "epoch": 0.16513634660688456, "grad_norm": 8.489794112165328e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34050 }, { "epoch": 0.16518484479972065, "grad_norm": 7.881505581508463e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34060 }, { "epoch": 0.16523334299255674, "grad_norm": 8.002534457318689e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34070 }, { "epoch": 0.16528184118539282, "grad_norm": 5.664551849804411e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34080 }, { "epoch": 0.1653303393782289, "grad_norm": 6.763734461401327e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34090 }, { "epoch": 0.165378837571065, "grad_norm": 7.107709620868263e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34100 }, { "epoch": 0.1654273357639011, "grad_norm": 5.976555428333086e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34110 }, { "epoch": 0.16547583395673718, "grad_norm": 6.159922349979752e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34120 }, { "epoch": 0.16552433214957327, "grad_norm": 6.55786109859946e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34130 }, { "epoch": 0.16557283034240936, "grad_norm": 6.375925920565351e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34140 }, { "epoch": 0.16562132853524544, "grad_norm": 7.753763497930777e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34150 }, { "epoch": 0.16566982672808153, "grad_norm": 7.736226592669482e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34160 }, { "epoch": 0.16571832492091765, "grad_norm": 6.851289668929894e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34170 }, { "epoch": 0.16576682311375374, "grad_norm": 5.2607234124479874e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34180 }, { "epoch": 0.16581532130658982, "grad_norm": 8.028777642721252e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34190 }, { "epoch": 0.1658638194994259, "grad_norm": 6.870394742009012e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34200 }, { "epoch": 0.165912317692262, "grad_norm": 5.617974352389865e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34210 }, { "epoch": 0.1659608158850981, "grad_norm": 9.473673401316773e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34220 }, { "epoch": 0.16600931407793418, "grad_norm": 1.941351541745462e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 34230 }, { "epoch": 0.16605781227077027, "grad_norm": 4.696235578194319e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34240 }, { "epoch": 0.16610631046360635, "grad_norm": 4.802917175084076e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34250 }, { "epoch": 0.16615480865644244, "grad_norm": 1.1942161393108108e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 34260 }, { "epoch": 0.16620330684927853, "grad_norm": 1.016307251688886e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 34270 }, { "epoch": 0.16625180504211462, "grad_norm": 7.053411366086948e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34280 }, { "epoch": 0.1663003032349507, "grad_norm": 1.2818256323043897e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 34290 }, { "epoch": 0.1663488014277868, "grad_norm": 5.737602393196539e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34300 }, { "epoch": 0.16639729962062288, "grad_norm": 9.920852761524657e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34310 }, { "epoch": 0.16644579781345897, "grad_norm": 6.344394165580525e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34320 }, { "epoch": 0.16649429600629506, "grad_norm": 6.731433188633673e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34330 }, { "epoch": 0.16654279419913115, "grad_norm": 5.664128721605266e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34340 }, { "epoch": 0.16659129239196724, "grad_norm": 8.666359008202562e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34350 }, { "epoch": 0.16663979058480333, "grad_norm": 1.485187368643892e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 34360 }, { "epoch": 0.16668828877763942, "grad_norm": 6.364481919263199e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 34370 }, { "epoch": 0.1667367869704755, "grad_norm": 5.687511972496395e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34380 }, { "epoch": 0.1667852851633116, "grad_norm": 6.896530635458475e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34390 }, { "epoch": 0.16683378335614768, "grad_norm": 6.187232060028691e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34400 }, { "epoch": 0.16688228154898377, "grad_norm": 2.653731030477502e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 34410 }, { "epoch": 0.16693077974181986, "grad_norm": 7.284922531880511e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34420 }, { "epoch": 0.16697927793465597, "grad_norm": 5.497101795981507e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34430 }, { "epoch": 0.16702777612749206, "grad_norm": 1.9682744323290535e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 34440 }, { "epoch": 0.16707627432032815, "grad_norm": 7.157252923661872e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34450 }, { "epoch": 0.16712477251316424, "grad_norm": 6.116415107726425e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34460 }, { "epoch": 0.16717327070600033, "grad_norm": 1.0152207323699258e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 34470 }, { "epoch": 0.16722176889883641, "grad_norm": 5.08113906505514e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34480 }, { "epoch": 0.1672702670916725, "grad_norm": 5.49894814128038e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34490 }, { "epoch": 0.1673187652845086, "grad_norm": 7.569557425313178e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34500 }, { "epoch": 0.16736726347734468, "grad_norm": 6.186944290220708e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34510 }, { "epoch": 0.16741576167018077, "grad_norm": 7.533464696507508e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34520 }, { "epoch": 0.16746425986301686, "grad_norm": 6.724346945929938e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34530 }, { "epoch": 0.16751275805585294, "grad_norm": 1.1702145741310233e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 34540 }, { "epoch": 0.16756125624868903, "grad_norm": 6.316371070624882e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34550 }, { "epoch": 0.16760975444152512, "grad_norm": 1.956826309879034e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 34560 }, { "epoch": 0.1676582526343612, "grad_norm": 6.185101142364147e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34570 }, { "epoch": 0.1677067508271973, "grad_norm": 6.320588852304354e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34580 }, { "epoch": 0.1677552490200334, "grad_norm": 1.2878953725703468e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 34590 }, { "epoch": 0.16780374721286948, "grad_norm": 6.397623053544521e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34600 }, { "epoch": 0.16785224540570556, "grad_norm": 1.9766680736665876e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 34610 }, { "epoch": 0.16790074359854165, "grad_norm": 1.0221397417353728e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 34620 }, { "epoch": 0.16794924179137774, "grad_norm": 5.8687962933845483e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34630 }, { "epoch": 0.16799773998421383, "grad_norm": 5.4567163232377425e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34640 }, { "epoch": 0.16804623817704992, "grad_norm": 1.393746771327642e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 34650 }, { "epoch": 0.168094736369886, "grad_norm": 1.149413151324552e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 34660 }, { "epoch": 0.1681432345627221, "grad_norm": 5.814426984329657e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34670 }, { "epoch": 0.1681917327555582, "grad_norm": 1.4584599739464466e-05, "learning_rate": 0.0002, "loss": 0.0, "step": 34680 }, { "epoch": 0.1682402309483943, "grad_norm": 5.9564122523170226e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34690 }, { "epoch": 0.1682887291412304, "grad_norm": 6.329614876676715e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34700 }, { "epoch": 0.16833722733406647, "grad_norm": 1.216772176348968e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 34710 }, { "epoch": 0.16838572552690256, "grad_norm": 6.262042973048665e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34720 }, { "epoch": 0.16843422371973865, "grad_norm": 7.897993725691776e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34730 }, { "epoch": 0.16848272191257474, "grad_norm": 6.877282032746734e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34740 }, { "epoch": 0.16853122010541083, "grad_norm": 4.2511771880526794e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34750 }, { "epoch": 0.16857971829824692, "grad_norm": 5.681200931917374e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34760 }, { "epoch": 0.168628216491083, "grad_norm": 5.783831369399195e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34770 }, { "epoch": 0.1686767146839191, "grad_norm": 5.7602299818881875e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34780 }, { "epoch": 0.16872521287675518, "grad_norm": 7.32606153519555e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34790 }, { "epoch": 0.16877371106959127, "grad_norm": 5.846951012244972e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34800 }, { "epoch": 0.16882220926242736, "grad_norm": 8.730371092724454e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34810 }, { "epoch": 0.16887070745526345, "grad_norm": 6.630965287968138e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34820 }, { "epoch": 0.16891920564809954, "grad_norm": 8.845182009054042e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34830 }, { "epoch": 0.16896770384093562, "grad_norm": 5.1639418074955756e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34840 }, { "epoch": 0.1690162020337717, "grad_norm": 5.3457931414868654e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34850 }, { "epoch": 0.1690647002266078, "grad_norm": 6.406597208297171e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34860 }, { "epoch": 0.1691131984194439, "grad_norm": 6.900831550638031e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34870 }, { "epoch": 0.16916169661227998, "grad_norm": 9.175593618238054e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34880 }, { "epoch": 0.16921019480511607, "grad_norm": 4.681749032897642e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34890 }, { "epoch": 0.16925869299795215, "grad_norm": 5.459467189439238e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34900 }, { "epoch": 0.16930719119078824, "grad_norm": 6.102273886199328e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34910 }, { "epoch": 0.16935568938362433, "grad_norm": 6.205918623436446e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34920 }, { "epoch": 0.16940418757646042, "grad_norm": 5.7296634992098916e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34930 }, { "epoch": 0.16945268576929653, "grad_norm": 4.961886901355683e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34940 }, { "epoch": 0.16950118396213262, "grad_norm": 6.517095130220696e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34950 }, { "epoch": 0.1695496821549687, "grad_norm": 6.09453110200775e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34960 }, { "epoch": 0.1695981803478048, "grad_norm": 8.030049514218263e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34970 }, { "epoch": 0.1696466785406409, "grad_norm": 7.886194453021744e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34980 }, { "epoch": 0.16969517673347698, "grad_norm": 7.555760817012924e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 34990 }, { "epoch": 0.16974367492631307, "grad_norm": 5.683585868609953e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35000 }, { "epoch": 0.16979217311914915, "grad_norm": 5.715495987601571e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35010 }, { "epoch": 0.16984067131198524, "grad_norm": 7.311841443424782e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35020 }, { "epoch": 0.16988916950482133, "grad_norm": 4.737557546263815e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35030 }, { "epoch": 0.16993766769765742, "grad_norm": 1.162063796300572e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35040 }, { "epoch": 0.1699861658904935, "grad_norm": 5.7937221242809755e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35050 }, { "epoch": 0.1700346640833296, "grad_norm": 4.509008277864268e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35060 }, { "epoch": 0.17008316227616568, "grad_norm": 6.219392645334665e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35070 }, { "epoch": 0.17013166046900177, "grad_norm": 6.997272095077278e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35080 }, { "epoch": 0.17018015866183786, "grad_norm": 4.716265777915396e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35090 }, { "epoch": 0.17022865685467395, "grad_norm": 6.332628288419073e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35100 }, { "epoch": 0.17027715504751004, "grad_norm": 5.910834488531691e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35110 }, { "epoch": 0.17032565324034613, "grad_norm": 6.993519008346993e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35120 }, { "epoch": 0.1703741514331822, "grad_norm": 5.986637319210786e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35130 }, { "epoch": 0.1704226496260183, "grad_norm": 7.841681082254581e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35140 }, { "epoch": 0.1704711478188544, "grad_norm": 4.5870994114238783e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35150 }, { "epoch": 0.17051964601169048, "grad_norm": 5.978188255539862e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35160 }, { "epoch": 0.17056814420452657, "grad_norm": 1.4944981785447453e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35170 }, { "epoch": 0.17061664239736266, "grad_norm": 6.317267065014676e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35180 }, { "epoch": 0.17066514059019874, "grad_norm": 6.095456939192445e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35190 }, { "epoch": 0.17071363878303486, "grad_norm": 4.6244679197116056e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35200 }, { "epoch": 0.17076213697587095, "grad_norm": 8.475053192569248e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35210 }, { "epoch": 0.17081063516870704, "grad_norm": 1.45854542665802e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35220 }, { "epoch": 0.17085913336154313, "grad_norm": 6.301025479160671e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35230 }, { "epoch": 0.1709076315543792, "grad_norm": 1.4816389182215062e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35240 }, { "epoch": 0.1709561297472153, "grad_norm": 5.5972169121787374e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35250 }, { "epoch": 0.1710046279400514, "grad_norm": 1.4758157362848578e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35260 }, { "epoch": 0.17105312613288748, "grad_norm": 1.1310726932833859e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35270 }, { "epoch": 0.17110162432572357, "grad_norm": 5.3987832870916463e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35280 }, { "epoch": 0.17115012251855966, "grad_norm": 5.3974261504663446e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35290 }, { "epoch": 0.17119862071139574, "grad_norm": 5.542078440612386e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35300 }, { "epoch": 0.17124711890423183, "grad_norm": 8.839381848702033e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35310 }, { "epoch": 0.17129561709706792, "grad_norm": 9.218715746328598e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35320 }, { "epoch": 0.171344115289904, "grad_norm": 5.683483550456003e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35330 }, { "epoch": 0.1713926134827401, "grad_norm": 7.069221652500346e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35340 }, { "epoch": 0.17144111167557619, "grad_norm": 4.0932789602265984e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35350 }, { "epoch": 0.17148960986841227, "grad_norm": 4.496509831142248e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35360 }, { "epoch": 0.17153810806124836, "grad_norm": 4.476774861927879e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35370 }, { "epoch": 0.17158660625408445, "grad_norm": 4.7668816449686346e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35380 }, { "epoch": 0.17163510444692054, "grad_norm": 5.792207247168335e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35390 }, { "epoch": 0.17168360263975663, "grad_norm": 4.753803395374234e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35400 }, { "epoch": 0.17173210083259272, "grad_norm": 4.4849301161775657e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35410 }, { "epoch": 0.1717805990254288, "grad_norm": 4.7446263806705247e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35420 }, { "epoch": 0.1718290972182649, "grad_norm": 7.781077471236131e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35430 }, { "epoch": 0.17187759541110098, "grad_norm": 5.227019173048575e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35440 }, { "epoch": 0.1719260936039371, "grad_norm": 4.734599912126214e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35450 }, { "epoch": 0.17197459179677319, "grad_norm": 5.115490964158198e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35460 }, { "epoch": 0.17202308998960927, "grad_norm": 6.749828429519766e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35470 }, { "epoch": 0.17207158818244536, "grad_norm": 7.739539853446331e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35480 }, { "epoch": 0.17212008637528145, "grad_norm": 5.924577806126763e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35490 }, { "epoch": 0.17216858456811754, "grad_norm": 1.2649148573018465e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35500 }, { "epoch": 0.17221708276095363, "grad_norm": 7.269447621638392e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35510 }, { "epoch": 0.17226558095378972, "grad_norm": 8.171866170414432e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35520 }, { "epoch": 0.1723140791466258, "grad_norm": 4.903352035512398e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35530 }, { "epoch": 0.1723625773394619, "grad_norm": 4.4911594443419744e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35540 }, { "epoch": 0.17241107553229798, "grad_norm": 7.710781346759177e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35550 }, { "epoch": 0.17245957372513407, "grad_norm": 5.408525183270285e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35560 }, { "epoch": 0.17250807191797016, "grad_norm": 8.225776326753476e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35570 }, { "epoch": 0.17255657011080625, "grad_norm": 4.7597737307114585e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35580 }, { "epoch": 0.17260506830364233, "grad_norm": 8.223904757187483e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35590 }, { "epoch": 0.17265356649647842, "grad_norm": 6.244629702223392e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35600 }, { "epoch": 0.1727020646893145, "grad_norm": 1.167391658896122e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35610 }, { "epoch": 0.1727505628821506, "grad_norm": 6.679175612589461e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35620 }, { "epoch": 0.1727990610749867, "grad_norm": 7.307210125873098e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35630 }, { "epoch": 0.17284755926782278, "grad_norm": 5.090214116876268e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35640 }, { "epoch": 0.17289605746065886, "grad_norm": 5.393870594616601e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35650 }, { "epoch": 0.17294455565349495, "grad_norm": 5.473804662869952e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35660 }, { "epoch": 0.17299305384633104, "grad_norm": 6.678459385511815e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35670 }, { "epoch": 0.17304155203916713, "grad_norm": 8.726628664135205e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35680 }, { "epoch": 0.17309005023200322, "grad_norm": 7.153228409606527e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35690 }, { "epoch": 0.1731385484248393, "grad_norm": 6.618179781980871e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35700 }, { "epoch": 0.17318704661767542, "grad_norm": 6.336207292179097e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35710 }, { "epoch": 0.1732355448105115, "grad_norm": 7.449192906960889e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35720 }, { "epoch": 0.1732840430033476, "grad_norm": 6.788680195768393e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35730 }, { "epoch": 0.1733325411961837, "grad_norm": 8.971223763865055e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35740 }, { "epoch": 0.17338103938901978, "grad_norm": 1.0376422920899131e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35750 }, { "epoch": 0.17342953758185586, "grad_norm": 1.7123139173236268e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35760 }, { "epoch": 0.17347803577469195, "grad_norm": 1.0071652667420494e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35770 }, { "epoch": 0.17352653396752804, "grad_norm": 9.50196223925559e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35780 }, { "epoch": 0.17357503216036413, "grad_norm": 4.5737973408677135e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35790 }, { "epoch": 0.17362353035320022, "grad_norm": 6.551917408614827e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35800 }, { "epoch": 0.1736720285460363, "grad_norm": 1.3780616825442848e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35810 }, { "epoch": 0.1737205267388724, "grad_norm": 8.893702840850892e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35820 }, { "epoch": 0.17376902493170848, "grad_norm": 7.416785763325606e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35830 }, { "epoch": 0.17381752312454457, "grad_norm": 5.40265219228786e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35840 }, { "epoch": 0.17386602131738066, "grad_norm": 5.683004999923469e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35850 }, { "epoch": 0.17391451951021675, "grad_norm": 5.409577141790578e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35860 }, { "epoch": 0.17396301770305284, "grad_norm": 5.137319902814852e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35870 }, { "epoch": 0.17401151589588892, "grad_norm": 8.59206394920875e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35880 }, { "epoch": 0.174060014088725, "grad_norm": 4.8780332662090586e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35890 }, { "epoch": 0.1741085122815611, "grad_norm": 6.485022652213956e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35900 }, { "epoch": 0.1741570104743972, "grad_norm": 7.534791990337908e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35910 }, { "epoch": 0.17420550866723328, "grad_norm": 3.5815264709526673e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35920 }, { "epoch": 0.17425400686006937, "grad_norm": 4.424356703225385e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35930 }, { "epoch": 0.17430250505290545, "grad_norm": 1.0029801700284224e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35940 }, { "epoch": 0.17435100324574154, "grad_norm": 1.7827126441716246e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 35950 }, { "epoch": 0.17439950143857766, "grad_norm": 4.606338421808687e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35960 }, { "epoch": 0.17444799963141375, "grad_norm": 7.050955730392161e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35970 }, { "epoch": 0.17449649782424984, "grad_norm": 7.814282554363672e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35980 }, { "epoch": 0.17454499601708592, "grad_norm": 4.650699736430397e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 35990 }, { "epoch": 0.174593494209922, "grad_norm": 1.357524155309875e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 36000 }, { "epoch": 0.1746419924027581, "grad_norm": 4.2503476294086795e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36010 }, { "epoch": 0.1746904905955942, "grad_norm": 6.915239225691039e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36020 }, { "epoch": 0.17473898878843028, "grad_norm": 4.411312559682301e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36030 }, { "epoch": 0.17478748698126637, "grad_norm": 4.512346052365501e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36040 }, { "epoch": 0.17483598517410245, "grad_norm": 7.554113068408697e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36050 }, { "epoch": 0.17488448336693854, "grad_norm": 1.8149638947306812e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 36060 }, { "epoch": 0.17493298155977463, "grad_norm": 5.3189260995623044e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36070 }, { "epoch": 0.17498147975261072, "grad_norm": 7.137568047710374e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36080 }, { "epoch": 0.1750299779454468, "grad_norm": 4.830283728551876e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36090 }, { "epoch": 0.1750784761382829, "grad_norm": 5.0556014485891865e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36100 }, { "epoch": 0.17512697433111898, "grad_norm": 7.246814703876225e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36110 }, { "epoch": 0.17517547252395507, "grad_norm": 1.2537246618649078e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 36120 }, { "epoch": 0.17522397071679116, "grad_norm": 6.187659096212883e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36130 }, { "epoch": 0.17527246890962725, "grad_norm": 7.028813087117669e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36140 }, { "epoch": 0.17532096710246334, "grad_norm": 4.9757058917521135e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36150 }, { "epoch": 0.17536946529529943, "grad_norm": 5.626237964406755e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36160 }, { "epoch": 0.17541796348813551, "grad_norm": 9.440857695608429e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36170 }, { "epoch": 0.1754664616809716, "grad_norm": 1.4358712974171794e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 36180 }, { "epoch": 0.1755149598738077, "grad_norm": 6.253258533206463e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36190 }, { "epoch": 0.17556345806664378, "grad_norm": 4.712652668104056e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36200 }, { "epoch": 0.17561195625947987, "grad_norm": 8.567149478722058e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36210 }, { "epoch": 0.17566045445231598, "grad_norm": 6.469086599736329e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36220 }, { "epoch": 0.17570895264515207, "grad_norm": 6.43752855467028e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36230 }, { "epoch": 0.17575745083798816, "grad_norm": 3.914622581646654e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36240 }, { "epoch": 0.17580594903082425, "grad_norm": 5.737251385085074e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36250 }, { "epoch": 0.17585444722366034, "grad_norm": 5.3253057075153265e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36260 }, { "epoch": 0.17590294541649643, "grad_norm": 5.94870215309129e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36270 }, { "epoch": 0.17595144360933251, "grad_norm": 5.759753918255228e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36280 }, { "epoch": 0.1759999418021686, "grad_norm": 4.09291338598905e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36290 }, { "epoch": 0.1760484399950047, "grad_norm": 6.995534107545609e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36300 }, { "epoch": 0.17609693818784078, "grad_norm": 8.090134429039608e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36310 }, { "epoch": 0.17614543638067687, "grad_norm": 1.3300247303504875e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 36320 }, { "epoch": 0.17619393457351296, "grad_norm": 5.921017987020605e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36330 }, { "epoch": 0.17624243276634904, "grad_norm": 6.212478353972983e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36340 }, { "epoch": 0.17629093095918513, "grad_norm": 4.328522962282477e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36350 }, { "epoch": 0.17633942915202122, "grad_norm": 6.996777557333189e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36360 }, { "epoch": 0.1763879273448573, "grad_norm": 1.425171092250821e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 36370 }, { "epoch": 0.1764364255376934, "grad_norm": 7.767929588453626e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36380 }, { "epoch": 0.17648492373052949, "grad_norm": 5.263075664174721e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36390 }, { "epoch": 0.17653342192336557, "grad_norm": 7.77109292471323e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36400 }, { "epoch": 0.17658192011620166, "grad_norm": 9.929708255640435e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36410 }, { "epoch": 0.17663041830903775, "grad_norm": 6.763383453289862e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36420 }, { "epoch": 0.17667891650187384, "grad_norm": 6.705606381274265e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36430 }, { "epoch": 0.17672741469470993, "grad_norm": 7.534329427016928e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36440 }, { "epoch": 0.17677591288754602, "grad_norm": 4.9047997663365095e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36450 }, { "epoch": 0.1768244110803821, "grad_norm": 1.8014355873674504e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 36460 }, { "epoch": 0.1768729092732182, "grad_norm": 6.233710792002967e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36470 }, { "epoch": 0.1769214074660543, "grad_norm": 9.516883636706552e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36480 }, { "epoch": 0.1769699056588904, "grad_norm": 5.595567742489038e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36490 }, { "epoch": 0.17701840385172649, "grad_norm": 4.246530949103544e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36500 }, { "epoch": 0.17706690204456257, "grad_norm": 5.432858074527758e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36510 }, { "epoch": 0.17711540023739866, "grad_norm": 8.227979009234332e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36520 }, { "epoch": 0.17716389843023475, "grad_norm": 3.797327963184216e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36530 }, { "epoch": 0.17721239662307084, "grad_norm": 7.390467970935788e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36540 }, { "epoch": 0.17726089481590693, "grad_norm": 4.830554090062833e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36550 }, { "epoch": 0.17730939300874302, "grad_norm": 5.0511729909885617e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36560 }, { "epoch": 0.1773578912015791, "grad_norm": 4.9068166418919645e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36570 }, { "epoch": 0.1774063893944152, "grad_norm": 5.015806792130206e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36580 }, { "epoch": 0.17745488758725128, "grad_norm": 4.207409176615329e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36590 }, { "epoch": 0.17750338578008737, "grad_norm": 4.339215564641563e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36600 }, { "epoch": 0.17755188397292346, "grad_norm": 3.984803242929047e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36610 }, { "epoch": 0.17760038216575955, "grad_norm": 1.0172392705953825e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 36620 }, { "epoch": 0.17764888035859563, "grad_norm": 8.06577062917313e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36630 }, { "epoch": 0.17769737855143172, "grad_norm": 5.255736468257055e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36640 }, { "epoch": 0.1777458767442678, "grad_norm": 4.6265242303888954e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36650 }, { "epoch": 0.1777943749371039, "grad_norm": 9.108025267323683e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36660 }, { "epoch": 0.17784287312994, "grad_norm": 1.405034595336474e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 36670 }, { "epoch": 0.17789137132277608, "grad_norm": 8.237783788445086e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36680 }, { "epoch": 0.17793986951561216, "grad_norm": 4.550523868829259e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36690 }, { "epoch": 0.17798836770844825, "grad_norm": 7.164619120203497e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36700 }, { "epoch": 0.17803686590128434, "grad_norm": 1.3692115885532985e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 36710 }, { "epoch": 0.17808536409412043, "grad_norm": 6.916361172670804e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36720 }, { "epoch": 0.17813386228695655, "grad_norm": 6.993145973410719e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36730 }, { "epoch": 0.17818236047979263, "grad_norm": 8.501422854578777e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36740 }, { "epoch": 0.17823085867262872, "grad_norm": 4.1230538982972575e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36750 }, { "epoch": 0.1782793568654648, "grad_norm": 7.407972191231238e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36760 }, { "epoch": 0.1783278550583009, "grad_norm": 7.584951333683421e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36770 }, { "epoch": 0.178376353251137, "grad_norm": 6.975962918431833e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36780 }, { "epoch": 0.17842485144397308, "grad_norm": 7.373714794312036e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36790 }, { "epoch": 0.17847334963680916, "grad_norm": 4.541254838841269e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36800 }, { "epoch": 0.17852184782964525, "grad_norm": 7.435299664848571e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36810 }, { "epoch": 0.17857034602248134, "grad_norm": 1.5442950029864733e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 36820 }, { "epoch": 0.17861884421531743, "grad_norm": 9.899835617943609e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36830 }, { "epoch": 0.17866734240815352, "grad_norm": 1.0066347044812574e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 36840 }, { "epoch": 0.1787158406009896, "grad_norm": 5.843791228699047e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36850 }, { "epoch": 0.1787643387938257, "grad_norm": 5.5703143431173885e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36860 }, { "epoch": 0.17881283698666178, "grad_norm": 9.445913207173362e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36870 }, { "epoch": 0.17886133517949787, "grad_norm": 1.4961923966438917e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 36880 }, { "epoch": 0.17890983337233396, "grad_norm": 7.673286006593116e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36890 }, { "epoch": 0.17895833156517005, "grad_norm": 5.295070693023263e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36900 }, { "epoch": 0.17900682975800614, "grad_norm": 1.6413844150520163e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 36910 }, { "epoch": 0.17905532795084222, "grad_norm": 6.658258655534155e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36920 }, { "epoch": 0.1791038261436783, "grad_norm": 6.749356629143222e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36930 }, { "epoch": 0.1791523243365144, "grad_norm": 1.2876881783085992e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 36940 }, { "epoch": 0.1792008225293505, "grad_norm": 8.840182630365234e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36950 }, { "epoch": 0.17924932072218658, "grad_norm": 6.76909834851358e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36960 }, { "epoch": 0.17929781891502267, "grad_norm": 6.141537767234695e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36970 }, { "epoch": 0.17934631710785875, "grad_norm": 1.0132166039511503e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 36980 }, { "epoch": 0.17939481530069487, "grad_norm": 4.91533072022321e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 36990 }, { "epoch": 0.17944331349353096, "grad_norm": 5.179398954169301e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37000 }, { "epoch": 0.17949181168636705, "grad_norm": 7.138353907976125e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37010 }, { "epoch": 0.17954030987920314, "grad_norm": 4.318507507150571e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37020 }, { "epoch": 0.17958880807203922, "grad_norm": 9.080787322091055e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37030 }, { "epoch": 0.1796373062648753, "grad_norm": 7.36016332325562e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37040 }, { "epoch": 0.1796858044577114, "grad_norm": 5.65787381390237e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37050 }, { "epoch": 0.1797343026505475, "grad_norm": 8.094814063497324e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37060 }, { "epoch": 0.17978280084338358, "grad_norm": 7.012112490656364e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37070 }, { "epoch": 0.17983129903621967, "grad_norm": 5.915252643262647e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37080 }, { "epoch": 0.17987979722905575, "grad_norm": 8.119747718637882e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37090 }, { "epoch": 0.17992829542189184, "grad_norm": 3.552025518160917e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37100 }, { "epoch": 0.17997679361472793, "grad_norm": 1.0551298146310728e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 37110 }, { "epoch": 0.18002529180756402, "grad_norm": 5.3185676307521135e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37120 }, { "epoch": 0.1800737900004001, "grad_norm": 6.936188157169454e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37130 }, { "epoch": 0.1801222881932362, "grad_norm": 7.502838883510776e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37140 }, { "epoch": 0.18017078638607228, "grad_norm": 5.224676513648774e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37150 }, { "epoch": 0.18021928457890837, "grad_norm": 6.394059681724684e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37160 }, { "epoch": 0.18026778277174446, "grad_norm": 9.942311152144612e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37170 }, { "epoch": 0.18031628096458055, "grad_norm": 4.3150581774398233e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37180 }, { "epoch": 0.18036477915741664, "grad_norm": 1.0409350892359726e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 37190 }, { "epoch": 0.18041327735025273, "grad_norm": 6.801094087904858e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37200 }, { "epoch": 0.18046177554308881, "grad_norm": 1.140254468623425e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 37210 }, { "epoch": 0.1805102737359249, "grad_norm": 6.488337334076277e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37220 }, { "epoch": 0.180558771928761, "grad_norm": 6.52765734798777e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37230 }, { "epoch": 0.1806072701215971, "grad_norm": 6.196929547286345e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37240 }, { "epoch": 0.1806557683144332, "grad_norm": 3.432979411854831e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37250 }, { "epoch": 0.18070426650726928, "grad_norm": 6.205300451256335e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37260 }, { "epoch": 0.18075276470010537, "grad_norm": 6.761269588650975e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37270 }, { "epoch": 0.18080126289294146, "grad_norm": 5.256134016917713e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37280 }, { "epoch": 0.18084976108577755, "grad_norm": 6.951769648821937e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37290 }, { "epoch": 0.18089825927861364, "grad_norm": 5.0673893525754465e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37300 }, { "epoch": 0.18094675747144973, "grad_norm": 4.433374556356284e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37310 }, { "epoch": 0.18099525566428581, "grad_norm": 6.02866023768911e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37320 }, { "epoch": 0.1810437538571219, "grad_norm": 3.996132136308006e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37330 }, { "epoch": 0.181092252049958, "grad_norm": 6.32197370009635e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37340 }, { "epoch": 0.18114075024279408, "grad_norm": 5.8717194661994654e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37350 }, { "epoch": 0.18118924843563017, "grad_norm": 1.021922528821051e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 37360 }, { "epoch": 0.18123774662846626, "grad_norm": 4.061443448222235e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37370 }, { "epoch": 0.18128624482130234, "grad_norm": 8.94507792281729e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37380 }, { "epoch": 0.18133474301413843, "grad_norm": 5.4514117664439254e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37390 }, { "epoch": 0.18138324120697452, "grad_norm": 4.653084317851608e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37400 }, { "epoch": 0.1814317393998106, "grad_norm": 4.294851052577542e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37410 }, { "epoch": 0.1814802375926467, "grad_norm": 6.900557991684764e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37420 }, { "epoch": 0.1815287357854828, "grad_norm": 4.550652477064432e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37430 }, { "epoch": 0.18157723397831887, "grad_norm": 4.9990099171282054e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37440 }, { "epoch": 0.18162573217115496, "grad_norm": 5.976210815106242e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37450 }, { "epoch": 0.18167423036399105, "grad_norm": 3.9642674920514764e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37460 }, { "epoch": 0.18172272855682714, "grad_norm": 3.452212027355017e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37470 }, { "epoch": 0.18177122674966323, "grad_norm": 4.2943963052266554e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37480 }, { "epoch": 0.18181972494249932, "grad_norm": 6.900118165731328e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37490 }, { "epoch": 0.18186822313533543, "grad_norm": 5.7335704184424685e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37500 }, { "epoch": 0.18191672132817152, "grad_norm": 7.294617176967222e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37510 }, { "epoch": 0.1819652195210076, "grad_norm": 1.3409285770649149e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 37520 }, { "epoch": 0.1820137177138437, "grad_norm": 5.882366238552095e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37530 }, { "epoch": 0.1820622159066798, "grad_norm": 4.8623384429902217e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37540 }, { "epoch": 0.18211071409951587, "grad_norm": 4.992618940491411e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37550 }, { "epoch": 0.18215921229235196, "grad_norm": 4.173350021119404e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37560 }, { "epoch": 0.18220771048518805, "grad_norm": 5.784935908081934e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37570 }, { "epoch": 0.18225620867802414, "grad_norm": 4.61996414458099e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37580 }, { "epoch": 0.18230470687086023, "grad_norm": 7.759495446180154e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37590 }, { "epoch": 0.18235320506369632, "grad_norm": 8.882251023578647e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37600 }, { "epoch": 0.1824017032565324, "grad_norm": 4.5777987622841465e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37610 }, { "epoch": 0.1824502014493685, "grad_norm": 3.950682980757847e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37620 }, { "epoch": 0.18249869964220458, "grad_norm": 9.787918742176771e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37630 }, { "epoch": 0.18254719783504067, "grad_norm": 5.49619798562162e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37640 }, { "epoch": 0.18259569602787676, "grad_norm": 5.699069660636269e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37650 }, { "epoch": 0.18264419422071285, "grad_norm": 7.400176826877214e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37660 }, { "epoch": 0.18269269241354893, "grad_norm": 6.125691953684509e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37670 }, { "epoch": 0.18274119060638502, "grad_norm": 5.4693245488124376e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37680 }, { "epoch": 0.1827896887992211, "grad_norm": 7.335468410474277e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37690 }, { "epoch": 0.1828381869920572, "grad_norm": 7.47198924955228e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37700 }, { "epoch": 0.1828866851848933, "grad_norm": 1.1537679966977521e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 37710 }, { "epoch": 0.18293518337772938, "grad_norm": 1.0552272300401455e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 37720 }, { "epoch": 0.18298368157056547, "grad_norm": 4.9746962105245984e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37730 }, { "epoch": 0.18303217976340155, "grad_norm": 6.15792501434953e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37740 }, { "epoch": 0.18308067795623764, "grad_norm": 3.699338080309644e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37750 }, { "epoch": 0.18312917614907376, "grad_norm": 5.178082673751305e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37760 }, { "epoch": 0.18317767434190985, "grad_norm": 1.0603849176504809e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 37770 }, { "epoch": 0.18322617253474593, "grad_norm": 5.63361588490352e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37780 }, { "epoch": 0.18327467072758202, "grad_norm": 3.698406558783063e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37790 }, { "epoch": 0.1833231689204181, "grad_norm": 1.19245228802356e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 37800 }, { "epoch": 0.1833716671132542, "grad_norm": 5.4175757213670295e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37810 }, { "epoch": 0.1834201653060903, "grad_norm": 4.121141827795327e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37820 }, { "epoch": 0.18346866349892638, "grad_norm": 8.449460864312641e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37830 }, { "epoch": 0.18351716169176246, "grad_norm": 1.5404991415834957e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 37840 }, { "epoch": 0.18356565988459855, "grad_norm": 5.058968000071218e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37850 }, { "epoch": 0.18361415807743464, "grad_norm": 4.6514131213371e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37860 }, { "epoch": 0.18366265627027073, "grad_norm": 1.387815586895158e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 37870 }, { "epoch": 0.18371115446310682, "grad_norm": 6.793631257551169e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37880 }, { "epoch": 0.1837596526559429, "grad_norm": 5.267716929324706e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37890 }, { "epoch": 0.183808150848779, "grad_norm": 3.272118576091998e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37900 }, { "epoch": 0.18385664904161508, "grad_norm": 1.0235599035013365e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 37910 }, { "epoch": 0.18390514723445117, "grad_norm": 5.7584646384611915e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37920 }, { "epoch": 0.18395364542728726, "grad_norm": 1.7792166318031377e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 37930 }, { "epoch": 0.18400214362012335, "grad_norm": 1.2915077718389512e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 37940 }, { "epoch": 0.18405064181295944, "grad_norm": 4.801787767405585e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37950 }, { "epoch": 0.18409914000579553, "grad_norm": 6.04524430514175e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37960 }, { "epoch": 0.1841476381986316, "grad_norm": 6.409726438505459e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37970 }, { "epoch": 0.1841961363914677, "grad_norm": 8.479651825155088e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37980 }, { "epoch": 0.1842446345843038, "grad_norm": 6.206958147458863e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 37990 }, { "epoch": 0.18429313277713988, "grad_norm": 7.796708700880117e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38000 }, { "epoch": 0.184341630969976, "grad_norm": 5.907408606731224e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38010 }, { "epoch": 0.18439012916281208, "grad_norm": 6.87858161541044e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38020 }, { "epoch": 0.18443862735564817, "grad_norm": 3.794168534909659e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38030 }, { "epoch": 0.18448712554848426, "grad_norm": 9.598132066912513e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38040 }, { "epoch": 0.18453562374132035, "grad_norm": 2.1167481634165597e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 38050 }, { "epoch": 0.18458412193415644, "grad_norm": 6.478111913565954e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38060 }, { "epoch": 0.18463262012699252, "grad_norm": 6.510273209414663e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38070 }, { "epoch": 0.1846811183198286, "grad_norm": 1.9527588790424488e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 38080 }, { "epoch": 0.1847296165126647, "grad_norm": 4.002636089239786e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38090 }, { "epoch": 0.1847781147055008, "grad_norm": 9.649838972336511e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38100 }, { "epoch": 0.18482661289833688, "grad_norm": 6.958440934567989e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38110 }, { "epoch": 0.18487511109117297, "grad_norm": 4.511725038014447e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38120 }, { "epoch": 0.18492360928400906, "grad_norm": 6.771466587451869e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38130 }, { "epoch": 0.18497210747684514, "grad_norm": 8.881889357326145e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38140 }, { "epoch": 0.18502060566968123, "grad_norm": 5.8988078421862156e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38150 }, { "epoch": 0.18506910386251732, "grad_norm": 4.783020557397322e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38160 }, { "epoch": 0.1851176020553534, "grad_norm": 4.6298733735739006e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38170 }, { "epoch": 0.1851661002481895, "grad_norm": 7.444411664891959e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38180 }, { "epoch": 0.18521459844102559, "grad_norm": 7.917584099459418e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38190 }, { "epoch": 0.18526309663386167, "grad_norm": 8.202680845670329e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38200 }, { "epoch": 0.18531159482669776, "grad_norm": 3.7830510279945884e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38210 }, { "epoch": 0.18536009301953385, "grad_norm": 7.007460567365342e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38220 }, { "epoch": 0.18540859121236994, "grad_norm": 1.0168481168193466e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 38230 }, { "epoch": 0.18545708940520603, "grad_norm": 4.5658506309109725e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38240 }, { "epoch": 0.18550558759804212, "grad_norm": 7.185374215623597e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 38250 }, { "epoch": 0.1855540857908782, "grad_norm": 9.153175284382087e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38260 }, { "epoch": 0.18560258398371432, "grad_norm": 6.229378612943037e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38270 }, { "epoch": 0.1856510821765504, "grad_norm": 5.939196512372291e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38280 }, { "epoch": 0.1856995803693865, "grad_norm": 7.956395364772106e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38290 }, { "epoch": 0.18574807856222258, "grad_norm": 7.840611004894527e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38300 }, { "epoch": 0.18579657675505867, "grad_norm": 1.6110394085444568e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 38310 }, { "epoch": 0.18584507494789476, "grad_norm": 6.195085688887048e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38320 }, { "epoch": 0.18589357314073085, "grad_norm": 5.689020809995782e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38330 }, { "epoch": 0.18594207133356694, "grad_norm": 3.987711139075145e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38340 }, { "epoch": 0.18599056952640303, "grad_norm": 4.073866222142897e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38350 }, { "epoch": 0.18603906771923912, "grad_norm": 7.52304387674485e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38360 }, { "epoch": 0.1860875659120752, "grad_norm": 1.1280749845354876e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 38370 }, { "epoch": 0.1861360641049113, "grad_norm": 1.3111279884014948e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 38380 }, { "epoch": 0.18618456229774738, "grad_norm": 6.909599647997311e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38390 }, { "epoch": 0.18623306049058347, "grad_norm": 7.402754675922552e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38400 }, { "epoch": 0.18628155868341956, "grad_norm": 1.245528551407915e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 38410 }, { "epoch": 0.18633005687625565, "grad_norm": 5.7388277241443575e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38420 }, { "epoch": 0.18637855506909173, "grad_norm": 8.39153031506612e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38430 }, { "epoch": 0.18642705326192782, "grad_norm": 5.159381544217467e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38440 }, { "epoch": 0.1864755514547639, "grad_norm": 4.061552161260806e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38450 }, { "epoch": 0.1865240496476, "grad_norm": 6.351414327809835e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38460 }, { "epoch": 0.1865725478404361, "grad_norm": 1.0087813251402622e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 38470 }, { "epoch": 0.18662104603327218, "grad_norm": 3.5966831291034396e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38480 }, { "epoch": 0.18666954422610826, "grad_norm": 5.370022293504917e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38490 }, { "epoch": 0.18671804241894435, "grad_norm": 1.4336221454414044e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 38500 }, { "epoch": 0.18676654061178044, "grad_norm": 5.172148576093605e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38510 }, { "epoch": 0.18681503880461656, "grad_norm": 5.4961706297262936e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38520 }, { "epoch": 0.18686353699745265, "grad_norm": 3.674412951681916e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38530 }, { "epoch": 0.18691203519028873, "grad_norm": 4.110088980269211e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38540 }, { "epoch": 0.18696053338312482, "grad_norm": 4.190987112906441e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38550 }, { "epoch": 0.1870090315759609, "grad_norm": 5.4937626003948026e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38560 }, { "epoch": 0.187057529768797, "grad_norm": 1.2073847699412e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 38570 }, { "epoch": 0.1871060279616331, "grad_norm": 4.538854625479871e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38580 }, { "epoch": 0.18715452615446918, "grad_norm": 4.254827601357647e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38590 }, { "epoch": 0.18720302434730526, "grad_norm": 9.409803425342034e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38600 }, { "epoch": 0.18725152254014135, "grad_norm": 5.746420228547322e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38610 }, { "epoch": 0.18730002073297744, "grad_norm": 4.27030357741387e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38620 }, { "epoch": 0.18734851892581353, "grad_norm": 6.949901631969624e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38630 }, { "epoch": 0.18739701711864962, "grad_norm": 3.63263161773375e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38640 }, { "epoch": 0.1874455153114857, "grad_norm": 8.117901728610377e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38650 }, { "epoch": 0.1874940135043218, "grad_norm": 7.179345828944861e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38660 }, { "epoch": 0.18754251169715788, "grad_norm": 1.1906885788448562e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 38670 }, { "epoch": 0.18759100988999397, "grad_norm": 4.644533291298103e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38680 }, { "epoch": 0.18763950808283006, "grad_norm": 6.664151896984549e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38690 }, { "epoch": 0.18768800627566615, "grad_norm": 4.139984710604949e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38700 }, { "epoch": 0.18773650446850224, "grad_norm": 9.160342528957699e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38710 }, { "epoch": 0.18778500266133832, "grad_norm": 3.616378307924606e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38720 }, { "epoch": 0.1878335008541744, "grad_norm": 5.401529534765359e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38730 }, { "epoch": 0.1878819990470105, "grad_norm": 5.653430079632926e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38740 }, { "epoch": 0.1879304972398466, "grad_norm": 5.658026935861926e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38750 }, { "epoch": 0.18797899543268268, "grad_norm": 3.604646892085839e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38760 }, { "epoch": 0.18802749362551877, "grad_norm": 6.693111487265924e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38770 }, { "epoch": 0.18807599181835488, "grad_norm": 1.1829179413780366e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 38780 }, { "epoch": 0.18812449001119097, "grad_norm": 4.182749080428039e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38790 }, { "epoch": 0.18817298820402706, "grad_norm": 3.835862116829958e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38800 }, { "epoch": 0.18822148639686315, "grad_norm": 5.4375981051180133e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38810 }, { "epoch": 0.18826998458969924, "grad_norm": 4.5139007198713443e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38820 }, { "epoch": 0.18831848278253532, "grad_norm": 4.1168547681991186e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38830 }, { "epoch": 0.1883669809753714, "grad_norm": 5.39389084508457e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38840 }, { "epoch": 0.1884154791682075, "grad_norm": 5.220746501777285e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38850 }, { "epoch": 0.1884639773610436, "grad_norm": 3.894106370694317e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38860 }, { "epoch": 0.18851247555387968, "grad_norm": 6.128533414084814e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38870 }, { "epoch": 0.18856097374671577, "grad_norm": 1.7065208623989747e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 38880 }, { "epoch": 0.18860947193955185, "grad_norm": 3.941584836297807e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38890 }, { "epoch": 0.18865797013238794, "grad_norm": 4.841802336841283e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38900 }, { "epoch": 0.18870646832522403, "grad_norm": 8.383007354950678e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38910 }, { "epoch": 0.18875496651806012, "grad_norm": 1.5465658975699625e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 38920 }, { "epoch": 0.1888034647108962, "grad_norm": 1.963076527999874e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 38930 }, { "epoch": 0.1888519629037323, "grad_norm": 7.457963846491111e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38940 }, { "epoch": 0.18890046109656838, "grad_norm": 4.863418467948577e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38950 }, { "epoch": 0.18894895928940447, "grad_norm": 8.324122546810031e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38960 }, { "epoch": 0.18899745748224056, "grad_norm": 3.8909348631932517e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38970 }, { "epoch": 0.18904595567507665, "grad_norm": 4.5953797211950587e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38980 }, { "epoch": 0.18909445386791274, "grad_norm": 3.277373039622944e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 38990 }, { "epoch": 0.18914295206074883, "grad_norm": 6.300679444848356e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39000 }, { "epoch": 0.18919145025358491, "grad_norm": 1.0588584586912475e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 39010 }, { "epoch": 0.189239948446421, "grad_norm": 3.227138023476073e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39020 }, { "epoch": 0.18928844663925712, "grad_norm": 3.384665703265455e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39030 }, { "epoch": 0.1893369448320932, "grad_norm": 4.804552489190428e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39040 }, { "epoch": 0.1893854430249293, "grad_norm": 3.9469640000788786e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39050 }, { "epoch": 0.18943394121776538, "grad_norm": 3.940795778589745e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39060 }, { "epoch": 0.18948243941060147, "grad_norm": 4.471742798273226e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39070 }, { "epoch": 0.18953093760343756, "grad_norm": 4.964604372048598e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39080 }, { "epoch": 0.18957943579627365, "grad_norm": 9.222256380780891e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39090 }, { "epoch": 0.18962793398910974, "grad_norm": 6.224197335313875e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39100 }, { "epoch": 0.18967643218194583, "grad_norm": 4.925574970116031e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39110 }, { "epoch": 0.1897249303747819, "grad_norm": 3.884568755552209e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39120 }, { "epoch": 0.189773428567618, "grad_norm": 3.094876888098952e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39130 }, { "epoch": 0.1898219267604541, "grad_norm": 3.141462912026327e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39140 }, { "epoch": 0.18987042495329018, "grad_norm": 1.2072092658854672e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 39150 }, { "epoch": 0.18991892314612627, "grad_norm": 4.3714862840715796e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39160 }, { "epoch": 0.18996742133896236, "grad_norm": 4.1731055944183026e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39170 }, { "epoch": 0.19001591953179844, "grad_norm": 5.745999587247752e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39180 }, { "epoch": 0.19006441772463453, "grad_norm": 4.793866281715964e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39190 }, { "epoch": 0.19011291591747062, "grad_norm": 7.412120339722605e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39200 }, { "epoch": 0.1901614141103067, "grad_norm": 1.0040896825103118e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 39210 }, { "epoch": 0.1902099123031428, "grad_norm": 4.5256292935391684e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39220 }, { "epoch": 0.19025841049597889, "grad_norm": 9.823093449767839e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39230 }, { "epoch": 0.19030690868881497, "grad_norm": 9.55871328756075e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39240 }, { "epoch": 0.19035540688165106, "grad_norm": 6.437294786110215e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39250 }, { "epoch": 0.19040390507448715, "grad_norm": 4.909835737976209e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39260 }, { "epoch": 0.19045240326732324, "grad_norm": 4.231950612165747e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39270 }, { "epoch": 0.19050090146015933, "grad_norm": 7.037335336690376e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39280 }, { "epoch": 0.19054939965299544, "grad_norm": 6.15239983403626e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39290 }, { "epoch": 0.19059789784583153, "grad_norm": 8.272898099903614e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39300 }, { "epoch": 0.19064639603866762, "grad_norm": 5.713147643859884e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39310 }, { "epoch": 0.1906948942315037, "grad_norm": 5.12566806776249e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39320 }, { "epoch": 0.1907433924243398, "grad_norm": 8.128850481625705e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39330 }, { "epoch": 0.19079189061717589, "grad_norm": 5.020140747546975e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39340 }, { "epoch": 0.19084038881001197, "grad_norm": 5.1934836875489054e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39350 }, { "epoch": 0.19088888700284806, "grad_norm": 5.170347350258453e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39360 }, { "epoch": 0.19093738519568415, "grad_norm": 4.942712550359829e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39370 }, { "epoch": 0.19098588338852024, "grad_norm": 1.1394741505910133e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 39380 }, { "epoch": 0.19103438158135633, "grad_norm": 7.710712424113808e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39390 }, { "epoch": 0.19108287977419242, "grad_norm": 4.2922692955471575e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39400 }, { "epoch": 0.1911313779670285, "grad_norm": 4.5749079191637065e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39410 }, { "epoch": 0.1911798761598646, "grad_norm": 3.92273271643262e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39420 }, { "epoch": 0.19122837435270068, "grad_norm": 7.837413562583606e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39430 }, { "epoch": 0.19127687254553677, "grad_norm": 3.961733341384388e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39440 }, { "epoch": 0.19132537073837286, "grad_norm": 4.844317658125874e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39450 }, { "epoch": 0.19137386893120895, "grad_norm": 4.4028556800412844e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39460 }, { "epoch": 0.19142236712404503, "grad_norm": 4.147241128293899e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39470 }, { "epoch": 0.19147086531688112, "grad_norm": 5.330818453330721e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39480 }, { "epoch": 0.1915193635097172, "grad_norm": 6.377091921194733e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39490 }, { "epoch": 0.1915678617025533, "grad_norm": 2.9934678735799025e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39500 }, { "epoch": 0.1916163598953894, "grad_norm": 5.6030955875030486e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39510 }, { "epoch": 0.19166485808822548, "grad_norm": 4.172268930346945e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39520 }, { "epoch": 0.19171335628106156, "grad_norm": 3.1635476460678547e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39530 }, { "epoch": 0.19176185447389765, "grad_norm": 4.64500971020243e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39540 }, { "epoch": 0.19181035266673377, "grad_norm": 3.6284760085436574e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39550 }, { "epoch": 0.19185885085956986, "grad_norm": 7.96846748585267e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39560 }, { "epoch": 0.19190734905240595, "grad_norm": 6.098792937336839e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39570 }, { "epoch": 0.19195584724524203, "grad_norm": 3.582272611879489e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39580 }, { "epoch": 0.19200434543807812, "grad_norm": 4.280438048454016e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39590 }, { "epoch": 0.1920528436309142, "grad_norm": 6.599778856752891e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39600 }, { "epoch": 0.1921013418237503, "grad_norm": 6.766296678506478e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39610 }, { "epoch": 0.1921498400165864, "grad_norm": 4.9621441178260284e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39620 }, { "epoch": 0.19219833820942248, "grad_norm": 4.80067754438096e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39630 }, { "epoch": 0.19224683640225856, "grad_norm": 5.251148138540884e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39640 }, { "epoch": 0.19229533459509465, "grad_norm": 5.4427129470013824e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39650 }, { "epoch": 0.19234383278793074, "grad_norm": 9.442326387443245e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39660 }, { "epoch": 0.19239233098076683, "grad_norm": 3.65466235052736e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39670 }, { "epoch": 0.19244082917360292, "grad_norm": 3.7679782849409094e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39680 }, { "epoch": 0.192489327366439, "grad_norm": 5.783341805454256e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39690 }, { "epoch": 0.1925378255592751, "grad_norm": 6.047012135468322e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39700 }, { "epoch": 0.19258632375211118, "grad_norm": 5.200086050649588e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39710 }, { "epoch": 0.19263482194494727, "grad_norm": 6.020973586373657e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39720 }, { "epoch": 0.19268332013778336, "grad_norm": 4.632264705151101e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39730 }, { "epoch": 0.19273181833061945, "grad_norm": 5.323138552171258e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39740 }, { "epoch": 0.19278031652345554, "grad_norm": 4.143625886854352e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39750 }, { "epoch": 0.19282881471629162, "grad_norm": 1.006347360998916e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 39760 }, { "epoch": 0.1928773129091277, "grad_norm": 6.779992389738254e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39770 }, { "epoch": 0.1929258111019638, "grad_norm": 1.41321308433362e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 39780 }, { "epoch": 0.1929743092947999, "grad_norm": 4.978945611355812e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39790 }, { "epoch": 0.193022807487636, "grad_norm": 3.503368617430169e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39800 }, { "epoch": 0.1930713056804721, "grad_norm": 9.44675306868703e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39810 }, { "epoch": 0.19311980387330818, "grad_norm": 6.094298754533156e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39820 }, { "epoch": 0.19316830206614427, "grad_norm": 3.164499418062405e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39830 }, { "epoch": 0.19321680025898036, "grad_norm": 8.997176337288693e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39840 }, { "epoch": 0.19326529845181645, "grad_norm": 6.313099021326707e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39850 }, { "epoch": 0.19331379664465254, "grad_norm": 8.782039628840721e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39860 }, { "epoch": 0.19336229483748862, "grad_norm": 4.259945640683327e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39870 }, { "epoch": 0.1934107930303247, "grad_norm": 6.065808122457383e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39880 }, { "epoch": 0.1934592912231608, "grad_norm": 4.775707296289511e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39890 }, { "epoch": 0.1935077894159969, "grad_norm": 3.530650616312414e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39900 }, { "epoch": 0.19355628760883298, "grad_norm": 4.873746561884218e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39910 }, { "epoch": 0.19360478580166907, "grad_norm": 8.056262856825924e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39920 }, { "epoch": 0.19365328399450515, "grad_norm": 4.40876881668828e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39930 }, { "epoch": 0.19370178218734124, "grad_norm": 5.3096854912837443e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39940 }, { "epoch": 0.19375028038017733, "grad_norm": 1.017693520566354e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 39950 }, { "epoch": 0.19379877857301342, "grad_norm": 5.9481845227082886e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39960 }, { "epoch": 0.1938472767658495, "grad_norm": 3.436031903447656e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39970 }, { "epoch": 0.1938957749586856, "grad_norm": 4.117581653417801e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39980 }, { "epoch": 0.19394427315152168, "grad_norm": 4.502205896983469e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 39990 }, { "epoch": 0.19399277134435777, "grad_norm": 7.476529617633787e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40000 }, { "epoch": 0.19404126953719386, "grad_norm": 5.202569752782438e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40010 }, { "epoch": 0.19408976773002995, "grad_norm": 4.505644923824548e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40020 }, { "epoch": 0.19413826592286604, "grad_norm": 6.333753788112517e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40030 }, { "epoch": 0.19418676411570213, "grad_norm": 4.894064886684646e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40040 }, { "epoch": 0.19423526230853821, "grad_norm": 4.958788579756401e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40050 }, { "epoch": 0.19428376050137433, "grad_norm": 3.935523906761773e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40060 }, { "epoch": 0.19433225869421042, "grad_norm": 7.92676573269091e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40070 }, { "epoch": 0.1943807568870465, "grad_norm": 6.744581071416178e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40080 }, { "epoch": 0.1944292550798826, "grad_norm": 3.769246958995609e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40090 }, { "epoch": 0.19447775327271868, "grad_norm": 4.569985989633096e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40100 }, { "epoch": 0.19452625146555477, "grad_norm": 6.424897946999408e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40110 }, { "epoch": 0.19457474965839086, "grad_norm": 9.654267785208503e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40120 }, { "epoch": 0.19462324785122695, "grad_norm": 3.219484412397833e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40130 }, { "epoch": 0.19467174604406304, "grad_norm": 3.5622012006797377e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40140 }, { "epoch": 0.19472024423689913, "grad_norm": 4.310497558890347e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40150 }, { "epoch": 0.19476874242973521, "grad_norm": 7.070668317510354e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40160 }, { "epoch": 0.1948172406225713, "grad_norm": 6.188378876004208e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40170 }, { "epoch": 0.1948657388154074, "grad_norm": 1.2437236307505373e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 40180 }, { "epoch": 0.19491423700824348, "grad_norm": 6.929062834615252e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40190 }, { "epoch": 0.19496273520107957, "grad_norm": 6.722834200445504e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40200 }, { "epoch": 0.19501123339391566, "grad_norm": 9.500461572997665e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40210 }, { "epoch": 0.19505973158675174, "grad_norm": 1.4002257842093968e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 40220 }, { "epoch": 0.19510822977958783, "grad_norm": 6.491421089549476e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40230 }, { "epoch": 0.19515672797242392, "grad_norm": 2.7939059066284244e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40240 }, { "epoch": 0.19520522616526, "grad_norm": 8.152245101200606e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40250 }, { "epoch": 0.1952537243580961, "grad_norm": 4.374894757575021e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40260 }, { "epoch": 0.1953022225509322, "grad_norm": 8.03445416863724e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40270 }, { "epoch": 0.19535072074376827, "grad_norm": 3.201585485612668e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40280 }, { "epoch": 0.19539921893660436, "grad_norm": 9.54347640913511e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40290 }, { "epoch": 0.19544771712944045, "grad_norm": 3.7963403087815095e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40300 }, { "epoch": 0.19549621532227657, "grad_norm": 6.026659349345209e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40310 }, { "epoch": 0.19554471351511266, "grad_norm": 4.682731713501198e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40320 }, { "epoch": 0.19559321170794874, "grad_norm": 4.59084965598322e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40330 }, { "epoch": 0.19564170990078483, "grad_norm": 4.970319267272316e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40340 }, { "epoch": 0.19569020809362092, "grad_norm": 3.435393125528208e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40350 }, { "epoch": 0.195738706286457, "grad_norm": 4.8937522478809115e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40360 }, { "epoch": 0.1957872044792931, "grad_norm": 5.993159390982328e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40370 }, { "epoch": 0.19583570267212919, "grad_norm": 3.3398627863334696e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40380 }, { "epoch": 0.19588420086496527, "grad_norm": 1.4373668477674073e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 40390 }, { "epoch": 0.19593269905780136, "grad_norm": 3.265688519604737e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40400 }, { "epoch": 0.19598119725063745, "grad_norm": 3.595289754798614e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40410 }, { "epoch": 0.19602969544347354, "grad_norm": 4.261728037135981e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40420 }, { "epoch": 0.19607819363630963, "grad_norm": 3.919760871440303e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40430 }, { "epoch": 0.19612669182914572, "grad_norm": 5.254098667251128e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40440 }, { "epoch": 0.1961751900219818, "grad_norm": 6.107630667884223e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40450 }, { "epoch": 0.1962236882148179, "grad_norm": 1.1456321402647518e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 40460 }, { "epoch": 0.19627218640765398, "grad_norm": 6.06597794217123e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40470 }, { "epoch": 0.19632068460049007, "grad_norm": 5.911542899639244e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40480 }, { "epoch": 0.19636918279332616, "grad_norm": 5.98467764234556e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40490 }, { "epoch": 0.19641768098616225, "grad_norm": 8.176122889835824e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40500 }, { "epoch": 0.19646617917899833, "grad_norm": 5.11587110452183e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40510 }, { "epoch": 0.19651467737183442, "grad_norm": 3.6719612239721755e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40520 }, { "epoch": 0.1965631755646705, "grad_norm": 7.16678556500483e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40530 }, { "epoch": 0.1966116737575066, "grad_norm": 1.3481900396072888e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 40540 }, { "epoch": 0.1966601719503427, "grad_norm": 2.999366799372183e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40550 }, { "epoch": 0.19670867014317878, "grad_norm": 7.971218707325534e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40560 }, { "epoch": 0.1967571683360149, "grad_norm": 7.171173876940884e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40570 }, { "epoch": 0.19680566652885098, "grad_norm": 9.762727870565868e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40580 }, { "epoch": 0.19685416472168707, "grad_norm": 3.2072733802124276e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40590 }, { "epoch": 0.19690266291452316, "grad_norm": 4.424705224437275e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40600 }, { "epoch": 0.19695116110735925, "grad_norm": 1.7086911441310804e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 40610 }, { "epoch": 0.19699965930019533, "grad_norm": 3.636877821122653e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40620 }, { "epoch": 0.19704815749303142, "grad_norm": 2.7000892188766557e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40630 }, { "epoch": 0.1970966556858675, "grad_norm": 5.012699944018095e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40640 }, { "epoch": 0.1971451538787036, "grad_norm": 3.032735307328949e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40650 }, { "epoch": 0.1971936520715397, "grad_norm": 6.644281569379018e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40660 }, { "epoch": 0.19724215026437578, "grad_norm": 4.9397911539017514e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40670 }, { "epoch": 0.19729064845721186, "grad_norm": 5.0206487856030435e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40680 }, { "epoch": 0.19733914665004795, "grad_norm": 3.826182748412066e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40690 }, { "epoch": 0.19738764484288404, "grad_norm": 7.84245557383656e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40700 }, { "epoch": 0.19743614303572013, "grad_norm": 8.040364463113292e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40710 }, { "epoch": 0.19748464122855622, "grad_norm": 9.999691030770919e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40720 }, { "epoch": 0.1975331394213923, "grad_norm": 3.26994964439109e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40730 }, { "epoch": 0.1975816376142284, "grad_norm": 2.887439087828625e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40740 }, { "epoch": 0.19763013580706448, "grad_norm": 3.264663206437035e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40750 }, { "epoch": 0.19767863399990057, "grad_norm": 5.946977310600232e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40760 }, { "epoch": 0.19772713219273666, "grad_norm": 6.398400387297443e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40770 }, { "epoch": 0.19777563038557275, "grad_norm": 5.045450635066118e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40780 }, { "epoch": 0.19782412857840884, "grad_norm": 3.275094329069361e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40790 }, { "epoch": 0.19787262677124492, "grad_norm": 3.803991432960174e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40800 }, { "epoch": 0.197921124964081, "grad_norm": 5.4737885335498504e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40810 }, { "epoch": 0.1979696231569171, "grad_norm": 5.8703331973219974e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40820 }, { "epoch": 0.19801812134975322, "grad_norm": 8.234518844574268e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40830 }, { "epoch": 0.1980666195425893, "grad_norm": 1.0091672919543271e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 40840 }, { "epoch": 0.1981151177354254, "grad_norm": 3.5899841321906933e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40850 }, { "epoch": 0.19816361592826148, "grad_norm": 4.289344701646769e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40860 }, { "epoch": 0.19821211412109757, "grad_norm": 1.1039183789307572e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 40870 }, { "epoch": 0.19826061231393366, "grad_norm": 6.003433128398683e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40880 }, { "epoch": 0.19830911050676975, "grad_norm": 6.519476869470964e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40890 }, { "epoch": 0.19835760869960584, "grad_norm": 4.4602167292850936e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40900 }, { "epoch": 0.19840610689244192, "grad_norm": 4.326156499701028e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40910 }, { "epoch": 0.198454605085278, "grad_norm": 5.869535257829739e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40920 }, { "epoch": 0.1985031032781141, "grad_norm": 3.1642379383356456e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40930 }, { "epoch": 0.1985516014709502, "grad_norm": 5.120765322885745e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40940 }, { "epoch": 0.19860009966378628, "grad_norm": 4.9196625440117714e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40950 }, { "epoch": 0.19864859785662237, "grad_norm": 4.65419844886128e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40960 }, { "epoch": 0.19869709604945845, "grad_norm": 5.721583207218828e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40970 }, { "epoch": 0.19874559424229454, "grad_norm": 5.0997417844200754e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40980 }, { "epoch": 0.19879409243513063, "grad_norm": 4.519943175296248e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 40990 }, { "epoch": 0.19884259062796672, "grad_norm": 1.0326027677365346e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 41000 }, { "epoch": 0.1988910888208028, "grad_norm": 6.328906465569162e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41010 }, { "epoch": 0.1989395870136389, "grad_norm": 4.7069313779957156e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41020 }, { "epoch": 0.19898808520647498, "grad_norm": 3.358940858788628e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41030 }, { "epoch": 0.19903658339931107, "grad_norm": 3.4216242283946485e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41040 }, { "epoch": 0.19908508159214716, "grad_norm": 3.8037775595967105e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41050 }, { "epoch": 0.19913357978498325, "grad_norm": 5.235515487811426e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41060 }, { "epoch": 0.19918207797781934, "grad_norm": 7.578642424732607e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41070 }, { "epoch": 0.19923057617065545, "grad_norm": 5.440679018420269e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41080 }, { "epoch": 0.19927907436349154, "grad_norm": 3.8471622332281186e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41090 }, { "epoch": 0.19932757255632763, "grad_norm": 8.989871957965079e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41100 }, { "epoch": 0.19937607074916372, "grad_norm": 2.8219179881716627e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41110 }, { "epoch": 0.1994245689419998, "grad_norm": 1.2605616461769387e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 41120 }, { "epoch": 0.1994730671348359, "grad_norm": 4.837940537072427e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41130 }, { "epoch": 0.19952156532767198, "grad_norm": 8.175168630941698e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41140 }, { "epoch": 0.19957006352050807, "grad_norm": 4.5384744851162395e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41150 }, { "epoch": 0.19961856171334416, "grad_norm": 1.5781610329668183e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 41160 }, { "epoch": 0.19966705990618025, "grad_norm": 4.013881493847293e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41170 }, { "epoch": 0.19971555809901634, "grad_norm": 5.610374032016807e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41180 }, { "epoch": 0.19976405629185243, "grad_norm": 9.133618306123026e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41190 }, { "epoch": 0.19981255448468851, "grad_norm": 7.33726608359575e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41200 }, { "epoch": 0.1998610526775246, "grad_norm": 8.058758282913914e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41210 }, { "epoch": 0.1999095508703607, "grad_norm": 5.675745740063576e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41220 }, { "epoch": 0.19995804906319678, "grad_norm": 2.743550275852158e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41230 }, { "epoch": 0.20000654725603287, "grad_norm": 3.03157712266966e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41240 }, { "epoch": 0.20005504544886896, "grad_norm": 5.1337224249436986e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41250 }, { "epoch": 0.20010354364170505, "grad_norm": 6.005092956229419e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41260 }, { "epoch": 0.20015204183454113, "grad_norm": 3.385275704204105e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41270 }, { "epoch": 0.20020054002737722, "grad_norm": 3.542488968832913e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41280 }, { "epoch": 0.2002490382202133, "grad_norm": 3.957181959890477e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41290 }, { "epoch": 0.2002975364130494, "grad_norm": 6.132940910674733e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41300 }, { "epoch": 0.2003460346058855, "grad_norm": 9.870643680187641e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41310 }, { "epoch": 0.20039453279872158, "grad_norm": 7.056854656184441e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41320 }, { "epoch": 0.20044303099155766, "grad_norm": 9.415087731667882e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41330 }, { "epoch": 0.20049152918439378, "grad_norm": 4.5338662602034674e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41340 }, { "epoch": 0.20054002737722987, "grad_norm": 9.24681273772876e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41350 }, { "epoch": 0.20058852557006596, "grad_norm": 3.1462125349435155e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41360 }, { "epoch": 0.20063702376290204, "grad_norm": 5.763691390825443e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41370 }, { "epoch": 0.20068552195573813, "grad_norm": 5.0007486152026104e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41380 }, { "epoch": 0.20073402014857422, "grad_norm": 7.209117569573209e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41390 }, { "epoch": 0.2007825183414103, "grad_norm": 2.7666665403103252e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41400 }, { "epoch": 0.2008310165342464, "grad_norm": 7.770216114977302e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41410 }, { "epoch": 0.2008795147270825, "grad_norm": 4.640952155909872e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41420 }, { "epoch": 0.20092801291991857, "grad_norm": 8.791548822273398e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41430 }, { "epoch": 0.20097651111275466, "grad_norm": 3.1199345329469e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41440 }, { "epoch": 0.20102500930559075, "grad_norm": 9.22334919550849e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41450 }, { "epoch": 0.20107350749842684, "grad_norm": 3.929571448679781e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 41460 }, { "epoch": 0.20112200569126293, "grad_norm": 7.366516996398786e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41470 }, { "epoch": 0.20117050388409902, "grad_norm": 5.2393300364883544e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41480 }, { "epoch": 0.2012190020769351, "grad_norm": 8.629363890122477e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41490 }, { "epoch": 0.2012675002697712, "grad_norm": 4.2453212500959125e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41500 }, { "epoch": 0.20131599846260728, "grad_norm": 8.006223595202755e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41510 }, { "epoch": 0.20136449665544337, "grad_norm": 2.6940234931771556e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41520 }, { "epoch": 0.20141299484827946, "grad_norm": 4.7194365748737255e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41530 }, { "epoch": 0.20146149304111555, "grad_norm": 3.9821600239520194e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41540 }, { "epoch": 0.20150999123395164, "grad_norm": 3.4057695330602655e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41550 }, { "epoch": 0.20155848942678772, "grad_norm": 3.3074723404524775e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41560 }, { "epoch": 0.2016069876196238, "grad_norm": 5.676695380429919e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41570 }, { "epoch": 0.2016554858124599, "grad_norm": 6.080379932882352e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41580 }, { "epoch": 0.20170398400529602, "grad_norm": 6.494506976650882e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41590 }, { "epoch": 0.2017524821981321, "grad_norm": 3.336982601354066e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41600 }, { "epoch": 0.2018009803909682, "grad_norm": 6.378665773354442e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41610 }, { "epoch": 0.20184947858380428, "grad_norm": 6.39910311406311e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41620 }, { "epoch": 0.20189797677664037, "grad_norm": 3.940460757689834e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41630 }, { "epoch": 0.20194647496947646, "grad_norm": 2.5131352998641887e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41640 }, { "epoch": 0.20199497316231255, "grad_norm": 9.272045531361073e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41650 }, { "epoch": 0.20204347135514863, "grad_norm": 7.965245174545998e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41660 }, { "epoch": 0.20209196954798472, "grad_norm": 7.863336293212342e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41670 }, { "epoch": 0.2021404677408208, "grad_norm": 4.910078743591839e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41680 }, { "epoch": 0.2021889659336569, "grad_norm": 8.117226713011405e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41690 }, { "epoch": 0.202237464126493, "grad_norm": 3.393509473426093e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41700 }, { "epoch": 0.20228596231932908, "grad_norm": 7.344989683133463e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41710 }, { "epoch": 0.20233446051216517, "grad_norm": 3.004787174631929e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41720 }, { "epoch": 0.20238295870500125, "grad_norm": 4.104058959342183e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41730 }, { "epoch": 0.20243145689783734, "grad_norm": 5.595345697884113e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41740 }, { "epoch": 0.20247995509067343, "grad_norm": 4.213536541897156e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41750 }, { "epoch": 0.20252845328350952, "grad_norm": 7.615366826030368e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41760 }, { "epoch": 0.2025769514763456, "grad_norm": 4.916603657534324e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41770 }, { "epoch": 0.2026254496691817, "grad_norm": 5.8414546089125e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41780 }, { "epoch": 0.20267394786201778, "grad_norm": 2.7815838521405567e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41790 }, { "epoch": 0.20272244605485387, "grad_norm": 3.5275274257173805e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41800 }, { "epoch": 0.20277094424768996, "grad_norm": 3.333347819989285e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41810 }, { "epoch": 0.20281944244052605, "grad_norm": 6.588514622762887e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41820 }, { "epoch": 0.20286794063336214, "grad_norm": 3.403895831866066e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41830 }, { "epoch": 0.20291643882619823, "grad_norm": 8.20374665977397e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41840 }, { "epoch": 0.20296493701903434, "grad_norm": 4.16476702014279e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41850 }, { "epoch": 0.20301343521187043, "grad_norm": 4.4697753054379064e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41860 }, { "epoch": 0.20306193340470652, "grad_norm": 5.493593135952324e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41870 }, { "epoch": 0.2031104315975426, "grad_norm": 1.51573104290037e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 41880 }, { "epoch": 0.2031589297903787, "grad_norm": 3.98736510476283e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41890 }, { "epoch": 0.20320742798321478, "grad_norm": 3.857358521486276e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41900 }, { "epoch": 0.20325592617605087, "grad_norm": 6.78400837728077e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41910 }, { "epoch": 0.20330442436888696, "grad_norm": 5.614305820245136e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41920 }, { "epoch": 0.20335292256172305, "grad_norm": 2.843810875674535e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41930 }, { "epoch": 0.20340142075455914, "grad_norm": 3.756572652946488e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41940 }, { "epoch": 0.20344991894739523, "grad_norm": 7.274363866827116e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41950 }, { "epoch": 0.2034984171402313, "grad_norm": 6.34584296221874e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41960 }, { "epoch": 0.2035469153330674, "grad_norm": 2.6343633052761106e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41970 }, { "epoch": 0.2035954135259035, "grad_norm": 6.314716216593297e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41980 }, { "epoch": 0.20364391171873958, "grad_norm": 3.059364672708398e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 41990 }, { "epoch": 0.20369240991157567, "grad_norm": 3.1349312479278524e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42000 }, { "epoch": 0.20374090810441176, "grad_norm": 6.717372968978452e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42010 }, { "epoch": 0.20378940629724784, "grad_norm": 3.683140192833889e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42020 }, { "epoch": 0.20383790449008393, "grad_norm": 4.3709565744620704e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42030 }, { "epoch": 0.20388640268292002, "grad_norm": 5.269387770567846e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42040 }, { "epoch": 0.2039349008757561, "grad_norm": 3.979928919761733e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42050 }, { "epoch": 0.2039833990685922, "grad_norm": 3.74402020497655e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42060 }, { "epoch": 0.20403189726142829, "grad_norm": 5.207044040389519e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42070 }, { "epoch": 0.20408039545426437, "grad_norm": 7.584008443473067e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42080 }, { "epoch": 0.20412889364710046, "grad_norm": 8.087002356660378e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42090 }, { "epoch": 0.20417739183993655, "grad_norm": 4.1038081377564595e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42100 }, { "epoch": 0.20422589003277267, "grad_norm": 3.662567849005427e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42110 }, { "epoch": 0.20427438822560876, "grad_norm": 8.185877931055074e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42120 }, { "epoch": 0.20432288641844484, "grad_norm": 6.019472209572996e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42130 }, { "epoch": 0.20437138461128093, "grad_norm": 6.080875181169176e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42140 }, { "epoch": 0.20441988280411702, "grad_norm": 5.973452488206021e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42150 }, { "epoch": 0.2044683809969531, "grad_norm": 8.543395324522862e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42160 }, { "epoch": 0.2045168791897892, "grad_norm": 4.327717917362861e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42170 }, { "epoch": 0.20456537738262529, "grad_norm": 3.570969298039017e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42180 }, { "epoch": 0.20461387557546137, "grad_norm": 4.320230218013421e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42190 }, { "epoch": 0.20466237376829746, "grad_norm": 4.308447643097679e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42200 }, { "epoch": 0.20471087196113355, "grad_norm": 3.6614050458183556e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42210 }, { "epoch": 0.20475937015396964, "grad_norm": 5.357997778787649e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42220 }, { "epoch": 0.20480786834680573, "grad_norm": 4.9209177177544916e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42230 }, { "epoch": 0.20485636653964182, "grad_norm": 6.020703580134068e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42240 }, { "epoch": 0.2049048647324779, "grad_norm": 3.062893938476918e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42250 }, { "epoch": 0.204953362925314, "grad_norm": 5.6457874819670906e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42260 }, { "epoch": 0.20500186111815008, "grad_norm": 3.947034699081087e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42270 }, { "epoch": 0.20505035931098617, "grad_norm": 2.7840776795073907e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42280 }, { "epoch": 0.20509885750382226, "grad_norm": 4.538093634209872e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42290 }, { "epoch": 0.20514735569665835, "grad_norm": 7.117786537946813e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42300 }, { "epoch": 0.20519585388949443, "grad_norm": 4.6754863092246524e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42310 }, { "epoch": 0.20524435208233052, "grad_norm": 3.288463901185423e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42320 }, { "epoch": 0.2052928502751666, "grad_norm": 4.6359694749753544e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42330 }, { "epoch": 0.2053413484680027, "grad_norm": 6.899163906837202e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42340 }, { "epoch": 0.2053898466608388, "grad_norm": 7.094765663850922e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42350 }, { "epoch": 0.2054383448536749, "grad_norm": 6.854711642745315e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42360 }, { "epoch": 0.205486843046511, "grad_norm": 2.7912994582379724e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42370 }, { "epoch": 0.20553534123934708, "grad_norm": 1.1077829498162828e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 42380 }, { "epoch": 0.20558383943218317, "grad_norm": 4.7232131095142904e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42390 }, { "epoch": 0.20563233762501926, "grad_norm": 8.879469959310882e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42400 }, { "epoch": 0.20568083581785535, "grad_norm": 7.260176460022194e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42410 }, { "epoch": 0.20572933401069143, "grad_norm": 3.1301560454721766e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42420 }, { "epoch": 0.20577783220352752, "grad_norm": 4.004395748324896e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42430 }, { "epoch": 0.2058263303963636, "grad_norm": 3.598755426992284e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42440 }, { "epoch": 0.2058748285891997, "grad_norm": 4.275609910564526e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42450 }, { "epoch": 0.2059233267820358, "grad_norm": 9.160234526461863e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42460 }, { "epoch": 0.20597182497487188, "grad_norm": 5.5924662234474454e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42470 }, { "epoch": 0.20602032316770796, "grad_norm": 5.030998551092125e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42480 }, { "epoch": 0.20606882136054405, "grad_norm": 2.2235719043806057e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42490 }, { "epoch": 0.20611731955338014, "grad_norm": 6.922029172073962e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42500 }, { "epoch": 0.20616581774621623, "grad_norm": 4.259312547105765e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42510 }, { "epoch": 0.20621431593905232, "grad_norm": 5.980904660418673e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42520 }, { "epoch": 0.2062628141318884, "grad_norm": 2.1529923799334938e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 42530 }, { "epoch": 0.2063113123247245, "grad_norm": 4.592328650687705e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42540 }, { "epoch": 0.20635981051756058, "grad_norm": 8.072844082107622e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42550 }, { "epoch": 0.20640830871039667, "grad_norm": 6.763652748986715e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42560 }, { "epoch": 0.20645680690323276, "grad_norm": 4.660047991933425e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42570 }, { "epoch": 0.20650530509606885, "grad_norm": 5.795214264026072e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42580 }, { "epoch": 0.20655380328890494, "grad_norm": 2.694552669879613e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42590 }, { "epoch": 0.20660230148174102, "grad_norm": 3.158488581789243e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42600 }, { "epoch": 0.2066507996745771, "grad_norm": 3.9128590145764974e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42610 }, { "epoch": 0.20669929786741323, "grad_norm": 3.651481961242098e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42620 }, { "epoch": 0.20674779606024932, "grad_norm": 2.4614113414145322e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42630 }, { "epoch": 0.2067962942530854, "grad_norm": 4.3991622789008034e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42640 }, { "epoch": 0.2068447924459215, "grad_norm": 2.935977327922501e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42650 }, { "epoch": 0.20689329063875758, "grad_norm": 5.408191228184478e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42660 }, { "epoch": 0.20694178883159367, "grad_norm": 4.7424702387388606e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42670 }, { "epoch": 0.20699028702442976, "grad_norm": 4.685077215071942e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42680 }, { "epoch": 0.20703878521726585, "grad_norm": 3.444559126819513e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42690 }, { "epoch": 0.20708728341010194, "grad_norm": 2.5710818363222643e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42700 }, { "epoch": 0.20713578160293802, "grad_norm": 3.598817599481663e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42710 }, { "epoch": 0.2071842797957741, "grad_norm": 4.221917748736814e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42720 }, { "epoch": 0.2072327779886102, "grad_norm": 8.045619637186974e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42730 }, { "epoch": 0.2072812761814463, "grad_norm": 3.104073442727895e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42740 }, { "epoch": 0.20732977437428238, "grad_norm": 3.727680564225011e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42750 }, { "epoch": 0.20737827256711847, "grad_norm": 7.372977961495053e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42760 }, { "epoch": 0.20742677075995455, "grad_norm": 1.0335232758507118e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 42770 }, { "epoch": 0.20747526895279064, "grad_norm": 7.012232572378707e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42780 }, { "epoch": 0.20752376714562673, "grad_norm": 8.561629982750674e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42790 }, { "epoch": 0.20757226533846282, "grad_norm": 2.916002728170497e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42800 }, { "epoch": 0.2076207635312989, "grad_norm": 4.9655440648166405e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42810 }, { "epoch": 0.207669261724135, "grad_norm": 8.588629896166822e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42820 }, { "epoch": 0.20771775991697108, "grad_norm": 4.4149725653142013e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42830 }, { "epoch": 0.20776625810980717, "grad_norm": 4.364983396953903e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42840 }, { "epoch": 0.20781475630264326, "grad_norm": 6.558027365599628e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42850 }, { "epoch": 0.20786325449547935, "grad_norm": 3.724423081052919e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42860 }, { "epoch": 0.20791175268831547, "grad_norm": 4.6070702808265196e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42870 }, { "epoch": 0.20796025088115155, "grad_norm": 3.187036057283876e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42880 }, { "epoch": 0.20800874907398764, "grad_norm": 2.3092129097790348e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42890 }, { "epoch": 0.20805724726682373, "grad_norm": 5.040527639721404e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42900 }, { "epoch": 0.20810574545965982, "grad_norm": 5.5462880510503965e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42910 }, { "epoch": 0.2081542436524959, "grad_norm": 3.8686419401301464e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42920 }, { "epoch": 0.208202741845332, "grad_norm": 7.477228081143039e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42930 }, { "epoch": 0.20825124003816808, "grad_norm": 4.1380939563850916e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42940 }, { "epoch": 0.20829973823100417, "grad_norm": 3.6316418317028365e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42950 }, { "epoch": 0.20834823642384026, "grad_norm": 4.526939179072542e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42960 }, { "epoch": 0.20839673461667635, "grad_norm": 6.025372556450748e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42970 }, { "epoch": 0.20844523280951244, "grad_norm": 5.633642174984743e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42980 }, { "epoch": 0.20849373100234853, "grad_norm": 5.615284948135013e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 42990 }, { "epoch": 0.20854222919518461, "grad_norm": 9.14817732677875e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43000 }, { "epoch": 0.2085907273880207, "grad_norm": 2.8025928244801435e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43010 }, { "epoch": 0.2086392255808568, "grad_norm": 5.1107413412410096e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43020 }, { "epoch": 0.20868772377369288, "grad_norm": 4.551608157044029e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43030 }, { "epoch": 0.20873622196652897, "grad_norm": 9.481038887315663e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43040 }, { "epoch": 0.20878472015936506, "grad_norm": 3.8799051083060476e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43050 }, { "epoch": 0.20883321835220114, "grad_norm": 4.969831479684217e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43060 }, { "epoch": 0.20888171654503723, "grad_norm": 3.146681848420485e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43070 }, { "epoch": 0.20893021473787332, "grad_norm": 8.106524518325386e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43080 }, { "epoch": 0.2089787129307094, "grad_norm": 8.384359517776829e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43090 }, { "epoch": 0.2090272111235455, "grad_norm": 3.13977821519984e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43100 }, { "epoch": 0.20907570931638159, "grad_norm": 5.5687738864662606e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43110 }, { "epoch": 0.20912420750921767, "grad_norm": 5.937958036383861e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43120 }, { "epoch": 0.2091727057020538, "grad_norm": 5.014182136164891e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43130 }, { "epoch": 0.20922120389488988, "grad_norm": 5.605300401612112e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43140 }, { "epoch": 0.20926970208772597, "grad_norm": 8.91610767439488e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43150 }, { "epoch": 0.20931820028056206, "grad_norm": 3.809690340972338e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43160 }, { "epoch": 0.20936669847339814, "grad_norm": 5.426338844927159e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43170 }, { "epoch": 0.20941519666623423, "grad_norm": 8.261768158490668e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43180 }, { "epoch": 0.20946369485907032, "grad_norm": 4.025119082484707e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43190 }, { "epoch": 0.2095121930519064, "grad_norm": 3.93943082599435e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43200 }, { "epoch": 0.2095606912447425, "grad_norm": 3.925419278516529e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43210 }, { "epoch": 0.20960918943757859, "grad_norm": 5.874206010503258e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43220 }, { "epoch": 0.20965768763041467, "grad_norm": 5.826049687129853e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43230 }, { "epoch": 0.20970618582325076, "grad_norm": 4.7711413486695164e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43240 }, { "epoch": 0.20975468401608685, "grad_norm": 7.846920624388076e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43250 }, { "epoch": 0.20980318220892294, "grad_norm": 4.206319204058673e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43260 }, { "epoch": 0.20985168040175903, "grad_norm": 3.67425805336552e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43270 }, { "epoch": 0.20990017859459512, "grad_norm": 6.949836262037934e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43280 }, { "epoch": 0.2099486767874312, "grad_norm": 5.517991752412854e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43290 }, { "epoch": 0.2099971749802673, "grad_norm": 3.537453707735949e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43300 }, { "epoch": 0.21004567317310338, "grad_norm": 5.053111706843083e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43310 }, { "epoch": 0.21009417136593947, "grad_norm": 3.564632677921509e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43320 }, { "epoch": 0.21014266955877556, "grad_norm": 5.702582583921867e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43330 }, { "epoch": 0.21019116775161165, "grad_norm": 5.0813156349249766e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43340 }, { "epoch": 0.21023966594444773, "grad_norm": 4.5946723759016095e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43350 }, { "epoch": 0.21028816413728382, "grad_norm": 6.221400639105923e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43360 }, { "epoch": 0.2103366623301199, "grad_norm": 9.097288256043612e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43370 }, { "epoch": 0.210385160522956, "grad_norm": 5.545007297769189e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43380 }, { "epoch": 0.21043365871579212, "grad_norm": 3.083290778249648e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43390 }, { "epoch": 0.2104821569086282, "grad_norm": 5.900266586422731e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43400 }, { "epoch": 0.2105306551014643, "grad_norm": 5.129071922738149e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43410 }, { "epoch": 0.21057915329430038, "grad_norm": 4.143602083672704e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43420 }, { "epoch": 0.21062765148713647, "grad_norm": 4.7078763998342765e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43430 }, { "epoch": 0.21067614967997256, "grad_norm": 5.122566903992265e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43440 }, { "epoch": 0.21072464787280865, "grad_norm": 4.475794668223898e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43450 }, { "epoch": 0.21077314606564473, "grad_norm": 5.76274672425825e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43460 }, { "epoch": 0.21082164425848082, "grad_norm": 9.582891635773194e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43470 }, { "epoch": 0.2108701424513169, "grad_norm": 2.651413844034778e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43480 }, { "epoch": 0.210918640644153, "grad_norm": 4.0434223080865195e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43490 }, { "epoch": 0.2109671388369891, "grad_norm": 7.685406444579712e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43500 }, { "epoch": 0.21101563702982518, "grad_norm": 2.7039451566679418e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43510 }, { "epoch": 0.21106413522266126, "grad_norm": 1.0965757724079594e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 43520 }, { "epoch": 0.21111263341549735, "grad_norm": 5.000787339781709e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43530 }, { "epoch": 0.21116113160833344, "grad_norm": 4.2157203949955147e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43540 }, { "epoch": 0.21120962980116953, "grad_norm": 7.235413335138219e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43550 }, { "epoch": 0.21125812799400562, "grad_norm": 1.1012506462293459e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 43560 }, { "epoch": 0.2113066261868417, "grad_norm": 1.590755260849619e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 43570 }, { "epoch": 0.2113551243796778, "grad_norm": 4.087984351031082e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43580 }, { "epoch": 0.21140362257251388, "grad_norm": 3.919808122532231e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43590 }, { "epoch": 0.21145212076534997, "grad_norm": 4.452814650335313e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43600 }, { "epoch": 0.21150061895818606, "grad_norm": 1.360832584396121e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 43610 }, { "epoch": 0.21154911715102215, "grad_norm": 5.363867217056395e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43620 }, { "epoch": 0.21159761534385824, "grad_norm": 3.9722284128629326e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43630 }, { "epoch": 0.21164611353669435, "grad_norm": 4.969347600081164e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43640 }, { "epoch": 0.21169461172953044, "grad_norm": 1.1144494038717312e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 43650 }, { "epoch": 0.21174310992236653, "grad_norm": 4.723192859046321e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43660 }, { "epoch": 0.21179160811520262, "grad_norm": 6.937269603213281e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43670 }, { "epoch": 0.2118401063080387, "grad_norm": 6.853911571624849e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43680 }, { "epoch": 0.2118886045008748, "grad_norm": 9.824716329376315e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43690 }, { "epoch": 0.21193710269371088, "grad_norm": 2.9142986690544603e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43700 }, { "epoch": 0.21198560088654697, "grad_norm": 5.023857596597736e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43710 }, { "epoch": 0.21203409907938306, "grad_norm": 5.6390973668385413e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43720 }, { "epoch": 0.21208259727221915, "grad_norm": 5.0218663005807684e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43730 }, { "epoch": 0.21213109546505524, "grad_norm": 4.2645702080790215e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43740 }, { "epoch": 0.21217959365789132, "grad_norm": 3.4715252894557125e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43750 }, { "epoch": 0.2122280918507274, "grad_norm": 5.320860907431779e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43760 }, { "epoch": 0.2122765900435635, "grad_norm": 1.3288747879869334e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 43770 }, { "epoch": 0.2123250882363996, "grad_norm": 5.331228081217887e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43780 }, { "epoch": 0.21237358642923568, "grad_norm": 5.118850410212872e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43790 }, { "epoch": 0.21242208462207177, "grad_norm": 3.097449763345139e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43800 }, { "epoch": 0.21247058281490785, "grad_norm": 9.202420159226676e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43810 }, { "epoch": 0.21251908100774394, "grad_norm": 3.284398530922772e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43820 }, { "epoch": 0.21256757920058003, "grad_norm": 5.010988246567649e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43830 }, { "epoch": 0.21261607739341612, "grad_norm": 3.661042669023118e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43840 }, { "epoch": 0.2126645755862522, "grad_norm": 3.73671191766789e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43850 }, { "epoch": 0.2127130737790883, "grad_norm": 2.9639480203513813e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43860 }, { "epoch": 0.21276157197192438, "grad_norm": 3.9928693240653956e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43870 }, { "epoch": 0.21281007016476047, "grad_norm": 4.605395531598333e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43880 }, { "epoch": 0.21285856835759656, "grad_norm": 1.7763103699053318e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 43890 }, { "epoch": 0.21290706655043268, "grad_norm": 3.9624488579192985e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43900 }, { "epoch": 0.21295556474326877, "grad_norm": 2.6529635377414706e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43910 }, { "epoch": 0.21300406293610485, "grad_norm": 7.350814001938488e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43920 }, { "epoch": 0.21305256112894094, "grad_norm": 5.2886296231235974e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43930 }, { "epoch": 0.21310105932177703, "grad_norm": 4.087740990144084e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43940 }, { "epoch": 0.21314955751461312, "grad_norm": 7.413361657881978e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43950 }, { "epoch": 0.2131980557074492, "grad_norm": 4.180473922588135e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43960 }, { "epoch": 0.2132465539002853, "grad_norm": 9.002393142054643e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43970 }, { "epoch": 0.21329505209312138, "grad_norm": 4.1911793147164644e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43980 }, { "epoch": 0.21334355028595747, "grad_norm": 2.6386848261950036e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 43990 }, { "epoch": 0.21339204847879356, "grad_norm": 3.939961246146595e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44000 }, { "epoch": 0.21344054667162965, "grad_norm": 5.2266710071080524e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44010 }, { "epoch": 0.21348904486446574, "grad_norm": 8.369316617518052e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44020 }, { "epoch": 0.21353754305730183, "grad_norm": 6.952168263296699e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44030 }, { "epoch": 0.21358604125013791, "grad_norm": 6.805971253243115e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44040 }, { "epoch": 0.213634539442974, "grad_norm": 5.818935733259423e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44050 }, { "epoch": 0.2136830376358101, "grad_norm": 4.8255500928462425e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44060 }, { "epoch": 0.21373153582864618, "grad_norm": 4.9828763337700366e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44070 }, { "epoch": 0.21378003402148227, "grad_norm": 4.60775062549601e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44080 }, { "epoch": 0.21382853221431836, "grad_norm": 5.2912540127181273e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44090 }, { "epoch": 0.21387703040715444, "grad_norm": 4.546038212538406e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44100 }, { "epoch": 0.21392552859999053, "grad_norm": 3.562486838859513e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44110 }, { "epoch": 0.21397402679282662, "grad_norm": 3.4556528305529355e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44120 }, { "epoch": 0.2140225249856627, "grad_norm": 3.1833778280088154e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44130 }, { "epoch": 0.2140710231784988, "grad_norm": 4.0723087124661106e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44140 }, { "epoch": 0.21411952137133491, "grad_norm": 4.5110329693898166e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44150 }, { "epoch": 0.214168019564171, "grad_norm": 3.714455942827044e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44160 }, { "epoch": 0.2142165177570071, "grad_norm": 2.989010994269847e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44170 }, { "epoch": 0.21426501594984318, "grad_norm": 7.251846767530878e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44180 }, { "epoch": 0.21431351414267927, "grad_norm": 5.061325225597102e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44190 }, { "epoch": 0.21436201233551536, "grad_norm": 1.45158864484074e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 44200 }, { "epoch": 0.21441051052835144, "grad_norm": 5.4304919672176766e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44210 }, { "epoch": 0.21445900872118753, "grad_norm": 4.488281390990778e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44220 }, { "epoch": 0.21450750691402362, "grad_norm": 3.5228538308729185e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44230 }, { "epoch": 0.2145560051068597, "grad_norm": 5.6617604826669776e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44240 }, { "epoch": 0.2146045032996958, "grad_norm": 5.3286264289909013e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44250 }, { "epoch": 0.2146530014925319, "grad_norm": 7.06758029878074e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44260 }, { "epoch": 0.21470149968536797, "grad_norm": 9.503223452611564e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44270 }, { "epoch": 0.21474999787820406, "grad_norm": 1.0068604439084083e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 44280 }, { "epoch": 0.21479849607104015, "grad_norm": 4.409304210639675e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44290 }, { "epoch": 0.21484699426387624, "grad_norm": 4.9037925720085696e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44300 }, { "epoch": 0.21489549245671233, "grad_norm": 3.6805921865834534e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44310 }, { "epoch": 0.21494399064954842, "grad_norm": 4.290994226607836e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44320 }, { "epoch": 0.2149924888423845, "grad_norm": 1.1727263427019352e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 44330 }, { "epoch": 0.2150409870352206, "grad_norm": 2.7969198512778348e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44340 }, { "epoch": 0.21508948522805668, "grad_norm": 2.7865663554393905e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44350 }, { "epoch": 0.21513798342089277, "grad_norm": 5.284869786237323e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44360 }, { "epoch": 0.21518648161372886, "grad_norm": 2.174096991325314e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44370 }, { "epoch": 0.21523497980656495, "grad_norm": 8.27767152600245e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44380 }, { "epoch": 0.21528347799940104, "grad_norm": 9.447460769251848e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44390 }, { "epoch": 0.21533197619223712, "grad_norm": 5.1691731783876094e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44400 }, { "epoch": 0.21538047438507324, "grad_norm": 3.532489500912561e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44410 }, { "epoch": 0.21542897257790933, "grad_norm": 2.229670599263045e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 44420 }, { "epoch": 0.21547747077074542, "grad_norm": 8.69391527658081e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44430 }, { "epoch": 0.2155259689635815, "grad_norm": 3.047890118068608e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44440 }, { "epoch": 0.2155744671564176, "grad_norm": 3.795802072659171e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44450 }, { "epoch": 0.21562296534925368, "grad_norm": 2.3832093631881435e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44460 }, { "epoch": 0.21567146354208977, "grad_norm": 2.7746441588760717e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44470 }, { "epoch": 0.21571996173492586, "grad_norm": 2.1083899071072665e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44480 }, { "epoch": 0.21576845992776195, "grad_norm": 7.233253995764244e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44490 }, { "epoch": 0.21581695812059803, "grad_norm": 2.8396502926852918e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44500 }, { "epoch": 0.21586545631343412, "grad_norm": 6.91181867296109e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44510 }, { "epoch": 0.2159139545062702, "grad_norm": 3.647160085051837e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44520 }, { "epoch": 0.2159624526991063, "grad_norm": 5.72690872502335e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44530 }, { "epoch": 0.2160109508919424, "grad_norm": 6.468430058248487e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44540 }, { "epoch": 0.21605944908477848, "grad_norm": 3.911458890115682e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44550 }, { "epoch": 0.21610794727761456, "grad_norm": 3.1067671102391614e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44560 }, { "epoch": 0.21615644547045065, "grad_norm": 4.0740591344956556e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44570 }, { "epoch": 0.21620494366328674, "grad_norm": 6.07650392225878e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44580 }, { "epoch": 0.21625344185612283, "grad_norm": 5.396856650463633e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44590 }, { "epoch": 0.21630194004895892, "grad_norm": 3.658494307501314e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44600 }, { "epoch": 0.216350438241795, "grad_norm": 6.010133546396901e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44610 }, { "epoch": 0.2163989364346311, "grad_norm": 2.9188148786829515e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44620 }, { "epoch": 0.21644743462746718, "grad_norm": 4.513514184623091e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44630 }, { "epoch": 0.21649593282030327, "grad_norm": 3.7602678304438086e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44640 }, { "epoch": 0.21654443101313936, "grad_norm": 4.1804717909599276e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44650 }, { "epoch": 0.21659292920597545, "grad_norm": 3.2084439993695923e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44660 }, { "epoch": 0.21664142739881156, "grad_norm": 5.644098166612821e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44670 }, { "epoch": 0.21668992559164765, "grad_norm": 5.672013259072628e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44680 }, { "epoch": 0.21673842378448374, "grad_norm": 7.218432074296288e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44690 }, { "epoch": 0.21678692197731983, "grad_norm": 2.6146933507220638e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44700 }, { "epoch": 0.21683542017015592, "grad_norm": 4.167699430013272e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44710 }, { "epoch": 0.216883918362992, "grad_norm": 4.2318774262639636e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44720 }, { "epoch": 0.2169324165558281, "grad_norm": 3.680361615465699e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44730 }, { "epoch": 0.21698091474866418, "grad_norm": 6.172621880295992e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44740 }, { "epoch": 0.21702941294150027, "grad_norm": 2.5938605929809455e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44750 }, { "epoch": 0.21707791113433636, "grad_norm": 5.184951490377898e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44760 }, { "epoch": 0.21712640932717245, "grad_norm": 7.144514313495165e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44770 }, { "epoch": 0.21717490752000854, "grad_norm": 3.540971960092065e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44780 }, { "epoch": 0.21722340571284462, "grad_norm": 3.6135567427209025e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44790 }, { "epoch": 0.2172719039056807, "grad_norm": 6.379750772111947e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44800 }, { "epoch": 0.2173204020985168, "grad_norm": 3.002097415105709e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44810 }, { "epoch": 0.2173689002913529, "grad_norm": 3.212670662833261e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44820 }, { "epoch": 0.21741739848418898, "grad_norm": 7.386107370166428e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44830 }, { "epoch": 0.21746589667702507, "grad_norm": 5.966297322856917e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44840 }, { "epoch": 0.21751439486986116, "grad_norm": 7.124389611590232e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44850 }, { "epoch": 0.21756289306269724, "grad_norm": 4.972990552687406e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44860 }, { "epoch": 0.21761139125553333, "grad_norm": 7.12739804953344e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44870 }, { "epoch": 0.21765988944836942, "grad_norm": 5.188138985090518e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44880 }, { "epoch": 0.2177083876412055, "grad_norm": 7.023035664133204e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44890 }, { "epoch": 0.2177568858340416, "grad_norm": 1.9014986207821494e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 44900 }, { "epoch": 0.21780538402687769, "grad_norm": 4.640822481860596e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44910 }, { "epoch": 0.2178538822197138, "grad_norm": 5.4611113853297866e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44920 }, { "epoch": 0.2179023804125499, "grad_norm": 7.677172675357724e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44930 }, { "epoch": 0.21795087860538598, "grad_norm": 6.951339059924067e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44940 }, { "epoch": 0.21799937679822207, "grad_norm": 6.640806304858415e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44950 }, { "epoch": 0.21804787499105815, "grad_norm": 6.074659353316747e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44960 }, { "epoch": 0.21809637318389424, "grad_norm": 4.999179026299316e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44970 }, { "epoch": 0.21814487137673033, "grad_norm": 3.874137988191251e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 44980 }, { "epoch": 0.21819336956956642, "grad_norm": 1.2993240261494066e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 44990 }, { "epoch": 0.2182418677624025, "grad_norm": 3.346970700590646e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45000 }, { "epoch": 0.2182903659552386, "grad_norm": 3.649100577263198e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45010 }, { "epoch": 0.21833886414807469, "grad_norm": 5.101928124418009e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45020 }, { "epoch": 0.21838736234091077, "grad_norm": 4.528129693426308e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45030 }, { "epoch": 0.21843586053374686, "grad_norm": 6.179811862239148e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45040 }, { "epoch": 0.21848435872658295, "grad_norm": 6.736628677117551e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45050 }, { "epoch": 0.21853285691941904, "grad_norm": 2.5595687347390594e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45060 }, { "epoch": 0.21858135511225513, "grad_norm": 5.1598316730405713e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45070 }, { "epoch": 0.21862985330509122, "grad_norm": 9.168842041162861e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45080 }, { "epoch": 0.2186783514979273, "grad_norm": 2.0167396641568303e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45090 }, { "epoch": 0.2187268496907634, "grad_norm": 7.060102547029601e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45100 }, { "epoch": 0.21877534788359948, "grad_norm": 2.6141997011563944e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45110 }, { "epoch": 0.21882384607643557, "grad_norm": 7.766252707597232e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45120 }, { "epoch": 0.21887234426927166, "grad_norm": 4.040408541072793e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45130 }, { "epoch": 0.21892084246210775, "grad_norm": 6.070031588478741e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45140 }, { "epoch": 0.21896934065494383, "grad_norm": 7.482574204686898e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45150 }, { "epoch": 0.21901783884777992, "grad_norm": 3.5009659171691965e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45160 }, { "epoch": 0.219066337040616, "grad_norm": 9.343875007061797e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45170 }, { "epoch": 0.21911483523345213, "grad_norm": 3.969167394757278e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45180 }, { "epoch": 0.21916333342628821, "grad_norm": 9.667312639294323e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45190 }, { "epoch": 0.2192118316191243, "grad_norm": 1.2276417749035318e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 45200 }, { "epoch": 0.2192603298119604, "grad_norm": 4.838631895154322e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45210 }, { "epoch": 0.21930882800479648, "grad_norm": 6.36333368220221e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45220 }, { "epoch": 0.21935732619763257, "grad_norm": 6.06842647243866e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45230 }, { "epoch": 0.21940582439046866, "grad_norm": 6.33052721354943e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45240 }, { "epoch": 0.21945432258330475, "grad_norm": 6.951050579573348e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45250 }, { "epoch": 0.21950282077614083, "grad_norm": 6.226195381486832e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45260 }, { "epoch": 0.21955131896897692, "grad_norm": 5.546733561345718e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45270 }, { "epoch": 0.219599817161813, "grad_norm": 4.24390051989576e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45280 }, { "epoch": 0.2196483153546491, "grad_norm": 3.200379339318715e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45290 }, { "epoch": 0.2196968135474852, "grad_norm": 5.839258676587633e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45300 }, { "epoch": 0.21974531174032128, "grad_norm": 4.2480227335772724e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45310 }, { "epoch": 0.21979380993315736, "grad_norm": 3.0918421600745205e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45320 }, { "epoch": 0.21984230812599345, "grad_norm": 3.1257339827561736e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45330 }, { "epoch": 0.21989080631882954, "grad_norm": 4.4217369321586375e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45340 }, { "epoch": 0.21993930451166563, "grad_norm": 3.491094702212649e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45350 }, { "epoch": 0.21998780270450172, "grad_norm": 7.9476585312932e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45360 }, { "epoch": 0.2200363008973378, "grad_norm": 1.1288452128610516e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 45370 }, { "epoch": 0.2200847990901739, "grad_norm": 2.9506161070003145e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45380 }, { "epoch": 0.22013329728300998, "grad_norm": 2.372862084598637e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45390 }, { "epoch": 0.22018179547584607, "grad_norm": 6.960606668826586e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45400 }, { "epoch": 0.22023029366868216, "grad_norm": 5.823522641890122e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45410 }, { "epoch": 0.22027879186151825, "grad_norm": 3.663651781948829e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45420 }, { "epoch": 0.22032729005435436, "grad_norm": 4.859204238982784e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45430 }, { "epoch": 0.22037578824719045, "grad_norm": 5.5256947462112294e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45440 }, { "epoch": 0.22042428644002654, "grad_norm": 4.730579661327283e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45450 }, { "epoch": 0.22047278463286263, "grad_norm": 3.489475375317852e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45460 }, { "epoch": 0.22052128282569872, "grad_norm": 3.263563996824814e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45470 }, { "epoch": 0.2205697810185348, "grad_norm": 4.710511092298475e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45480 }, { "epoch": 0.2206182792113709, "grad_norm": 5.234000610698786e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45490 }, { "epoch": 0.22066677740420698, "grad_norm": 3.076309340599437e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45500 }, { "epoch": 0.22071527559704307, "grad_norm": 1.7232301274816564e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 45510 }, { "epoch": 0.22076377378987916, "grad_norm": 5.014094313082751e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 45520 }, { "epoch": 0.22081227198271525, "grad_norm": 3.297232353816071e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45530 }, { "epoch": 0.22086077017555134, "grad_norm": 4.674912545965526e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45540 }, { "epoch": 0.22090926836838742, "grad_norm": 3.709972418164398e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45550 }, { "epoch": 0.2209577665612235, "grad_norm": 3.39029568863225e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45560 }, { "epoch": 0.2210062647540596, "grad_norm": 7.498014298334965e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45570 }, { "epoch": 0.2210547629468957, "grad_norm": 9.96776918782416e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45580 }, { "epoch": 0.22110326113973178, "grad_norm": 4.4154489842185285e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45590 }, { "epoch": 0.22115175933256787, "grad_norm": 2.3893486300607947e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45600 }, { "epoch": 0.22120025752540395, "grad_norm": 4.5337923637589483e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45610 }, { "epoch": 0.22124875571824004, "grad_norm": 3.884697008516014e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45620 }, { "epoch": 0.22129725391107613, "grad_norm": 5.1344645868312e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45630 }, { "epoch": 0.22134575210391222, "grad_norm": 1.1311435343941412e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 45640 }, { "epoch": 0.2213942502967483, "grad_norm": 3.678748328184156e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45650 }, { "epoch": 0.2214427484895844, "grad_norm": 4.9531664103596995e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45660 }, { "epoch": 0.22149124668242048, "grad_norm": 7.276321412064135e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45670 }, { "epoch": 0.22153974487525657, "grad_norm": 2.802612009134009e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45680 }, { "epoch": 0.2215882430680927, "grad_norm": 4.857458435481021e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45690 }, { "epoch": 0.22163674126092878, "grad_norm": 2.497460904749005e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45700 }, { "epoch": 0.22168523945376487, "grad_norm": 4.4364334428337315e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45710 }, { "epoch": 0.22173373764660095, "grad_norm": 7.741382290760157e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45720 }, { "epoch": 0.22178223583943704, "grad_norm": 3.673910953239101e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45730 }, { "epoch": 0.22183073403227313, "grad_norm": 7.746663044372326e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45740 }, { "epoch": 0.22187923222510922, "grad_norm": 3.7481544978845704e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45750 }, { "epoch": 0.2219277304179453, "grad_norm": 2.8836319998504223e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45760 }, { "epoch": 0.2219762286107814, "grad_norm": 6.222050075166408e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45770 }, { "epoch": 0.22202472680361748, "grad_norm": 7.158921278005437e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45780 }, { "epoch": 0.22207322499645357, "grad_norm": 3.18299662183108e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45790 }, { "epoch": 0.22212172318928966, "grad_norm": 3.61585641428519e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45800 }, { "epoch": 0.22217022138212575, "grad_norm": 7.61412621841373e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45810 }, { "epoch": 0.22221871957496184, "grad_norm": 5.3086662177292965e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45820 }, { "epoch": 0.22226721776779793, "grad_norm": 4.3303106878056496e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45830 }, { "epoch": 0.222315715960634, "grad_norm": 4.8370125682595244e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45840 }, { "epoch": 0.2223642141534701, "grad_norm": 2.4453298053117578e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45850 }, { "epoch": 0.2224127123463062, "grad_norm": 5.7792330920847235e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45860 }, { "epoch": 0.22246121053914228, "grad_norm": 1.4978841988977365e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 45870 }, { "epoch": 0.22250970873197837, "grad_norm": 3.44783259720316e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45880 }, { "epoch": 0.22255820692481446, "grad_norm": 5.076992692920612e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45890 }, { "epoch": 0.22260670511765054, "grad_norm": 6.150124676196356e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45900 }, { "epoch": 0.22265520331048663, "grad_norm": 5.513039980087342e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45910 }, { "epoch": 0.22270370150332272, "grad_norm": 5.51981749197239e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45920 }, { "epoch": 0.2227521996961588, "grad_norm": 1.7674416596946685e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 45930 }, { "epoch": 0.2228006978889949, "grad_norm": 7.67817880387156e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45940 }, { "epoch": 0.222849196081831, "grad_norm": 5.570446859337608e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45950 }, { "epoch": 0.2228976942746671, "grad_norm": 9.916440291135586e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45960 }, { "epoch": 0.2229461924675032, "grad_norm": 4.867565550625841e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45970 }, { "epoch": 0.22299469066033928, "grad_norm": 5.898863264519605e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45980 }, { "epoch": 0.22304318885317537, "grad_norm": 4.150377463929544e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 45990 }, { "epoch": 0.22309168704601146, "grad_norm": 7.638779919716399e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46000 }, { "epoch": 0.22314018523884754, "grad_norm": 5.91743081201912e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46010 }, { "epoch": 0.22318868343168363, "grad_norm": 3.476661802892522e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46020 }, { "epoch": 0.22323718162451972, "grad_norm": 4.6944773401946804e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46030 }, { "epoch": 0.2232856798173558, "grad_norm": 6.891488624205522e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46040 }, { "epoch": 0.2233341780101919, "grad_norm": 4.4207094873627284e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46050 }, { "epoch": 0.22338267620302799, "grad_norm": 2.8026022391713923e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46060 }, { "epoch": 0.22343117439586407, "grad_norm": 4.911986550837355e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46070 }, { "epoch": 0.22347967258870016, "grad_norm": 5.191673224658189e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46080 }, { "epoch": 0.22352817078153625, "grad_norm": 5.6264145342765914e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46090 }, { "epoch": 0.22357666897437234, "grad_norm": 3.525462233255894e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46100 }, { "epoch": 0.22362516716720843, "grad_norm": 7.518174527376686e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46110 }, { "epoch": 0.22367366536004452, "grad_norm": 4.110911078214485e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46120 }, { "epoch": 0.2237221635528806, "grad_norm": 3.423664196589016e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46130 }, { "epoch": 0.2237706617457167, "grad_norm": 7.288897307944353e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46140 }, { "epoch": 0.22381915993855278, "grad_norm": 5.4942368876709224e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46150 }, { "epoch": 0.22386765813138887, "grad_norm": 3.496100831057447e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46160 }, { "epoch": 0.22391615632422496, "grad_norm": 6.443559641411412e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46170 }, { "epoch": 0.22396465451706105, "grad_norm": 2.990000780300761e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46180 }, { "epoch": 0.22401315270989713, "grad_norm": 4.213770665728589e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46190 }, { "epoch": 0.22406165090273325, "grad_norm": 3.264879566700074e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46200 }, { "epoch": 0.22411014909556934, "grad_norm": 4.679008469565815e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46210 }, { "epoch": 0.22415864728840543, "grad_norm": 5.8329810315171926e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46220 }, { "epoch": 0.22420714548124152, "grad_norm": 7.139809099498962e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46230 }, { "epoch": 0.2242556436740776, "grad_norm": 7.06959468743662e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46240 }, { "epoch": 0.2243041418669137, "grad_norm": 4.27450324025358e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46250 }, { "epoch": 0.22435264005974978, "grad_norm": 3.8849439221166904e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46260 }, { "epoch": 0.22440113825258587, "grad_norm": 4.942321396583793e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46270 }, { "epoch": 0.22444963644542196, "grad_norm": 2.851695768413265e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46280 }, { "epoch": 0.22449813463825805, "grad_norm": 3.440451123992716e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46290 }, { "epoch": 0.22454663283109413, "grad_norm": 2.8180208389017025e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46300 }, { "epoch": 0.22459513102393022, "grad_norm": 3.664473169351368e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46310 }, { "epoch": 0.2246436292167663, "grad_norm": 4.0029046743939034e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46320 }, { "epoch": 0.2246921274096024, "grad_norm": 5.664202973321153e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46330 }, { "epoch": 0.2247406256024385, "grad_norm": 3.7867174285111105e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46340 }, { "epoch": 0.22478912379527458, "grad_norm": 5.6713009399800285e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46350 }, { "epoch": 0.22483762198811066, "grad_norm": 2.5129278213853468e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46360 }, { "epoch": 0.22488612018094675, "grad_norm": 3.75120627893466e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46370 }, { "epoch": 0.22493461837378284, "grad_norm": 4.5561062478327585e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46380 }, { "epoch": 0.22498311656661893, "grad_norm": 2.8323889011971914e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46390 }, { "epoch": 0.22503161475945502, "grad_norm": 6.048258427426845e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46400 }, { "epoch": 0.2250801129522911, "grad_norm": 3.355289734940925e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46410 }, { "epoch": 0.2251286111451272, "grad_norm": 7.405275681549028e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46420 }, { "epoch": 0.22517710933796328, "grad_norm": 6.635109883745827e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46430 }, { "epoch": 0.22522560753079937, "grad_norm": 8.016660757448335e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46440 }, { "epoch": 0.22527410572363546, "grad_norm": 5.247289180942971e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46450 }, { "epoch": 0.22532260391647158, "grad_norm": 8.286448860417295e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46460 }, { "epoch": 0.22537110210930766, "grad_norm": 5.8736652874813444e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46470 }, { "epoch": 0.22541960030214375, "grad_norm": 3.67094656894551e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46480 }, { "epoch": 0.22546809849497984, "grad_norm": 4.3695667528709237e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46490 }, { "epoch": 0.22551659668781593, "grad_norm": 1.0152268714591628e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 46500 }, { "epoch": 0.22556509488065202, "grad_norm": 3.913043400416427e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46510 }, { "epoch": 0.2256135930734881, "grad_norm": 6.792397044819154e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46520 }, { "epoch": 0.2256620912663242, "grad_norm": 6.645844052854954e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46530 }, { "epoch": 0.22571058945916028, "grad_norm": 2.5079405219230466e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46540 }, { "epoch": 0.22575908765199637, "grad_norm": 2.081891992133933e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46550 }, { "epoch": 0.22580758584483246, "grad_norm": 8.674769702565754e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46560 }, { "epoch": 0.22585608403766855, "grad_norm": 5.365815880509217e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46570 }, { "epoch": 0.22590458223050464, "grad_norm": 4.762929961543705e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46580 }, { "epoch": 0.22595308042334072, "grad_norm": 2.1539830186156905e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 46590 }, { "epoch": 0.2260015786161768, "grad_norm": 4.873292525076067e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46600 }, { "epoch": 0.2260500768090129, "grad_norm": 3.742830756436888e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46610 }, { "epoch": 0.226098575001849, "grad_norm": 2.6765075489265655e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46620 }, { "epoch": 0.22614707319468508, "grad_norm": 3.0426996033838805e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46630 }, { "epoch": 0.22619557138752117, "grad_norm": 4.759020555411553e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46640 }, { "epoch": 0.22624406958035725, "grad_norm": 4.611736414972256e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46650 }, { "epoch": 0.22629256777319334, "grad_norm": 6.685360887104252e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46660 }, { "epoch": 0.22634106596602943, "grad_norm": 4.9990863004722996e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46670 }, { "epoch": 0.22638956415886552, "grad_norm": 5.355863663680793e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46680 }, { "epoch": 0.2264380623517016, "grad_norm": 4.7291397464732654e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46690 }, { "epoch": 0.2264865605445377, "grad_norm": 3.967933182025263e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46700 }, { "epoch": 0.2265350587373738, "grad_norm": 7.752670683203178e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46710 }, { "epoch": 0.2265835569302099, "grad_norm": 3.517946822739759e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46720 }, { "epoch": 0.226632055123046, "grad_norm": 5.141490788673764e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46730 }, { "epoch": 0.22668055331588208, "grad_norm": 7.047059824571988e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46740 }, { "epoch": 0.22672905150871817, "grad_norm": 9.410100432205581e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46750 }, { "epoch": 0.22677754970155425, "grad_norm": 7.79713715814978e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46760 }, { "epoch": 0.22682604789439034, "grad_norm": 2.7830683535512435e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46770 }, { "epoch": 0.22687454608722643, "grad_norm": 4.952258692014766e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46780 }, { "epoch": 0.22692304428006252, "grad_norm": 2.8902036319777835e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46790 }, { "epoch": 0.2269715424728986, "grad_norm": 2.3515733360568447e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46800 }, { "epoch": 0.2270200406657347, "grad_norm": 4.494172856084333e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46810 }, { "epoch": 0.22706853885857078, "grad_norm": 5.511662948265439e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46820 }, { "epoch": 0.22711703705140687, "grad_norm": 7.075811936374521e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46830 }, { "epoch": 0.22716553524424296, "grad_norm": 3.348521104840074e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46840 }, { "epoch": 0.22721403343707905, "grad_norm": 4.113279317152774e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46850 }, { "epoch": 0.22726253162991514, "grad_norm": 5.620917775672751e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46860 }, { "epoch": 0.22731102982275123, "grad_norm": 5.3777419850575825e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46870 }, { "epoch": 0.22735952801558731, "grad_norm": 1.2478714950248104e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 46880 }, { "epoch": 0.2274080262084234, "grad_norm": 3.754988853188479e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46890 }, { "epoch": 0.2274565244012595, "grad_norm": 3.37389245430586e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46900 }, { "epoch": 0.22750502259409558, "grad_norm": 5.326959851004176e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46910 }, { "epoch": 0.22755352078693167, "grad_norm": 4.83757638392035e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46920 }, { "epoch": 0.22760201897976776, "grad_norm": 1.1213064254889105e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 46930 }, { "epoch": 0.22765051717260384, "grad_norm": 4.639538886408445e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46940 }, { "epoch": 0.22769901536543993, "grad_norm": 6.670703811550993e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46950 }, { "epoch": 0.22774751355827602, "grad_norm": 8.431504028294512e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46960 }, { "epoch": 0.22779601175111214, "grad_norm": 3.393855863009776e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46970 }, { "epoch": 0.22784450994394823, "grad_norm": 4.491868210720895e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46980 }, { "epoch": 0.22789300813678431, "grad_norm": 3.8273121560905565e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 46990 }, { "epoch": 0.2279415063296204, "grad_norm": 5.20548866234094e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47000 }, { "epoch": 0.2279900045224565, "grad_norm": 5.1171042514397413e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47010 }, { "epoch": 0.22803850271529258, "grad_norm": 3.285066796365754e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47020 }, { "epoch": 0.22808700090812867, "grad_norm": 8.016301933366776e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47030 }, { "epoch": 0.22813549910096476, "grad_norm": 6.204498248507662e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47040 }, { "epoch": 0.22818399729380084, "grad_norm": 3.164980100223147e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47050 }, { "epoch": 0.22823249548663693, "grad_norm": 1.0246135673241952e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 47060 }, { "epoch": 0.22828099367947302, "grad_norm": 2.6639323635890833e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47070 }, { "epoch": 0.2283294918723091, "grad_norm": 6.565122845358928e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47080 }, { "epoch": 0.2283779900651452, "grad_norm": 6.38548840470321e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47090 }, { "epoch": 0.22842648825798129, "grad_norm": 1.1672964319586754e-06, "learning_rate": 0.0002, "loss": 0.0, "step": 47100 }, { "epoch": 0.22847498645081737, "grad_norm": 6.154201770414147e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47110 }, { "epoch": 0.22852348464365346, "grad_norm": 1.1169323954618449e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 47120 }, { "epoch": 0.22857198283648955, "grad_norm": 4.357779914698767e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47130 }, { "epoch": 0.22862048102932564, "grad_norm": 9.499927244860373e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47140 }, { "epoch": 0.22866897922216173, "grad_norm": 4.050193780358313e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47150 }, { "epoch": 0.22871747741499782, "grad_norm": 2.608691751504466e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47160 }, { "epoch": 0.2287659756078339, "grad_norm": 1.6969659100141143e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 47170 }, { "epoch": 0.22881447380067, "grad_norm": 5.8942973168996105e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47180 }, { "epoch": 0.22886297199350608, "grad_norm": 3.176142726601938e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47190 }, { "epoch": 0.22891147018634217, "grad_norm": 4.585998425454818e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47200 }, { "epoch": 0.22895996837917826, "grad_norm": 1.3124405029429909e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 47210 }, { "epoch": 0.22900846657201437, "grad_norm": 2.0440500847485055e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47220 }, { "epoch": 0.22905696476485046, "grad_norm": 4.234604844555179e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47230 }, { "epoch": 0.22910546295768655, "grad_norm": 5.189104967939784e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47240 }, { "epoch": 0.22915396115052264, "grad_norm": 3.679950566493062e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47250 }, { "epoch": 0.22920245934335873, "grad_norm": 5.837753747073293e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47260 }, { "epoch": 0.22925095753619482, "grad_norm": 5.016406490199188e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47270 }, { "epoch": 0.2292994557290309, "grad_norm": 7.235180987663625e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47280 }, { "epoch": 0.229347953921867, "grad_norm": 4.1356681634852066e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47290 }, { "epoch": 0.22939645211470308, "grad_norm": 3.9048732247692897e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47300 }, { "epoch": 0.22944495030753917, "grad_norm": 2.6003595721135753e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47310 }, { "epoch": 0.22949344850037526, "grad_norm": 6.187531909063182e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47320 }, { "epoch": 0.22954194669321135, "grad_norm": 2.3277973326685242e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47330 }, { "epoch": 0.22959044488604743, "grad_norm": 4.090013661084413e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47340 }, { "epoch": 0.22963894307888352, "grad_norm": 4.000471065523925e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47350 }, { "epoch": 0.2296874412717196, "grad_norm": 6.61596786244445e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47360 }, { "epoch": 0.2297359394645557, "grad_norm": 4.1556070584647387e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47370 }, { "epoch": 0.2297844376573918, "grad_norm": 6.516840045378558e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47380 }, { "epoch": 0.22983293585022788, "grad_norm": 4.376648021775509e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47390 }, { "epoch": 0.22988143404306396, "grad_norm": 7.105275301455549e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47400 }, { "epoch": 0.22992993223590005, "grad_norm": 3.19962438766197e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47410 }, { "epoch": 0.22997843042873614, "grad_norm": 4.4940918542124564e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47420 }, { "epoch": 0.23002692862157223, "grad_norm": 6.308105326979785e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47430 }, { "epoch": 0.23007542681440832, "grad_norm": 2.907016671827023e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47440 }, { "epoch": 0.2301239250072444, "grad_norm": 4.8629878790507064e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47450 }, { "epoch": 0.2301724232000805, "grad_norm": 6.875219327184823e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47460 }, { "epoch": 0.23022092139291658, "grad_norm": 3.3684120381849425e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47470 }, { "epoch": 0.2302694195857527, "grad_norm": 8.497507053562003e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47480 }, { "epoch": 0.2303179177785888, "grad_norm": 5.837059191549088e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47490 }, { "epoch": 0.23036641597142488, "grad_norm": 3.385234137454063e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47500 }, { "epoch": 0.23041491416426096, "grad_norm": 6.468399504910849e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47510 }, { "epoch": 0.23046341235709705, "grad_norm": 9.940973910715911e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47520 }, { "epoch": 0.23051191054993314, "grad_norm": 6.320799172954139e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47530 }, { "epoch": 0.23056040874276923, "grad_norm": 7.018365266731053e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47540 }, { "epoch": 0.23060890693560532, "grad_norm": 7.175537319881187e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47550 }, { "epoch": 0.2306574051284414, "grad_norm": 7.82060070037005e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47560 }, { "epoch": 0.2307059033212775, "grad_norm": 3.168781148588096e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47570 }, { "epoch": 0.23075440151411358, "grad_norm": 3.82721943026354e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47580 }, { "epoch": 0.23080289970694967, "grad_norm": 3.7650636386388214e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47590 }, { "epoch": 0.23085139789978576, "grad_norm": 5.3866514804212784e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47600 }, { "epoch": 0.23089989609262185, "grad_norm": 5.4783672709390885e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47610 }, { "epoch": 0.23094839428545794, "grad_norm": 3.231698286754181e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47620 }, { "epoch": 0.23099689247829402, "grad_norm": 3.321291330848908e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47630 }, { "epoch": 0.2310453906711301, "grad_norm": 2.3072239230259584e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47640 }, { "epoch": 0.2310938888639662, "grad_norm": 5.524537272094676e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47650 }, { "epoch": 0.2311423870568023, "grad_norm": 2.7066171526257676e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47660 }, { "epoch": 0.23119088524963838, "grad_norm": 8.390780692479893e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47670 }, { "epoch": 0.23123938344247447, "grad_norm": 3.2129189975194095e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47680 }, { "epoch": 0.23128788163531055, "grad_norm": 3.463571474071614e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47690 }, { "epoch": 0.23133637982814664, "grad_norm": 2.794744879963673e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47700 }, { "epoch": 0.23138487802098273, "grad_norm": 7.566291770899625e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47710 }, { "epoch": 0.23143337621381882, "grad_norm": 6.528236440317414e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47720 }, { "epoch": 0.2314818744066549, "grad_norm": 3.334621112571767e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47730 }, { "epoch": 0.23153037259949102, "grad_norm": 3.572930751261083e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47740 }, { "epoch": 0.2315788707923271, "grad_norm": 3.8828645188004884e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47750 }, { "epoch": 0.2316273689851632, "grad_norm": 7.22662676366781e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47760 }, { "epoch": 0.2316758671779993, "grad_norm": 1.0790463988996635e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 47770 }, { "epoch": 0.23172436537083538, "grad_norm": 7.494909937122429e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47780 }, { "epoch": 0.23177286356367147, "grad_norm": 4.2301568470293205e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47790 }, { "epoch": 0.23182136175650755, "grad_norm": 2.8359602666228056e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47800 }, { "epoch": 0.23186985994934364, "grad_norm": 2.3523911707457046e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47810 }, { "epoch": 0.23191835814217973, "grad_norm": 7.377628463700603e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47820 }, { "epoch": 0.23196685633501582, "grad_norm": 4.5964792860786474e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47830 }, { "epoch": 0.2320153545278519, "grad_norm": 5.7068078263000643e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47840 }, { "epoch": 0.232063852720688, "grad_norm": 4.1085502999749224e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47850 }, { "epoch": 0.23211235091352408, "grad_norm": 3.611049592677773e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47860 }, { "epoch": 0.23216084910636017, "grad_norm": 5.507853018116293e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47870 }, { "epoch": 0.23220934729919626, "grad_norm": 1.7467249335822999e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 47880 }, { "epoch": 0.23225784549203235, "grad_norm": 8.421142183578922e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47890 }, { "epoch": 0.23230634368486844, "grad_norm": 6.560704690627972e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47900 }, { "epoch": 0.23235484187770453, "grad_norm": 5.152486792781019e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47910 }, { "epoch": 0.23240334007054061, "grad_norm": 2.85923285048284e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47920 }, { "epoch": 0.2324518382633767, "grad_norm": 4.455782587342583e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47930 }, { "epoch": 0.2325003364562128, "grad_norm": 6.872978985938971e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47940 }, { "epoch": 0.23254883464904888, "grad_norm": 2.5560122907108962e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47950 }, { "epoch": 0.23259733284188497, "grad_norm": 4.801314901214937e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47960 }, { "epoch": 0.23264583103472106, "grad_norm": 3.716091612204764e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47970 }, { "epoch": 0.23269432922755715, "grad_norm": 3.154579175657091e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47980 }, { "epoch": 0.23274282742039326, "grad_norm": 3.589757469057986e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 47990 }, { "epoch": 0.23279132561322935, "grad_norm": 2.0511263798539403e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48000 }, { "epoch": 0.23283982380606544, "grad_norm": 5.2570801045703774e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48010 }, { "epoch": 0.23288832199890153, "grad_norm": 8.085778802069399e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48020 }, { "epoch": 0.23293682019173761, "grad_norm": 2.8806510954382247e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48030 }, { "epoch": 0.2329853183845737, "grad_norm": 3.928148828435951e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48040 }, { "epoch": 0.2330338165774098, "grad_norm": 4.964021016462539e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48050 }, { "epoch": 0.23308231477024588, "grad_norm": 8.951036534199375e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48060 }, { "epoch": 0.23313081296308197, "grad_norm": 7.342928398657023e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48070 }, { "epoch": 0.23317931115591806, "grad_norm": 8.862308220614068e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48080 }, { "epoch": 0.23322780934875414, "grad_norm": 9.574334569606435e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48090 }, { "epoch": 0.23327630754159023, "grad_norm": 6.986613243498141e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48100 }, { "epoch": 0.23332480573442632, "grad_norm": 9.458278071861059e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48110 }, { "epoch": 0.2333733039272624, "grad_norm": 5.3269879174422385e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48120 }, { "epoch": 0.2334218021200985, "grad_norm": 6.499514171309784e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48130 }, { "epoch": 0.2334703003129346, "grad_norm": 4.231515404740094e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48140 }, { "epoch": 0.23351879850577067, "grad_norm": 2.4314806168490577e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48150 }, { "epoch": 0.23356729669860676, "grad_norm": 6.771438165742438e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48160 }, { "epoch": 0.23361579489144285, "grad_norm": 5.206680597780178e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48170 }, { "epoch": 0.23366429308427894, "grad_norm": 3.405883930440723e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48180 }, { "epoch": 0.23371279127711503, "grad_norm": 3.9637338744569206e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48190 }, { "epoch": 0.23376128946995112, "grad_norm": 5.222924670533757e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48200 }, { "epoch": 0.2338097876627872, "grad_norm": 4.571275624698501e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48210 }, { "epoch": 0.2338582858556233, "grad_norm": 5.6928687541812906e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48220 }, { "epoch": 0.23390678404845938, "grad_norm": 6.928826934426979e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48230 }, { "epoch": 0.23395528224129547, "grad_norm": 7.681119740254871e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48240 }, { "epoch": 0.2340037804341316, "grad_norm": 2.917711228178632e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48250 }, { "epoch": 0.23405227862696767, "grad_norm": 6.968684118646706e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48260 }, { "epoch": 0.23410077681980376, "grad_norm": 5.565561167486521e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48270 }, { "epoch": 0.23414927501263985, "grad_norm": 4.813896836708409e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48280 }, { "epoch": 0.23419777320547594, "grad_norm": 3.834484729736687e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48290 }, { "epoch": 0.23424627139831203, "grad_norm": 4.087425153898039e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48300 }, { "epoch": 0.23429476959114812, "grad_norm": 5.2298815944595844e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48310 }, { "epoch": 0.2343432677839842, "grad_norm": 8.79334010051025e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48320 }, { "epoch": 0.2343917659768203, "grad_norm": 3.552622018787588e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48330 }, { "epoch": 0.23444026416965638, "grad_norm": 5.440952222102169e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48340 }, { "epoch": 0.23448876236249247, "grad_norm": 2.255046105403835e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48350 }, { "epoch": 0.23453726055532856, "grad_norm": 4.5222790845400596e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48360 }, { "epoch": 0.23458575874816465, "grad_norm": 1.0395063299029061e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 48370 }, { "epoch": 0.23463425694100074, "grad_norm": 4.928548236193819e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48380 }, { "epoch": 0.23468275513383682, "grad_norm": 4.583555224257907e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48390 }, { "epoch": 0.2347312533266729, "grad_norm": 8.221256564411306e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48400 }, { "epoch": 0.234779751519509, "grad_norm": 3.3030502777364745e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48410 }, { "epoch": 0.2348282497123451, "grad_norm": 6.269005581316378e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48420 }, { "epoch": 0.23487674790518118, "grad_norm": 4.673580278335976e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48430 }, { "epoch": 0.23492524609801727, "grad_norm": 2.1585723430916914e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48440 }, { "epoch": 0.23497374429085335, "grad_norm": 3.615569710291311e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48450 }, { "epoch": 0.23502224248368944, "grad_norm": 2.5723601027038967e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48460 }, { "epoch": 0.23507074067652553, "grad_norm": 4.4446913705087354e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48470 }, { "epoch": 0.23511923886936162, "grad_norm": 3.2790005377592024e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48480 }, { "epoch": 0.2351677370621977, "grad_norm": 4.9394973444805146e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48490 }, { "epoch": 0.23521623525503382, "grad_norm": 4.952774546040928e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48500 }, { "epoch": 0.2352647334478699, "grad_norm": 2.8131479368198598e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48510 }, { "epoch": 0.235313231640706, "grad_norm": 3.575993545723577e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48520 }, { "epoch": 0.2353617298335421, "grad_norm": 5.132366709403868e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48530 }, { "epoch": 0.23541022802637818, "grad_norm": 2.3855410091755402e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48540 }, { "epoch": 0.23545872621921426, "grad_norm": 1.0600548705497204e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 48550 }, { "epoch": 0.23550722441205035, "grad_norm": 3.2862981669268265e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48560 }, { "epoch": 0.23555572260488644, "grad_norm": 5.143429504528285e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48570 }, { "epoch": 0.23560422079772253, "grad_norm": 3.733207165623753e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48580 }, { "epoch": 0.23565271899055862, "grad_norm": 6.661593943135813e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48590 }, { "epoch": 0.2357012171833947, "grad_norm": 1.806808747062405e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48600 }, { "epoch": 0.2357497153762308, "grad_norm": 4.381443119427786e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48610 }, { "epoch": 0.23579821356906688, "grad_norm": 2.484961569848565e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48620 }, { "epoch": 0.23584671176190297, "grad_norm": 6.277851127833856e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48630 }, { "epoch": 0.23589520995473906, "grad_norm": 8.672125773045991e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48640 }, { "epoch": 0.23594370814757515, "grad_norm": 2.7591420703743097e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48650 }, { "epoch": 0.23599220634041124, "grad_norm": 2.848342361971845e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48660 }, { "epoch": 0.23604070453324733, "grad_norm": 5.4867932419710996e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48670 }, { "epoch": 0.2360892027260834, "grad_norm": 1.6072345943030086e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 48680 }, { "epoch": 0.2361377009189195, "grad_norm": 7.572748472739477e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48690 }, { "epoch": 0.2361861991117556, "grad_norm": 2.0491706109737606e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48700 }, { "epoch": 0.23623469730459168, "grad_norm": 3.0980238818756334e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48710 }, { "epoch": 0.23628319549742777, "grad_norm": 5.820056969696452e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48720 }, { "epoch": 0.23633169369026386, "grad_norm": 9.904616860012538e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48730 }, { "epoch": 0.23638019188309994, "grad_norm": 2.1527579718849665e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48740 }, { "epoch": 0.23642869007593603, "grad_norm": 1.364437167694632e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 48750 }, { "epoch": 0.23647718826877215, "grad_norm": 2.3520325242998297e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48760 }, { "epoch": 0.23652568646160824, "grad_norm": 2.8013603881049676e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48770 }, { "epoch": 0.23657418465444432, "grad_norm": 2.9879679175337515e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48780 }, { "epoch": 0.2366226828472804, "grad_norm": 5.642928968541128e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48790 }, { "epoch": 0.2366711810401165, "grad_norm": 4.7772516609256854e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48800 }, { "epoch": 0.2367196792329526, "grad_norm": 8.567828757577445e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48810 }, { "epoch": 0.23676817742578868, "grad_norm": 9.817075152795951e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48820 }, { "epoch": 0.23681667561862477, "grad_norm": 6.593118939690612e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48830 }, { "epoch": 0.23686517381146086, "grad_norm": 5.316104889629969e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48840 }, { "epoch": 0.23691367200429694, "grad_norm": 3.976068896349716e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48850 }, { "epoch": 0.23696217019713303, "grad_norm": 4.4693006628904186e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48860 }, { "epoch": 0.23701066838996912, "grad_norm": 3.223463806989457e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48870 }, { "epoch": 0.2370591665828052, "grad_norm": 4.826161514870364e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48880 }, { "epoch": 0.2371076647756413, "grad_norm": 1.290672884124433e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 48890 }, { "epoch": 0.23715616296847739, "grad_norm": 3.251020785910441e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48900 }, { "epoch": 0.23720466116131347, "grad_norm": 4.2325531524056714e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48910 }, { "epoch": 0.23725315935414956, "grad_norm": 4.2044096204563175e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48920 }, { "epoch": 0.23730165754698565, "grad_norm": 8.573807974698866e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48930 }, { "epoch": 0.23735015573982174, "grad_norm": 2.198348525439542e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48940 }, { "epoch": 0.23739865393265783, "grad_norm": 1.1003148614463498e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 48950 }, { "epoch": 0.23744715212549392, "grad_norm": 5.26410985912662e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48960 }, { "epoch": 0.23749565031833, "grad_norm": 6.751059800080839e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48970 }, { "epoch": 0.2375441485111661, "grad_norm": 1.6503364008713106e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 48980 }, { "epoch": 0.23759264670400218, "grad_norm": 4.224922633966344e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 48990 }, { "epoch": 0.23764114489683827, "grad_norm": 6.416202324999176e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49000 }, { "epoch": 0.23768964308967436, "grad_norm": 3.4487008804262587e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49010 }, { "epoch": 0.23773814128251047, "grad_norm": 2.8080721747869575e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49020 }, { "epoch": 0.23778663947534656, "grad_norm": 2.910985763548979e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49030 }, { "epoch": 0.23783513766818265, "grad_norm": 5.5153602573909666e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49040 }, { "epoch": 0.23788363586101874, "grad_norm": 4.7222705745753046e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49050 }, { "epoch": 0.23793213405385483, "grad_norm": 1.0828792795791742e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 49060 }, { "epoch": 0.23798063224669092, "grad_norm": 5.5952913413648275e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49070 }, { "epoch": 0.238029130439527, "grad_norm": 4.189690727685047e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49080 }, { "epoch": 0.2380776286323631, "grad_norm": 3.1511319775745505e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49090 }, { "epoch": 0.23812612682519918, "grad_norm": 5.7602122183197935e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49100 }, { "epoch": 0.23817462501803527, "grad_norm": 4.39087237680269e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49110 }, { "epoch": 0.23822312321087136, "grad_norm": 4.5577031926313794e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49120 }, { "epoch": 0.23827162140370745, "grad_norm": 6.85465053607004e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49130 }, { "epoch": 0.23832011959654353, "grad_norm": 6.481963765736509e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49140 }, { "epoch": 0.23836861778937962, "grad_norm": 8.405928753063563e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49150 }, { "epoch": 0.2384171159822157, "grad_norm": 1.1583387049540761e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 49160 }, { "epoch": 0.2384656141750518, "grad_norm": 6.304031785475672e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49170 }, { "epoch": 0.2385141123678879, "grad_norm": 9.47997023104108e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49180 }, { "epoch": 0.23856261056072398, "grad_norm": 7.072812735486878e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49190 }, { "epoch": 0.23861110875356006, "grad_norm": 2.8126605045031283e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49200 }, { "epoch": 0.23865960694639615, "grad_norm": 2.3943815818938674e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49210 }, { "epoch": 0.23870810513923224, "grad_norm": 4.2190261950736385e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49220 }, { "epoch": 0.23875660333206833, "grad_norm": 5.1919645471798503e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49230 }, { "epoch": 0.23880510152490442, "grad_norm": 6.426314769214514e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49240 }, { "epoch": 0.2388535997177405, "grad_norm": 1.0144080420104729e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 49250 }, { "epoch": 0.2389020979105766, "grad_norm": 4.292315125553614e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49260 }, { "epoch": 0.2389505961034127, "grad_norm": 5.020591942184183e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49270 }, { "epoch": 0.2389990942962488, "grad_norm": 7.08824359207938e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49280 }, { "epoch": 0.2390475924890849, "grad_norm": 2.3963302453466895e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49290 }, { "epoch": 0.23909609068192098, "grad_norm": 4.150465215957411e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49300 }, { "epoch": 0.23914458887475706, "grad_norm": 4.773299622229388e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49310 }, { "epoch": 0.23919308706759315, "grad_norm": 3.0745951562494156e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49320 }, { "epoch": 0.23924158526042924, "grad_norm": 3.247840396625179e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49330 }, { "epoch": 0.23929008345326533, "grad_norm": 2.15100026679238e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49340 }, { "epoch": 0.23933858164610142, "grad_norm": 8.7704094653418e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49350 }, { "epoch": 0.2393870798389375, "grad_norm": 1.1997479987257975e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 49360 }, { "epoch": 0.2394355780317736, "grad_norm": 6.18839024468798e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49370 }, { "epoch": 0.23948407622460968, "grad_norm": 4.627348815233745e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49380 }, { "epoch": 0.23953257441744577, "grad_norm": 5.4289195361434395e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49390 }, { "epoch": 0.23958107261028186, "grad_norm": 2.150274802659169e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49400 }, { "epoch": 0.23962957080311795, "grad_norm": 5.922792212231798e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49410 }, { "epoch": 0.23967806899595404, "grad_norm": 4.085964278033316e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49420 }, { "epoch": 0.23972656718879012, "grad_norm": 7.670776369650412e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49430 }, { "epoch": 0.2397750653816262, "grad_norm": 3.6714634887857756e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49440 }, { "epoch": 0.2398235635744623, "grad_norm": 2.18159623699421e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49450 }, { "epoch": 0.2398720617672984, "grad_norm": 6.527448448423456e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49460 }, { "epoch": 0.23992055996013448, "grad_norm": 2.191615600111163e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49470 }, { "epoch": 0.23996905815297057, "grad_norm": 4.8091330029365054e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49480 }, { "epoch": 0.24001755634580665, "grad_norm": 4.099188544159915e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49490 }, { "epoch": 0.24006605453864274, "grad_norm": 3.40075629878811e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49500 }, { "epoch": 0.24011455273147883, "grad_norm": 2.61283297220416e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49510 }, { "epoch": 0.24016305092431492, "grad_norm": 4.06755447102114e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49520 }, { "epoch": 0.24021154911715104, "grad_norm": 2.8164608423253412e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49530 }, { "epoch": 0.24026004730998712, "grad_norm": 3.651700453133344e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49540 }, { "epoch": 0.2403085455028232, "grad_norm": 3.8000667501592034e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49550 }, { "epoch": 0.2403570436956593, "grad_norm": 5.265098224072062e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49560 }, { "epoch": 0.2404055418884954, "grad_norm": 1.0250088422480985e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 49570 }, { "epoch": 0.24045404008133148, "grad_norm": 3.8347639730318406e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49580 }, { "epoch": 0.24050253827416757, "grad_norm": 8.873947621168554e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49590 }, { "epoch": 0.24055103646700365, "grad_norm": 1.3074614457764255e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 49600 }, { "epoch": 0.24059953465983974, "grad_norm": 6.669188934438353e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49610 }, { "epoch": 0.24064803285267583, "grad_norm": 5.4237034419202246e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49620 }, { "epoch": 0.24069653104551192, "grad_norm": 7.80933575583731e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49630 }, { "epoch": 0.240745029238348, "grad_norm": 9.244219967285972e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49640 }, { "epoch": 0.2407935274311841, "grad_norm": 6.7690749006033e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49650 }, { "epoch": 0.24084202562402018, "grad_norm": 2.3132651350010747e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49660 }, { "epoch": 0.24089052381685627, "grad_norm": 8.077298474518102e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49670 }, { "epoch": 0.24093902200969236, "grad_norm": 6.328725987714279e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49680 }, { "epoch": 0.24098752020252845, "grad_norm": 3.9305589893956494e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49690 }, { "epoch": 0.24103601839536454, "grad_norm": 9.661638245006543e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49700 }, { "epoch": 0.24108451658820063, "grad_norm": 8.31793371958156e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49710 }, { "epoch": 0.24113301478103671, "grad_norm": 5.320055507240795e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49720 }, { "epoch": 0.2411815129738728, "grad_norm": 6.660032880745348e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49730 }, { "epoch": 0.2412300111667089, "grad_norm": 3.1056771376825054e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49740 }, { "epoch": 0.24127850935954498, "grad_norm": 1.92997688941432e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 49750 }, { "epoch": 0.24132700755238107, "grad_norm": 4.7422364701787956e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49760 }, { "epoch": 0.24137550574521716, "grad_norm": 9.816474033641498e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49770 }, { "epoch": 0.24142400393805327, "grad_norm": 4.545104204112249e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49780 }, { "epoch": 0.24147250213088936, "grad_norm": 1.0299321218099067e-07, "learning_rate": 0.0002, "loss": 0.0, "step": 49790 }, { "epoch": 0.24152100032372545, "grad_norm": 5.2743860834425504e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49800 }, { "epoch": 0.24156949851656154, "grad_norm": 6.434081711859108e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49810 }, { "epoch": 0.24161799670939763, "grad_norm": 7.825585157661408e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49820 }, { "epoch": 0.24166649490223371, "grad_norm": 2.8906860904953646e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49830 }, { "epoch": 0.2417149930950698, "grad_norm": 3.100122114574333e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49840 }, { "epoch": 0.2417634912879059, "grad_norm": 3.473737208992134e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49850 }, { "epoch": 0.24181198948074198, "grad_norm": 9.920023558152025e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49860 }, { "epoch": 0.24186048767357807, "grad_norm": 4.699575839595127e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49870 }, { "epoch": 0.24190898586641416, "grad_norm": 7.540262458860525e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49880 }, { "epoch": 0.24195748405925024, "grad_norm": 4.180150980914732e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49890 }, { "epoch": 0.24200598225208633, "grad_norm": 9.805444278754294e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49900 }, { "epoch": 0.24205448044492242, "grad_norm": 6.242672156986373e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49910 }, { "epoch": 0.2421029786377585, "grad_norm": 2.8502393334406406e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49920 }, { "epoch": 0.2421514768305946, "grad_norm": 4.7779330714092794e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49930 }, { "epoch": 0.24219997502343069, "grad_norm": 4.3472702770941396e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49940 }, { "epoch": 0.24224847321626677, "grad_norm": 3.289814642926103e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49950 }, { "epoch": 0.24229697140910286, "grad_norm": 6.220294324066344e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49960 }, { "epoch": 0.24234546960193895, "grad_norm": 7.991709338739383e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49970 }, { "epoch": 0.24239396779477504, "grad_norm": 4.327486280431003e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49980 }, { "epoch": 0.24244246598761113, "grad_norm": 3.8003214797299734e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 49990 }, { "epoch": 0.24249096418044722, "grad_norm": 2.952933542132996e-08, "learning_rate": 0.0002, "loss": 0.0, "step": 50000 } ], "logging_steps": 10, "max_steps": 1000000, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.684865874748809e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }