| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.1689600865075643, |
| "eval_steps": 500, |
| "global_step": 82500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00010240005242882685, |
| "grad_norm": 1.5689221620559692, |
| "learning_rate": 1.47e-05, |
| "loss": 10.437386474609376, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0002048001048576537, |
| "grad_norm": 1.4314604997634888, |
| "learning_rate": 2.97e-05, |
| "loss": 8.872786865234374, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.00030720015728648054, |
| "grad_norm": 2.1900956630706787, |
| "learning_rate": 4.4699999999999996e-05, |
| "loss": 6.67523681640625, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0004096002097153074, |
| "grad_norm": 0.7161903381347656, |
| "learning_rate": 5.97e-05, |
| "loss": 4.3745730590820315, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0005120002621441342, |
| "grad_norm": 1.2864420413970947, |
| "learning_rate": 7.47e-05, |
| "loss": 1.9937155151367187, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0006144003145729611, |
| "grad_norm": 1.5255779027938843, |
| "learning_rate": 8.969999999999998e-05, |
| "loss": 6.841383056640625, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0007168003670017879, |
| "grad_norm": 1.0778907537460327, |
| "learning_rate": 0.00010469999999999998, |
| "loss": 6.193285522460937, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.0008192004194306148, |
| "grad_norm": 0.8099711537361145, |
| "learning_rate": 0.0001197, |
| "loss": 6.307914428710937, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.0009216004718594416, |
| "grad_norm": 1.3735090494155884, |
| "learning_rate": 0.0001347, |
| "loss": 5.728865966796875, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.0010240005242882684, |
| "grad_norm": 1.2599254846572876, |
| "learning_rate": 0.00014969999999999998, |
| "loss": 5.8577117919921875, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0011264005767170954, |
| "grad_norm": 1.0690525770187378, |
| "learning_rate": 0.0001647, |
| "loss": 5.578800048828125, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.0012288006291459222, |
| "grad_norm": 0.9692347049713135, |
| "learning_rate": 0.00017969999999999998, |
| "loss": 5.012375183105469, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.001331200681574749, |
| "grad_norm": 1.241825819015503, |
| "learning_rate": 0.0001947, |
| "loss": 6.2367095947265625, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.0014336007340035757, |
| "grad_norm": 1.1092249155044556, |
| "learning_rate": 0.00020969999999999997, |
| "loss": 5.6980218505859375, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.0015360007864324027, |
| "grad_norm": 0.8965554237365723, |
| "learning_rate": 0.0002247, |
| "loss": 5.374319458007813, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.0016384008388612295, |
| "grad_norm": 1.4790899753570557, |
| "learning_rate": 0.0002397, |
| "loss": 4.950992126464843, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.0017408008912900563, |
| "grad_norm": 0.9521295428276062, |
| "learning_rate": 0.00025469999999999996, |
| "loss": 5.40803955078125, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.0018432009437188831, |
| "grad_norm": 0.836391806602478, |
| "learning_rate": 0.0002697, |
| "loss": 5.781373291015625, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.0019456009961477101, |
| "grad_norm": 0.9251846075057983, |
| "learning_rate": 0.0002847, |
| "loss": 5.027839660644531, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.0020480010485765367, |
| "grad_norm": 0.8701666593551636, |
| "learning_rate": 0.00029969999999999997, |
| "loss": 5.40314208984375, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.002150401101005364, |
| "grad_norm": 1.102386474609375, |
| "learning_rate": 0.0002999999925149585, |
| "loss": 5.350908203125, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.0022528011534341907, |
| "grad_norm": 1.2463473081588745, |
| "learning_rate": 0.0002999999694456937, |
| "loss": 5.666819458007812, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.0023552012058630175, |
| "grad_norm": 1.5089119672775269, |
| "learning_rate": 0.00029999993078909046, |
| "loss": 5.699287109375, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.0024576012582918443, |
| "grad_norm": 0.8943452835083008, |
| "learning_rate": 0.0002999998765451527, |
| "loss": 5.515684814453125, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.002560001310720671, |
| "grad_norm": 0.9431995749473572, |
| "learning_rate": 0.0002999998067138862, |
| "loss": 5.668785400390625, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.002662401363149498, |
| "grad_norm": 0.7088674902915955, |
| "learning_rate": 0.00029999972129529813, |
| "loss": 5.059076843261718, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.0027648014155783247, |
| "grad_norm": 0.7139531970024109, |
| "learning_rate": 0.00029999962028939744, |
| "loss": 5.189839477539063, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.0028672014680071515, |
| "grad_norm": 0.6557895541191101, |
| "learning_rate": 0.0002999995036961946, |
| "loss": 5.100037841796875, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.0029696015204359783, |
| "grad_norm": 2.214290142059326, |
| "learning_rate": 0.0002999993715157016, |
| "loss": 3.933666076660156, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.0030720015728648055, |
| "grad_norm": 1.1693249940872192, |
| "learning_rate": 0.0002999992237479324, |
| "loss": 5.641339721679688, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.0031744016252936323, |
| "grad_norm": 0.812566876411438, |
| "learning_rate": 0.0002999990603929022, |
| "loss": 4.826256408691406, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.003276801677722459, |
| "grad_norm": 0.8744778037071228, |
| "learning_rate": 0.00029999888145062803, |
| "loss": 5.060762329101562, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.003379201730151286, |
| "grad_norm": 1.4869335889816284, |
| "learning_rate": 0.0002999986869211285, |
| "loss": 5.231287231445313, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.0034816017825801127, |
| "grad_norm": 2.121548652648926, |
| "learning_rate": 0.0002999984768044237, |
| "loss": 5.097483520507812, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.0035840018350089395, |
| "grad_norm": 0.8741556406021118, |
| "learning_rate": 0.00029999825110053565, |
| "loss": 4.697709045410156, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.0036864018874377662, |
| "grad_norm": 0.6771953105926514, |
| "learning_rate": 0.00029999800980948764, |
| "loss": 5.405962524414062, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.003788801939866593, |
| "grad_norm": 0.7090007066726685, |
| "learning_rate": 0.00029999775293130485, |
| "loss": 5.24799560546875, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.0038912019922954203, |
| "grad_norm": 0.561838686466217, |
| "learning_rate": 0.00029999748046601396, |
| "loss": 5.034546813964844, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.003993602044724247, |
| "grad_norm": 1.955099105834961, |
| "learning_rate": 0.0002999971924136432, |
| "loss": 4.816056823730468, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.004096002097153073, |
| "grad_norm": 1.5861859321594238, |
| "learning_rate": 0.00029999688877422264, |
| "loss": 4.836883544921875, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.0041984021495819, |
| "grad_norm": 0.599829375743866, |
| "learning_rate": 0.00029999656954778374, |
| "loss": 4.677350463867188, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.004300802202010728, |
| "grad_norm": 0.7785560488700867, |
| "learning_rate": 0.0002999962347343597, |
| "loss": 4.665549621582032, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.004403202254439555, |
| "grad_norm": 0.7040075659751892, |
| "learning_rate": 0.00029999588433398533, |
| "loss": 4.816753540039063, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.0045056023068683814, |
| "grad_norm": 0.9000102877616882, |
| "learning_rate": 0.00029999551834669695, |
| "loss": 4.776250915527344, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.004608002359297208, |
| "grad_norm": 0.8187811374664307, |
| "learning_rate": 0.0002999951367725327, |
| "loss": 5.544743041992188, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.004710402411726035, |
| "grad_norm": 0.684819757938385, |
| "learning_rate": 0.0002999947396115322, |
| "loss": 5.165157470703125, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.004812802464154862, |
| "grad_norm": 1.125178337097168, |
| "learning_rate": 0.0002999943268637367, |
| "loss": 4.768605651855469, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.004915202516583689, |
| "grad_norm": 0.8499088287353516, |
| "learning_rate": 0.0002999938985291891, |
| "loss": 4.563653869628906, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.005017602569012515, |
| "grad_norm": 0.8239416480064392, |
| "learning_rate": 0.0002999934546079339, |
| "loss": 4.3343331909179685, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.005120002621441342, |
| "grad_norm": 0.9708461761474609, |
| "learning_rate": 0.00029999299510001726, |
| "loss": 4.572106018066406, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.005222402673870169, |
| "grad_norm": 0.5595722794532776, |
| "learning_rate": 0.0002999925200054869, |
| "loss": 3.886677551269531, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.005324802726298996, |
| "grad_norm": 0.843467116355896, |
| "learning_rate": 0.0002999920293243922, |
| "loss": 4.781981506347656, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.005427202778727823, |
| "grad_norm": 0.7127471566200256, |
| "learning_rate": 0.0002999915230567842, |
| "loss": 4.583160400390625, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.005529602831156649, |
| "grad_norm": 1.2107303142547607, |
| "learning_rate": 0.00029999100120271544, |
| "loss": 4.792764587402344, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.005632002883585476, |
| "grad_norm": 0.46370163559913635, |
| "learning_rate": 0.0002999904637622402, |
| "loss": 4.452548522949218, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.005734402936014303, |
| "grad_norm": 0.8558986186981201, |
| "learning_rate": 0.00029998991073541424, |
| "loss": 4.687911376953125, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.00583680298844313, |
| "grad_norm": 0.716712236404419, |
| "learning_rate": 0.0002999893421222951, |
| "loss": 5.1007318115234375, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.0059392030408719565, |
| "grad_norm": 0.6236938238143921, |
| "learning_rate": 0.00029998875792294186, |
| "loss": 4.0649325561523435, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.006041603093300784, |
| "grad_norm": 0.7991392612457275, |
| "learning_rate": 0.0002999881581374152, |
| "loss": 5.119035339355468, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.006144003145729611, |
| "grad_norm": 0.8357495665550232, |
| "learning_rate": 0.00029998754276577757, |
| "loss": 4.757432556152343, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.006246403198158438, |
| "grad_norm": 0.6117859482765198, |
| "learning_rate": 0.0002999869118080927, |
| "loss": 4.448386840820312, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.0063488032505872646, |
| "grad_norm": 0.49256569147109985, |
| "learning_rate": 0.0002999862652644263, |
| "loss": 3.11305419921875, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.006451203303016091, |
| "grad_norm": 0.6232755184173584, |
| "learning_rate": 0.00029998560313484557, |
| "loss": 4.7346923828125, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.006553603355444918, |
| "grad_norm": 0.9806835055351257, |
| "learning_rate": 0.00029998492541941926, |
| "loss": 5.011588745117187, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.006656003407873745, |
| "grad_norm": 0.5504988431930542, |
| "learning_rate": 0.00029998423211821776, |
| "loss": 4.568263549804687, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.006758403460302572, |
| "grad_norm": 1.2172794342041016, |
| "learning_rate": 0.0002999835232313133, |
| "loss": 4.617164306640625, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.0068608035127313985, |
| "grad_norm": 0.8813052773475647, |
| "learning_rate": 0.0002999827987587793, |
| "loss": 4.053099975585938, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.006963203565160225, |
| "grad_norm": 0.9132696986198425, |
| "learning_rate": 0.0002999820587006912, |
| "loss": 3.3842108154296877, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.007065603617589052, |
| "grad_norm": 0.6898446679115295, |
| "learning_rate": 0.0002999813030571258, |
| "loss": 4.640269470214844, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.007168003670017879, |
| "grad_norm": 0.8895163536071777, |
| "learning_rate": 0.0002999805318281617, |
| "loss": 4.337832641601563, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.007270403722446706, |
| "grad_norm": 0.8650217056274414, |
| "learning_rate": 0.000299979745013879, |
| "loss": 4.312217102050782, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.0073728037748755325, |
| "grad_norm": 0.8591002821922302, |
| "learning_rate": 0.0002999789426143595, |
| "loss": 4.517200622558594, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.007475203827304359, |
| "grad_norm": 1.0993435382843018, |
| "learning_rate": 0.0002999781246296866, |
| "loss": 5.017222900390625, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.007577603879733186, |
| "grad_norm": 0.826409101486206, |
| "learning_rate": 0.00029997729105994523, |
| "loss": 5.4449609375, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.007680003932162013, |
| "grad_norm": 0.7336626052856445, |
| "learning_rate": 0.0002999764419052221, |
| "loss": 5.442882080078125, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.0077824039845908405, |
| "grad_norm": 0.8554229140281677, |
| "learning_rate": 0.00029997557716560536, |
| "loss": 5.044765625, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.007884804037019667, |
| "grad_norm": 1.2047715187072754, |
| "learning_rate": 0.0002999746968411849, |
| "loss": 5.347750244140625, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.007987204089448493, |
| "grad_norm": 0.6852602362632751, |
| "learning_rate": 0.00029997380093205227, |
| "loss": 5.431246948242188, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.008089604141877321, |
| "grad_norm": 0.599185585975647, |
| "learning_rate": 0.00029997288943830043, |
| "loss": 5.4587548828125, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.008192004194306147, |
| "grad_norm": 0.6573649644851685, |
| "learning_rate": 0.0002999719623600242, |
| "loss": 5.388607177734375, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.008294404246734974, |
| "grad_norm": 0.8899281024932861, |
| "learning_rate": 0.00029997101969731995, |
| "loss": 5.013424072265625, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.0083968042991638, |
| "grad_norm": 0.7623964548110962, |
| "learning_rate": 0.0002999700614502855, |
| "loss": 5.455863037109375, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.008499204351592628, |
| "grad_norm": 0.6434335112571716, |
| "learning_rate": 0.0002999690876190205, |
| "loss": 4.965211791992187, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.008601604404021456, |
| "grad_norm": 1.0846576690673828, |
| "learning_rate": 0.0002999680982036263, |
| "loss": 5.367398071289062, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.008704004456450282, |
| "grad_norm": 0.687623143196106, |
| "learning_rate": 0.0002999670932042054, |
| "loss": 5.260775146484375, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.00880640450887911, |
| "grad_norm": 0.7438795566558838, |
| "learning_rate": 0.0002999660726208625, |
| "loss": 4.861600341796875, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.008908804561307935, |
| "grad_norm": 0.653516948223114, |
| "learning_rate": 0.0002999650364537035, |
| "loss": 5.213981323242187, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.009011204613736763, |
| "grad_norm": 0.6365879774093628, |
| "learning_rate": 0.0002999639847028362, |
| "loss": 5.282333984375, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.009113604666165589, |
| "grad_norm": 1.073702335357666, |
| "learning_rate": 0.00029996291736836977, |
| "loss": 4.728897705078125, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.009216004718594416, |
| "grad_norm": 0.5726307034492493, |
| "learning_rate": 0.00029996183445041524, |
| "loss": 4.985563354492188, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.009318404771023242, |
| "grad_norm": 0.8428155779838562, |
| "learning_rate": 0.00029996073594908503, |
| "loss": 5.237740478515625, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.00942080482345207, |
| "grad_norm": 0.7983867526054382, |
| "learning_rate": 0.0002999596218644934, |
| "loss": 5.2612847900390625, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.009523204875880896, |
| "grad_norm": 1.4800513982772827, |
| "learning_rate": 0.000299958492196756, |
| "loss": 5.220035400390625, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.009625604928309724, |
| "grad_norm": 0.7891004085540771, |
| "learning_rate": 0.00029995734694599033, |
| "loss": 4.930169677734375, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.00972800498073855, |
| "grad_norm": 0.6847373247146606, |
| "learning_rate": 0.0002999561861123153, |
| "loss": 4.984630126953125, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.009830405033167377, |
| "grad_norm": 0.6594445705413818, |
| "learning_rate": 0.0002999550096958517, |
| "loss": 5.030910034179687, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.009932805085596203, |
| "grad_norm": 0.6435703635215759, |
| "learning_rate": 0.0002999538176967216, |
| "loss": 5.204117431640625, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.01003520513802503, |
| "grad_norm": 0.43691107630729675, |
| "learning_rate": 0.0002999526101150489, |
| "loss": 4.9494412231445315, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.010137605190453857, |
| "grad_norm": 0.839853823184967, |
| "learning_rate": 0.00029995138695095914, |
| "loss": 3.1014248657226564, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.010240005242882684, |
| "grad_norm": 0.8040403723716736, |
| "learning_rate": 0.00029995014820457947, |
| "loss": 5.11622314453125, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.010342405295311512, |
| "grad_norm": 0.6953795552253723, |
| "learning_rate": 0.0002999488938760385, |
| "loss": 5.122266235351563, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.010444805347740338, |
| "grad_norm": 0.5960660576820374, |
| "learning_rate": 0.00029994762396546665, |
| "loss": 4.512597961425781, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.010547205400169166, |
| "grad_norm": 0.7795936465263367, |
| "learning_rate": 0.0002999463384729958, |
| "loss": 4.1439907836914065, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.010649605452597992, |
| "grad_norm": 0.5827996730804443, |
| "learning_rate": 0.0002999450373987597, |
| "loss": 5.13221435546875, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.01075200550502682, |
| "grad_norm": 0.5559226870536804, |
| "learning_rate": 0.0002999437207428934, |
| "loss": 5.330996704101563, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.010854405557455645, |
| "grad_norm": 0.7576444745063782, |
| "learning_rate": 0.0002999423885055338, |
| "loss": 5.0482110595703125, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.010956805609884473, |
| "grad_norm": 0.6038886308670044, |
| "learning_rate": 0.0002999410406868193, |
| "loss": 5.026975708007813, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.011059205662313299, |
| "grad_norm": 0.8441299200057983, |
| "learning_rate": 0.00029993967728688997, |
| "loss": 5.212452392578125, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.011161605714742126, |
| "grad_norm": 0.6785016655921936, |
| "learning_rate": 0.00029993829830588745, |
| "loss": 5.052464599609375, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.011264005767170952, |
| "grad_norm": 0.7248463034629822, |
| "learning_rate": 0.0002999369037439551, |
| "loss": 4.948311157226563, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.01136640581959978, |
| "grad_norm": 1.8698147535324097, |
| "learning_rate": 0.00029993549360123777, |
| "loss": 4.748592529296875, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.011468805872028606, |
| "grad_norm": 0.5474430918693542, |
| "learning_rate": 0.0002999340678778821, |
| "loss": 4.849425659179688, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.011571205924457434, |
| "grad_norm": 0.6169009804725647, |
| "learning_rate": 0.00029993262657403613, |
| "loss": 4.795867919921875, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.01167360597688626, |
| "grad_norm": 0.773813009262085, |
| "learning_rate": 0.0002999311696898497, |
| "loss": 4.561126098632813, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.011776006029315087, |
| "grad_norm": 0.841324508190155, |
| "learning_rate": 0.00029992969722547424, |
| "loss": 4.801204223632812, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.011878406081743913, |
| "grad_norm": 0.6325180530548096, |
| "learning_rate": 0.0002999282091810627, |
| "loss": 5.0141598510742185, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.01198080613417274, |
| "grad_norm": 0.6073687672615051, |
| "learning_rate": 0.00029992670555676964, |
| "loss": 4.727720642089844, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.012083206186601568, |
| "grad_norm": 0.7254152297973633, |
| "learning_rate": 0.00029992518635275147, |
| "loss": 5.180827026367187, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.012185606239030394, |
| "grad_norm": 0.6669420599937439, |
| "learning_rate": 0.000299923651569166, |
| "loss": 5.232777099609375, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.012288006291459222, |
| "grad_norm": 0.9034198522567749, |
| "learning_rate": 0.0002999221012061726, |
| "loss": 4.571735229492187, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.012390406343888048, |
| "grad_norm": 0.9541974663734436, |
| "learning_rate": 0.0002999205352639326, |
| "loss": 4.678871459960938, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.012492806396316876, |
| "grad_norm": 1.4738138914108276, |
| "learning_rate": 0.0002999189537426085, |
| "loss": 4.96472412109375, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.012595206448745701, |
| "grad_norm": 0.7434485554695129, |
| "learning_rate": 0.0002999173566423648, |
| "loss": 5.090062255859375, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.012697606501174529, |
| "grad_norm": 0.5921583771705627, |
| "learning_rate": 0.0002999157439633674, |
| "loss": 4.839577026367188, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.012800006553603355, |
| "grad_norm": 0.5730924606323242, |
| "learning_rate": 0.00029991411570578385, |
| "loss": 4.44057373046875, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.012902406606032183, |
| "grad_norm": 0.6314680576324463, |
| "learning_rate": 0.0002999124718697834, |
| "loss": 4.906407165527344, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.013004806658461009, |
| "grad_norm": 0.5586856603622437, |
| "learning_rate": 0.00029991081245553695, |
| "loss": 4.8386752319335935, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.013107206710889836, |
| "grad_norm": 0.4960859417915344, |
| "learning_rate": 0.0002999091374632168, |
| "loss": 4.7797067260742185, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.013209606763318662, |
| "grad_norm": 0.7504858374595642, |
| "learning_rate": 0.0002999074468929971, |
| "loss": 4.906391906738281, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.01331200681574749, |
| "grad_norm": 0.5791200995445251, |
| "learning_rate": 0.0002999057407450534, |
| "loss": 4.6073193359375, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.013414406868176316, |
| "grad_norm": 1.04066002368927, |
| "learning_rate": 0.00029990401901956314, |
| "loss": 4.697982177734375, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.013516806920605143, |
| "grad_norm": 0.5570167899131775, |
| "learning_rate": 0.0002999022817167052, |
| "loss": 5.063222351074219, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.01361920697303397, |
| "grad_norm": 0.6061655879020691, |
| "learning_rate": 0.00029990052883666004, |
| "loss": 4.329053955078125, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.013721607025462797, |
| "grad_norm": 0.6637709736824036, |
| "learning_rate": 0.0002998987603796099, |
| "loss": 4.776343688964844, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.013824007077891625, |
| "grad_norm": 0.6519717574119568, |
| "learning_rate": 0.0002998969763457385, |
| "loss": 4.839088439941406, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.01392640713032045, |
| "grad_norm": 0.643963098526001, |
| "learning_rate": 0.00029989517673523127, |
| "loss": 4.581628112792969, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.014028807182749278, |
| "grad_norm": 1.4058446884155273, |
| "learning_rate": 0.0002998933615482751, |
| "loss": 4.007187194824219, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.014131207235178104, |
| "grad_norm": 0.7021802067756653, |
| "learning_rate": 0.00029989153078505886, |
| "loss": 4.761097106933594, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.014233607287606932, |
| "grad_norm": 0.6105393171310425, |
| "learning_rate": 0.0002998896844457725, |
| "loss": 5.0122119140625, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.014336007340035758, |
| "grad_norm": 0.7652610540390015, |
| "learning_rate": 0.00029988782253060806, |
| "loss": 4.946090393066406, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.014438407392464585, |
| "grad_norm": 0.7618656754493713, |
| "learning_rate": 0.000299885945039759, |
| "loss": 3.561051025390625, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.014540807444893411, |
| "grad_norm": 0.6516929864883423, |
| "learning_rate": 0.0002998840519734204, |
| "loss": 4.529894409179687, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.014643207497322239, |
| "grad_norm": 1.0100959539413452, |
| "learning_rate": 0.000299882143331789, |
| "loss": 4.72200927734375, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.014745607549751065, |
| "grad_norm": 0.9135130047798157, |
| "learning_rate": 0.0002998802191150631, |
| "loss": 4.017086791992187, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.014848007602179893, |
| "grad_norm": 1.0336369276046753, |
| "learning_rate": 0.0002998782793234427, |
| "loss": 4.969613952636719, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.014950407654608719, |
| "grad_norm": 0.6827586889266968, |
| "learning_rate": 0.0002998763239571293, |
| "loss": 4.958232421875, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.015052807707037546, |
| "grad_norm": 0.8095134496688843, |
| "learning_rate": 0.00029987435301632624, |
| "loss": 4.539352722167969, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.015155207759466372, |
| "grad_norm": 0.811736524105072, |
| "learning_rate": 0.0002998723665012382, |
| "loss": 4.618602905273438, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.0152576078118952, |
| "grad_norm": 0.6750462651252747, |
| "learning_rate": 0.00029987036441207163, |
| "loss": 4.390194702148437, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.015360007864324026, |
| "grad_norm": 0.6136668920516968, |
| "learning_rate": 0.0002998683467490346, |
| "loss": 4.691050109863281, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.015462407916752853, |
| "grad_norm": 0.608397364616394, |
| "learning_rate": 0.0002998663135123368, |
| "loss": 5.00837646484375, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.015564807969181681, |
| "grad_norm": 0.6426307559013367, |
| "learning_rate": 0.0002998642647021895, |
| "loss": 4.924872741699219, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.015667208021610507, |
| "grad_norm": 0.8153278827667236, |
| "learning_rate": 0.00029986220031880557, |
| "loss": 4.830538635253906, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.015769608074039335, |
| "grad_norm": 0.6194471120834351, |
| "learning_rate": 0.0002998601203623995, |
| "loss": 4.807819213867187, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.015872008126468162, |
| "grad_norm": 1.5707075595855713, |
| "learning_rate": 0.00029985802483318755, |
| "loss": 4.509772644042969, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.015974408178896986, |
| "grad_norm": 0.8517248630523682, |
| "learning_rate": 0.0002998559137313874, |
| "loss": 4.2860891723632815, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.016076808231325814, |
| "grad_norm": 1.0736734867095947, |
| "learning_rate": 0.00029985378705721843, |
| "loss": 4.5593634033203125, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.016179208283754642, |
| "grad_norm": 0.6145778894424438, |
| "learning_rate": 0.0002998516448109016, |
| "loss": 4.50625, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.01628160833618347, |
| "grad_norm": 0.7230775356292725, |
| "learning_rate": 0.00029984948699265967, |
| "loss": 4.884090270996094, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.016384008388612294, |
| "grad_norm": 0.7744879722595215, |
| "learning_rate": 0.0002998473136027167, |
| "loss": 4.186481018066406, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.01648640844104112, |
| "grad_norm": 0.7375713586807251, |
| "learning_rate": 0.00029984512464129856, |
| "loss": 4.879469299316407, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.01658880849346995, |
| "grad_norm": 1.0072307586669922, |
| "learning_rate": 0.0002998429201086329, |
| "loss": 4.755104064941406, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.016691208545898777, |
| "grad_norm": 0.9491130113601685, |
| "learning_rate": 0.00029984070000494854, |
| "loss": 4.182529907226563, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.0167936085983276, |
| "grad_norm": 0.9159969687461853, |
| "learning_rate": 0.00029983846433047633, |
| "loss": 4.361718444824219, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.01689600865075643, |
| "grad_norm": 0.9138163328170776, |
| "learning_rate": 0.00029983621308544864, |
| "loss": 4.748040466308594, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.016998408703185256, |
| "grad_norm": 0.7999444603919983, |
| "learning_rate": 0.0002998339462700993, |
| "loss": 4.52157470703125, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.017100808755614084, |
| "grad_norm": 0.732362687587738, |
| "learning_rate": 0.0002998316638846639, |
| "loss": 4.664584045410156, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.01720320880804291, |
| "grad_norm": 0.9679093956947327, |
| "learning_rate": 0.00029982936592937967, |
| "loss": 4.6484066772460935, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.017305608860471736, |
| "grad_norm": 0.7307636141777039, |
| "learning_rate": 0.0002998270524044853, |
| "loss": 4.694376220703125, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.017408008912900563, |
| "grad_norm": 0.7069781422615051, |
| "learning_rate": 0.00029982472331022126, |
| "loss": 4.551060180664063, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.01751040896532939, |
| "grad_norm": 0.764034628868103, |
| "learning_rate": 0.00029982237864682965, |
| "loss": 4.622559814453125, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.01761280901775822, |
| "grad_norm": 0.7239750623703003, |
| "learning_rate": 0.000299820018414554, |
| "loss": 4.617013549804687, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.017715209070187043, |
| "grad_norm": 0.6056758165359497, |
| "learning_rate": 0.0002998176426136396, |
| "loss": 4.456921997070313, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.01781760912261587, |
| "grad_norm": 0.8634012341499329, |
| "learning_rate": 0.0002998152512443334, |
| "loss": 4.55794677734375, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.017920009175044698, |
| "grad_norm": 0.7804837226867676, |
| "learning_rate": 0.00029981284430688384, |
| "loss": 4.680322570800781, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.018022409227473526, |
| "grad_norm": 0.773954451084137, |
| "learning_rate": 0.00029981042180154103, |
| "loss": 4.5744256591796875, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.01812480927990235, |
| "grad_norm": 0.691335916519165, |
| "learning_rate": 0.0002998079837285568, |
| "loss": 4.607868347167969, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.018227209332331178, |
| "grad_norm": 0.4418846368789673, |
| "learning_rate": 0.0002998055300881844, |
| "loss": 4.455259094238281, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.018329609384760005, |
| "grad_norm": 1.0125758647918701, |
| "learning_rate": 0.00029980306088067877, |
| "loss": 3.1990432739257812, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.018432009437188833, |
| "grad_norm": 0.7495264410972595, |
| "learning_rate": 0.00029980057610629664, |
| "loss": 4.650667419433594, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.018534409489617657, |
| "grad_norm": 0.8682289123535156, |
| "learning_rate": 0.0002997980757652961, |
| "loss": 3.851683349609375, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.018636809542046485, |
| "grad_norm": 0.9349716305732727, |
| "learning_rate": 0.000299795559857937, |
| "loss": 4.859715576171875, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.018739209594475312, |
| "grad_norm": 0.7786422967910767, |
| "learning_rate": 0.0002997930283844809, |
| "loss": 4.666428833007813, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.01884160964690414, |
| "grad_norm": 0.7877052426338196, |
| "learning_rate": 0.0002997904813451907, |
| "loss": 4.6610784912109375, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.018944009699332968, |
| "grad_norm": 0.9601690173149109, |
| "learning_rate": 0.00029978791874033114, |
| "loss": 4.808619384765625, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.019046409751761792, |
| "grad_norm": 0.5345655083656311, |
| "learning_rate": 0.0002997853405701684, |
| "loss": 4.262407836914062, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.01914880980419062, |
| "grad_norm": 0.8365965485572815, |
| "learning_rate": 0.00029978274683497067, |
| "loss": 3.8195550537109373, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.019251209856619447, |
| "grad_norm": 0.8324418663978577, |
| "learning_rate": 0.00029978013753500723, |
| "loss": 4.371593933105469, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.019353609909048275, |
| "grad_norm": 0.7757883071899414, |
| "learning_rate": 0.00029977751267054934, |
| "loss": 4.406093444824219, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.0194560099614771, |
| "grad_norm": 0.8704003095626831, |
| "learning_rate": 0.0002997748722418697, |
| "loss": 4.736319885253907, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.019558410013905927, |
| "grad_norm": 0.8212069869041443, |
| "learning_rate": 0.0002997722162492427, |
| "loss": 4.341388549804687, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.019660810066334754, |
| "grad_norm": 0.5836915373802185, |
| "learning_rate": 0.0002997695446929444, |
| "loss": 4.658592529296875, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.019763210118763582, |
| "grad_norm": 0.8792363405227661, |
| "learning_rate": 0.0002997668575732524, |
| "loss": 4.1852349853515625, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.019865610171192406, |
| "grad_norm": 0.6817139387130737, |
| "learning_rate": 0.00029976415489044585, |
| "loss": 4.120821838378906, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.019968010223621234, |
| "grad_norm": 0.9270561337471008, |
| "learning_rate": 0.0002997614366448057, |
| "loss": 4.595604553222656, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.02007041027605006, |
| "grad_norm": 0.7752207517623901, |
| "learning_rate": 0.0002997587028366144, |
| "loss": 4.643276977539062, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.02017281032847889, |
| "grad_norm": 0.6949714422225952, |
| "learning_rate": 0.000299755953466156, |
| "loss": 4.598296203613281, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.020275210380907713, |
| "grad_norm": 0.6971185207366943, |
| "learning_rate": 0.00029975318853371624, |
| "loss": 3.976045837402344, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.02037761043333654, |
| "grad_norm": 0.6620817184448242, |
| "learning_rate": 0.00029975040803958237, |
| "loss": 4.670194396972656, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.02048001048576537, |
| "grad_norm": 0.7390024065971375, |
| "learning_rate": 0.0002997476119840434, |
| "loss": 4.440447998046875, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.020582410538194196, |
| "grad_norm": 1.074389934539795, |
| "learning_rate": 0.0002997448003673899, |
| "loss": 4.406011352539062, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.020684810590623024, |
| "grad_norm": 0.7580602765083313, |
| "learning_rate": 0.000299741973189914, |
| "loss": 4.489655456542969, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.02078721064305185, |
| "grad_norm": 0.8966153860092163, |
| "learning_rate": 0.0002997391304519094, |
| "loss": 4.419082946777344, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.020889610695480676, |
| "grad_norm": 0.8477383255958557, |
| "learning_rate": 0.00029973627215367166, |
| "loss": 4.569579467773438, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.020992010747909504, |
| "grad_norm": 1.0875380039215088, |
| "learning_rate": 0.00029973339829549776, |
| "loss": 4.634755859375, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.02109441080033833, |
| "grad_norm": 1.043662190437317, |
| "learning_rate": 0.00029973050887768625, |
| "loss": 4.522914123535156, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.021196810852767155, |
| "grad_norm": 0.7864259481430054, |
| "learning_rate": 0.0002997276039005375, |
| "loss": 4.525141296386718, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.021299210905195983, |
| "grad_norm": 0.7917724251747131, |
| "learning_rate": 0.00029972468336435335, |
| "loss": 4.140654602050781, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.02140161095762481, |
| "grad_norm": 0.8878633975982666, |
| "learning_rate": 0.0002997217472694372, |
| "loss": 4.351778564453125, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.02150401101005364, |
| "grad_norm": 0.8213766813278198, |
| "learning_rate": 0.0002997187956160943, |
| "loss": 4.426820068359375, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.021606411062482463, |
| "grad_norm": 0.8385847210884094, |
| "learning_rate": 0.0002997158284046313, |
| "loss": 4.410148315429687, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.02170881111491129, |
| "grad_norm": 1.029899001121521, |
| "learning_rate": 0.0002997128456353565, |
| "loss": 4.456363830566406, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.021811211167340118, |
| "grad_norm": 0.8777541518211365, |
| "learning_rate": 0.0002997098473085799, |
| "loss": 4.56017578125, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.021913611219768946, |
| "grad_norm": 0.7988455891609192, |
| "learning_rate": 0.0002997068334246131, |
| "loss": 4.490418701171875, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.02201601127219777, |
| "grad_norm": 1.208889126777649, |
| "learning_rate": 0.00029970380398376917, |
| "loss": 4.553769836425781, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.022118411324626597, |
| "grad_norm": 1.0305062532424927, |
| "learning_rate": 0.0002997007589863631, |
| "loss": 4.295799865722656, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.022220811377055425, |
| "grad_norm": 0.8051795363426208, |
| "learning_rate": 0.00029969769843271116, |
| "loss": 4.561275329589844, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.022323211429484253, |
| "grad_norm": 0.8639199733734131, |
| "learning_rate": 0.00029969462232313154, |
| "loss": 4.470157165527343, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.02242561148191308, |
| "grad_norm": 0.7574597597122192, |
| "learning_rate": 0.00029969153065794374, |
| "loss": 4.476951599121094, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.022528011534341905, |
| "grad_norm": 0.787169337272644, |
| "learning_rate": 0.00029968842343746906, |
| "loss": 4.459609375, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.022630411586770732, |
| "grad_norm": 0.7591275572776794, |
| "learning_rate": 0.0002996853006620305, |
| "loss": 4.777853393554688, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.02273281163919956, |
| "grad_norm": 0.9186727404594421, |
| "learning_rate": 0.0002996821623319524, |
| "loss": 4.658177185058594, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.022835211691628388, |
| "grad_norm": 0.7670950293540955, |
| "learning_rate": 0.0002996790084475611, |
| "loss": 4.718930053710937, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.022937611744057212, |
| "grad_norm": 0.9697529077529907, |
| "learning_rate": 0.00029967583900918413, |
| "loss": 4.4181521606445315, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.02304001179648604, |
| "grad_norm": 1.807626724243164, |
| "learning_rate": 0.00029967265401715083, |
| "loss": 4.645519104003906, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.023142411848914867, |
| "grad_norm": 0.8117107152938843, |
| "learning_rate": 0.00029966945347179236, |
| "loss": 3.835715637207031, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.023244811901343695, |
| "grad_norm": 1.2005183696746826, |
| "learning_rate": 0.00029966623737344124, |
| "loss": 4.443558959960938, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.02334721195377252, |
| "grad_norm": 0.8746537566184998, |
| "learning_rate": 0.0002996630057224316, |
| "loss": 4.395650024414063, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.023449612006201347, |
| "grad_norm": 0.7293654680252075, |
| "learning_rate": 0.00029965975851909934, |
| "loss": 4.513606262207031, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.023552012058630174, |
| "grad_norm": 0.7779085636138916, |
| "learning_rate": 0.00029965649576378184, |
| "loss": 4.524747009277344, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.023654412111059002, |
| "grad_norm": 1.0146737098693848, |
| "learning_rate": 0.00029965321745681816, |
| "loss": 4.670032348632812, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.023756812163487826, |
| "grad_norm": 0.9226559400558472, |
| "learning_rate": 0.00029964992359854896, |
| "loss": 4.319842529296875, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.023859212215916654, |
| "grad_norm": 0.729659378528595, |
| "learning_rate": 0.0002996466141893166, |
| "loss": 4.3390591430664065, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.02396161226834548, |
| "grad_norm": 0.8851988315582275, |
| "learning_rate": 0.00029964328922946486, |
| "loss": 4.193225708007812, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.02406401232077431, |
| "grad_norm": 1.142880916595459, |
| "learning_rate": 0.0002996399487193393, |
| "loss": 4.7212896728515625, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.024166412373203137, |
| "grad_norm": 0.6688424944877625, |
| "learning_rate": 0.0002996365926592871, |
| "loss": 4.5033807373046875, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.02426881242563196, |
| "grad_norm": 0.89569491147995, |
| "learning_rate": 0.00029963322104965693, |
| "loss": 4.241100463867188, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.02437121247806079, |
| "grad_norm": 0.8132964372634888, |
| "learning_rate": 0.0002996298338907992, |
| "loss": 4.217136535644531, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.024473612530489616, |
| "grad_norm": 1.4552931785583496, |
| "learning_rate": 0.00029962643118306597, |
| "loss": 4.451352844238281, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.024576012582918444, |
| "grad_norm": 0.7032333612442017, |
| "learning_rate": 0.00029962301292681066, |
| "loss": 3.709466857910156, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.024678412635347268, |
| "grad_norm": 0.7736782431602478, |
| "learning_rate": 0.0002996195791223886, |
| "loss": 3.931116027832031, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.024780812687776096, |
| "grad_norm": 1.0214853286743164, |
| "learning_rate": 0.0002996161297701566, |
| "loss": 4.091096496582031, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.024883212740204923, |
| "grad_norm": 0.7319433093070984, |
| "learning_rate": 0.00029961266487047307, |
| "loss": 4.754253234863281, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.02498561279263375, |
| "grad_norm": 0.7848948240280151, |
| "learning_rate": 0.00029960918442369804, |
| "loss": 4.210378723144531, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.025088012845062575, |
| "grad_norm": 0.8420546650886536, |
| "learning_rate": 0.00029960568843019327, |
| "loss": 4.331927185058594, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.025190412897491403, |
| "grad_norm": 0.7843689322471619, |
| "learning_rate": 0.00029960217689032205, |
| "loss": 4.491570129394531, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.02529281294992023, |
| "grad_norm": 1.0013247728347778, |
| "learning_rate": 0.0002995986498044491, |
| "loss": 4.356235961914063, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.025395213002349058, |
| "grad_norm": 0.8285472989082336, |
| "learning_rate": 0.0002995951071729412, |
| "loss": 4.19695556640625, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.025497613054777882, |
| "grad_norm": 0.8935615420341492, |
| "learning_rate": 0.0002995915489961663, |
| "loss": 4.556292724609375, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.02560001310720671, |
| "grad_norm": 1.1061961650848389, |
| "learning_rate": 0.0002995879752744942, |
| "loss": 4.260919799804688, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.025702413159635538, |
| "grad_norm": 0.7796922922134399, |
| "learning_rate": 0.00029958438600829633, |
| "loss": 3.7681890869140626, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.025804813212064365, |
| "grad_norm": 0.9937464594841003, |
| "learning_rate": 0.0002995807811979456, |
| "loss": 4.396112670898438, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.025907213264493193, |
| "grad_norm": 0.9796547889709473, |
| "learning_rate": 0.0002995771608438166, |
| "loss": 4.378516540527344, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.026009613316922017, |
| "grad_norm": 0.9051157236099243, |
| "learning_rate": 0.00029957352494628563, |
| "loss": 4.480902404785156, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.026112013369350845, |
| "grad_norm": 0.706322968006134, |
| "learning_rate": 0.0002995698735057304, |
| "loss": 4.157791442871094, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.026214413421779673, |
| "grad_norm": 1.033637285232544, |
| "learning_rate": 0.0002995662065225304, |
| "loss": 4.6359164428710935, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.0263168134742085, |
| "grad_norm": 0.9319335222244263, |
| "learning_rate": 0.00029956252399706673, |
| "loss": 4.510284423828125, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.026419213526637324, |
| "grad_norm": 0.887332022190094, |
| "learning_rate": 0.000299558825929722, |
| "loss": 4.224294738769531, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.026521613579066152, |
| "grad_norm": 0.7545831203460693, |
| "learning_rate": 0.0002995551123208805, |
| "loss": 3.612664794921875, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.02662401363149498, |
| "grad_norm": 1.4527435302734375, |
| "learning_rate": 0.0002995513831709281, |
| "loss": 3.0348556518554686, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.026726413683923807, |
| "grad_norm": 0.826316237449646, |
| "learning_rate": 0.00029954763848025244, |
| "loss": 3.7530322265625, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.02682881373635263, |
| "grad_norm": 0.7737396955490112, |
| "learning_rate": 0.0002995438782492426, |
| "loss": 4.3491796875, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.02693121378878146, |
| "grad_norm": 0.7360561490058899, |
| "learning_rate": 0.0002995401024782892, |
| "loss": 4.23507568359375, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.027033613841210287, |
| "grad_norm": 1.048795223236084, |
| "learning_rate": 0.00029953631116778483, |
| "loss": 4.128821716308594, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.027136013893639115, |
| "grad_norm": 0.744465172290802, |
| "learning_rate": 0.00029953250431812326, |
| "loss": 4.229864501953125, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.02723841394606794, |
| "grad_norm": 1.0225343704223633, |
| "learning_rate": 0.0002995286819297002, |
| "loss": 4.329259033203125, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.027340813998496766, |
| "grad_norm": 0.8426514863967896, |
| "learning_rate": 0.0002995248440029128, |
| "loss": 4.405516662597656, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.027443214050925594, |
| "grad_norm": 0.8175310492515564, |
| "learning_rate": 0.00029952099053815996, |
| "loss": 4.2612826538085935, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.027545614103354422, |
| "grad_norm": 0.9133870601654053, |
| "learning_rate": 0.000299517121535842, |
| "loss": 4.32334228515625, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.02764801415578325, |
| "grad_norm": 0.9261609315872192, |
| "learning_rate": 0.00029951323699636107, |
| "loss": 4.267542114257813, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.027750414208212074, |
| "grad_norm": 0.964561402797699, |
| "learning_rate": 0.00029950933692012076, |
| "loss": 4.246123657226563, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.0278528142606409, |
| "grad_norm": 1.1370861530303955, |
| "learning_rate": 0.00029950542130752634, |
| "loss": 4.350406188964843, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.02795521431306973, |
| "grad_norm": 0.8274940848350525, |
| "learning_rate": 0.00029950149015898483, |
| "loss": 4.124059448242187, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.028057614365498557, |
| "grad_norm": 1.0486522912979126, |
| "learning_rate": 0.0002994975434749046, |
| "loss": 4.241673278808594, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.02816001441792738, |
| "grad_norm": 0.8022660613059998, |
| "learning_rate": 0.0002994935812556958, |
| "loss": 3.647921447753906, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.02826241447035621, |
| "grad_norm": 1.1589747667312622, |
| "learning_rate": 0.00029948960350177026, |
| "loss": 4.2052005004882815, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.028364814522785036, |
| "grad_norm": 0.6878979802131653, |
| "learning_rate": 0.0002994856102135412, |
| "loss": 4.0764639282226565, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.028467214575213864, |
| "grad_norm": 1.299386739730835, |
| "learning_rate": 0.0002994816013914236, |
| "loss": 3.8260293579101563, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.028569614627642688, |
| "grad_norm": 0.7897019982337952, |
| "learning_rate": 0.0002994775770358342, |
| "loss": 4.474502258300781, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.028672014680071516, |
| "grad_norm": 1.031049132347107, |
| "learning_rate": 0.000299473537147191, |
| "loss": 4.189122924804687, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.028774414732500343, |
| "grad_norm": 1.224804401397705, |
| "learning_rate": 0.0002994694817259139, |
| "loss": 4.223143615722656, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.02887681478492917, |
| "grad_norm": 0.8684813380241394, |
| "learning_rate": 0.00029946541077242433, |
| "loss": 4.23610107421875, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.028979214837357995, |
| "grad_norm": 0.6440140008926392, |
| "learning_rate": 0.0002994613242871453, |
| "loss": 3.841741638183594, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.029081614889786823, |
| "grad_norm": 0.49674278497695923, |
| "learning_rate": 0.0002994572222705014, |
| "loss": 2.3330259704589844, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.02918401494221565, |
| "grad_norm": 0.8202585577964783, |
| "learning_rate": 0.00029945310472291906, |
| "loss": 3.3214230346679687, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.029286414994644478, |
| "grad_norm": 0.9601882100105286, |
| "learning_rate": 0.00029944897164482597, |
| "loss": 4.437399291992188, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.029388815047073306, |
| "grad_norm": 0.8373337388038635, |
| "learning_rate": 0.00029944482303665175, |
| "loss": 4.476743469238281, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.02949121509950213, |
| "grad_norm": 0.7051481008529663, |
| "learning_rate": 0.0002994406588988274, |
| "loss": 4.182169189453125, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.029593615151930958, |
| "grad_norm": 1.0870895385742188, |
| "learning_rate": 0.00029943647923178575, |
| "loss": 4.3550872802734375, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.029696015204359785, |
| "grad_norm": 0.768278181552887, |
| "learning_rate": 0.00029943228403596107, |
| "loss": 4.4534228515625, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.029798415256788613, |
| "grad_norm": 0.8001137971878052, |
| "learning_rate": 0.00029942807331178933, |
| "loss": 4.255840148925781, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.029900815309217437, |
| "grad_norm": 0.779834508895874, |
| "learning_rate": 0.000299423847059708, |
| "loss": 4.268035888671875, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.030003215361646265, |
| "grad_norm": 0.7155870199203491, |
| "learning_rate": 0.00029941960528015644, |
| "loss": 3.9607696533203125, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.030105615414075092, |
| "grad_norm": 0.8414117693901062, |
| "learning_rate": 0.0002994153479735753, |
| "loss": 3.9243670654296876, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.03020801546650392, |
| "grad_norm": 1.1119954586029053, |
| "learning_rate": 0.00029941107514040694, |
| "loss": 4.47902099609375, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.030310415518932744, |
| "grad_norm": 1.0905205011367798, |
| "learning_rate": 0.00029940678678109546, |
| "loss": 4.391621398925781, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.030412815571361572, |
| "grad_norm": 0.9594521522521973, |
| "learning_rate": 0.00029940248289608655, |
| "loss": 4.156022644042968, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.0305152156237904, |
| "grad_norm": 0.830136775970459, |
| "learning_rate": 0.0002993981634858273, |
| "loss": 4.088116760253906, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.030617615676219227, |
| "grad_norm": 0.7149996161460876, |
| "learning_rate": 0.00029939382855076664, |
| "loss": 3.857545166015625, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.03072001572864805, |
| "grad_norm": 0.8593119978904724, |
| "learning_rate": 0.0002993894780913551, |
| "loss": 4.10996826171875, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.03082241578107688, |
| "grad_norm": 0.8296166658401489, |
| "learning_rate": 0.0002993851121080446, |
| "loss": 4.353337097167969, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.030924815833505707, |
| "grad_norm": 0.7708966732025146, |
| "learning_rate": 0.00029938073060128896, |
| "loss": 4.261842651367187, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.031027215885934534, |
| "grad_norm": 0.6590582132339478, |
| "learning_rate": 0.00029937633357154345, |
| "loss": 3.885545349121094, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.031129615938363362, |
| "grad_norm": 1.0012860298156738, |
| "learning_rate": 0.000299371921019265, |
| "loss": 4.263138427734375, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.031232015990792186, |
| "grad_norm": 0.9702419638633728, |
| "learning_rate": 0.00029936749294491214, |
| "loss": 4.242536926269532, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.031334416043221014, |
| "grad_norm": 1.740096092224121, |
| "learning_rate": 0.000299363049348945, |
| "loss": 3.794849853515625, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.03143681609564984, |
| "grad_norm": 0.8641059994697571, |
| "learning_rate": 0.0002993585902318254, |
| "loss": 3.7392898559570313, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.03153921614807867, |
| "grad_norm": 0.7307964563369751, |
| "learning_rate": 0.0002993541155940166, |
| "loss": 4.284304809570313, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.0316416162005075, |
| "grad_norm": 0.8395029902458191, |
| "learning_rate": 0.0002993496254359837, |
| "loss": 3.666776428222656, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.031744016252936325, |
| "grad_norm": 0.88369220495224, |
| "learning_rate": 0.00029934511975819323, |
| "loss": 4.232069396972657, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.031846416305365145, |
| "grad_norm": 0.900976836681366, |
| "learning_rate": 0.00029934059856111337, |
| "loss": 4.181927490234375, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.03194881635779397, |
| "grad_norm": 0.8746826648712158, |
| "learning_rate": 0.00029933606184521404, |
| "loss": 4.177504577636719, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.0320512164102228, |
| "grad_norm": 0.9220513105392456, |
| "learning_rate": 0.0002993315096109666, |
| "loss": 4.219546813964843, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.03215361646265163, |
| "grad_norm": 0.9001684784889221, |
| "learning_rate": 0.00029932694185884416, |
| "loss": 4.161190490722657, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.032256016515080456, |
| "grad_norm": 1.1615084409713745, |
| "learning_rate": 0.0002993223585893213, |
| "loss": 4.272937316894531, |
| "step": 15750 |
| }, |
| { |
| "epoch": 0.032358416567509284, |
| "grad_norm": 0.9227635860443115, |
| "learning_rate": 0.0002993177598028743, |
| "loss": 4.0247500610351565, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.03246081661993811, |
| "grad_norm": 0.9501990675926208, |
| "learning_rate": 0.0002993131454999812, |
| "loss": 3.6547119140625, |
| "step": 15850 |
| }, |
| { |
| "epoch": 0.03256321667236694, |
| "grad_norm": 0.8894864320755005, |
| "learning_rate": 0.0002993085156811213, |
| "loss": 4.238618469238281, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.03266561672479577, |
| "grad_norm": 1.1804680824279785, |
| "learning_rate": 0.0002993038703467758, |
| "loss": 4.274075317382812, |
| "step": 15950 |
| }, |
| { |
| "epoch": 0.03276801677722459, |
| "grad_norm": 0.9597388505935669, |
| "learning_rate": 0.00029929920949742743, |
| "loss": 2.690977783203125, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.032870416829653415, |
| "grad_norm": 0.8713410496711731, |
| "learning_rate": 0.0002992945331335605, |
| "loss": 4.216771850585937, |
| "step": 16050 |
| }, |
| { |
| "epoch": 0.03297281688208224, |
| "grad_norm": 0.7275038361549377, |
| "learning_rate": 0.000299289841255661, |
| "loss": 3.7311639404296875, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.03307521693451107, |
| "grad_norm": 0.987648069858551, |
| "learning_rate": 0.0002992851338642164, |
| "loss": 4.2468328857421875, |
| "step": 16150 |
| }, |
| { |
| "epoch": 0.0331776169869399, |
| "grad_norm": 0.8776699900627136, |
| "learning_rate": 0.00029928041095971593, |
| "loss": 4.107083435058594, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.033280017039368726, |
| "grad_norm": 1.0074553489685059, |
| "learning_rate": 0.00029927567254265037, |
| "loss": 4.185172119140625, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.03338241709179755, |
| "grad_norm": 0.8109734058380127, |
| "learning_rate": 0.00029927091861351216, |
| "loss": 4.268891296386719, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.03348481714422638, |
| "grad_norm": 1.1346583366394043, |
| "learning_rate": 0.00029926614917279523, |
| "loss": 4.282049865722656, |
| "step": 16350 |
| }, |
| { |
| "epoch": 0.0335872171966552, |
| "grad_norm": 0.8583949208259583, |
| "learning_rate": 0.0002992613642209952, |
| "loss": 4.328241577148438, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.03368961724908403, |
| "grad_norm": 0.8398747444152832, |
| "learning_rate": 0.0002992565637586094, |
| "loss": 4.186492004394531, |
| "step": 16450 |
| }, |
| { |
| "epoch": 0.03379201730151286, |
| "grad_norm": 0.643873929977417, |
| "learning_rate": 0.0002992517477861366, |
| "loss": 3.162231140136719, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.033894417353941685, |
| "grad_norm": 0.9688578248023987, |
| "learning_rate": 0.00029924691630407724, |
| "loss": 4.235280151367188, |
| "step": 16550 |
| }, |
| { |
| "epoch": 0.03399681740637051, |
| "grad_norm": 0.8266287446022034, |
| "learning_rate": 0.0002992420693129334, |
| "loss": 4.479638977050781, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.03409921745879934, |
| "grad_norm": 0.8200719356536865, |
| "learning_rate": 0.0002992372068132088, |
| "loss": 4.379118957519531, |
| "step": 16650 |
| }, |
| { |
| "epoch": 0.03420161751122817, |
| "grad_norm": 0.9193712472915649, |
| "learning_rate": 0.00029923232880540865, |
| "loss": 4.209988708496094, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.034304017563656995, |
| "grad_norm": 0.9132387638092041, |
| "learning_rate": 0.0002992274352900399, |
| "loss": 4.341851501464844, |
| "step": 16750 |
| }, |
| { |
| "epoch": 0.03440641761608582, |
| "grad_norm": 1.0033169984817505, |
| "learning_rate": 0.0002992225262676111, |
| "loss": 4.356620483398437, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.034508817668514644, |
| "grad_norm": 1.109008550643921, |
| "learning_rate": 0.0002992176017386323, |
| "loss": 4.189815368652344, |
| "step": 16850 |
| }, |
| { |
| "epoch": 0.03461121772094347, |
| "grad_norm": 1.2428394556045532, |
| "learning_rate": 0.00029921266170361533, |
| "loss": 4.286259460449219, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.0347136177733723, |
| "grad_norm": 0.9120133519172668, |
| "learning_rate": 0.0002992077061630734, |
| "loss": 4.392665405273437, |
| "step": 16950 |
| }, |
| { |
| "epoch": 0.03481601782580113, |
| "grad_norm": 1.2237519025802612, |
| "learning_rate": 0.0002992027351175216, |
| "loss": 4.461217041015625, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.034918417878229954, |
| "grad_norm": 0.9254854917526245, |
| "learning_rate": 0.00029919774856747636, |
| "loss": 4.2495333862304685, |
| "step": 17050 |
| }, |
| { |
| "epoch": 0.03502081793065878, |
| "grad_norm": 1.204923391342163, |
| "learning_rate": 0.000299192746513456, |
| "loss": 4.237576293945312, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.03512321798308761, |
| "grad_norm": 0.8846333026885986, |
| "learning_rate": 0.0002991877289559803, |
| "loss": 3.958520812988281, |
| "step": 17150 |
| }, |
| { |
| "epoch": 0.03522561803551644, |
| "grad_norm": 0.8742989897727966, |
| "learning_rate": 0.00029918269589557055, |
| "loss": 4.097115173339843, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.03532801808794526, |
| "grad_norm": 0.9790547490119934, |
| "learning_rate": 0.0002991776473327499, |
| "loss": 4.068385314941406, |
| "step": 17250 |
| }, |
| { |
| "epoch": 0.035430418140374086, |
| "grad_norm": 0.8808755278587341, |
| "learning_rate": 0.0002991725832680428, |
| "loss": 4.071025390625, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.03553281819280291, |
| "grad_norm": 0.9796196818351746, |
| "learning_rate": 0.00029916750370197567, |
| "loss": 3.7829425048828127, |
| "step": 17350 |
| }, |
| { |
| "epoch": 0.03563521824523174, |
| "grad_norm": 0.9726704955101013, |
| "learning_rate": 0.00029916240863507625, |
| "loss": 4.105780334472656, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.03573761829766057, |
| "grad_norm": 1.0631580352783203, |
| "learning_rate": 0.000299157298067874, |
| "loss": 4.146686401367187, |
| "step": 17450 |
| }, |
| { |
| "epoch": 0.035840018350089396, |
| "grad_norm": 0.8494559526443481, |
| "learning_rate": 0.0002991521720009001, |
| "loss": 4.303363342285156, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.035942418402518224, |
| "grad_norm": 1.3400248289108276, |
| "learning_rate": 0.00029914703043468704, |
| "loss": 4.124955749511718, |
| "step": 17550 |
| }, |
| { |
| "epoch": 0.03604481845494705, |
| "grad_norm": 1.2535253763198853, |
| "learning_rate": 0.00029914187336976925, |
| "loss": 3.625634765625, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.03614721850737588, |
| "grad_norm": 0.9625725746154785, |
| "learning_rate": 0.0002991367008066826, |
| "loss": 4.224259948730468, |
| "step": 17650 |
| }, |
| { |
| "epoch": 0.0362496185598047, |
| "grad_norm": 0.9419931769371033, |
| "learning_rate": 0.00029913151274596456, |
| "loss": 4.3089794921875, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.03635201861223353, |
| "grad_norm": 1.2326748371124268, |
| "learning_rate": 0.0002991263091881543, |
| "loss": 4.07185791015625, |
| "step": 17750 |
| }, |
| { |
| "epoch": 0.036454418664662355, |
| "grad_norm": 0.9051257967948914, |
| "learning_rate": 0.00029912109013379253, |
| "loss": 4.346282958984375, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.03655681871709118, |
| "grad_norm": 0.8675338625907898, |
| "learning_rate": 0.0002991158555834216, |
| "loss": 4.14196044921875, |
| "step": 17850 |
| }, |
| { |
| "epoch": 0.03665921876952001, |
| "grad_norm": 1.7800242900848389, |
| "learning_rate": 0.0002991106055375854, |
| "loss": 4.262186279296875, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.03676161882194884, |
| "grad_norm": 0.8730024099349976, |
| "learning_rate": 0.0002991053399968296, |
| "loss": 3.647480163574219, |
| "step": 17950 |
| }, |
| { |
| "epoch": 0.036864018874377666, |
| "grad_norm": 0.8715499639511108, |
| "learning_rate": 0.0002991000589617013, |
| "loss": 3.8062033081054687, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.036966418926806494, |
| "grad_norm": 1.1045186519622803, |
| "learning_rate": 0.0002990947624327493, |
| "loss": 3.142933349609375, |
| "step": 18050 |
| }, |
| { |
| "epoch": 0.037068818979235314, |
| "grad_norm": 0.9436252117156982, |
| "learning_rate": 0.000299089450410524, |
| "loss": 3.2230126953125, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.03717121903166414, |
| "grad_norm": 0.7957382798194885, |
| "learning_rate": 0.00029908412289557737, |
| "loss": 4.389481811523438, |
| "step": 18150 |
| }, |
| { |
| "epoch": 0.03727361908409297, |
| "grad_norm": 1.0775970220565796, |
| "learning_rate": 0.0002990787798884631, |
| "loss": 3.8384576416015626, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.0373760191365218, |
| "grad_norm": 0.9266685843467712, |
| "learning_rate": 0.00029907342138973627, |
| "loss": 4.209334106445312, |
| "step": 18250 |
| }, |
| { |
| "epoch": 0.037478419188950625, |
| "grad_norm": 0.9169478416442871, |
| "learning_rate": 0.00029906804739995385, |
| "loss": 4.067582092285156, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.03758081924137945, |
| "grad_norm": 0.8588764071464539, |
| "learning_rate": 0.0002990626579196742, |
| "loss": 4.140736694335938, |
| "step": 18350 |
| }, |
| { |
| "epoch": 0.03768321929380828, |
| "grad_norm": 1.0396775007247925, |
| "learning_rate": 0.0002990572529494574, |
| "loss": 4.312765502929688, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.03778561934623711, |
| "grad_norm": 1.0524662733078003, |
| "learning_rate": 0.0002990518324898652, |
| "loss": 3.2222711181640626, |
| "step": 18450 |
| }, |
| { |
| "epoch": 0.037888019398665936, |
| "grad_norm": 0.8703554272651672, |
| "learning_rate": 0.00029904639654146066, |
| "loss": 4.180811462402343, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.037990419451094756, |
| "grad_norm": 2.620311737060547, |
| "learning_rate": 0.00029904094510480885, |
| "loss": 4.130848388671875, |
| "step": 18550 |
| }, |
| { |
| "epoch": 0.038092819503523584, |
| "grad_norm": 0.8157406449317932, |
| "learning_rate": 0.0002990354781804762, |
| "loss": 3.6872372436523437, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.03819521955595241, |
| "grad_norm": 0.8512464165687561, |
| "learning_rate": 0.0002990299957690308, |
| "loss": 4.433642883300781, |
| "step": 18650 |
| }, |
| { |
| "epoch": 0.03829761960838124, |
| "grad_norm": 0.9459244012832642, |
| "learning_rate": 0.0002990244978710423, |
| "loss": 4.312282104492187, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.03840001966081007, |
| "grad_norm": 0.8191068768501282, |
| "learning_rate": 0.0002990189844870821, |
| "loss": 4.3835546875, |
| "step": 18750 |
| }, |
| { |
| "epoch": 0.038502419713238895, |
| "grad_norm": 0.9797852039337158, |
| "learning_rate": 0.0002990134556177231, |
| "loss": 4.277929077148437, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.03860481976566772, |
| "grad_norm": 0.957114040851593, |
| "learning_rate": 0.00029900791126353984, |
| "loss": 4.525142822265625, |
| "step": 18850 |
| }, |
| { |
| "epoch": 0.03870721981809655, |
| "grad_norm": 0.9237158894538879, |
| "learning_rate": 0.0002990023514251085, |
| "loss": 3.7692413330078125, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.03880961987052537, |
| "grad_norm": 1.055321455001831, |
| "learning_rate": 0.0002989967761030067, |
| "loss": 4.0058810424804685, |
| "step": 18950 |
| }, |
| { |
| "epoch": 0.0389120199229542, |
| "grad_norm": 0.9850941896438599, |
| "learning_rate": 0.000298991185297814, |
| "loss": 3.8927227783203127, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.039014419975383026, |
| "grad_norm": 0.8424584269523621, |
| "learning_rate": 0.0002989855790101112, |
| "loss": 4.3304986572265625, |
| "step": 19050 |
| }, |
| { |
| "epoch": 0.039116820027811854, |
| "grad_norm": 0.8309029936790466, |
| "learning_rate": 0.00029897995724048105, |
| "loss": 4.19474609375, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.03921922008024068, |
| "grad_norm": 0.8734010457992554, |
| "learning_rate": 0.00029897431998950763, |
| "loss": 4.056589965820312, |
| "step": 19150 |
| }, |
| { |
| "epoch": 0.03932162013266951, |
| "grad_norm": 1.723552942276001, |
| "learning_rate": 0.0002989686672577767, |
| "loss": 4.061507568359375, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.03942402018509834, |
| "grad_norm": 1.0202237367630005, |
| "learning_rate": 0.0002989629990458757, |
| "loss": 3.8971566772460937, |
| "step": 19250 |
| }, |
| { |
| "epoch": 0.039526420237527164, |
| "grad_norm": 1.2921315431594849, |
| "learning_rate": 0.00029895731535439367, |
| "loss": 3.0908432006835938, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.03962882028995599, |
| "grad_norm": 1.0007706880569458, |
| "learning_rate": 0.00029895161618392126, |
| "loss": 3.4613546752929687, |
| "step": 19350 |
| }, |
| { |
| "epoch": 0.03973122034238481, |
| "grad_norm": 1.0438216924667358, |
| "learning_rate": 0.00029894590153505066, |
| "loss": 3.344393615722656, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.03983362039481364, |
| "grad_norm": 1.0282576084136963, |
| "learning_rate": 0.0002989401714083757, |
| "loss": 3.807875671386719, |
| "step": 19450 |
| }, |
| { |
| "epoch": 0.03993602044724247, |
| "grad_norm": 1.20839262008667, |
| "learning_rate": 0.00029893442580449187, |
| "loss": 4.143163452148437, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.040038420499671296, |
| "grad_norm": 1.1626482009887695, |
| "learning_rate": 0.0002989286647239962, |
| "loss": 4.075806884765625, |
| "step": 19550 |
| }, |
| { |
| "epoch": 0.04014082055210012, |
| "grad_norm": 0.7632113695144653, |
| "learning_rate": 0.0002989228881674874, |
| "loss": 4.186883239746094, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.04024322060452895, |
| "grad_norm": 0.8571646213531494, |
| "learning_rate": 0.00029891709613556565, |
| "loss": 3.8722219848632813, |
| "step": 19650 |
| }, |
| { |
| "epoch": 0.04034562065695778, |
| "grad_norm": 1.4133912324905396, |
| "learning_rate": 0.0002989112886288329, |
| "loss": 3.877001953125, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.040448020709386606, |
| "grad_norm": 0.9766141176223755, |
| "learning_rate": 0.0002989054656478927, |
| "loss": 3.9540411376953126, |
| "step": 19750 |
| }, |
| { |
| "epoch": 0.04055042076181543, |
| "grad_norm": 0.8429685235023499, |
| "learning_rate": 0.00029889962719335003, |
| "loss": 4.412438049316406, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.040652820814244255, |
| "grad_norm": 0.7656176686286926, |
| "learning_rate": 0.0002988937732658116, |
| "loss": 4.269136657714844, |
| "step": 19850 |
| }, |
| { |
| "epoch": 0.04075522086667308, |
| "grad_norm": 1.1075332164764404, |
| "learning_rate": 0.0002988879038658859, |
| "loss": 4.419913330078125, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.04085762091910191, |
| "grad_norm": 0.8199209570884705, |
| "learning_rate": 0.0002988820189941826, |
| "loss": 4.36384765625, |
| "step": 19950 |
| }, |
| { |
| "epoch": 0.04096002097153074, |
| "grad_norm": 0.8144904375076294, |
| "learning_rate": 0.00029887611865131344, |
| "loss": 4.030648803710937, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.041062421023959565, |
| "grad_norm": 0.9372329711914062, |
| "learning_rate": 0.00029887020283789147, |
| "loss": 4.1174404907226565, |
| "step": 20050 |
| }, |
| { |
| "epoch": 0.04116482107638839, |
| "grad_norm": 0.8546763062477112, |
| "learning_rate": 0.0002988642715545314, |
| "loss": 4.441152648925781, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.04126722112881722, |
| "grad_norm": 1.333139181137085, |
| "learning_rate": 0.00029885832480184963, |
| "loss": 4.200628356933594, |
| "step": 20150 |
| }, |
| { |
| "epoch": 0.04136962118124605, |
| "grad_norm": 1.320517659187317, |
| "learning_rate": 0.0002988523625804641, |
| "loss": 3.89320068359375, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.04147202123367487, |
| "grad_norm": 0.9039347171783447, |
| "learning_rate": 0.0002988463848909944, |
| "loss": 3.9010406494140626, |
| "step": 20250 |
| }, |
| { |
| "epoch": 0.0415744212861037, |
| "grad_norm": 0.9151229858398438, |
| "learning_rate": 0.00029884039173406167, |
| "loss": 3.6283367919921874, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.041676821338532524, |
| "grad_norm": 0.8544915318489075, |
| "learning_rate": 0.00029883438311028876, |
| "loss": 4.021604919433594, |
| "step": 20350 |
| }, |
| { |
| "epoch": 0.04177922139096135, |
| "grad_norm": 1.2115877866744995, |
| "learning_rate": 0.0002988283590203, |
| "loss": 4.037056579589843, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.04188162144339018, |
| "grad_norm": 0.8434769511222839, |
| "learning_rate": 0.0002988223194647214, |
| "loss": 4.190481262207031, |
| "step": 20450 |
| }, |
| { |
| "epoch": 0.04198402149581901, |
| "grad_norm": 1.0086390972137451, |
| "learning_rate": 0.00029881626444418056, |
| "loss": 3.7280892944335937, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.042086421548247835, |
| "grad_norm": 1.0009269714355469, |
| "learning_rate": 0.0002988101939593067, |
| "loss": 4.065418090820312, |
| "step": 20550 |
| }, |
| { |
| "epoch": 0.04218882160067666, |
| "grad_norm": 0.7844799160957336, |
| "learning_rate": 0.0002988041080107307, |
| "loss": 3.97632080078125, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.04229122165310548, |
| "grad_norm": 0.9640885591506958, |
| "learning_rate": 0.00029879800659908485, |
| "loss": 4.065289916992188, |
| "step": 20650 |
| }, |
| { |
| "epoch": 0.04239362170553431, |
| "grad_norm": 0.8006758093833923, |
| "learning_rate": 0.0002987918897250033, |
| "loss": 4.137116088867187, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.04249602175796314, |
| "grad_norm": 0.624839186668396, |
| "learning_rate": 0.00029878575738912156, |
| "loss": 2.075597839355469, |
| "step": 20750 |
| }, |
| { |
| "epoch": 0.042598421810391966, |
| "grad_norm": 0.8152270317077637, |
| "learning_rate": 0.00029877960959207706, |
| "loss": 3.2935858154296875, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.042700821862820794, |
| "grad_norm": 0.9872801303863525, |
| "learning_rate": 0.0002987734463345085, |
| "loss": 3.3229608154296875, |
| "step": 20850 |
| }, |
| { |
| "epoch": 0.04280322191524962, |
| "grad_norm": 0.6640042066574097, |
| "learning_rate": 0.00029876726761705636, |
| "loss": 2.9013262939453126, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.04290562196767845, |
| "grad_norm": 0.6145225167274475, |
| "learning_rate": 0.00029876107344036277, |
| "loss": 2.4409584045410155, |
| "step": 20950 |
| }, |
| { |
| "epoch": 0.04300802202010728, |
| "grad_norm": 1.0556402206420898, |
| "learning_rate": 0.0002987548638050714, |
| "loss": 2.4114979553222655, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.043110422072536105, |
| "grad_norm": 0.9862767457962036, |
| "learning_rate": 0.00029874863871182745, |
| "loss": 3.802875671386719, |
| "step": 21050 |
| }, |
| { |
| "epoch": 0.043212822124964925, |
| "grad_norm": 0.852150559425354, |
| "learning_rate": 0.0002987423981612778, |
| "loss": 3.66058349609375, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.04331522217739375, |
| "grad_norm": 0.8836477398872375, |
| "learning_rate": 0.0002987361421540711, |
| "loss": 3.4694943237304687, |
| "step": 21150 |
| }, |
| { |
| "epoch": 0.04341762222982258, |
| "grad_norm": 1.5402307510375977, |
| "learning_rate": 0.00029872987069085727, |
| "loss": 3.277726135253906, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.04352002228225141, |
| "grad_norm": 0.9419423341751099, |
| "learning_rate": 0.0002987235837722881, |
| "loss": 3.5211444091796875, |
| "step": 21250 |
| }, |
| { |
| "epoch": 0.043622422334680236, |
| "grad_norm": 0.7486373782157898, |
| "learning_rate": 0.0002987172813990169, |
| "loss": 3.471663818359375, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.043724822387109064, |
| "grad_norm": 0.7535277605056763, |
| "learning_rate": 0.0002987109635716985, |
| "loss": 3.376907958984375, |
| "step": 21350 |
| }, |
| { |
| "epoch": 0.04382722243953789, |
| "grad_norm": 0.8332289457321167, |
| "learning_rate": 0.0002987046302909895, |
| "loss": 3.9842266845703125, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.04392962249196672, |
| "grad_norm": 1.322947382926941, |
| "learning_rate": 0.000298698281557548, |
| "loss": 3.1945089721679687, |
| "step": 21450 |
| }, |
| { |
| "epoch": 0.04403202254439554, |
| "grad_norm": 1.0296247005462646, |
| "learning_rate": 0.00029869191737203377, |
| "loss": 3.6288201904296873, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.04413442259682437, |
| "grad_norm": 0.9314439296722412, |
| "learning_rate": 0.0002986855377351081, |
| "loss": 3.4926687622070314, |
| "step": 21550 |
| }, |
| { |
| "epoch": 0.044236822649253195, |
| "grad_norm": 0.7597600221633911, |
| "learning_rate": 0.000298679142647434, |
| "loss": 2.996235046386719, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.04433922270168202, |
| "grad_norm": 1.4043519496917725, |
| "learning_rate": 0.00029867273210967593, |
| "loss": 3.252802429199219, |
| "step": 21650 |
| }, |
| { |
| "epoch": 0.04444162275411085, |
| "grad_norm": 3.3350236415863037, |
| "learning_rate": 0.00029866630612250013, |
| "loss": 3.2056927490234375, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.04454402280653968, |
| "grad_norm": 0.8740987777709961, |
| "learning_rate": 0.0002986598646865743, |
| "loss": 3.5895626831054686, |
| "step": 21750 |
| }, |
| { |
| "epoch": 0.044646422858968506, |
| "grad_norm": 1.1191177368164062, |
| "learning_rate": 0.00029865340780256777, |
| "loss": 3.456165466308594, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.04474882291139733, |
| "grad_norm": 0.8428330421447754, |
| "learning_rate": 0.0002986469354711516, |
| "loss": 3.3481961059570313, |
| "step": 21850 |
| }, |
| { |
| "epoch": 0.04485122296382616, |
| "grad_norm": 0.9282798767089844, |
| "learning_rate": 0.0002986404476929984, |
| "loss": 3.2974124145507813, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.04495362301625498, |
| "grad_norm": 1.1790461540222168, |
| "learning_rate": 0.00029863394446878223, |
| "loss": 2.619112854003906, |
| "step": 21950 |
| }, |
| { |
| "epoch": 0.04505602306868381, |
| "grad_norm": 0.905838131904602, |
| "learning_rate": 0.00029862742579917894, |
| "loss": 3.3288262939453124, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.04515842312111264, |
| "grad_norm": 0.7021234631538391, |
| "learning_rate": 0.00029862089168486596, |
| "loss": 3.40490234375, |
| "step": 22050 |
| }, |
| { |
| "epoch": 0.045260823173541465, |
| "grad_norm": 0.8678475618362427, |
| "learning_rate": 0.00029861434212652215, |
| "loss": 3.6314691162109374, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.04536322322597029, |
| "grad_norm": 0.9551572203636169, |
| "learning_rate": 0.00029860777712482824, |
| "loss": 3.654752197265625, |
| "step": 22150 |
| }, |
| { |
| "epoch": 0.04546562327839912, |
| "grad_norm": 1.1007713079452515, |
| "learning_rate": 0.00029860119668046636, |
| "loss": 3.439637451171875, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.04556802333082795, |
| "grad_norm": 0.8319056034088135, |
| "learning_rate": 0.0002985946007941204, |
| "loss": 3.5101995849609375, |
| "step": 22250 |
| }, |
| { |
| "epoch": 0.045670423383256775, |
| "grad_norm": 1.040257215499878, |
| "learning_rate": 0.0002985879894664757, |
| "loss": 3.7279443359375, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.045772823435685596, |
| "grad_norm": 1.222548246383667, |
| "learning_rate": 0.00029858136269821935, |
| "loss": 3.6467132568359375, |
| "step": 22350 |
| }, |
| { |
| "epoch": 0.045875223488114424, |
| "grad_norm": 0.7653852701187134, |
| "learning_rate": 0.00029857472049003993, |
| "loss": 3.789747619628906, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.04597762354054325, |
| "grad_norm": 1.0074176788330078, |
| "learning_rate": 0.00029856806284262767, |
| "loss": 3.3356439208984376, |
| "step": 22450 |
| }, |
| { |
| "epoch": 0.04608002359297208, |
| "grad_norm": 0.9829652309417725, |
| "learning_rate": 0.0002985613897566744, |
| "loss": 2.86457763671875, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.04618242364540091, |
| "grad_norm": 1.0552867650985718, |
| "learning_rate": 0.0002985547012328736, |
| "loss": 3.389576110839844, |
| "step": 22550 |
| }, |
| { |
| "epoch": 0.046284823697829734, |
| "grad_norm": 0.7977453470230103, |
| "learning_rate": 0.00029854799727192024, |
| "loss": 3.094827880859375, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.04638722375025856, |
| "grad_norm": 1.0439661741256714, |
| "learning_rate": 0.00029854127787451104, |
| "loss": 3.353898620605469, |
| "step": 22650 |
| }, |
| { |
| "epoch": 0.04648962380268739, |
| "grad_norm": 0.8338518738746643, |
| "learning_rate": 0.0002985345430413442, |
| "loss": 3.2231854248046874, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.04659202385511622, |
| "grad_norm": 1.1333472728729248, |
| "learning_rate": 0.0002985277927731196, |
| "loss": 3.30358642578125, |
| "step": 22750 |
| }, |
| { |
| "epoch": 0.04669442390754504, |
| "grad_norm": 0.8333401679992676, |
| "learning_rate": 0.0002985210270705387, |
| "loss": 3.2313726806640624, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.046796823959973866, |
| "grad_norm": 0.926623523235321, |
| "learning_rate": 0.0002985142459343045, |
| "loss": 3.3423468017578126, |
| "step": 22850 |
| }, |
| { |
| "epoch": 0.04689922401240269, |
| "grad_norm": 0.7728790640830994, |
| "learning_rate": 0.00029850744936512177, |
| "loss": 3.470130615234375, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.04700162406483152, |
| "grad_norm": 1.0513544082641602, |
| "learning_rate": 0.0002985006373636967, |
| "loss": 4.155077514648437, |
| "step": 22950 |
| }, |
| { |
| "epoch": 0.04710402411726035, |
| "grad_norm": 0.8886310458183289, |
| "learning_rate": 0.00029849380993073716, |
| "loss": 4.144877319335937, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.047206424169689176, |
| "grad_norm": 0.615044116973877, |
| "learning_rate": 0.0002984869670669527, |
| "loss": 4.217498779296875, |
| "step": 23050 |
| }, |
| { |
| "epoch": 0.047308824222118004, |
| "grad_norm": 1.0154633522033691, |
| "learning_rate": 0.00029848010877305437, |
| "loss": 3.5084097290039065, |
| "step": 23100 |
| }, |
| { |
| "epoch": 0.04741122427454683, |
| "grad_norm": 1.1519191265106201, |
| "learning_rate": 0.0002984732350497548, |
| "loss": 4.138232727050781, |
| "step": 23150 |
| }, |
| { |
| "epoch": 0.04751362432697565, |
| "grad_norm": 1.1761195659637451, |
| "learning_rate": 0.0002984663458977683, |
| "loss": 4.233868713378906, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.04761602437940448, |
| "grad_norm": 1.0882890224456787, |
| "learning_rate": 0.00029845944131781085, |
| "loss": 3.8094412231445314, |
| "step": 23250 |
| }, |
| { |
| "epoch": 0.04771842443183331, |
| "grad_norm": 1.145857810974121, |
| "learning_rate": 0.0002984525213105998, |
| "loss": 4.4981906127929685, |
| "step": 23300 |
| }, |
| { |
| "epoch": 0.047820824484262135, |
| "grad_norm": 1.0446664094924927, |
| "learning_rate": 0.0002984455858768544, |
| "loss": 3.7824630737304688, |
| "step": 23350 |
| }, |
| { |
| "epoch": 0.04792322453669096, |
| "grad_norm": 0.9234415292739868, |
| "learning_rate": 0.0002984386350172952, |
| "loss": 4.244895629882812, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.04802562458911979, |
| "grad_norm": 0.8664620518684387, |
| "learning_rate": 0.0002984316687326446, |
| "loss": 4.05336181640625, |
| "step": 23450 |
| }, |
| { |
| "epoch": 0.04812802464154862, |
| "grad_norm": 1.1607353687286377, |
| "learning_rate": 0.0002984246870236265, |
| "loss": 3.920790710449219, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.048230424693977446, |
| "grad_norm": 1.0881608724594116, |
| "learning_rate": 0.00029841768989096633, |
| "loss": 4.012793273925781, |
| "step": 23550 |
| }, |
| { |
| "epoch": 0.048332824746406274, |
| "grad_norm": 1.136512041091919, |
| "learning_rate": 0.0002984106773353913, |
| "loss": 3.7952926635742186, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.048435224798835094, |
| "grad_norm": 0.9657559990882874, |
| "learning_rate": 0.0002984036493576301, |
| "loss": 3.48884033203125, |
| "step": 23650 |
| }, |
| { |
| "epoch": 0.04853762485126392, |
| "grad_norm": 0.8505204319953918, |
| "learning_rate": 0.000298396605958413, |
| "loss": 3.842665710449219, |
| "step": 23700 |
| }, |
| { |
| "epoch": 0.04864002490369275, |
| "grad_norm": 0.9779611825942993, |
| "learning_rate": 0.00029838954713847193, |
| "loss": 3.847880859375, |
| "step": 23750 |
| }, |
| { |
| "epoch": 0.04874242495612158, |
| "grad_norm": 1.0220547914505005, |
| "learning_rate": 0.0002983824728985404, |
| "loss": 4.149264831542968, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.048844825008550405, |
| "grad_norm": 1.3035789728164673, |
| "learning_rate": 0.00029837538323935364, |
| "loss": 4.045937194824218, |
| "step": 23850 |
| }, |
| { |
| "epoch": 0.04894722506097923, |
| "grad_norm": 1.0806480646133423, |
| "learning_rate": 0.00029836827816164826, |
| "loss": 3.93858154296875, |
| "step": 23900 |
| }, |
| { |
| "epoch": 0.04904962511340806, |
| "grad_norm": 1.0183125734329224, |
| "learning_rate": 0.0002983611576661626, |
| "loss": 3.665546875, |
| "step": 23950 |
| }, |
| { |
| "epoch": 0.04915202516583689, |
| "grad_norm": 1.1539430618286133, |
| "learning_rate": 0.0002983540217536367, |
| "loss": 4.074727783203125, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.04925442521826571, |
| "grad_norm": 1.0822535753250122, |
| "learning_rate": 0.00029834687042481193, |
| "loss": 4.032168579101563, |
| "step": 24050 |
| }, |
| { |
| "epoch": 0.049356825270694536, |
| "grad_norm": 1.0588322877883911, |
| "learning_rate": 0.00029833970368043153, |
| "loss": 4.178402404785157, |
| "step": 24100 |
| }, |
| { |
| "epoch": 0.049459225323123364, |
| "grad_norm": 0.7627548575401306, |
| "learning_rate": 0.0002983325215212402, |
| "loss": 4.084798889160156, |
| "step": 24150 |
| }, |
| { |
| "epoch": 0.04956162537555219, |
| "grad_norm": 1.185702919960022, |
| "learning_rate": 0.0002983253239479843, |
| "loss": 4.136662292480469, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.04966402542798102, |
| "grad_norm": 2.4309804439544678, |
| "learning_rate": 0.0002983181109614118, |
| "loss": 4.230069885253906, |
| "step": 24250 |
| }, |
| { |
| "epoch": 0.04976642548040985, |
| "grad_norm": 1.0039188861846924, |
| "learning_rate": 0.00029831088256227216, |
| "loss": 3.9972125244140626, |
| "step": 24300 |
| }, |
| { |
| "epoch": 0.049868825532838675, |
| "grad_norm": 0.9414103627204895, |
| "learning_rate": 0.0002983036387513166, |
| "loss": 4.060273742675781, |
| "step": 24350 |
| }, |
| { |
| "epoch": 0.0499712255852675, |
| "grad_norm": 1.0714952945709229, |
| "learning_rate": 0.0002982963795292978, |
| "loss": 3.6833465576171873, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.05007362563769633, |
| "grad_norm": 0.924064576625824, |
| "learning_rate": 0.00029828910489697016, |
| "loss": 3.9215875244140626, |
| "step": 24450 |
| }, |
| { |
| "epoch": 0.05017602569012515, |
| "grad_norm": 0.9032275080680847, |
| "learning_rate": 0.00029828181485508956, |
| "loss": 4.0937020874023435, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.05027842574255398, |
| "grad_norm": 0.9629778861999512, |
| "learning_rate": 0.00029827450940441363, |
| "loss": 3.5827789306640625, |
| "step": 24550 |
| }, |
| { |
| "epoch": 0.050380825794982806, |
| "grad_norm": 1.0797669887542725, |
| "learning_rate": 0.00029826718854570147, |
| "loss": 3.6074313354492187, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.050483225847411634, |
| "grad_norm": 1.1837302446365356, |
| "learning_rate": 0.00029825985227971386, |
| "loss": 3.8778558349609376, |
| "step": 24650 |
| }, |
| { |
| "epoch": 0.05058562589984046, |
| "grad_norm": 1.0532505512237549, |
| "learning_rate": 0.0002982525006072131, |
| "loss": 4.007304382324219, |
| "step": 24700 |
| }, |
| { |
| "epoch": 0.05068802595226929, |
| "grad_norm": 1.024993896484375, |
| "learning_rate": 0.00029824513352896327, |
| "loss": 4.1383056640625, |
| "step": 24750 |
| }, |
| { |
| "epoch": 0.050790426004698117, |
| "grad_norm": 2.709007978439331, |
| "learning_rate": 0.00029823775104572976, |
| "loss": 3.71488525390625, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.050892826057126944, |
| "grad_norm": 0.9420567750930786, |
| "learning_rate": 0.0002982303531582799, |
| "loss": 4.161868591308593, |
| "step": 24850 |
| }, |
| { |
| "epoch": 0.050995226109555765, |
| "grad_norm": 1.638623595237732, |
| "learning_rate": 0.0002982229398673822, |
| "loss": 4.007568969726562, |
| "step": 24900 |
| }, |
| { |
| "epoch": 0.05109762616198459, |
| "grad_norm": 0.7433112859725952, |
| "learning_rate": 0.0002982155111738073, |
| "loss": 3.716796875, |
| "step": 24950 |
| }, |
| { |
| "epoch": 0.05120002621441342, |
| "grad_norm": 1.1634193658828735, |
| "learning_rate": 0.00029820806707832694, |
| "loss": 4.099712524414063, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.05130242626684225, |
| "grad_norm": 1.0174721479415894, |
| "learning_rate": 0.0002982006075817148, |
| "loss": 3.70357666015625, |
| "step": 25050 |
| }, |
| { |
| "epoch": 0.051404826319271076, |
| "grad_norm": 1.041905164718628, |
| "learning_rate": 0.00029819313268474593, |
| "loss": 3.85610107421875, |
| "step": 25100 |
| }, |
| { |
| "epoch": 0.0515072263716999, |
| "grad_norm": 1.108231782913208, |
| "learning_rate": 0.00029818564238819723, |
| "loss": 4.048504333496094, |
| "step": 25150 |
| }, |
| { |
| "epoch": 0.05160962642412873, |
| "grad_norm": 0.8780749440193176, |
| "learning_rate": 0.00029817813669284695, |
| "loss": 4.2607119750976565, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.05171202647655756, |
| "grad_norm": 1.0939981937408447, |
| "learning_rate": 0.0002981706155994751, |
| "loss": 4.242766723632813, |
| "step": 25250 |
| }, |
| { |
| "epoch": 0.051814426528986386, |
| "grad_norm": 0.9443891644477844, |
| "learning_rate": 0.00029816307910886323, |
| "loss": 4.077508850097656, |
| "step": 25300 |
| }, |
| { |
| "epoch": 0.05191682658141521, |
| "grad_norm": 0.8710380792617798, |
| "learning_rate": 0.00029815552722179447, |
| "loss": 3.954695739746094, |
| "step": 25350 |
| }, |
| { |
| "epoch": 0.052019226633844035, |
| "grad_norm": 0.9465594291687012, |
| "learning_rate": 0.0002981479599390536, |
| "loss": 3.9642620849609376, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.05212162668627286, |
| "grad_norm": 1.2072516679763794, |
| "learning_rate": 0.00029814037726142703, |
| "loss": 3.5950994873046875, |
| "step": 25450 |
| }, |
| { |
| "epoch": 0.05222402673870169, |
| "grad_norm": 0.9787052869796753, |
| "learning_rate": 0.0002981327791897026, |
| "loss": 3.669163818359375, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.05232642679113052, |
| "grad_norm": 0.9823593497276306, |
| "learning_rate": 0.00029812516572467, |
| "loss": 3.70659423828125, |
| "step": 25550 |
| }, |
| { |
| "epoch": 0.052428826843559345, |
| "grad_norm": 0.9548662304878235, |
| "learning_rate": 0.00029811753686712024, |
| "loss": 4.188983459472656, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.05253122689598817, |
| "grad_norm": 0.8237021565437317, |
| "learning_rate": 0.0002981098926178462, |
| "loss": 4.097180786132813, |
| "step": 25650 |
| }, |
| { |
| "epoch": 0.052633626948417, |
| "grad_norm": 0.8100720047950745, |
| "learning_rate": 0.00029810223297764224, |
| "loss": 4.057103271484375, |
| "step": 25700 |
| }, |
| { |
| "epoch": 0.05273602700084582, |
| "grad_norm": 0.9498805403709412, |
| "learning_rate": 0.00029809455794730424, |
| "loss": 3.9076028442382813, |
| "step": 25750 |
| }, |
| { |
| "epoch": 0.05283842705327465, |
| "grad_norm": 0.9514391422271729, |
| "learning_rate": 0.00029808686752762984, |
| "loss": 3.881569519042969, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.05294082710570348, |
| "grad_norm": 0.5591891407966614, |
| "learning_rate": 0.0002980791617194181, |
| "loss": 4.816184692382812, |
| "step": 25850 |
| }, |
| { |
| "epoch": 0.053043227158132304, |
| "grad_norm": 0.8840929269790649, |
| "learning_rate": 0.0002980714405234698, |
| "loss": 3.9826123046875, |
| "step": 25900 |
| }, |
| { |
| "epoch": 0.05314562721056113, |
| "grad_norm": 0.6732226610183716, |
| "learning_rate": 0.00029806370394058735, |
| "loss": 3.7573004150390625, |
| "step": 25950 |
| }, |
| { |
| "epoch": 0.05324802726298996, |
| "grad_norm": 1.1279404163360596, |
| "learning_rate": 0.0002980559519715747, |
| "loss": 3.7439083862304687, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.05335042731541879, |
| "grad_norm": 1.28814697265625, |
| "learning_rate": 0.0002980481846172372, |
| "loss": 3.40891357421875, |
| "step": 26050 |
| }, |
| { |
| "epoch": 0.053452827367847615, |
| "grad_norm": 0.8305365443229675, |
| "learning_rate": 0.0002980404018783823, |
| "loss": 3.9074551391601564, |
| "step": 26100 |
| }, |
| { |
| "epoch": 0.05355522742027644, |
| "grad_norm": 1.059561848640442, |
| "learning_rate": 0.0002980326037558186, |
| "loss": 3.3790802001953124, |
| "step": 26150 |
| }, |
| { |
| "epoch": 0.05365762747270526, |
| "grad_norm": 0.7863622903823853, |
| "learning_rate": 0.00029802479025035645, |
| "loss": 3.8910751342773438, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.05376002752513409, |
| "grad_norm": 0.8412345051765442, |
| "learning_rate": 0.0002980169613628078, |
| "loss": 3.905106201171875, |
| "step": 26250 |
| }, |
| { |
| "epoch": 0.05386242757756292, |
| "grad_norm": 0.6786169409751892, |
| "learning_rate": 0.0002980091170939862, |
| "loss": 3.6419586181640624, |
| "step": 26300 |
| }, |
| { |
| "epoch": 0.053964827629991746, |
| "grad_norm": 0.8411727547645569, |
| "learning_rate": 0.00029800125744470677, |
| "loss": 3.3573968505859373, |
| "step": 26350 |
| }, |
| { |
| "epoch": 0.054067227682420574, |
| "grad_norm": 0.9979608654975891, |
| "learning_rate": 0.0002979933824157863, |
| "loss": 3.6130526733398436, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.0541696277348494, |
| "grad_norm": 0.8738940358161926, |
| "learning_rate": 0.00029798549200804305, |
| "loss": 3.2773031616210937, |
| "step": 26450 |
| }, |
| { |
| "epoch": 0.05427202778727823, |
| "grad_norm": 0.8625099062919617, |
| "learning_rate": 0.0002979775862222971, |
| "loss": 3.92064453125, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.05437442783970706, |
| "grad_norm": 1.1380776166915894, |
| "learning_rate": 0.00029796966505936975, |
| "loss": 3.9016488647460936, |
| "step": 26550 |
| }, |
| { |
| "epoch": 0.05447682789213588, |
| "grad_norm": 0.8728241324424744, |
| "learning_rate": 0.0002979617285200844, |
| "loss": 4.155015258789063, |
| "step": 26600 |
| }, |
| { |
| "epoch": 0.054579227944564705, |
| "grad_norm": 1.174974799156189, |
| "learning_rate": 0.0002979537766052656, |
| "loss": 3.755271301269531, |
| "step": 26650 |
| }, |
| { |
| "epoch": 0.05468162799699353, |
| "grad_norm": 1.0797170400619507, |
| "learning_rate": 0.00029794580931573973, |
| "loss": 3.6002767944335936, |
| "step": 26700 |
| }, |
| { |
| "epoch": 0.05478402804942236, |
| "grad_norm": 0.8095331192016602, |
| "learning_rate": 0.0002979378266523347, |
| "loss": 3.9049578857421876, |
| "step": 26750 |
| }, |
| { |
| "epoch": 0.05488642810185119, |
| "grad_norm": 0.8785421252250671, |
| "learning_rate": 0.00029792982861588007, |
| "loss": 3.594248046875, |
| "step": 26800 |
| }, |
| { |
| "epoch": 0.054988828154280016, |
| "grad_norm": 0.8992822766304016, |
| "learning_rate": 0.0002979218152072069, |
| "loss": 4.156261901855469, |
| "step": 26850 |
| }, |
| { |
| "epoch": 0.055091228206708844, |
| "grad_norm": 1.633196234703064, |
| "learning_rate": 0.000297913786427148, |
| "loss": 3.608190612792969, |
| "step": 26900 |
| }, |
| { |
| "epoch": 0.05519362825913767, |
| "grad_norm": 1.1997803449630737, |
| "learning_rate": 0.0002979057422765376, |
| "loss": 3.6971340942382813, |
| "step": 26950 |
| }, |
| { |
| "epoch": 0.0552960283115665, |
| "grad_norm": 0.987196147441864, |
| "learning_rate": 0.00029789768275621163, |
| "loss": 3.6062017822265626, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.05539842836399532, |
| "grad_norm": 1.0470249652862549, |
| "learning_rate": 0.00029788960786700767, |
| "loss": 3.6216055297851564, |
| "step": 27050 |
| }, |
| { |
| "epoch": 0.05550082841642415, |
| "grad_norm": 1.3368786573410034, |
| "learning_rate": 0.00029788151760976473, |
| "loss": 3.4363177490234373, |
| "step": 27100 |
| }, |
| { |
| "epoch": 0.055603228468852975, |
| "grad_norm": 1.0057690143585205, |
| "learning_rate": 0.0002978734119853236, |
| "loss": 2.9398748779296877, |
| "step": 27150 |
| }, |
| { |
| "epoch": 0.0557056285212818, |
| "grad_norm": 1.0253512859344482, |
| "learning_rate": 0.0002978652909945265, |
| "loss": 3.5486212158203125, |
| "step": 27200 |
| }, |
| { |
| "epoch": 0.05580802857371063, |
| "grad_norm": 0.9567630887031555, |
| "learning_rate": 0.0002978571546382174, |
| "loss": 3.531204833984375, |
| "step": 27250 |
| }, |
| { |
| "epoch": 0.05591042862613946, |
| "grad_norm": 0.7189958691596985, |
| "learning_rate": 0.00029784900291724174, |
| "loss": 4.003550415039062, |
| "step": 27300 |
| }, |
| { |
| "epoch": 0.056012828678568286, |
| "grad_norm": 0.7804083228111267, |
| "learning_rate": 0.0002978408358324466, |
| "loss": 3.952115173339844, |
| "step": 27350 |
| }, |
| { |
| "epoch": 0.05611522873099711, |
| "grad_norm": 0.7131394743919373, |
| "learning_rate": 0.00029783265338468077, |
| "loss": 3.712818298339844, |
| "step": 27400 |
| }, |
| { |
| "epoch": 0.056217628783425934, |
| "grad_norm": 0.9421349167823792, |
| "learning_rate": 0.0002978244555747944, |
| "loss": 3.955911865234375, |
| "step": 27450 |
| }, |
| { |
| "epoch": 0.05632002883585476, |
| "grad_norm": 1.1702853441238403, |
| "learning_rate": 0.0002978162424036395, |
| "loss": 3.715908203125, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.05642242888828359, |
| "grad_norm": 1.0307793617248535, |
| "learning_rate": 0.0002978080138720694, |
| "loss": 4.063231506347656, |
| "step": 27550 |
| }, |
| { |
| "epoch": 0.05652482894071242, |
| "grad_norm": 1.0633025169372559, |
| "learning_rate": 0.00029779976998093926, |
| "loss": 3.9883132934570313, |
| "step": 27600 |
| }, |
| { |
| "epoch": 0.056627228993141245, |
| "grad_norm": 1.6195343732833862, |
| "learning_rate": 0.0002977915107311058, |
| "loss": 4.001260681152344, |
| "step": 27650 |
| }, |
| { |
| "epoch": 0.05672962904557007, |
| "grad_norm": 0.9477188587188721, |
| "learning_rate": 0.00029778323612342716, |
| "loss": 3.9925576782226564, |
| "step": 27700 |
| }, |
| { |
| "epoch": 0.0568320290979989, |
| "grad_norm": 0.7277911305427551, |
| "learning_rate": 0.00029777494615876337, |
| "loss": 3.8298355102539063, |
| "step": 27750 |
| }, |
| { |
| "epoch": 0.05693442915042773, |
| "grad_norm": 0.8074896931648254, |
| "learning_rate": 0.0002977666408379757, |
| "loss": 3.53470947265625, |
| "step": 27800 |
| }, |
| { |
| "epoch": 0.057036829202856555, |
| "grad_norm": 1.870801568031311, |
| "learning_rate": 0.0002977583201619273, |
| "loss": 4.093720703125, |
| "step": 27850 |
| }, |
| { |
| "epoch": 0.057139229255285376, |
| "grad_norm": 0.9061904549598694, |
| "learning_rate": 0.00029774998413148283, |
| "loss": 3.6751202392578124, |
| "step": 27900 |
| }, |
| { |
| "epoch": 0.057241629307714204, |
| "grad_norm": 0.766776978969574, |
| "learning_rate": 0.0002977416327475085, |
| "loss": 3.7472940063476563, |
| "step": 27950 |
| }, |
| { |
| "epoch": 0.05734402936014303, |
| "grad_norm": 0.9437297582626343, |
| "learning_rate": 0.0002977332660108722, |
| "loss": 3.1673342895507814, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.05744642941257186, |
| "grad_norm": 0.9875741004943848, |
| "learning_rate": 0.00029772488392244324, |
| "loss": 3.69399658203125, |
| "step": 28050 |
| }, |
| { |
| "epoch": 0.057548829465000687, |
| "grad_norm": 1.2089347839355469, |
| "learning_rate": 0.00029771648648309275, |
| "loss": 3.5663076782226564, |
| "step": 28100 |
| }, |
| { |
| "epoch": 0.057651229517429514, |
| "grad_norm": 1.0613031387329102, |
| "learning_rate": 0.00029770807369369334, |
| "loss": 3.696695556640625, |
| "step": 28150 |
| }, |
| { |
| "epoch": 0.05775362956985834, |
| "grad_norm": 1.1133229732513428, |
| "learning_rate": 0.00029769964555511925, |
| "loss": 3.527508544921875, |
| "step": 28200 |
| }, |
| { |
| "epoch": 0.05785602962228717, |
| "grad_norm": 1.0089772939682007, |
| "learning_rate": 0.0002976912020682463, |
| "loss": 3.744898376464844, |
| "step": 28250 |
| }, |
| { |
| "epoch": 0.05795842967471599, |
| "grad_norm": 0.9647061824798584, |
| "learning_rate": 0.00029768274323395183, |
| "loss": 3.6294049072265624, |
| "step": 28300 |
| }, |
| { |
| "epoch": 0.05806082972714482, |
| "grad_norm": 1.680829644203186, |
| "learning_rate": 0.00029767426905311485, |
| "loss": 3.6455474853515626, |
| "step": 28350 |
| }, |
| { |
| "epoch": 0.058163229779573646, |
| "grad_norm": 0.9101169109344482, |
| "learning_rate": 0.00029766577952661607, |
| "loss": 3.9211056518554686, |
| "step": 28400 |
| }, |
| { |
| "epoch": 0.05826562983200247, |
| "grad_norm": 1.0310935974121094, |
| "learning_rate": 0.00029765727465533764, |
| "loss": 3.7476397705078126, |
| "step": 28450 |
| }, |
| { |
| "epoch": 0.0583680298844313, |
| "grad_norm": 1.042888879776001, |
| "learning_rate": 0.00029764875444016325, |
| "loss": 4.1402108764648435, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.05847042993686013, |
| "grad_norm": 1.0709255933761597, |
| "learning_rate": 0.00029764021888197835, |
| "loss": 3.9610775756835936, |
| "step": 28550 |
| }, |
| { |
| "epoch": 0.058572829989288956, |
| "grad_norm": 1.027099370956421, |
| "learning_rate": 0.00029763166798166995, |
| "loss": 3.751552734375, |
| "step": 28600 |
| }, |
| { |
| "epoch": 0.058675230041717784, |
| "grad_norm": 0.7349804639816284, |
| "learning_rate": 0.0002976231017401266, |
| "loss": 3.742770080566406, |
| "step": 28650 |
| }, |
| { |
| "epoch": 0.05877763009414661, |
| "grad_norm": 1.0283441543579102, |
| "learning_rate": 0.0002976145201582384, |
| "loss": 3.7890921020507813, |
| "step": 28700 |
| }, |
| { |
| "epoch": 0.05888003014657543, |
| "grad_norm": 0.8082360029220581, |
| "learning_rate": 0.00029760592323689725, |
| "loss": 4.149041748046875, |
| "step": 28750 |
| }, |
| { |
| "epoch": 0.05898243019900426, |
| "grad_norm": 0.9537481665611267, |
| "learning_rate": 0.00029759731097699635, |
| "loss": 4.166469421386719, |
| "step": 28800 |
| }, |
| { |
| "epoch": 0.05908483025143309, |
| "grad_norm": 1.1642649173736572, |
| "learning_rate": 0.0002975886833794308, |
| "loss": 4.074107360839844, |
| "step": 28850 |
| }, |
| { |
| "epoch": 0.059187230303861915, |
| "grad_norm": 1.0695040225982666, |
| "learning_rate": 0.00029758004044509707, |
| "loss": 4.009411926269531, |
| "step": 28900 |
| }, |
| { |
| "epoch": 0.05928963035629074, |
| "grad_norm": 0.933382511138916, |
| "learning_rate": 0.00029757138217489324, |
| "loss": 3.857533264160156, |
| "step": 28950 |
| }, |
| { |
| "epoch": 0.05939203040871957, |
| "grad_norm": 1.0519219636917114, |
| "learning_rate": 0.0002975627085697191, |
| "loss": 3.5922341918945313, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.0594944304611484, |
| "grad_norm": 0.879135251045227, |
| "learning_rate": 0.00029755401963047596, |
| "loss": 4.271012268066406, |
| "step": 29050 |
| }, |
| { |
| "epoch": 0.059596830513577226, |
| "grad_norm": 1.0314289331436157, |
| "learning_rate": 0.0002975453153580667, |
| "loss": 3.891732177734375, |
| "step": 29100 |
| }, |
| { |
| "epoch": 0.05969923056600605, |
| "grad_norm": 0.9761302471160889, |
| "learning_rate": 0.000297536595753396, |
| "loss": 3.860959167480469, |
| "step": 29150 |
| }, |
| { |
| "epoch": 0.059801630618434874, |
| "grad_norm": 0.866371750831604, |
| "learning_rate": 0.0002975278608173697, |
| "loss": 3.8342303466796874, |
| "step": 29200 |
| }, |
| { |
| "epoch": 0.0599040306708637, |
| "grad_norm": 0.9015768766403198, |
| "learning_rate": 0.0002975191105508957, |
| "loss": 3.8901824951171875, |
| "step": 29250 |
| }, |
| { |
| "epoch": 0.06000643072329253, |
| "grad_norm": 0.9253438711166382, |
| "learning_rate": 0.0002975103449548832, |
| "loss": 3.8019094848632813, |
| "step": 29300 |
| }, |
| { |
| "epoch": 0.06010883077572136, |
| "grad_norm": 0.7289124727249146, |
| "learning_rate": 0.0002975015640302431, |
| "loss": 3.34075439453125, |
| "step": 29350 |
| }, |
| { |
| "epoch": 0.060211230828150185, |
| "grad_norm": 0.713688313961029, |
| "learning_rate": 0.0002974927677778879, |
| "loss": 3.235279235839844, |
| "step": 29400 |
| }, |
| { |
| "epoch": 0.06031363088057901, |
| "grad_norm": 0.6275246143341064, |
| "learning_rate": 0.0002974839561987316, |
| "loss": 3.8884927368164064, |
| "step": 29450 |
| }, |
| { |
| "epoch": 0.06041603093300784, |
| "grad_norm": 1.1090385913848877, |
| "learning_rate": 0.0002974751292936899, |
| "loss": 3.3796435546875, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.06051843098543667, |
| "grad_norm": 0.8206045031547546, |
| "learning_rate": 0.0002974662870636801, |
| "loss": 3.9724603271484376, |
| "step": 29550 |
| }, |
| { |
| "epoch": 0.06062083103786549, |
| "grad_norm": 1.3841317892074585, |
| "learning_rate": 0.00029745742950962095, |
| "loss": 3.951322021484375, |
| "step": 29600 |
| }, |
| { |
| "epoch": 0.060723231090294316, |
| "grad_norm": 0.9978547692298889, |
| "learning_rate": 0.000297448556632433, |
| "loss": 4.5418917846679685, |
| "step": 29650 |
| }, |
| { |
| "epoch": 0.060825631142723144, |
| "grad_norm": 0.9191545248031616, |
| "learning_rate": 0.0002974396684330382, |
| "loss": 3.5654345703125, |
| "step": 29700 |
| }, |
| { |
| "epoch": 0.06092803119515197, |
| "grad_norm": 1.4994065761566162, |
| "learning_rate": 0.0002974307649123602, |
| "loss": 3.7218731689453124, |
| "step": 29750 |
| }, |
| { |
| "epoch": 0.0610304312475808, |
| "grad_norm": 0.6516634821891785, |
| "learning_rate": 0.0002974218460713242, |
| "loss": 3.561522216796875, |
| "step": 29800 |
| }, |
| { |
| "epoch": 0.06113283130000963, |
| "grad_norm": 1.1022820472717285, |
| "learning_rate": 0.000297412911910857, |
| "loss": 4.207413024902344, |
| "step": 29850 |
| }, |
| { |
| "epoch": 0.061235231352438454, |
| "grad_norm": 0.861346960067749, |
| "learning_rate": 0.000297403962431887, |
| "loss": 3.959631042480469, |
| "step": 29900 |
| }, |
| { |
| "epoch": 0.06133763140486728, |
| "grad_norm": 0.8098173141479492, |
| "learning_rate": 0.0002973949976353442, |
| "loss": 4.688843383789062, |
| "step": 29950 |
| }, |
| { |
| "epoch": 0.0614400314572961, |
| "grad_norm": 0.8004640936851501, |
| "learning_rate": 0.0002973860175221603, |
| "loss": 4.384559631347656, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.06154243150972493, |
| "grad_norm": 1.4548406600952148, |
| "learning_rate": 0.0002973770220932683, |
| "loss": 4.232876281738282, |
| "step": 30050 |
| }, |
| { |
| "epoch": 0.06164483156215376, |
| "grad_norm": 1.1136951446533203, |
| "learning_rate": 0.00029736801134960296, |
| "loss": 4.017593994140625, |
| "step": 30100 |
| }, |
| { |
| "epoch": 0.061747231614582586, |
| "grad_norm": 0.9526700377464294, |
| "learning_rate": 0.00029735898529210074, |
| "loss": 3.694122619628906, |
| "step": 30150 |
| }, |
| { |
| "epoch": 0.061849631667011414, |
| "grad_norm": 0.9094407558441162, |
| "learning_rate": 0.0002973499439216996, |
| "loss": 2.5258544921875, |
| "step": 30200 |
| }, |
| { |
| "epoch": 0.06195203171944024, |
| "grad_norm": 0.9788475632667542, |
| "learning_rate": 0.000297340887239339, |
| "loss": 3.6749945068359375, |
| "step": 30250 |
| }, |
| { |
| "epoch": 0.06205443177186907, |
| "grad_norm": 0.9837728142738342, |
| "learning_rate": 0.00029733181524596006, |
| "loss": 3.9548965454101563, |
| "step": 30300 |
| }, |
| { |
| "epoch": 0.062156831824297896, |
| "grad_norm": 0.9949678778648376, |
| "learning_rate": 0.00029732272794250563, |
| "loss": 3.194211730957031, |
| "step": 30350 |
| }, |
| { |
| "epoch": 0.062259231876726724, |
| "grad_norm": 1.091620683670044, |
| "learning_rate": 0.00029731362532991985, |
| "loss": 3.8439263916015625, |
| "step": 30400 |
| }, |
| { |
| "epoch": 0.062361631929155545, |
| "grad_norm": 0.759272575378418, |
| "learning_rate": 0.0002973045074091488, |
| "loss": 3.965645751953125, |
| "step": 30450 |
| }, |
| { |
| "epoch": 0.06246403198158437, |
| "grad_norm": 0.9479434490203857, |
| "learning_rate": 0.0002972953741811398, |
| "loss": 3.6418606567382814, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.0625664320340132, |
| "grad_norm": 0.8087990880012512, |
| "learning_rate": 0.00029728622564684204, |
| "loss": 3.7622882080078126, |
| "step": 30550 |
| }, |
| { |
| "epoch": 0.06266883208644203, |
| "grad_norm": 1.2932571172714233, |
| "learning_rate": 0.0002972770618072062, |
| "loss": 4.1614468383789065, |
| "step": 30600 |
| }, |
| { |
| "epoch": 0.06277123213887086, |
| "grad_norm": 0.6852632761001587, |
| "learning_rate": 0.00029726788266318455, |
| "loss": 3.5135552978515623, |
| "step": 30650 |
| }, |
| { |
| "epoch": 0.06287363219129968, |
| "grad_norm": 0.9849332571029663, |
| "learning_rate": 0.0002972586882157309, |
| "loss": 3.3184869384765623, |
| "step": 30700 |
| }, |
| { |
| "epoch": 0.06297603224372851, |
| "grad_norm": 1.1004332304000854, |
| "learning_rate": 0.00029724947846580064, |
| "loss": 3.4316140747070314, |
| "step": 30750 |
| }, |
| { |
| "epoch": 0.06307843229615734, |
| "grad_norm": 0.9240966439247131, |
| "learning_rate": 0.00029724025341435097, |
| "loss": 4.058392333984375, |
| "step": 30800 |
| }, |
| { |
| "epoch": 0.06318083234858617, |
| "grad_norm": 0.8939677476882935, |
| "learning_rate": 0.0002972310130623404, |
| "loss": 4.048366088867187, |
| "step": 30850 |
| }, |
| { |
| "epoch": 0.063283232401015, |
| "grad_norm": 0.8218761086463928, |
| "learning_rate": 0.00029722175741072915, |
| "loss": 4.063833618164063, |
| "step": 30900 |
| }, |
| { |
| "epoch": 0.06338563245344382, |
| "grad_norm": 0.9675712585449219, |
| "learning_rate": 0.0002972124864604791, |
| "loss": 3.7749728393554687, |
| "step": 30950 |
| }, |
| { |
| "epoch": 0.06348803250587265, |
| "grad_norm": 1.2063570022583008, |
| "learning_rate": 0.0002972032002125536, |
| "loss": 3.5751220703125, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.06359043255830148, |
| "grad_norm": 1.0709924697875977, |
| "learning_rate": 0.00029719389866791755, |
| "loss": 3.7293637084960936, |
| "step": 31050 |
| }, |
| { |
| "epoch": 0.06369283261073029, |
| "grad_norm": 0.8866503834724426, |
| "learning_rate": 0.0002971845818275377, |
| "loss": 3.907535400390625, |
| "step": 31100 |
| }, |
| { |
| "epoch": 0.06379523266315912, |
| "grad_norm": 1.0057551860809326, |
| "learning_rate": 0.00029717524969238206, |
| "loss": 3.222738037109375, |
| "step": 31150 |
| }, |
| { |
| "epoch": 0.06389763271558795, |
| "grad_norm": 0.9129172563552856, |
| "learning_rate": 0.0002971659022634205, |
| "loss": 3.4403155517578123, |
| "step": 31200 |
| }, |
| { |
| "epoch": 0.06400003276801677, |
| "grad_norm": 0.9336997866630554, |
| "learning_rate": 0.0002971565395416243, |
| "loss": 3.30571044921875, |
| "step": 31250 |
| }, |
| { |
| "epoch": 0.0641024328204456, |
| "grad_norm": 1.1164926290512085, |
| "learning_rate": 0.0002971471615279664, |
| "loss": 3.8188116455078127, |
| "step": 31300 |
| }, |
| { |
| "epoch": 0.06420483287287443, |
| "grad_norm": 0.8115789890289307, |
| "learning_rate": 0.0002971377682234213, |
| "loss": 3.9151617431640626, |
| "step": 31350 |
| }, |
| { |
| "epoch": 0.06430723292530326, |
| "grad_norm": 0.9240061044692993, |
| "learning_rate": 0.00029712835962896514, |
| "loss": 3.709864196777344, |
| "step": 31400 |
| }, |
| { |
| "epoch": 0.06440963297773208, |
| "grad_norm": 1.6785798072814941, |
| "learning_rate": 0.0002971189357455756, |
| "loss": 3.689013671875, |
| "step": 31450 |
| }, |
| { |
| "epoch": 0.06451203303016091, |
| "grad_norm": 0.7833497524261475, |
| "learning_rate": 0.0002971094965742321, |
| "loss": 3.3715243530273438, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.06461443308258974, |
| "grad_norm": 0.8799951076507568, |
| "learning_rate": 0.0002971000421159153, |
| "loss": 4.09000244140625, |
| "step": 31550 |
| }, |
| { |
| "epoch": 0.06471683313501857, |
| "grad_norm": 0.7977895736694336, |
| "learning_rate": 0.0002970905723716078, |
| "loss": 4.248508911132813, |
| "step": 31600 |
| }, |
| { |
| "epoch": 0.0648192331874474, |
| "grad_norm": 0.8709924221038818, |
| "learning_rate": 0.00029708108734229365, |
| "loss": 3.489057922363281, |
| "step": 31650 |
| }, |
| { |
| "epoch": 0.06492163323987622, |
| "grad_norm": 0.8895650506019592, |
| "learning_rate": 0.00029707158702895847, |
| "loss": 3.898555908203125, |
| "step": 31700 |
| }, |
| { |
| "epoch": 0.06502403329230505, |
| "grad_norm": 0.8814746737480164, |
| "learning_rate": 0.00029706207143258945, |
| "loss": 3.7208917236328123, |
| "step": 31750 |
| }, |
| { |
| "epoch": 0.06512643334473388, |
| "grad_norm": 0.9977162480354309, |
| "learning_rate": 0.0002970525405541755, |
| "loss": 4.208245849609375, |
| "step": 31800 |
| }, |
| { |
| "epoch": 0.0652288333971627, |
| "grad_norm": 0.7882950901985168, |
| "learning_rate": 0.0002970429943947069, |
| "loss": 3.7341409301757813, |
| "step": 31850 |
| }, |
| { |
| "epoch": 0.06533123344959153, |
| "grad_norm": 0.9084259867668152, |
| "learning_rate": 0.00029703343295517577, |
| "loss": 3.782439880371094, |
| "step": 31900 |
| }, |
| { |
| "epoch": 0.06543363350202035, |
| "grad_norm": 1.0745272636413574, |
| "learning_rate": 0.0002970238562365756, |
| "loss": 3.530187072753906, |
| "step": 31950 |
| }, |
| { |
| "epoch": 0.06553603355444917, |
| "grad_norm": 0.7873273491859436, |
| "learning_rate": 0.0002970142642399017, |
| "loss": 3.5862966918945314, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.065638433606878, |
| "grad_norm": 0.9771028757095337, |
| "learning_rate": 0.0002970046569661506, |
| "loss": 3.8827175903320312, |
| "step": 32050 |
| }, |
| { |
| "epoch": 0.06574083365930683, |
| "grad_norm": 0.8443105816841125, |
| "learning_rate": 0.00029699503441632085, |
| "loss": 3.28268310546875, |
| "step": 32100 |
| }, |
| { |
| "epoch": 0.06584323371173566, |
| "grad_norm": 0.7213400602340698, |
| "learning_rate": 0.0002969853965914123, |
| "loss": 3.13387939453125, |
| "step": 32150 |
| }, |
| { |
| "epoch": 0.06594563376416449, |
| "grad_norm": 1.1795644760131836, |
| "learning_rate": 0.0002969757434924265, |
| "loss": 3.658702087402344, |
| "step": 32200 |
| }, |
| { |
| "epoch": 0.06604803381659331, |
| "grad_norm": 0.7857722640037537, |
| "learning_rate": 0.0002969660751203665, |
| "loss": 3.4446502685546876, |
| "step": 32250 |
| }, |
| { |
| "epoch": 0.06615043386902214, |
| "grad_norm": 1.0390616655349731, |
| "learning_rate": 0.00029695639147623703, |
| "loss": 3.644783630371094, |
| "step": 32300 |
| }, |
| { |
| "epoch": 0.06625283392145097, |
| "grad_norm": 0.7487825155258179, |
| "learning_rate": 0.00029694669256104446, |
| "loss": 3.63455810546875, |
| "step": 32350 |
| }, |
| { |
| "epoch": 0.0663552339738798, |
| "grad_norm": 0.8825246691703796, |
| "learning_rate": 0.0002969369783757965, |
| "loss": 3.3496524047851564, |
| "step": 32400 |
| }, |
| { |
| "epoch": 0.06645763402630862, |
| "grad_norm": 1.1626224517822266, |
| "learning_rate": 0.00029692724892150266, |
| "loss": 3.726259460449219, |
| "step": 32450 |
| }, |
| { |
| "epoch": 0.06656003407873745, |
| "grad_norm": 0.74493008852005, |
| "learning_rate": 0.00029691750419917406, |
| "loss": 3.7289053344726564, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.06666243413116628, |
| "grad_norm": 0.6749517917633057, |
| "learning_rate": 0.00029690774420982317, |
| "loss": 3.5053274536132815, |
| "step": 32550 |
| }, |
| { |
| "epoch": 0.0667648341835951, |
| "grad_norm": 1.099471926689148, |
| "learning_rate": 0.0002968979689544644, |
| "loss": 3.514427490234375, |
| "step": 32600 |
| }, |
| { |
| "epoch": 0.06686723423602393, |
| "grad_norm": 0.9038723111152649, |
| "learning_rate": 0.00029688817843411344, |
| "loss": 3.616097106933594, |
| "step": 32650 |
| }, |
| { |
| "epoch": 0.06696963428845276, |
| "grad_norm": 0.7338837385177612, |
| "learning_rate": 0.0002968783726497877, |
| "loss": 3.4425479125976564, |
| "step": 32700 |
| }, |
| { |
| "epoch": 0.06707203434088159, |
| "grad_norm": 1.254689335823059, |
| "learning_rate": 0.0002968685516025061, |
| "loss": 3.3777651977539063, |
| "step": 32750 |
| }, |
| { |
| "epoch": 0.0671744343933104, |
| "grad_norm": 0.8535405397415161, |
| "learning_rate": 0.00029685871529328933, |
| "loss": 4.319814758300781, |
| "step": 32800 |
| }, |
| { |
| "epoch": 0.06727683444573923, |
| "grad_norm": 0.9299177527427673, |
| "learning_rate": 0.00029684886372315935, |
| "loss": 3.78345458984375, |
| "step": 32850 |
| }, |
| { |
| "epoch": 0.06737923449816806, |
| "grad_norm": 1.0497288703918457, |
| "learning_rate": 0.0002968389968931401, |
| "loss": 3.619969787597656, |
| "step": 32900 |
| }, |
| { |
| "epoch": 0.06748163455059689, |
| "grad_norm": 0.9285115599632263, |
| "learning_rate": 0.00029682911480425673, |
| "loss": 3.488844909667969, |
| "step": 32950 |
| }, |
| { |
| "epoch": 0.06758403460302571, |
| "grad_norm": 1.2114810943603516, |
| "learning_rate": 0.0002968192174575362, |
| "loss": 3.8050308227539062, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.06768643465545454, |
| "grad_norm": 0.9714403748512268, |
| "learning_rate": 0.0002968093048540071, |
| "loss": 3.402801208496094, |
| "step": 33050 |
| }, |
| { |
| "epoch": 0.06778883470788337, |
| "grad_norm": 1.049149990081787, |
| "learning_rate": 0.00029679937699469934, |
| "loss": 3.4410101318359376, |
| "step": 33100 |
| }, |
| { |
| "epoch": 0.0678912347603122, |
| "grad_norm": 0.8005252480506897, |
| "learning_rate": 0.0002967894338806446, |
| "loss": 3.9667138671875, |
| "step": 33150 |
| }, |
| { |
| "epoch": 0.06799363481274102, |
| "grad_norm": 1.0901520252227783, |
| "learning_rate": 0.00029677947551287625, |
| "loss": 3.6659295654296873, |
| "step": 33200 |
| }, |
| { |
| "epoch": 0.06809603486516985, |
| "grad_norm": 0.9532211422920227, |
| "learning_rate": 0.000296769501892429, |
| "loss": 3.9132586669921876, |
| "step": 33250 |
| }, |
| { |
| "epoch": 0.06819843491759868, |
| "grad_norm": 1.3878906965255737, |
| "learning_rate": 0.0002967595130203394, |
| "loss": 3.927642822265625, |
| "step": 33300 |
| }, |
| { |
| "epoch": 0.06830083497002751, |
| "grad_norm": 1.046176552772522, |
| "learning_rate": 0.00029674950889764523, |
| "loss": 3.9409329223632814, |
| "step": 33350 |
| }, |
| { |
| "epoch": 0.06840323502245634, |
| "grad_norm": 0.8497132062911987, |
| "learning_rate": 0.0002967394895253863, |
| "loss": 3.60568359375, |
| "step": 33400 |
| }, |
| { |
| "epoch": 0.06850563507488516, |
| "grad_norm": 1.1313576698303223, |
| "learning_rate": 0.00029672945490460365, |
| "loss": 4.024774780273438, |
| "step": 33450 |
| }, |
| { |
| "epoch": 0.06860803512731399, |
| "grad_norm": 1.0038946866989136, |
| "learning_rate": 0.00029671940503634006, |
| "loss": 3.707646484375, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.06871043517974282, |
| "grad_norm": 1.1383546590805054, |
| "learning_rate": 0.0002967093399216399, |
| "loss": 3.2730068969726562, |
| "step": 33550 |
| }, |
| { |
| "epoch": 0.06881283523217165, |
| "grad_norm": 0.9146387577056885, |
| "learning_rate": 0.00029669925956154905, |
| "loss": 3.9269442749023438, |
| "step": 33600 |
| }, |
| { |
| "epoch": 0.06891523528460046, |
| "grad_norm": 0.6965939402580261, |
| "learning_rate": 0.000296689163957115, |
| "loss": 2.7985528564453124, |
| "step": 33650 |
| }, |
| { |
| "epoch": 0.06901763533702929, |
| "grad_norm": 0.8769970536231995, |
| "learning_rate": 0.00029667905310938695, |
| "loss": 4.186055908203125, |
| "step": 33700 |
| }, |
| { |
| "epoch": 0.06912003538945811, |
| "grad_norm": 0.8398081660270691, |
| "learning_rate": 0.0002966689270194154, |
| "loss": 3.677633056640625, |
| "step": 33750 |
| }, |
| { |
| "epoch": 0.06922243544188694, |
| "grad_norm": 0.7318697571754456, |
| "learning_rate": 0.00029665878568825284, |
| "loss": 4.001636352539062, |
| "step": 33800 |
| }, |
| { |
| "epoch": 0.06932483549431577, |
| "grad_norm": 1.0592197179794312, |
| "learning_rate": 0.00029664862911695286, |
| "loss": 3.9292962646484373, |
| "step": 33850 |
| }, |
| { |
| "epoch": 0.0694272355467446, |
| "grad_norm": 1.3345533609390259, |
| "learning_rate": 0.0002966384573065711, |
| "loss": 3.7566705322265626, |
| "step": 33900 |
| }, |
| { |
| "epoch": 0.06952963559917343, |
| "grad_norm": 0.9815147519111633, |
| "learning_rate": 0.00029662827025816443, |
| "loss": 3.8881317138671876, |
| "step": 33950 |
| }, |
| { |
| "epoch": 0.06963203565160225, |
| "grad_norm": 0.996683657169342, |
| "learning_rate": 0.00029661806797279147, |
| "loss": 3.9453826904296876, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.06973443570403108, |
| "grad_norm": 1.0983341932296753, |
| "learning_rate": 0.0002966078504515125, |
| "loss": 4.025393371582031, |
| "step": 34050 |
| }, |
| { |
| "epoch": 0.06983683575645991, |
| "grad_norm": 1.2514588832855225, |
| "learning_rate": 0.0002965976176953891, |
| "loss": 4.020445556640625, |
| "step": 34100 |
| }, |
| { |
| "epoch": 0.06993923580888874, |
| "grad_norm": 0.7997650504112244, |
| "learning_rate": 0.00029658736970548477, |
| "loss": 3.7041055297851564, |
| "step": 34150 |
| }, |
| { |
| "epoch": 0.07004163586131756, |
| "grad_norm": 0.7876397371292114, |
| "learning_rate": 0.00029657710648286437, |
| "loss": 3.2046856689453125, |
| "step": 34200 |
| }, |
| { |
| "epoch": 0.07014403591374639, |
| "grad_norm": 0.9293930530548096, |
| "learning_rate": 0.00029656682802859443, |
| "loss": 3.7819009399414063, |
| "step": 34250 |
| }, |
| { |
| "epoch": 0.07024643596617522, |
| "grad_norm": 0.6517935395240784, |
| "learning_rate": 0.000296556534343743, |
| "loss": 2.6689993286132814, |
| "step": 34300 |
| }, |
| { |
| "epoch": 0.07034883601860405, |
| "grad_norm": 1.03813898563385, |
| "learning_rate": 0.00029654622542937977, |
| "loss": 2.5518731689453125, |
| "step": 34350 |
| }, |
| { |
| "epoch": 0.07045123607103287, |
| "grad_norm": 0.7847388386726379, |
| "learning_rate": 0.00029653590128657603, |
| "loss": 3.8658258056640626, |
| "step": 34400 |
| }, |
| { |
| "epoch": 0.0705536361234617, |
| "grad_norm": 0.9255051612854004, |
| "learning_rate": 0.0002965255619164046, |
| "loss": 3.4440853881835936, |
| "step": 34450 |
| }, |
| { |
| "epoch": 0.07065603617589052, |
| "grad_norm": 0.8334102630615234, |
| "learning_rate": 0.00029651520731993993, |
| "loss": 3.837626647949219, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.07075843622831934, |
| "grad_norm": 1.0661958456039429, |
| "learning_rate": 0.000296504837498258, |
| "loss": 4.052589111328125, |
| "step": 34550 |
| }, |
| { |
| "epoch": 0.07086083628074817, |
| "grad_norm": 0.8307774662971497, |
| "learning_rate": 0.0002964944524524363, |
| "loss": 4.152563781738281, |
| "step": 34600 |
| }, |
| { |
| "epoch": 0.070963236333177, |
| "grad_norm": 1.1851427555084229, |
| "learning_rate": 0.00029648405218355415, |
| "loss": 3.877910461425781, |
| "step": 34650 |
| }, |
| { |
| "epoch": 0.07106563638560583, |
| "grad_norm": 1.024609088897705, |
| "learning_rate": 0.0002964736366926923, |
| "loss": 2.9125543212890626, |
| "step": 34700 |
| }, |
| { |
| "epoch": 0.07116803643803465, |
| "grad_norm": 1.091864824295044, |
| "learning_rate": 0.00029646320598093295, |
| "loss": 3.8221173095703125, |
| "step": 34750 |
| }, |
| { |
| "epoch": 0.07127043649046348, |
| "grad_norm": 0.9245619177818298, |
| "learning_rate": 0.0002964527600493601, |
| "loss": 2.984726867675781, |
| "step": 34800 |
| }, |
| { |
| "epoch": 0.07137283654289231, |
| "grad_norm": 0.9920394420623779, |
| "learning_rate": 0.0002964422988990592, |
| "loss": 3.581501159667969, |
| "step": 34850 |
| }, |
| { |
| "epoch": 0.07147523659532114, |
| "grad_norm": 0.7046719789505005, |
| "learning_rate": 0.0002964318225311174, |
| "loss": 2.9280935668945314, |
| "step": 34900 |
| }, |
| { |
| "epoch": 0.07157763664774996, |
| "grad_norm": 0.7766258120536804, |
| "learning_rate": 0.0002964213309466233, |
| "loss": 2.3795321655273436, |
| "step": 34950 |
| }, |
| { |
| "epoch": 0.07168003670017879, |
| "grad_norm": 1.3687994480133057, |
| "learning_rate": 0.0002964108241466672, |
| "loss": 3.43721923828125, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.07178243675260762, |
| "grad_norm": 1.0140045881271362, |
| "learning_rate": 0.00029640030213234084, |
| "loss": 3.19546875, |
| "step": 35050 |
| }, |
| { |
| "epoch": 0.07188483680503645, |
| "grad_norm": 0.8950518369674683, |
| "learning_rate": 0.0002963897649047376, |
| "loss": 3.794825134277344, |
| "step": 35100 |
| }, |
| { |
| "epoch": 0.07198723685746528, |
| "grad_norm": 1.7291576862335205, |
| "learning_rate": 0.0002963792124649526, |
| "loss": 4.249531555175781, |
| "step": 35150 |
| }, |
| { |
| "epoch": 0.0720896369098941, |
| "grad_norm": 1.7866417169570923, |
| "learning_rate": 0.0002963686448140823, |
| "loss": 3.9559259033203125, |
| "step": 35200 |
| }, |
| { |
| "epoch": 0.07219203696232293, |
| "grad_norm": 0.9784793257713318, |
| "learning_rate": 0.0002963580619532249, |
| "loss": 3.6990866088867187, |
| "step": 35250 |
| }, |
| { |
| "epoch": 0.07229443701475176, |
| "grad_norm": 1.6409136056900024, |
| "learning_rate": 0.00029634746388348005, |
| "loss": 3.6978335571289063, |
| "step": 35300 |
| }, |
| { |
| "epoch": 0.07239683706718057, |
| "grad_norm": 1.109778642654419, |
| "learning_rate": 0.00029633685060594914, |
| "loss": 3.7638284301757814, |
| "step": 35350 |
| }, |
| { |
| "epoch": 0.0724992371196094, |
| "grad_norm": 1.3247849941253662, |
| "learning_rate": 0.000296326222121735, |
| "loss": 4.101665954589844, |
| "step": 35400 |
| }, |
| { |
| "epoch": 0.07260163717203823, |
| "grad_norm": 1.0803288221359253, |
| "learning_rate": 0.0002963155784319421, |
| "loss": 3.9325439453125, |
| "step": 35450 |
| }, |
| { |
| "epoch": 0.07270403722446706, |
| "grad_norm": 1.2640902996063232, |
| "learning_rate": 0.00029630491953767647, |
| "loss": 3.4765811157226563, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.07280643727689588, |
| "grad_norm": 1.0323841571807861, |
| "learning_rate": 0.0002962942454400458, |
| "loss": 3.8790185546875, |
| "step": 35550 |
| }, |
| { |
| "epoch": 0.07290883732932471, |
| "grad_norm": 0.9365559816360474, |
| "learning_rate": 0.0002962835561401592, |
| "loss": 3.8441122436523436, |
| "step": 35600 |
| }, |
| { |
| "epoch": 0.07301123738175354, |
| "grad_norm": 0.9189864993095398, |
| "learning_rate": 0.00029627285163912753, |
| "loss": 3.819436340332031, |
| "step": 35650 |
| }, |
| { |
| "epoch": 0.07311363743418237, |
| "grad_norm": 1.2897831201553345, |
| "learning_rate": 0.00029626213193806317, |
| "loss": 3.544706115722656, |
| "step": 35700 |
| }, |
| { |
| "epoch": 0.0732160374866112, |
| "grad_norm": 0.86373370885849, |
| "learning_rate": 0.00029625139703807996, |
| "loss": 3.7399945068359375, |
| "step": 35750 |
| }, |
| { |
| "epoch": 0.07331843753904002, |
| "grad_norm": 1.0938329696655273, |
| "learning_rate": 0.00029624064694029357, |
| "loss": 3.89250244140625, |
| "step": 35800 |
| }, |
| { |
| "epoch": 0.07342083759146885, |
| "grad_norm": 0.9408879280090332, |
| "learning_rate": 0.000296229881645821, |
| "loss": 3.056258239746094, |
| "step": 35850 |
| }, |
| { |
| "epoch": 0.07352323764389768, |
| "grad_norm": 1.1271533966064453, |
| "learning_rate": 0.0002962191011557809, |
| "loss": 3.544586181640625, |
| "step": 35900 |
| }, |
| { |
| "epoch": 0.0736256376963265, |
| "grad_norm": 1.011702537536621, |
| "learning_rate": 0.0002962083054712936, |
| "loss": 3.683125305175781, |
| "step": 35950 |
| }, |
| { |
| "epoch": 0.07372803774875533, |
| "grad_norm": 0.8757224678993225, |
| "learning_rate": 0.000296197494593481, |
| "loss": 3.3673382568359376, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.07383043780118416, |
| "grad_norm": 0.6535724997520447, |
| "learning_rate": 0.00029618666852346644, |
| "loss": 4.935340881347656, |
| "step": 36050 |
| }, |
| { |
| "epoch": 0.07393283785361299, |
| "grad_norm": 0.6584002375602722, |
| "learning_rate": 0.0002961758272623749, |
| "loss": 4.499714660644531, |
| "step": 36100 |
| }, |
| { |
| "epoch": 0.07403523790604181, |
| "grad_norm": 0.8999959230422974, |
| "learning_rate": 0.000296164970811333, |
| "loss": 4.462132568359375, |
| "step": 36150 |
| }, |
| { |
| "epoch": 0.07413763795847063, |
| "grad_norm": 1.0016320943832397, |
| "learning_rate": 0.00029615409917146886, |
| "loss": 3.6168402099609374, |
| "step": 36200 |
| }, |
| { |
| "epoch": 0.07424003801089946, |
| "grad_norm": 0.9233262538909912, |
| "learning_rate": 0.0002961432123439122, |
| "loss": 3.7079287719726564, |
| "step": 36250 |
| }, |
| { |
| "epoch": 0.07434243806332828, |
| "grad_norm": 1.2862437963485718, |
| "learning_rate": 0.0002961323103297944, |
| "loss": 3.554483642578125, |
| "step": 36300 |
| }, |
| { |
| "epoch": 0.07444483811575711, |
| "grad_norm": 1.0531319379806519, |
| "learning_rate": 0.0002961213931302483, |
| "loss": 3.9057122802734376, |
| "step": 36350 |
| }, |
| { |
| "epoch": 0.07454723816818594, |
| "grad_norm": 1.0585157871246338, |
| "learning_rate": 0.00029611046074640835, |
| "loss": 4.065590209960938, |
| "step": 36400 |
| }, |
| { |
| "epoch": 0.07464963822061477, |
| "grad_norm": 0.9923078417778015, |
| "learning_rate": 0.00029609951317941067, |
| "loss": 3.753091125488281, |
| "step": 36450 |
| }, |
| { |
| "epoch": 0.0747520382730436, |
| "grad_norm": 1.4187533855438232, |
| "learning_rate": 0.0002960885504303928, |
| "loss": 3.81512939453125, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.07485443832547242, |
| "grad_norm": 0.9602039456367493, |
| "learning_rate": 0.000296077572500494, |
| "loss": 3.3714874267578123, |
| "step": 36550 |
| }, |
| { |
| "epoch": 0.07495683837790125, |
| "grad_norm": 0.9583538770675659, |
| "learning_rate": 0.000296066579390855, |
| "loss": 4.172694702148437, |
| "step": 36600 |
| }, |
| { |
| "epoch": 0.07505923843033008, |
| "grad_norm": 0.9498001933097839, |
| "learning_rate": 0.0002960555711026182, |
| "loss": 3.799460144042969, |
| "step": 36650 |
| }, |
| { |
| "epoch": 0.0751616384827589, |
| "grad_norm": 1.037429928779602, |
| "learning_rate": 0.00029604454763692753, |
| "loss": 3.3060308837890626, |
| "step": 36700 |
| }, |
| { |
| "epoch": 0.07526403853518773, |
| "grad_norm": 0.9222440123558044, |
| "learning_rate": 0.0002960335089949284, |
| "loss": 3.724703063964844, |
| "step": 36750 |
| }, |
| { |
| "epoch": 0.07536643858761656, |
| "grad_norm": 0.891686201095581, |
| "learning_rate": 0.0002960224551777681, |
| "loss": 3.8415121459960937, |
| "step": 36800 |
| }, |
| { |
| "epoch": 0.07546883864004539, |
| "grad_norm": 1.1739381551742554, |
| "learning_rate": 0.0002960113861865951, |
| "loss": 3.6421640014648435, |
| "step": 36850 |
| }, |
| { |
| "epoch": 0.07557123869247422, |
| "grad_norm": 1.118273138999939, |
| "learning_rate": 0.0002960003020225598, |
| "loss": 4.042873229980469, |
| "step": 36900 |
| }, |
| { |
| "epoch": 0.07567363874490304, |
| "grad_norm": 1.1903246641159058, |
| "learning_rate": 0.00029598920268681387, |
| "loss": 3.7228439331054686, |
| "step": 36950 |
| }, |
| { |
| "epoch": 0.07577603879733187, |
| "grad_norm": 0.8074274063110352, |
| "learning_rate": 0.00029597808818051076, |
| "loss": 3.74279296875, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.07587843884976068, |
| "grad_norm": 0.7993521690368652, |
| "learning_rate": 0.00029596695850480547, |
| "loss": 3.5909658813476564, |
| "step": 37050 |
| }, |
| { |
| "epoch": 0.07598083890218951, |
| "grad_norm": 0.9763518571853638, |
| "learning_rate": 0.0002959558136608545, |
| "loss": 3.4760845947265624, |
| "step": 37100 |
| }, |
| { |
| "epoch": 0.07608323895461834, |
| "grad_norm": 0.9700019359588623, |
| "learning_rate": 0.000295944653649816, |
| "loss": 3.5362197875976564, |
| "step": 37150 |
| }, |
| { |
| "epoch": 0.07618563900704717, |
| "grad_norm": 0.9611456990242004, |
| "learning_rate": 0.0002959334784728497, |
| "loss": 3.392528381347656, |
| "step": 37200 |
| }, |
| { |
| "epoch": 0.076288039059476, |
| "grad_norm": 1.0106040239334106, |
| "learning_rate": 0.0002959222881311168, |
| "loss": 3.9602230834960936, |
| "step": 37250 |
| }, |
| { |
| "epoch": 0.07639043911190482, |
| "grad_norm": 0.9530378580093384, |
| "learning_rate": 0.00029591108262578023, |
| "loss": 3.755385437011719, |
| "step": 37300 |
| }, |
| { |
| "epoch": 0.07649283916433365, |
| "grad_norm": 1.2167539596557617, |
| "learning_rate": 0.0002958998619580044, |
| "loss": 3.5471917724609376, |
| "step": 37350 |
| }, |
| { |
| "epoch": 0.07659523921676248, |
| "grad_norm": 0.6693082451820374, |
| "learning_rate": 0.0002958886261289553, |
| "loss": 2.953871154785156, |
| "step": 37400 |
| }, |
| { |
| "epoch": 0.0766976392691913, |
| "grad_norm": 0.8044131398200989, |
| "learning_rate": 0.0002958773751398004, |
| "loss": 3.6775543212890627, |
| "step": 37450 |
| }, |
| { |
| "epoch": 0.07680003932162013, |
| "grad_norm": 0.9389724731445312, |
| "learning_rate": 0.00029586610899170904, |
| "loss": 3.951288757324219, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.07690243937404896, |
| "grad_norm": 0.9143916964530945, |
| "learning_rate": 0.0002958548276858519, |
| "loss": 3.773663330078125, |
| "step": 37550 |
| }, |
| { |
| "epoch": 0.07700483942647779, |
| "grad_norm": 1.3429774045944214, |
| "learning_rate": 0.0002958435312234012, |
| "loss": 3.516551818847656, |
| "step": 37600 |
| }, |
| { |
| "epoch": 0.07710723947890662, |
| "grad_norm": 0.9084817171096802, |
| "learning_rate": 0.00029583221960553086, |
| "loss": 3.5966671752929686, |
| "step": 37650 |
| }, |
| { |
| "epoch": 0.07720963953133544, |
| "grad_norm": 0.9403077363967896, |
| "learning_rate": 0.0002958208928334164, |
| "loss": 3.7960610961914063, |
| "step": 37700 |
| }, |
| { |
| "epoch": 0.07731203958376427, |
| "grad_norm": 0.9307132363319397, |
| "learning_rate": 0.0002958095509082347, |
| "loss": 3.526631164550781, |
| "step": 37750 |
| }, |
| { |
| "epoch": 0.0774144396361931, |
| "grad_norm": 1.0403499603271484, |
| "learning_rate": 0.0002957981938311645, |
| "loss": 3.657856140136719, |
| "step": 37800 |
| }, |
| { |
| "epoch": 0.07751683968862193, |
| "grad_norm": 0.8535223007202148, |
| "learning_rate": 0.00029578682160338594, |
| "loss": 3.4064453125, |
| "step": 37850 |
| }, |
| { |
| "epoch": 0.07761923974105074, |
| "grad_norm": 0.6557066440582275, |
| "learning_rate": 0.00029577543422608073, |
| "loss": 3.3173226928710937, |
| "step": 37900 |
| }, |
| { |
| "epoch": 0.07772163979347957, |
| "grad_norm": 1.1363869905471802, |
| "learning_rate": 0.0002957640317004323, |
| "loss": 3.3008172607421873, |
| "step": 37950 |
| }, |
| { |
| "epoch": 0.0778240398459084, |
| "grad_norm": 0.9868215322494507, |
| "learning_rate": 0.0002957526140276254, |
| "loss": 3.6425216674804686, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.07792643989833722, |
| "grad_norm": 0.6904926896095276, |
| "learning_rate": 0.00029574118120884657, |
| "loss": 3.5323916625976564, |
| "step": 38050 |
| }, |
| { |
| "epoch": 0.07802883995076605, |
| "grad_norm": 1.4620712995529175, |
| "learning_rate": 0.00029572973324528394, |
| "loss": 3.751639709472656, |
| "step": 38100 |
| }, |
| { |
| "epoch": 0.07813124000319488, |
| "grad_norm": 0.9893333315849304, |
| "learning_rate": 0.000295718270138127, |
| "loss": 3.4107330322265623, |
| "step": 38150 |
| }, |
| { |
| "epoch": 0.07823364005562371, |
| "grad_norm": 0.8329883217811584, |
| "learning_rate": 0.00029570679188856705, |
| "loss": 3.1873550415039062, |
| "step": 38200 |
| }, |
| { |
| "epoch": 0.07833604010805253, |
| "grad_norm": 1.5774136781692505, |
| "learning_rate": 0.00029569529849779685, |
| "loss": 3.432158203125, |
| "step": 38250 |
| }, |
| { |
| "epoch": 0.07843844016048136, |
| "grad_norm": 0.8458206057548523, |
| "learning_rate": 0.0002956837899670107, |
| "loss": 3.302817077636719, |
| "step": 38300 |
| }, |
| { |
| "epoch": 0.07854084021291019, |
| "grad_norm": 1.3035576343536377, |
| "learning_rate": 0.00029567226629740445, |
| "loss": 3.1465521240234375, |
| "step": 38350 |
| }, |
| { |
| "epoch": 0.07864324026533902, |
| "grad_norm": 0.9802455902099609, |
| "learning_rate": 0.00029566072749017574, |
| "loss": 3.3001138305664064, |
| "step": 38400 |
| }, |
| { |
| "epoch": 0.07874564031776785, |
| "grad_norm": 0.9335483312606812, |
| "learning_rate": 0.00029564917354652355, |
| "loss": 3.2266500854492186, |
| "step": 38450 |
| }, |
| { |
| "epoch": 0.07884804037019667, |
| "grad_norm": 1.0493320226669312, |
| "learning_rate": 0.0002956376044676485, |
| "loss": 3.0587277221679687, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.0789504404226255, |
| "grad_norm": 1.096993088722229, |
| "learning_rate": 0.00029562602025475285, |
| "loss": 4.07334716796875, |
| "step": 38550 |
| }, |
| { |
| "epoch": 0.07905284047505433, |
| "grad_norm": 0.7419833540916443, |
| "learning_rate": 0.0002956144209090403, |
| "loss": 3.633370056152344, |
| "step": 38600 |
| }, |
| { |
| "epoch": 0.07915524052748316, |
| "grad_norm": 1.1980725526809692, |
| "learning_rate": 0.00029560280643171633, |
| "loss": 3.7685275268554688, |
| "step": 38650 |
| }, |
| { |
| "epoch": 0.07925764057991198, |
| "grad_norm": 2.977545738220215, |
| "learning_rate": 0.00029559117682398774, |
| "loss": 3.9755072021484374, |
| "step": 38700 |
| }, |
| { |
| "epoch": 0.0793600406323408, |
| "grad_norm": 1.0279217958450317, |
| "learning_rate": 0.0002955795320870631, |
| "loss": 3.4383935546875, |
| "step": 38750 |
| }, |
| { |
| "epoch": 0.07946244068476963, |
| "grad_norm": 0.9934809803962708, |
| "learning_rate": 0.00029556787222215247, |
| "loss": 3.663726501464844, |
| "step": 38800 |
| }, |
| { |
| "epoch": 0.07956484073719845, |
| "grad_norm": 1.145448088645935, |
| "learning_rate": 0.00029555619723046746, |
| "loss": 3.51242431640625, |
| "step": 38850 |
| }, |
| { |
| "epoch": 0.07966724078962728, |
| "grad_norm": 0.6591020226478577, |
| "learning_rate": 0.00029554450711322133, |
| "loss": 3.402906494140625, |
| "step": 38900 |
| }, |
| { |
| "epoch": 0.07976964084205611, |
| "grad_norm": 0.9392556548118591, |
| "learning_rate": 0.00029553280187162876, |
| "loss": 3.1334713745117186, |
| "step": 38950 |
| }, |
| { |
| "epoch": 0.07987204089448494, |
| "grad_norm": 1.0616618394851685, |
| "learning_rate": 0.0002955210815069063, |
| "loss": 3.934781494140625, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.07997444094691376, |
| "grad_norm": 0.7064653038978577, |
| "learning_rate": 0.0002955093460202717, |
| "loss": 3.5139471435546876, |
| "step": 39050 |
| }, |
| { |
| "epoch": 0.08007684099934259, |
| "grad_norm": 0.8327042460441589, |
| "learning_rate": 0.0002954975954129445, |
| "loss": 3.4110308837890626, |
| "step": 39100 |
| }, |
| { |
| "epoch": 0.08017924105177142, |
| "grad_norm": 1.7272720336914062, |
| "learning_rate": 0.0002954858296861459, |
| "loss": 3.403736877441406, |
| "step": 39150 |
| }, |
| { |
| "epoch": 0.08028164110420025, |
| "grad_norm": 0.7907924056053162, |
| "learning_rate": 0.00029547404884109837, |
| "loss": 2.493211212158203, |
| "step": 39200 |
| }, |
| { |
| "epoch": 0.08038404115662907, |
| "grad_norm": 0.8305758237838745, |
| "learning_rate": 0.00029546225287902623, |
| "loss": 3.276422119140625, |
| "step": 39250 |
| }, |
| { |
| "epoch": 0.0804864412090579, |
| "grad_norm": 1.02776300907135, |
| "learning_rate": 0.0002954504418011552, |
| "loss": 3.471695861816406, |
| "step": 39300 |
| }, |
| { |
| "epoch": 0.08058884126148673, |
| "grad_norm": 1.1195554733276367, |
| "learning_rate": 0.0002954386156087127, |
| "loss": 3.8007080078125, |
| "step": 39350 |
| }, |
| { |
| "epoch": 0.08069124131391556, |
| "grad_norm": 1.0131675004959106, |
| "learning_rate": 0.00029542677430292755, |
| "loss": 3.80172119140625, |
| "step": 39400 |
| }, |
| { |
| "epoch": 0.08079364136634438, |
| "grad_norm": 1.0135658979415894, |
| "learning_rate": 0.0002954149178850304, |
| "loss": 3.3349169921875, |
| "step": 39450 |
| }, |
| { |
| "epoch": 0.08089604141877321, |
| "grad_norm": 0.9203832149505615, |
| "learning_rate": 0.00029540304635625316, |
| "loss": 3.532286376953125, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.08099844147120204, |
| "grad_norm": 0.9541352987289429, |
| "learning_rate": 0.0002953911597178296, |
| "loss": 3.9118218994140626, |
| "step": 39550 |
| }, |
| { |
| "epoch": 0.08110084152363085, |
| "grad_norm": 0.8768864870071411, |
| "learning_rate": 0.0002953792579709948, |
| "loss": 2.9628286743164063, |
| "step": 39600 |
| }, |
| { |
| "epoch": 0.08120324157605968, |
| "grad_norm": 0.9336755275726318, |
| "learning_rate": 0.00029536734111698567, |
| "loss": 3.8077597045898437, |
| "step": 39650 |
| }, |
| { |
| "epoch": 0.08130564162848851, |
| "grad_norm": 0.7618170380592346, |
| "learning_rate": 0.00029535540915704046, |
| "loss": 3.6045367431640627, |
| "step": 39700 |
| }, |
| { |
| "epoch": 0.08140804168091734, |
| "grad_norm": 0.7901123762130737, |
| "learning_rate": 0.0002953434620923991, |
| "loss": 3.8127349853515624, |
| "step": 39750 |
| }, |
| { |
| "epoch": 0.08151044173334616, |
| "grad_norm": 0.90858393907547, |
| "learning_rate": 0.0002953314999243032, |
| "loss": 3.246180419921875, |
| "step": 39800 |
| }, |
| { |
| "epoch": 0.08161284178577499, |
| "grad_norm": 1.1294665336608887, |
| "learning_rate": 0.00029531952265399565, |
| "loss": 3.8714260864257812, |
| "step": 39850 |
| }, |
| { |
| "epoch": 0.08171524183820382, |
| "grad_norm": 0.8707161545753479, |
| "learning_rate": 0.0002953075302827211, |
| "loss": 3.1378076171875, |
| "step": 39900 |
| }, |
| { |
| "epoch": 0.08181764189063265, |
| "grad_norm": 0.9953368902206421, |
| "learning_rate": 0.0002952955228117258, |
| "loss": 3.7785629272460937, |
| "step": 39950 |
| }, |
| { |
| "epoch": 0.08192004194306148, |
| "grad_norm": 1.116952657699585, |
| "learning_rate": 0.00029528350024225753, |
| "loss": 3.962169494628906, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.0820224419954903, |
| "grad_norm": 1.140546202659607, |
| "learning_rate": 0.0002952714625755656, |
| "loss": 4.067502136230469, |
| "step": 40050 |
| }, |
| { |
| "epoch": 0.08212484204791913, |
| "grad_norm": 0.8150861859321594, |
| "learning_rate": 0.0002952594098129008, |
| "loss": 3.7707586669921875, |
| "step": 40100 |
| }, |
| { |
| "epoch": 0.08222724210034796, |
| "grad_norm": 1.0610780715942383, |
| "learning_rate": 0.00029524734195551577, |
| "loss": 3.650087890625, |
| "step": 40150 |
| }, |
| { |
| "epoch": 0.08232964215277679, |
| "grad_norm": 1.0550084114074707, |
| "learning_rate": 0.00029523525900466453, |
| "loss": 3.259920959472656, |
| "step": 40200 |
| }, |
| { |
| "epoch": 0.08243204220520561, |
| "grad_norm": 0.9846071004867554, |
| "learning_rate": 0.00029522316096160256, |
| "loss": 3.823460998535156, |
| "step": 40250 |
| }, |
| { |
| "epoch": 0.08253444225763444, |
| "grad_norm": 0.932036280632019, |
| "learning_rate": 0.00029521104782758714, |
| "loss": 3.7148446655273437, |
| "step": 40300 |
| }, |
| { |
| "epoch": 0.08263684231006327, |
| "grad_norm": 0.7645614743232727, |
| "learning_rate": 0.00029519891960387703, |
| "loss": 2.9853546142578127, |
| "step": 40350 |
| }, |
| { |
| "epoch": 0.0827392423624921, |
| "grad_norm": 0.9208267331123352, |
| "learning_rate": 0.00029518677629173246, |
| "loss": 3.2360791015625, |
| "step": 40400 |
| }, |
| { |
| "epoch": 0.08284164241492091, |
| "grad_norm": 0.6297294497489929, |
| "learning_rate": 0.0002951746178924153, |
| "loss": 3.471868896484375, |
| "step": 40450 |
| }, |
| { |
| "epoch": 0.08294404246734974, |
| "grad_norm": 0.7016891241073608, |
| "learning_rate": 0.0002951624444071891, |
| "loss": 1.757879180908203, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.08304644251977857, |
| "grad_norm": 0.9499281048774719, |
| "learning_rate": 0.00029515025583731877, |
| "loss": 3.28075439453125, |
| "step": 40550 |
| }, |
| { |
| "epoch": 0.0831488425722074, |
| "grad_norm": 1.3156087398529053, |
| "learning_rate": 0.00029513805218407105, |
| "loss": 3.5208966064453127, |
| "step": 40600 |
| }, |
| { |
| "epoch": 0.08325124262463622, |
| "grad_norm": 0.9431557059288025, |
| "learning_rate": 0.00029512583344871383, |
| "loss": 3.794385986328125, |
| "step": 40650 |
| }, |
| { |
| "epoch": 0.08335364267706505, |
| "grad_norm": 0.9936553835868835, |
| "learning_rate": 0.0002951135996325171, |
| "loss": 3.725905456542969, |
| "step": 40700 |
| }, |
| { |
| "epoch": 0.08345604272949388, |
| "grad_norm": 0.9810524582862854, |
| "learning_rate": 0.00029510135073675196, |
| "loss": 3.7150784301757813, |
| "step": 40750 |
| }, |
| { |
| "epoch": 0.0835584427819227, |
| "grad_norm": 0.9625670313835144, |
| "learning_rate": 0.0002950890867626914, |
| "loss": 3.2056814575195314, |
| "step": 40800 |
| }, |
| { |
| "epoch": 0.08366084283435153, |
| "grad_norm": 0.8976542949676514, |
| "learning_rate": 0.0002950768077116097, |
| "loss": 3.6317849731445313, |
| "step": 40850 |
| }, |
| { |
| "epoch": 0.08376324288678036, |
| "grad_norm": 0.835045576095581, |
| "learning_rate": 0.00029506451358478293, |
| "loss": 3.50963623046875, |
| "step": 40900 |
| }, |
| { |
| "epoch": 0.08386564293920919, |
| "grad_norm": 0.9474772810935974, |
| "learning_rate": 0.0002950522043834886, |
| "loss": 3.357117919921875, |
| "step": 40950 |
| }, |
| { |
| "epoch": 0.08396804299163801, |
| "grad_norm": 0.9703244566917419, |
| "learning_rate": 0.0002950398801090059, |
| "loss": 3.6155599975585937, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.08407044304406684, |
| "grad_norm": 0.9425392746925354, |
| "learning_rate": 0.0002950275407626154, |
| "loss": 3.6346676635742186, |
| "step": 41050 |
| }, |
| { |
| "epoch": 0.08417284309649567, |
| "grad_norm": 0.8194516897201538, |
| "learning_rate": 0.00029501518634559947, |
| "loss": 3.578563232421875, |
| "step": 41100 |
| }, |
| { |
| "epoch": 0.0842752431489245, |
| "grad_norm": 0.9572771191596985, |
| "learning_rate": 0.00029500281685924186, |
| "loss": 3.8818609619140627, |
| "step": 41150 |
| }, |
| { |
| "epoch": 0.08437764320135333, |
| "grad_norm": 0.9095619320869446, |
| "learning_rate": 0.0002949904323048279, |
| "loss": 4.035207214355469, |
| "step": 41200 |
| }, |
| { |
| "epoch": 0.08448004325378215, |
| "grad_norm": 1.0001695156097412, |
| "learning_rate": 0.0002949780326836447, |
| "loss": 3.6542062377929687, |
| "step": 41250 |
| }, |
| { |
| "epoch": 0.08458244330621097, |
| "grad_norm": 0.7178096175193787, |
| "learning_rate": 0.00029496561799698064, |
| "loss": 4.071335754394531, |
| "step": 41300 |
| }, |
| { |
| "epoch": 0.0846848433586398, |
| "grad_norm": 1.246907114982605, |
| "learning_rate": 0.0002949531882461258, |
| "loss": 3.1734967041015625, |
| "step": 41350 |
| }, |
| { |
| "epoch": 0.08478724341106862, |
| "grad_norm": 0.930445671081543, |
| "learning_rate": 0.0002949407434323719, |
| "loss": 3.6632540893554686, |
| "step": 41400 |
| }, |
| { |
| "epoch": 0.08488964346349745, |
| "grad_norm": 1.0094538927078247, |
| "learning_rate": 0.0002949282835570121, |
| "loss": 3.3916510009765624, |
| "step": 41450 |
| }, |
| { |
| "epoch": 0.08499204351592628, |
| "grad_norm": 0.8062929511070251, |
| "learning_rate": 0.0002949158086213412, |
| "loss": 3.894981689453125, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.0850944435683551, |
| "grad_norm": 0.9255414009094238, |
| "learning_rate": 0.0002949033186266555, |
| "loss": 2.9597015380859375, |
| "step": 41550 |
| }, |
| { |
| "epoch": 0.08519684362078393, |
| "grad_norm": 1.0212881565093994, |
| "learning_rate": 0.00029489081357425296, |
| "loss": 3.641199951171875, |
| "step": 41600 |
| }, |
| { |
| "epoch": 0.08529924367321276, |
| "grad_norm": 1.0403817892074585, |
| "learning_rate": 0.000294878293465433, |
| "loss": 3.671487121582031, |
| "step": 41650 |
| }, |
| { |
| "epoch": 0.08540164372564159, |
| "grad_norm": 1.1286342144012451, |
| "learning_rate": 0.0002948657583014967, |
| "loss": 3.9286517333984374, |
| "step": 41700 |
| }, |
| { |
| "epoch": 0.08550404377807042, |
| "grad_norm": 0.8692294359207153, |
| "learning_rate": 0.00029485320808374666, |
| "loss": 3.6199725341796873, |
| "step": 41750 |
| }, |
| { |
| "epoch": 0.08560644383049924, |
| "grad_norm": 0.9791249632835388, |
| "learning_rate": 0.000294840642813487, |
| "loss": 4.0375192260742185, |
| "step": 41800 |
| }, |
| { |
| "epoch": 0.08570884388292807, |
| "grad_norm": 0.9460155367851257, |
| "learning_rate": 0.0002948280624920234, |
| "loss": 3.7891888427734375, |
| "step": 41850 |
| }, |
| { |
| "epoch": 0.0858112439353569, |
| "grad_norm": 0.9373695254325867, |
| "learning_rate": 0.0002948154671206633, |
| "loss": 3.796607666015625, |
| "step": 41900 |
| }, |
| { |
| "epoch": 0.08591364398778573, |
| "grad_norm": 0.9017443656921387, |
| "learning_rate": 0.0002948028567007155, |
| "loss": 3.572817077636719, |
| "step": 41950 |
| }, |
| { |
| "epoch": 0.08601604404021455, |
| "grad_norm": 1.0376108884811401, |
| "learning_rate": 0.0002947902312334904, |
| "loss": 3.006630859375, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.08611844409264338, |
| "grad_norm": 1.2481657266616821, |
| "learning_rate": 0.00029477759072029985, |
| "loss": 3.8225225830078124, |
| "step": 42050 |
| }, |
| { |
| "epoch": 0.08622084414507221, |
| "grad_norm": 0.9803751707077026, |
| "learning_rate": 0.00029476493516245766, |
| "loss": 3.7674041748046876, |
| "step": 42100 |
| }, |
| { |
| "epoch": 0.08632324419750102, |
| "grad_norm": 0.6287221908569336, |
| "learning_rate": 0.00029475226456127877, |
| "loss": 3.707611083984375, |
| "step": 42150 |
| }, |
| { |
| "epoch": 0.08642564424992985, |
| "grad_norm": 0.8981334567070007, |
| "learning_rate": 0.00029473957891807984, |
| "loss": 3.517123107910156, |
| "step": 42200 |
| }, |
| { |
| "epoch": 0.08652804430235868, |
| "grad_norm": 0.8311030864715576, |
| "learning_rate": 0.0002947268782341792, |
| "loss": 3.3404605102539064, |
| "step": 42250 |
| }, |
| { |
| "epoch": 0.0866304443547875, |
| "grad_norm": 0.9770768284797668, |
| "learning_rate": 0.00029471416251089657, |
| "loss": 3.50871337890625, |
| "step": 42300 |
| }, |
| { |
| "epoch": 0.08673284440721633, |
| "grad_norm": 0.9695789813995361, |
| "learning_rate": 0.0002947014317495534, |
| "loss": 2.9593045043945314, |
| "step": 42350 |
| }, |
| { |
| "epoch": 0.08683524445964516, |
| "grad_norm": 1.279931902885437, |
| "learning_rate": 0.0002946886859514726, |
| "loss": 3.4032522583007814, |
| "step": 42400 |
| }, |
| { |
| "epoch": 0.08693764451207399, |
| "grad_norm": 0.846924364566803, |
| "learning_rate": 0.00029467592511797853, |
| "loss": 3.4247207641601562, |
| "step": 42450 |
| }, |
| { |
| "epoch": 0.08704004456450282, |
| "grad_norm": 0.9505274891853333, |
| "learning_rate": 0.0002946631492503974, |
| "loss": 3.43554931640625, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.08714244461693164, |
| "grad_norm": 0.8762588500976562, |
| "learning_rate": 0.00029465035835005664, |
| "loss": 3.82656982421875, |
| "step": 42550 |
| }, |
| { |
| "epoch": 0.08724484466936047, |
| "grad_norm": 1.11127769947052, |
| "learning_rate": 0.0002946375524182856, |
| "loss": 3.80216796875, |
| "step": 42600 |
| }, |
| { |
| "epoch": 0.0873472447217893, |
| "grad_norm": 0.989086925983429, |
| "learning_rate": 0.00029462473145641497, |
| "loss": 3.4192626953125, |
| "step": 42650 |
| }, |
| { |
| "epoch": 0.08744964477421813, |
| "grad_norm": 1.940375804901123, |
| "learning_rate": 0.000294611895465777, |
| "loss": 3.6263909912109376, |
| "step": 42700 |
| }, |
| { |
| "epoch": 0.08755204482664695, |
| "grad_norm": 0.9889708161354065, |
| "learning_rate": 0.0002945990444477056, |
| "loss": 3.809459533691406, |
| "step": 42750 |
| }, |
| { |
| "epoch": 0.08765444487907578, |
| "grad_norm": 0.7751711010932922, |
| "learning_rate": 0.0002945861784035362, |
| "loss": 3.572983703613281, |
| "step": 42800 |
| }, |
| { |
| "epoch": 0.08775684493150461, |
| "grad_norm": 0.9419236779212952, |
| "learning_rate": 0.0002945732973346057, |
| "loss": 2.524838714599609, |
| "step": 42850 |
| }, |
| { |
| "epoch": 0.08785924498393344, |
| "grad_norm": 0.8961177468299866, |
| "learning_rate": 0.0002945604012422527, |
| "loss": 3.410054016113281, |
| "step": 42900 |
| }, |
| { |
| "epoch": 0.08796164503636227, |
| "grad_norm": 1.059244155883789, |
| "learning_rate": 0.00029454749012781733, |
| "loss": 3.40218994140625, |
| "step": 42950 |
| }, |
| { |
| "epoch": 0.08806404508879108, |
| "grad_norm": 1.054032325744629, |
| "learning_rate": 0.0002945345639926412, |
| "loss": 3.8609942626953124, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.0881664451412199, |
| "grad_norm": 1.5438601970672607, |
| "learning_rate": 0.00029452162283806764, |
| "loss": 3.6072705078125, |
| "step": 43050 |
| }, |
| { |
| "epoch": 0.08826884519364873, |
| "grad_norm": 1.3585573434829712, |
| "learning_rate": 0.0002945086666654413, |
| "loss": 2.9131259155273437, |
| "step": 43100 |
| }, |
| { |
| "epoch": 0.08837124524607756, |
| "grad_norm": 0.9829738736152649, |
| "learning_rate": 0.0002944956954761086, |
| "loss": 3.900540771484375, |
| "step": 43150 |
| }, |
| { |
| "epoch": 0.08847364529850639, |
| "grad_norm": 0.8328433036804199, |
| "learning_rate": 0.00029448270927141747, |
| "loss": 2.7704718017578127, |
| "step": 43200 |
| }, |
| { |
| "epoch": 0.08857604535093522, |
| "grad_norm": 0.9175614714622498, |
| "learning_rate": 0.0002944697080527173, |
| "loss": 3.7308444213867187, |
| "step": 43250 |
| }, |
| { |
| "epoch": 0.08867844540336405, |
| "grad_norm": 0.991267204284668, |
| "learning_rate": 0.0002944566918213592, |
| "loss": 3.726257019042969, |
| "step": 43300 |
| }, |
| { |
| "epoch": 0.08878084545579287, |
| "grad_norm": 0.8164985775947571, |
| "learning_rate": 0.0002944436605786958, |
| "loss": 3.6927761840820312, |
| "step": 43350 |
| }, |
| { |
| "epoch": 0.0888832455082217, |
| "grad_norm": 0.6943197846412659, |
| "learning_rate": 0.00029443061432608104, |
| "loss": 3.4184146118164063, |
| "step": 43400 |
| }, |
| { |
| "epoch": 0.08898564556065053, |
| "grad_norm": 0.7790623307228088, |
| "learning_rate": 0.00029441755306487086, |
| "loss": 2.976038818359375, |
| "step": 43450 |
| }, |
| { |
| "epoch": 0.08908804561307936, |
| "grad_norm": 1.6038352251052856, |
| "learning_rate": 0.00029440447679642245, |
| "loss": 3.82299560546875, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.08919044566550818, |
| "grad_norm": 0.8337633013725281, |
| "learning_rate": 0.0002943913855220946, |
| "loss": 3.1569622802734374, |
| "step": 43550 |
| }, |
| { |
| "epoch": 0.08929284571793701, |
| "grad_norm": 0.7999888062477112, |
| "learning_rate": 0.0002943782792432477, |
| "loss": 1.872430877685547, |
| "step": 43600 |
| }, |
| { |
| "epoch": 0.08939524577036584, |
| "grad_norm": 1.0902249813079834, |
| "learning_rate": 0.00029436515796124374, |
| "loss": 3.7972311401367187, |
| "step": 43650 |
| }, |
| { |
| "epoch": 0.08949764582279467, |
| "grad_norm": 1.0413930416107178, |
| "learning_rate": 0.0002943520216774462, |
| "loss": 3.2716705322265627, |
| "step": 43700 |
| }, |
| { |
| "epoch": 0.0896000458752235, |
| "grad_norm": 1.2011021375656128, |
| "learning_rate": 0.00029433887039322017, |
| "loss": 3.626478271484375, |
| "step": 43750 |
| }, |
| { |
| "epoch": 0.08970244592765232, |
| "grad_norm": 0.9314827919006348, |
| "learning_rate": 0.00029432570410993226, |
| "loss": 2.8823446655273437, |
| "step": 43800 |
| }, |
| { |
| "epoch": 0.08980484598008114, |
| "grad_norm": 0.7399948835372925, |
| "learning_rate": 0.0002943125228289507, |
| "loss": 4.1010568237304685, |
| "step": 43850 |
| }, |
| { |
| "epoch": 0.08990724603250996, |
| "grad_norm": 0.8250963687896729, |
| "learning_rate": 0.0002942993265516451, |
| "loss": 3.1346914672851565, |
| "step": 43900 |
| }, |
| { |
| "epoch": 0.09000964608493879, |
| "grad_norm": 0.9391615390777588, |
| "learning_rate": 0.00029428611527938683, |
| "loss": 3.304781188964844, |
| "step": 43950 |
| }, |
| { |
| "epoch": 0.09011204613736762, |
| "grad_norm": 1.3079005479812622, |
| "learning_rate": 0.0002942728890135488, |
| "loss": 3.67544189453125, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.09021444618979645, |
| "grad_norm": 0.8453630208969116, |
| "learning_rate": 0.0002942596477555054, |
| "loss": 3.5969857788085937, |
| "step": 44050 |
| }, |
| { |
| "epoch": 0.09031684624222527, |
| "grad_norm": 0.8319234251976013, |
| "learning_rate": 0.0002942463915066326, |
| "loss": 3.4878445434570313, |
| "step": 44100 |
| }, |
| { |
| "epoch": 0.0904192462946541, |
| "grad_norm": 0.8988983035087585, |
| "learning_rate": 0.0002942331202683078, |
| "loss": 3.648201599121094, |
| "step": 44150 |
| }, |
| { |
| "epoch": 0.09052164634708293, |
| "grad_norm": 0.9342271089553833, |
| "learning_rate": 0.00029421983404191027, |
| "loss": 3.274960632324219, |
| "step": 44200 |
| }, |
| { |
| "epoch": 0.09062404639951176, |
| "grad_norm": 0.6616138219833374, |
| "learning_rate": 0.0002942065328288206, |
| "loss": 3.499600830078125, |
| "step": 44250 |
| }, |
| { |
| "epoch": 0.09072644645194058, |
| "grad_norm": 0.5895013213157654, |
| "learning_rate": 0.00029419321663042106, |
| "loss": 3.312397155761719, |
| "step": 44300 |
| }, |
| { |
| "epoch": 0.09082884650436941, |
| "grad_norm": 0.9072443246841431, |
| "learning_rate": 0.0002941798854480952, |
| "loss": 3.4259283447265627, |
| "step": 44350 |
| }, |
| { |
| "epoch": 0.09093124655679824, |
| "grad_norm": 0.9704260230064392, |
| "learning_rate": 0.00029416653928322854, |
| "loss": 3.4393576049804686, |
| "step": 44400 |
| }, |
| { |
| "epoch": 0.09103364660922707, |
| "grad_norm": 1.2480257749557495, |
| "learning_rate": 0.0002941531781372079, |
| "loss": 3.769176025390625, |
| "step": 44450 |
| }, |
| { |
| "epoch": 0.0911360466616559, |
| "grad_norm": 0.7178895473480225, |
| "learning_rate": 0.0002941398020114217, |
| "loss": 3.466965637207031, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.09123844671408472, |
| "grad_norm": 0.870692789554596, |
| "learning_rate": 0.0002941264109072599, |
| "loss": 3.033865661621094, |
| "step": 44550 |
| }, |
| { |
| "epoch": 0.09134084676651355, |
| "grad_norm": 0.8359827399253845, |
| "learning_rate": 0.0002941130048261141, |
| "loss": 2.9268179321289063, |
| "step": 44600 |
| }, |
| { |
| "epoch": 0.09144324681894238, |
| "grad_norm": 0.8175760507583618, |
| "learning_rate": 0.0002940995837693774, |
| "loss": 3.4120257568359373, |
| "step": 44650 |
| }, |
| { |
| "epoch": 0.09154564687137119, |
| "grad_norm": 0.9331879615783691, |
| "learning_rate": 0.00029408614773844435, |
| "loss": 3.2513809204101562, |
| "step": 44700 |
| }, |
| { |
| "epoch": 0.09164804692380002, |
| "grad_norm": 1.0784333944320679, |
| "learning_rate": 0.0002940726967347113, |
| "loss": 3.6439849853515627, |
| "step": 44750 |
| }, |
| { |
| "epoch": 0.09175044697622885, |
| "grad_norm": 1.1995110511779785, |
| "learning_rate": 0.000294059230759576, |
| "loss": 3.4360501098632814, |
| "step": 44800 |
| }, |
| { |
| "epoch": 0.09185284702865767, |
| "grad_norm": 1.0588244199752808, |
| "learning_rate": 0.0002940457498144377, |
| "loss": 3.7745849609375, |
| "step": 44850 |
| }, |
| { |
| "epoch": 0.0919552470810865, |
| "grad_norm": 0.8767450451850891, |
| "learning_rate": 0.0002940322539006973, |
| "loss": 3.844217529296875, |
| "step": 44900 |
| }, |
| { |
| "epoch": 0.09205764713351533, |
| "grad_norm": 1.0808109045028687, |
| "learning_rate": 0.00029401874301975727, |
| "loss": 3.6274505615234376, |
| "step": 44950 |
| }, |
| { |
| "epoch": 0.09216004718594416, |
| "grad_norm": 0.7221155762672424, |
| "learning_rate": 0.00029400521717302166, |
| "loss": 3.7380535888671873, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.09226244723837299, |
| "grad_norm": 0.847489595413208, |
| "learning_rate": 0.0002939916763618958, |
| "loss": 3.911776428222656, |
| "step": 45050 |
| }, |
| { |
| "epoch": 0.09236484729080181, |
| "grad_norm": 0.9442451596260071, |
| "learning_rate": 0.00029397812058778707, |
| "loss": 3.614713439941406, |
| "step": 45100 |
| }, |
| { |
| "epoch": 0.09246724734323064, |
| "grad_norm": 0.8224995136260986, |
| "learning_rate": 0.0002939645498521039, |
| "loss": 2.9758087158203126, |
| "step": 45150 |
| }, |
| { |
| "epoch": 0.09256964739565947, |
| "grad_norm": 0.5964682698249817, |
| "learning_rate": 0.0002939509641562567, |
| "loss": 1.742069854736328, |
| "step": 45200 |
| }, |
| { |
| "epoch": 0.0926720474480883, |
| "grad_norm": 0.5689894556999207, |
| "learning_rate": 0.000293937363501657, |
| "loss": 2.5378482055664064, |
| "step": 45250 |
| }, |
| { |
| "epoch": 0.09277444750051712, |
| "grad_norm": 1.1072094440460205, |
| "learning_rate": 0.00029392374788971833, |
| "loss": 3.6468490600585937, |
| "step": 45300 |
| }, |
| { |
| "epoch": 0.09287684755294595, |
| "grad_norm": 0.8903659582138062, |
| "learning_rate": 0.0002939101173218555, |
| "loss": 3.3196234130859374, |
| "step": 45350 |
| }, |
| { |
| "epoch": 0.09297924760537478, |
| "grad_norm": 0.7800249457359314, |
| "learning_rate": 0.0002938964717994849, |
| "loss": 3.895177001953125, |
| "step": 45400 |
| }, |
| { |
| "epoch": 0.0930816476578036, |
| "grad_norm": 0.8999978303909302, |
| "learning_rate": 0.00029388281132402454, |
| "loss": 3.9075274658203125, |
| "step": 45450 |
| }, |
| { |
| "epoch": 0.09318404771023243, |
| "grad_norm": 0.8825941681861877, |
| "learning_rate": 0.00029386913589689393, |
| "loss": 3.787184753417969, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.09328644776266125, |
| "grad_norm": 0.8387885689735413, |
| "learning_rate": 0.0002938554455195142, |
| "loss": 3.0614547729492188, |
| "step": 45550 |
| }, |
| { |
| "epoch": 0.09338884781509008, |
| "grad_norm": 0.8926045894622803, |
| "learning_rate": 0.000293841740193308, |
| "loss": 3.60509765625, |
| "step": 45600 |
| }, |
| { |
| "epoch": 0.0934912478675189, |
| "grad_norm": 0.8237414956092834, |
| "learning_rate": 0.00029382801991969945, |
| "loss": 3.7173092651367186, |
| "step": 45650 |
| }, |
| { |
| "epoch": 0.09359364791994773, |
| "grad_norm": 0.9613683223724365, |
| "learning_rate": 0.0002938142847001144, |
| "loss": 3.6430303955078127, |
| "step": 45700 |
| }, |
| { |
| "epoch": 0.09369604797237656, |
| "grad_norm": 0.6676517724990845, |
| "learning_rate": 0.0002938005345359801, |
| "loss": 3.367287292480469, |
| "step": 45750 |
| }, |
| { |
| "epoch": 0.09379844802480539, |
| "grad_norm": 1.0207992792129517, |
| "learning_rate": 0.0002937867694287254, |
| "loss": 3.720477294921875, |
| "step": 45800 |
| }, |
| { |
| "epoch": 0.09390084807723421, |
| "grad_norm": 1.102271318435669, |
| "learning_rate": 0.00029377298937978077, |
| "loss": 3.620904846191406, |
| "step": 45850 |
| }, |
| { |
| "epoch": 0.09400324812966304, |
| "grad_norm": 0.7163046598434448, |
| "learning_rate": 0.0002937591943905781, |
| "loss": 2.606007080078125, |
| "step": 45900 |
| }, |
| { |
| "epoch": 0.09410564818209187, |
| "grad_norm": 0.4335630238056183, |
| "learning_rate": 0.0002937453844625509, |
| "loss": 1.4070957946777343, |
| "step": 45950 |
| }, |
| { |
| "epoch": 0.0942080482345207, |
| "grad_norm": 0.6361094117164612, |
| "learning_rate": 0.0002937315595971343, |
| "loss": 3.254652099609375, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.09431044828694952, |
| "grad_norm": 0.8255375027656555, |
| "learning_rate": 0.0002937177197957649, |
| "loss": 3.422788391113281, |
| "step": 46050 |
| }, |
| { |
| "epoch": 0.09441284833937835, |
| "grad_norm": 0.5633572340011597, |
| "learning_rate": 0.0002937038650598809, |
| "loss": 3.6044976806640623, |
| "step": 46100 |
| }, |
| { |
| "epoch": 0.09451524839180718, |
| "grad_norm": 1.2230876684188843, |
| "learning_rate": 0.0002936899953909219, |
| "loss": 3.22015869140625, |
| "step": 46150 |
| }, |
| { |
| "epoch": 0.09461764844423601, |
| "grad_norm": 0.8899142742156982, |
| "learning_rate": 0.0002936761107903293, |
| "loss": 4.0206103515625, |
| "step": 46200 |
| }, |
| { |
| "epoch": 0.09472004849666484, |
| "grad_norm": 0.9843432307243347, |
| "learning_rate": 0.00029366221125954586, |
| "loss": 3.70310791015625, |
| "step": 46250 |
| }, |
| { |
| "epoch": 0.09482244854909366, |
| "grad_norm": 0.9883196353912354, |
| "learning_rate": 0.000293648296800016, |
| "loss": 3.84428466796875, |
| "step": 46300 |
| }, |
| { |
| "epoch": 0.09492484860152249, |
| "grad_norm": 0.8828408718109131, |
| "learning_rate": 0.0002936343674131856, |
| "loss": 3.496847839355469, |
| "step": 46350 |
| }, |
| { |
| "epoch": 0.0950272486539513, |
| "grad_norm": 0.6513479351997375, |
| "learning_rate": 0.0002936204231005023, |
| "loss": 3.3328936767578123, |
| "step": 46400 |
| }, |
| { |
| "epoch": 0.09512964870638013, |
| "grad_norm": 0.9128335118293762, |
| "learning_rate": 0.0002936064638634149, |
| "loss": 2.726371154785156, |
| "step": 46450 |
| }, |
| { |
| "epoch": 0.09523204875880896, |
| "grad_norm": 0.9786936044692993, |
| "learning_rate": 0.00029359248970337406, |
| "loss": 3.190602111816406, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.09533444881123779, |
| "grad_norm": 0.8290608525276184, |
| "learning_rate": 0.00029357850062183203, |
| "loss": 3.8881295776367186, |
| "step": 46550 |
| }, |
| { |
| "epoch": 0.09543684886366662, |
| "grad_norm": 0.9058592319488525, |
| "learning_rate": 0.0002935644966202424, |
| "loss": 3.583518371582031, |
| "step": 46600 |
| }, |
| { |
| "epoch": 0.09553924891609544, |
| "grad_norm": 1.0789927244186401, |
| "learning_rate": 0.00029355047770006034, |
| "loss": 3.5978643798828127, |
| "step": 46650 |
| }, |
| { |
| "epoch": 0.09564164896852427, |
| "grad_norm": 0.9313961863517761, |
| "learning_rate": 0.00029353644386274273, |
| "loss": 3.6306307983398436, |
| "step": 46700 |
| }, |
| { |
| "epoch": 0.0957440490209531, |
| "grad_norm": 0.9315224289894104, |
| "learning_rate": 0.00029352239510974787, |
| "loss": 3.5369802856445314, |
| "step": 46750 |
| }, |
| { |
| "epoch": 0.09584644907338193, |
| "grad_norm": 0.8773080110549927, |
| "learning_rate": 0.0002935083314425357, |
| "loss": 3.766584777832031, |
| "step": 46800 |
| }, |
| { |
| "epoch": 0.09594884912581075, |
| "grad_norm": 0.8457773923873901, |
| "learning_rate": 0.00029349425286256763, |
| "loss": 3.876020812988281, |
| "step": 46850 |
| }, |
| { |
| "epoch": 0.09605124917823958, |
| "grad_norm": 0.9530948400497437, |
| "learning_rate": 0.00029348015937130656, |
| "loss": 3.862485046386719, |
| "step": 46900 |
| }, |
| { |
| "epoch": 0.09615364923066841, |
| "grad_norm": 1.1303527355194092, |
| "learning_rate": 0.0002934660509702171, |
| "loss": 2.8852374267578127, |
| "step": 46950 |
| }, |
| { |
| "epoch": 0.09625604928309724, |
| "grad_norm": 1.0009031295776367, |
| "learning_rate": 0.0002934519276607653, |
| "loss": 3.7352252197265625, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.09635844933552606, |
| "grad_norm": 0.5930376052856445, |
| "learning_rate": 0.00029343778944441887, |
| "loss": 2.9531982421875, |
| "step": 47050 |
| }, |
| { |
| "epoch": 0.09646084938795489, |
| "grad_norm": 0.8898753523826599, |
| "learning_rate": 0.0002934236363226469, |
| "loss": 3.4945404052734377, |
| "step": 47100 |
| }, |
| { |
| "epoch": 0.09656324944038372, |
| "grad_norm": 1.1294547319412231, |
| "learning_rate": 0.00029340946829692013, |
| "loss": 3.6753500366210936, |
| "step": 47150 |
| }, |
| { |
| "epoch": 0.09666564949281255, |
| "grad_norm": 1.0108319520950317, |
| "learning_rate": 0.00029339528536871087, |
| "loss": 3.531564025878906, |
| "step": 47200 |
| }, |
| { |
| "epoch": 0.09676804954524136, |
| "grad_norm": 1.2252336740493774, |
| "learning_rate": 0.00029338108753949296, |
| "loss": 3.4618963623046874, |
| "step": 47250 |
| }, |
| { |
| "epoch": 0.09687044959767019, |
| "grad_norm": 0.975235641002655, |
| "learning_rate": 0.0002933668748107418, |
| "loss": 3.814194641113281, |
| "step": 47300 |
| }, |
| { |
| "epoch": 0.09697284965009902, |
| "grad_norm": 0.9449312090873718, |
| "learning_rate": 0.00029335264718393424, |
| "loss": 3.584350891113281, |
| "step": 47350 |
| }, |
| { |
| "epoch": 0.09707524970252784, |
| "grad_norm": 0.677931010723114, |
| "learning_rate": 0.00029333840466054875, |
| "loss": 3.3688113403320314, |
| "step": 47400 |
| }, |
| { |
| "epoch": 0.09717764975495667, |
| "grad_norm": 0.8441532254219055, |
| "learning_rate": 0.0002933241472420654, |
| "loss": 3.5264968872070312, |
| "step": 47450 |
| }, |
| { |
| "epoch": 0.0972800498073855, |
| "grad_norm": 1.312727451324463, |
| "learning_rate": 0.0002933098749299657, |
| "loss": 3.4481561279296873, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.09738244985981433, |
| "grad_norm": 1.3327820301055908, |
| "learning_rate": 0.0002932955877257329, |
| "loss": 3.394440002441406, |
| "step": 47550 |
| }, |
| { |
| "epoch": 0.09748484991224315, |
| "grad_norm": 0.9248800277709961, |
| "learning_rate": 0.00029328128563085154, |
| "loss": 3.8233456420898437, |
| "step": 47600 |
| }, |
| { |
| "epoch": 0.09758724996467198, |
| "grad_norm": 0.8401134014129639, |
| "learning_rate": 0.00029326696864680787, |
| "loss": 3.518874206542969, |
| "step": 47650 |
| }, |
| { |
| "epoch": 0.09768965001710081, |
| "grad_norm": 1.0612273216247559, |
| "learning_rate": 0.0002932526367750896, |
| "loss": 3.5591195678710936, |
| "step": 47700 |
| }, |
| { |
| "epoch": 0.09779205006952964, |
| "grad_norm": 0.7437798380851746, |
| "learning_rate": 0.00029323829001718613, |
| "loss": 3.0408529663085937, |
| "step": 47750 |
| }, |
| { |
| "epoch": 0.09789445012195847, |
| "grad_norm": 0.8572849631309509, |
| "learning_rate": 0.0002932239283745882, |
| "loss": 3.37136474609375, |
| "step": 47800 |
| }, |
| { |
| "epoch": 0.09799685017438729, |
| "grad_norm": 1.062315583229065, |
| "learning_rate": 0.0002932095518487883, |
| "loss": 3.3033380126953125, |
| "step": 47850 |
| }, |
| { |
| "epoch": 0.09809925022681612, |
| "grad_norm": 1.2052414417266846, |
| "learning_rate": 0.0002931951604412804, |
| "loss": 2.8181661987304687, |
| "step": 47900 |
| }, |
| { |
| "epoch": 0.09820165027924495, |
| "grad_norm": 1.1409345865249634, |
| "learning_rate": 0.00029318075415355984, |
| "loss": 3.9486019897460936, |
| "step": 47950 |
| }, |
| { |
| "epoch": 0.09830405033167378, |
| "grad_norm": 1.0399415493011475, |
| "learning_rate": 0.0002931663329871238, |
| "loss": 3.3418069458007813, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.0984064503841026, |
| "grad_norm": 0.919865608215332, |
| "learning_rate": 0.0002931518969434708, |
| "loss": 3.688287658691406, |
| "step": 48050 |
| }, |
| { |
| "epoch": 0.09850885043653142, |
| "grad_norm": 0.994552493095398, |
| "learning_rate": 0.000293137446024101, |
| "loss": 3.589768371582031, |
| "step": 48100 |
| }, |
| { |
| "epoch": 0.09861125048896024, |
| "grad_norm": 0.9309687614440918, |
| "learning_rate": 0.00029312298023051605, |
| "loss": 3.7281314086914064, |
| "step": 48150 |
| }, |
| { |
| "epoch": 0.09871365054138907, |
| "grad_norm": 1.0229836702346802, |
| "learning_rate": 0.0002931084995642192, |
| "loss": 3.7916598510742188, |
| "step": 48200 |
| }, |
| { |
| "epoch": 0.0988160505938179, |
| "grad_norm": 0.7249611616134644, |
| "learning_rate": 0.0002930940040267152, |
| "loss": 3.300664367675781, |
| "step": 48250 |
| }, |
| { |
| "epoch": 0.09891845064624673, |
| "grad_norm": 1.0371336936950684, |
| "learning_rate": 0.0002930794936195104, |
| "loss": 3.6068963623046875, |
| "step": 48300 |
| }, |
| { |
| "epoch": 0.09902085069867556, |
| "grad_norm": 1.082552194595337, |
| "learning_rate": 0.0002930649683441126, |
| "loss": 3.424382629394531, |
| "step": 48350 |
| }, |
| { |
| "epoch": 0.09912325075110438, |
| "grad_norm": 1.1194738149642944, |
| "learning_rate": 0.0002930504282020312, |
| "loss": 3.5506494140625, |
| "step": 48400 |
| }, |
| { |
| "epoch": 0.09922565080353321, |
| "grad_norm": 0.8479589223861694, |
| "learning_rate": 0.00029303587319477715, |
| "loss": 3.7008261108398437, |
| "step": 48450 |
| }, |
| { |
| "epoch": 0.09932805085596204, |
| "grad_norm": 1.6099497079849243, |
| "learning_rate": 0.00029302130332386307, |
| "loss": 3.1615875244140623, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.09943045090839087, |
| "grad_norm": 0.9683935046195984, |
| "learning_rate": 0.00029300671859080275, |
| "loss": 3.681039123535156, |
| "step": 48550 |
| }, |
| { |
| "epoch": 0.0995328509608197, |
| "grad_norm": 1.2820624113082886, |
| "learning_rate": 0.000292992118997112, |
| "loss": 3.5511508178710938, |
| "step": 48600 |
| }, |
| { |
| "epoch": 0.09963525101324852, |
| "grad_norm": 0.9168037176132202, |
| "learning_rate": 0.00029297750454430785, |
| "loss": 3.657781677246094, |
| "step": 48650 |
| }, |
| { |
| "epoch": 0.09973765106567735, |
| "grad_norm": 4.044058322906494, |
| "learning_rate": 0.000292962875233909, |
| "loss": 3.3354425048828125, |
| "step": 48700 |
| }, |
| { |
| "epoch": 0.09984005111810618, |
| "grad_norm": 1.3989704847335815, |
| "learning_rate": 0.00029294823106743565, |
| "loss": 3.1698623657226563, |
| "step": 48750 |
| }, |
| { |
| "epoch": 0.099942451170535, |
| "grad_norm": 1.0335566997528076, |
| "learning_rate": 0.00029293357204640953, |
| "loss": 2.8218838500976564, |
| "step": 48800 |
| }, |
| { |
| "epoch": 0.10004485122296383, |
| "grad_norm": 0.9593238234519958, |
| "learning_rate": 0.00029291889817235396, |
| "loss": 3.712968444824219, |
| "step": 48850 |
| }, |
| { |
| "epoch": 0.10014725127539266, |
| "grad_norm": 0.8883773684501648, |
| "learning_rate": 0.0002929042094467938, |
| "loss": 3.7107012939453123, |
| "step": 48900 |
| }, |
| { |
| "epoch": 0.10024965132782147, |
| "grad_norm": 0.817356526851654, |
| "learning_rate": 0.00029288950587125543, |
| "loss": 3.442810363769531, |
| "step": 48950 |
| }, |
| { |
| "epoch": 0.1003520513802503, |
| "grad_norm": 0.7103580236434937, |
| "learning_rate": 0.0002928747874472667, |
| "loss": 3.518086242675781, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.10045445143267913, |
| "grad_norm": 0.9503324031829834, |
| "learning_rate": 0.0002928600541763573, |
| "loss": 3.3786196899414063, |
| "step": 49050 |
| }, |
| { |
| "epoch": 0.10055685148510796, |
| "grad_norm": 1.2780108451843262, |
| "learning_rate": 0.000292845306060058, |
| "loss": 3.57686767578125, |
| "step": 49100 |
| }, |
| { |
| "epoch": 0.10065925153753678, |
| "grad_norm": 0.7724966406822205, |
| "learning_rate": 0.0002928305430999015, |
| "loss": 3.4349874877929687, |
| "step": 49150 |
| }, |
| { |
| "epoch": 0.10076165158996561, |
| "grad_norm": 0.73604816198349, |
| "learning_rate": 0.0002928157652974219, |
| "loss": 2.7600396728515624, |
| "step": 49200 |
| }, |
| { |
| "epoch": 0.10086405164239444, |
| "grad_norm": 1.0744940042495728, |
| "learning_rate": 0.00029280097265415477, |
| "loss": 3.0249954223632813, |
| "step": 49250 |
| }, |
| { |
| "epoch": 0.10096645169482327, |
| "grad_norm": 1.065955400466919, |
| "learning_rate": 0.0002927861651716373, |
| "loss": 3.8623785400390624, |
| "step": 49300 |
| }, |
| { |
| "epoch": 0.1010688517472521, |
| "grad_norm": 0.9593607783317566, |
| "learning_rate": 0.00029277134285140833, |
| "loss": 3.2714468383789064, |
| "step": 49350 |
| }, |
| { |
| "epoch": 0.10117125179968092, |
| "grad_norm": 0.875238835811615, |
| "learning_rate": 0.00029275650569500803, |
| "loss": 3.738236999511719, |
| "step": 49400 |
| }, |
| { |
| "epoch": 0.10127365185210975, |
| "grad_norm": 0.6924005150794983, |
| "learning_rate": 0.00029274165370397827, |
| "loss": 3.841283874511719, |
| "step": 49450 |
| }, |
| { |
| "epoch": 0.10137605190453858, |
| "grad_norm": 0.8927252292633057, |
| "learning_rate": 0.00029272678687986236, |
| "loss": 3.7357077026367187, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.1014784519569674, |
| "grad_norm": 0.9974352121353149, |
| "learning_rate": 0.0002927119052242052, |
| "loss": 3.653075866699219, |
| "step": 49550 |
| }, |
| { |
| "epoch": 0.10158085200939623, |
| "grad_norm": 0.9928423762321472, |
| "learning_rate": 0.00029269700873855325, |
| "loss": 3.923564147949219, |
| "step": 49600 |
| }, |
| { |
| "epoch": 0.10168325206182506, |
| "grad_norm": 0.727768063545227, |
| "learning_rate": 0.0002926820974244544, |
| "loss": 3.398980712890625, |
| "step": 49650 |
| }, |
| { |
| "epoch": 0.10178565211425389, |
| "grad_norm": 0.5926229357719421, |
| "learning_rate": 0.00029266717128345837, |
| "loss": 2.8432931518554687, |
| "step": 49700 |
| }, |
| { |
| "epoch": 0.10188805216668272, |
| "grad_norm": 0.7883087992668152, |
| "learning_rate": 0.000292652230317116, |
| "loss": 3.0143255615234374, |
| "step": 49750 |
| }, |
| { |
| "epoch": 0.10199045221911153, |
| "grad_norm": 0.7131246328353882, |
| "learning_rate": 0.00029263727452698, |
| "loss": 2.733319091796875, |
| "step": 49800 |
| }, |
| { |
| "epoch": 0.10209285227154036, |
| "grad_norm": 0.6186954379081726, |
| "learning_rate": 0.0002926223039146045, |
| "loss": 2.7473519897460936, |
| "step": 49850 |
| }, |
| { |
| "epoch": 0.10219525232396919, |
| "grad_norm": 1.1395238637924194, |
| "learning_rate": 0.0002926073184815452, |
| "loss": 3.2758560180664062, |
| "step": 49900 |
| }, |
| { |
| "epoch": 0.10229765237639801, |
| "grad_norm": 0.9928449988365173, |
| "learning_rate": 0.0002925923182293592, |
| "loss": 4.000916442871094, |
| "step": 49950 |
| }, |
| { |
| "epoch": 0.10240005242882684, |
| "grad_norm": 2.3027689456939697, |
| "learning_rate": 0.00029257730315960547, |
| "loss": 3.35286865234375, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.10250245248125567, |
| "grad_norm": 0.7039443254470825, |
| "learning_rate": 0.0002925622732738441, |
| "loss": 3.695789794921875, |
| "step": 50050 |
| }, |
| { |
| "epoch": 0.1026048525336845, |
| "grad_norm": 0.8762661218643188, |
| "learning_rate": 0.00029254722857363706, |
| "loss": 3.2338931274414064, |
| "step": 50100 |
| }, |
| { |
| "epoch": 0.10270725258611332, |
| "grad_norm": 1.2041362524032593, |
| "learning_rate": 0.00029253216906054765, |
| "loss": 3.807637023925781, |
| "step": 50150 |
| }, |
| { |
| "epoch": 0.10280965263854215, |
| "grad_norm": 0.7898900508880615, |
| "learning_rate": 0.0002925170947361409, |
| "loss": 3.60753662109375, |
| "step": 50200 |
| }, |
| { |
| "epoch": 0.10291205269097098, |
| "grad_norm": 1.3529953956604004, |
| "learning_rate": 0.00029250200560198316, |
| "loss": 3.5016552734375, |
| "step": 50250 |
| }, |
| { |
| "epoch": 0.1030144527433998, |
| "grad_norm": 1.0065248012542725, |
| "learning_rate": 0.00029248690165964246, |
| "loss": 3.634730224609375, |
| "step": 50300 |
| }, |
| { |
| "epoch": 0.10311685279582863, |
| "grad_norm": 0.8603098392486572, |
| "learning_rate": 0.00029247178291068836, |
| "loss": 3.8783328247070314, |
| "step": 50350 |
| }, |
| { |
| "epoch": 0.10321925284825746, |
| "grad_norm": 0.9008740186691284, |
| "learning_rate": 0.00029245664935669186, |
| "loss": 3.682059631347656, |
| "step": 50400 |
| }, |
| { |
| "epoch": 0.10332165290068629, |
| "grad_norm": 1.109923243522644, |
| "learning_rate": 0.00029244150099922567, |
| "loss": 3.8022805786132814, |
| "step": 50450 |
| }, |
| { |
| "epoch": 0.10342405295311512, |
| "grad_norm": 0.9621108770370483, |
| "learning_rate": 0.0002924263378398639, |
| "loss": 4.338629455566406, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.10352645300554394, |
| "grad_norm": 0.8833173513412476, |
| "learning_rate": 0.00029241115988018224, |
| "loss": 3.44856689453125, |
| "step": 50550 |
| }, |
| { |
| "epoch": 0.10362885305797277, |
| "grad_norm": 0.8892256617546082, |
| "learning_rate": 0.0002923959671217579, |
| "loss": 3.9488174438476564, |
| "step": 50600 |
| }, |
| { |
| "epoch": 0.10373125311040159, |
| "grad_norm": 0.8669362664222717, |
| "learning_rate": 0.00029238075956616963, |
| "loss": 3.4380224609375, |
| "step": 50650 |
| }, |
| { |
| "epoch": 0.10383365316283041, |
| "grad_norm": 0.8463394045829773, |
| "learning_rate": 0.0002923655372149978, |
| "loss": 3.5007855224609377, |
| "step": 50700 |
| }, |
| { |
| "epoch": 0.10393605321525924, |
| "grad_norm": 1.0633169412612915, |
| "learning_rate": 0.00029235030006982416, |
| "loss": 3.4543692016601564, |
| "step": 50750 |
| }, |
| { |
| "epoch": 0.10403845326768807, |
| "grad_norm": 1.0498319864273071, |
| "learning_rate": 0.0002923350481322322, |
| "loss": 3.207664794921875, |
| "step": 50800 |
| }, |
| { |
| "epoch": 0.1041408533201169, |
| "grad_norm": 1.1241377592086792, |
| "learning_rate": 0.00029231978140380676, |
| "loss": 3.3383258056640623, |
| "step": 50850 |
| }, |
| { |
| "epoch": 0.10424325337254572, |
| "grad_norm": 0.990468442440033, |
| "learning_rate": 0.0002923044998861343, |
| "loss": 3.237965393066406, |
| "step": 50900 |
| }, |
| { |
| "epoch": 0.10434565342497455, |
| "grad_norm": 1.204306721687317, |
| "learning_rate": 0.0002922892035808027, |
| "loss": 3.101645812988281, |
| "step": 50950 |
| }, |
| { |
| "epoch": 0.10444805347740338, |
| "grad_norm": 0.9017521739006042, |
| "learning_rate": 0.00029227389248940173, |
| "loss": 2.567582702636719, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.10455045352983221, |
| "grad_norm": 1.0600897073745728, |
| "learning_rate": 0.00029225856661352226, |
| "loss": 3.3536370849609374, |
| "step": 51050 |
| }, |
| { |
| "epoch": 0.10465285358226104, |
| "grad_norm": 0.9761303663253784, |
| "learning_rate": 0.00029224322595475694, |
| "loss": 3.4230682373046877, |
| "step": 51100 |
| }, |
| { |
| "epoch": 0.10475525363468986, |
| "grad_norm": 0.7504201531410217, |
| "learning_rate": 0.0002922278705147, |
| "loss": 3.14410888671875, |
| "step": 51150 |
| }, |
| { |
| "epoch": 0.10485765368711869, |
| "grad_norm": 1.2177993059158325, |
| "learning_rate": 0.00029221250029494694, |
| "loss": 3.2004080200195313, |
| "step": 51200 |
| }, |
| { |
| "epoch": 0.10496005373954752, |
| "grad_norm": 0.744257926940918, |
| "learning_rate": 0.000292197115297095, |
| "loss": 3.388042297363281, |
| "step": 51250 |
| }, |
| { |
| "epoch": 0.10506245379197635, |
| "grad_norm": 0.8854607939720154, |
| "learning_rate": 0.000292181715522743, |
| "loss": 3.436142578125, |
| "step": 51300 |
| }, |
| { |
| "epoch": 0.10516485384440517, |
| "grad_norm": 1.030616283416748, |
| "learning_rate": 0.00029216630097349125, |
| "loss": 3.2875115966796873, |
| "step": 51350 |
| }, |
| { |
| "epoch": 0.105267253896834, |
| "grad_norm": 0.8778656721115112, |
| "learning_rate": 0.00029215087165094145, |
| "loss": 3.6679806518554687, |
| "step": 51400 |
| }, |
| { |
| "epoch": 0.10536965394926283, |
| "grad_norm": 0.6675243377685547, |
| "learning_rate": 0.000292135427556697, |
| "loss": 2.4680940246582033, |
| "step": 51450 |
| }, |
| { |
| "epoch": 0.10547205400169164, |
| "grad_norm": 0.7153152227401733, |
| "learning_rate": 0.0002921199686923628, |
| "loss": 2.0736355590820312, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.10557445405412047, |
| "grad_norm": 0.9837950468063354, |
| "learning_rate": 0.0002921044950595452, |
| "loss": 3.5398410034179686, |
| "step": 51550 |
| }, |
| { |
| "epoch": 0.1056768541065493, |
| "grad_norm": 0.9022551774978638, |
| "learning_rate": 0.00029208900665985213, |
| "loss": 3.0752154541015626, |
| "step": 51600 |
| }, |
| { |
| "epoch": 0.10577925415897813, |
| "grad_norm": 0.802068293094635, |
| "learning_rate": 0.0002920735034948932, |
| "loss": 3.4963739013671873, |
| "step": 51650 |
| }, |
| { |
| "epoch": 0.10588165421140695, |
| "grad_norm": 0.8395520448684692, |
| "learning_rate": 0.00029205798556627944, |
| "loss": 3.05790771484375, |
| "step": 51700 |
| }, |
| { |
| "epoch": 0.10598405426383578, |
| "grad_norm": 0.8915894627571106, |
| "learning_rate": 0.0002920424528756233, |
| "loss": 3.4001889038085937, |
| "step": 51750 |
| }, |
| { |
| "epoch": 0.10608645431626461, |
| "grad_norm": 0.6118106842041016, |
| "learning_rate": 0.00029202690542453886, |
| "loss": 2.7612185668945313, |
| "step": 51800 |
| }, |
| { |
| "epoch": 0.10618885436869344, |
| "grad_norm": 0.8468356132507324, |
| "learning_rate": 0.00029201134321464177, |
| "loss": 3.045502014160156, |
| "step": 51850 |
| }, |
| { |
| "epoch": 0.10629125442112226, |
| "grad_norm": 0.837311327457428, |
| "learning_rate": 0.00029199576624754927, |
| "loss": 2.9287734985351563, |
| "step": 51900 |
| }, |
| { |
| "epoch": 0.10639365447355109, |
| "grad_norm": 0.8536468744277954, |
| "learning_rate": 0.00029198017452487996, |
| "loss": 3.28405517578125, |
| "step": 51950 |
| }, |
| { |
| "epoch": 0.10649605452597992, |
| "grad_norm": 0.5843203067779541, |
| "learning_rate": 0.0002919645680482541, |
| "loss": 3.158900146484375, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.10659845457840875, |
| "grad_norm": 0.8552372455596924, |
| "learning_rate": 0.0002919489468192934, |
| "loss": 2.7685336303710937, |
| "step": 52050 |
| }, |
| { |
| "epoch": 0.10670085463083757, |
| "grad_norm": 0.9539399743080139, |
| "learning_rate": 0.00029193331083962127, |
| "loss": 3.3179580688476564, |
| "step": 52100 |
| }, |
| { |
| "epoch": 0.1068032546832664, |
| "grad_norm": 0.9066966772079468, |
| "learning_rate": 0.00029191766011086234, |
| "loss": 3.2776177978515624, |
| "step": 52150 |
| }, |
| { |
| "epoch": 0.10690565473569523, |
| "grad_norm": 1.242811918258667, |
| "learning_rate": 0.0002919019946346431, |
| "loss": 3.0137930297851563, |
| "step": 52200 |
| }, |
| { |
| "epoch": 0.10700805478812406, |
| "grad_norm": 0.7365036606788635, |
| "learning_rate": 0.0002918863144125915, |
| "loss": 3.2038583374023437, |
| "step": 52250 |
| }, |
| { |
| "epoch": 0.10711045484055289, |
| "grad_norm": 1.0112019777297974, |
| "learning_rate": 0.00029187061944633674, |
| "loss": 3.0209481811523435, |
| "step": 52300 |
| }, |
| { |
| "epoch": 0.1072128548929817, |
| "grad_norm": 0.9452877640724182, |
| "learning_rate": 0.00029185490973751, |
| "loss": 3.137978210449219, |
| "step": 52350 |
| }, |
| { |
| "epoch": 0.10731525494541053, |
| "grad_norm": 0.6544405817985535, |
| "learning_rate": 0.0002918391852877436, |
| "loss": 3.417147521972656, |
| "step": 52400 |
| }, |
| { |
| "epoch": 0.10741765499783935, |
| "grad_norm": 0.5501437783241272, |
| "learning_rate": 0.0002918234460986717, |
| "loss": 2.9117431640625, |
| "step": 52450 |
| }, |
| { |
| "epoch": 0.10752005505026818, |
| "grad_norm": 0.8474003076553345, |
| "learning_rate": 0.0002918076921719297, |
| "loss": 3.441578063964844, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.10762245510269701, |
| "grad_norm": 1.1429380178451538, |
| "learning_rate": 0.0002917919235091548, |
| "loss": 3.6524429321289062, |
| "step": 52550 |
| }, |
| { |
| "epoch": 0.10772485515512584, |
| "grad_norm": 0.6069464683532715, |
| "learning_rate": 0.0002917761401119855, |
| "loss": 3.335174865722656, |
| "step": 52600 |
| }, |
| { |
| "epoch": 0.10782725520755466, |
| "grad_norm": 0.880016028881073, |
| "learning_rate": 0.00029176034198206204, |
| "loss": 3.1002215576171874, |
| "step": 52650 |
| }, |
| { |
| "epoch": 0.10792965525998349, |
| "grad_norm": 0.8357744812965393, |
| "learning_rate": 0.000291744529121026, |
| "loss": 3.146689147949219, |
| "step": 52700 |
| }, |
| { |
| "epoch": 0.10803205531241232, |
| "grad_norm": 1.0419034957885742, |
| "learning_rate": 0.0002917287015305207, |
| "loss": 2.675668029785156, |
| "step": 52750 |
| }, |
| { |
| "epoch": 0.10813445536484115, |
| "grad_norm": 1.0537099838256836, |
| "learning_rate": 0.0002917128592121908, |
| "loss": 3.45556640625, |
| "step": 52800 |
| }, |
| { |
| "epoch": 0.10823685541726998, |
| "grad_norm": 0.8088594675064087, |
| "learning_rate": 0.0002916970021676825, |
| "loss": 3.272125244140625, |
| "step": 52850 |
| }, |
| { |
| "epoch": 0.1083392554696988, |
| "grad_norm": 0.9163171648979187, |
| "learning_rate": 0.0002916811303986437, |
| "loss": 3.303933410644531, |
| "step": 52900 |
| }, |
| { |
| "epoch": 0.10844165552212763, |
| "grad_norm": 1.4584760665893555, |
| "learning_rate": 0.00029166524390672374, |
| "loss": 3.420548095703125, |
| "step": 52950 |
| }, |
| { |
| "epoch": 0.10854405557455646, |
| "grad_norm": 0.9255147576332092, |
| "learning_rate": 0.0002916493426935734, |
| "loss": 3.422330017089844, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.10864645562698529, |
| "grad_norm": 0.7951823472976685, |
| "learning_rate": 0.0002916334267608451, |
| "loss": 3.435027770996094, |
| "step": 53050 |
| }, |
| { |
| "epoch": 0.10874885567941411, |
| "grad_norm": 0.8804548978805542, |
| "learning_rate": 0.00029161749611019273, |
| "loss": 3.147249755859375, |
| "step": 53100 |
| }, |
| { |
| "epoch": 0.10885125573184294, |
| "grad_norm": 0.6578207015991211, |
| "learning_rate": 0.00029160155074327174, |
| "loss": 3.2707638549804687, |
| "step": 53150 |
| }, |
| { |
| "epoch": 0.10895365578427176, |
| "grad_norm": 0.9386240243911743, |
| "learning_rate": 0.0002915855906617391, |
| "loss": 3.227253723144531, |
| "step": 53200 |
| }, |
| { |
| "epoch": 0.10905605583670058, |
| "grad_norm": 1.0355428457260132, |
| "learning_rate": 0.00029156961586725334, |
| "loss": 3.180726318359375, |
| "step": 53250 |
| }, |
| { |
| "epoch": 0.10915845588912941, |
| "grad_norm": 0.7636610269546509, |
| "learning_rate": 0.0002915536263614745, |
| "loss": 3.335189208984375, |
| "step": 53300 |
| }, |
| { |
| "epoch": 0.10926085594155824, |
| "grad_norm": 0.8508041501045227, |
| "learning_rate": 0.0002915376221460641, |
| "loss": 3.4625811767578125, |
| "step": 53350 |
| }, |
| { |
| "epoch": 0.10936325599398707, |
| "grad_norm": 0.8359129428863525, |
| "learning_rate": 0.0002915216032226852, |
| "loss": 3.2652053833007812, |
| "step": 53400 |
| }, |
| { |
| "epoch": 0.1094656560464159, |
| "grad_norm": 0.7385704517364502, |
| "learning_rate": 0.0002915055695930025, |
| "loss": 3.322744140625, |
| "step": 53450 |
| }, |
| { |
| "epoch": 0.10956805609884472, |
| "grad_norm": 0.8769361972808838, |
| "learning_rate": 0.0002914895212586821, |
| "loss": 2.853853454589844, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.10967045615127355, |
| "grad_norm": 0.8474721908569336, |
| "learning_rate": 0.00029147345822139165, |
| "loss": 3.6555825805664064, |
| "step": 53550 |
| }, |
| { |
| "epoch": 0.10977285620370238, |
| "grad_norm": 1.0334205627441406, |
| "learning_rate": 0.0002914573804828004, |
| "loss": 3.35653564453125, |
| "step": 53600 |
| }, |
| { |
| "epoch": 0.1098752562561312, |
| "grad_norm": 0.9623045325279236, |
| "learning_rate": 0.000291441288044579, |
| "loss": 3.910505676269531, |
| "step": 53650 |
| }, |
| { |
| "epoch": 0.10997765630856003, |
| "grad_norm": 1.3744142055511475, |
| "learning_rate": 0.0002914251809083998, |
| "loss": 3.520959777832031, |
| "step": 53700 |
| }, |
| { |
| "epoch": 0.11008005636098886, |
| "grad_norm": 0.7532607316970825, |
| "learning_rate": 0.00029140905907593654, |
| "loss": 3.816366882324219, |
| "step": 53750 |
| }, |
| { |
| "epoch": 0.11018245641341769, |
| "grad_norm": 0.8342536091804504, |
| "learning_rate": 0.00029139292254886447, |
| "loss": 3.52778076171875, |
| "step": 53800 |
| }, |
| { |
| "epoch": 0.11028485646584651, |
| "grad_norm": 0.928033173084259, |
| "learning_rate": 0.0002913767713288606, |
| "loss": 3.7655780029296877, |
| "step": 53850 |
| }, |
| { |
| "epoch": 0.11038725651827534, |
| "grad_norm": 0.7441654801368713, |
| "learning_rate": 0.00029136060541760304, |
| "loss": 3.655460205078125, |
| "step": 53900 |
| }, |
| { |
| "epoch": 0.11048965657070417, |
| "grad_norm": 0.9803574085235596, |
| "learning_rate": 0.0002913444248167719, |
| "loss": 3.4816769409179686, |
| "step": 53950 |
| }, |
| { |
| "epoch": 0.110592056623133, |
| "grad_norm": 0.9637227654457092, |
| "learning_rate": 0.00029132822952804846, |
| "loss": 3.7213009643554686, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.11069445667556181, |
| "grad_norm": 1.0016183853149414, |
| "learning_rate": 0.0002913120195531158, |
| "loss": 3.2758993530273437, |
| "step": 54050 |
| }, |
| { |
| "epoch": 0.11079685672799064, |
| "grad_norm": 0.8481519818305969, |
| "learning_rate": 0.0002912957948936583, |
| "loss": 4.208623962402344, |
| "step": 54100 |
| }, |
| { |
| "epoch": 0.11089925678041947, |
| "grad_norm": 0.9075637459754944, |
| "learning_rate": 0.00029127955555136194, |
| "loss": 3.764527587890625, |
| "step": 54150 |
| }, |
| { |
| "epoch": 0.1110016568328483, |
| "grad_norm": 0.648526132106781, |
| "learning_rate": 0.0002912633015279143, |
| "loss": 3.3832241821289064, |
| "step": 54200 |
| }, |
| { |
| "epoch": 0.11110405688527712, |
| "grad_norm": 0.9115505218505859, |
| "learning_rate": 0.0002912470328250044, |
| "loss": 3.866526794433594, |
| "step": 54250 |
| }, |
| { |
| "epoch": 0.11120645693770595, |
| "grad_norm": 1.0018901824951172, |
| "learning_rate": 0.00029123074944432275, |
| "loss": 3.3686892700195314, |
| "step": 54300 |
| }, |
| { |
| "epoch": 0.11130885699013478, |
| "grad_norm": 0.9239192605018616, |
| "learning_rate": 0.0002912144513875615, |
| "loss": 3.808809509277344, |
| "step": 54350 |
| }, |
| { |
| "epoch": 0.1114112570425636, |
| "grad_norm": 0.7452714443206787, |
| "learning_rate": 0.0002911981386564143, |
| "loss": 3.228931884765625, |
| "step": 54400 |
| }, |
| { |
| "epoch": 0.11151365709499243, |
| "grad_norm": 1.3459135293960571, |
| "learning_rate": 0.0002911818112525763, |
| "loss": 3.20574951171875, |
| "step": 54450 |
| }, |
| { |
| "epoch": 0.11161605714742126, |
| "grad_norm": 1.0211737155914307, |
| "learning_rate": 0.0002911654691777441, |
| "loss": 2.2418772888183596, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.11171845719985009, |
| "grad_norm": 1.1944788694381714, |
| "learning_rate": 0.00029114911243361595, |
| "loss": 3.102964172363281, |
| "step": 54550 |
| }, |
| { |
| "epoch": 0.11182085725227892, |
| "grad_norm": 0.8922857642173767, |
| "learning_rate": 0.0002911327410218916, |
| "loss": 3.3322735595703126, |
| "step": 54600 |
| }, |
| { |
| "epoch": 0.11192325730470774, |
| "grad_norm": 0.8530144095420837, |
| "learning_rate": 0.0002911163549442722, |
| "loss": 3.8322744750976563, |
| "step": 54650 |
| }, |
| { |
| "epoch": 0.11202565735713657, |
| "grad_norm": 0.9170437455177307, |
| "learning_rate": 0.00029109995420246066, |
| "loss": 4.006968994140625, |
| "step": 54700 |
| }, |
| { |
| "epoch": 0.1121280574095654, |
| "grad_norm": 0.8506373763084412, |
| "learning_rate": 0.0002910835387981612, |
| "loss": 3.5530404663085937, |
| "step": 54750 |
| }, |
| { |
| "epoch": 0.11223045746199423, |
| "grad_norm": 1.0387393236160278, |
| "learning_rate": 0.00029106710873307956, |
| "loss": 3.33231201171875, |
| "step": 54800 |
| }, |
| { |
| "epoch": 0.11233285751442305, |
| "grad_norm": 0.5560868382453918, |
| "learning_rate": 0.00029105066400892315, |
| "loss": 2.382226867675781, |
| "step": 54850 |
| }, |
| { |
| "epoch": 0.11243525756685187, |
| "grad_norm": 0.9407156705856323, |
| "learning_rate": 0.00029103420462740087, |
| "loss": 3.685501708984375, |
| "step": 54900 |
| }, |
| { |
| "epoch": 0.1125376576192807, |
| "grad_norm": 0.6936938166618347, |
| "learning_rate": 0.000291017730590223, |
| "loss": 3.4359500122070314, |
| "step": 54950 |
| }, |
| { |
| "epoch": 0.11264005767170952, |
| "grad_norm": 0.9318833947181702, |
| "learning_rate": 0.0002910012418991016, |
| "loss": 3.430309143066406, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.11274245772413835, |
| "grad_norm": 1.0374469757080078, |
| "learning_rate": 0.00029098473855574997, |
| "loss": 3.467359619140625, |
| "step": 55050 |
| }, |
| { |
| "epoch": 0.11284485777656718, |
| "grad_norm": 1.1647804975509644, |
| "learning_rate": 0.0002909682205618831, |
| "loss": 3.3597537231445314, |
| "step": 55100 |
| }, |
| { |
| "epoch": 0.112947257828996, |
| "grad_norm": 1.8363399505615234, |
| "learning_rate": 0.00029095168791921753, |
| "loss": 3.4801220703125, |
| "step": 55150 |
| }, |
| { |
| "epoch": 0.11304965788142483, |
| "grad_norm": 0.8499084711074829, |
| "learning_rate": 0.0002909351406294712, |
| "loss": 3.76212158203125, |
| "step": 55200 |
| }, |
| { |
| "epoch": 0.11315205793385366, |
| "grad_norm": 0.6988071203231812, |
| "learning_rate": 0.0002909185786943636, |
| "loss": 3.5692535400390626, |
| "step": 55250 |
| }, |
| { |
| "epoch": 0.11325445798628249, |
| "grad_norm": 0.6894858479499817, |
| "learning_rate": 0.0002909020021156159, |
| "loss": 3.47705810546875, |
| "step": 55300 |
| }, |
| { |
| "epoch": 0.11335685803871132, |
| "grad_norm": 1.0136258602142334, |
| "learning_rate": 0.00029088541089495056, |
| "loss": 3.2412789916992186, |
| "step": 55350 |
| }, |
| { |
| "epoch": 0.11345925809114014, |
| "grad_norm": 0.6407994031906128, |
| "learning_rate": 0.00029086880503409164, |
| "loss": 3.150567626953125, |
| "step": 55400 |
| }, |
| { |
| "epoch": 0.11356165814356897, |
| "grad_norm": 0.8704736828804016, |
| "learning_rate": 0.00029085218453476483, |
| "loss": 3.5778497314453124, |
| "step": 55450 |
| }, |
| { |
| "epoch": 0.1136640581959978, |
| "grad_norm": 0.9866794943809509, |
| "learning_rate": 0.00029083554939869725, |
| "loss": 3.52720703125, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.11376645824842663, |
| "grad_norm": 0.4646882712841034, |
| "learning_rate": 0.0002908188996276175, |
| "loss": 2.8131982421875, |
| "step": 55550 |
| }, |
| { |
| "epoch": 0.11386885830085546, |
| "grad_norm": 0.7451179623603821, |
| "learning_rate": 0.00029080223522325575, |
| "loss": 3.3548162841796874, |
| "step": 55600 |
| }, |
| { |
| "epoch": 0.11397125835328428, |
| "grad_norm": 1.027496337890625, |
| "learning_rate": 0.0002907855561873438, |
| "loss": 3.047060852050781, |
| "step": 55650 |
| }, |
| { |
| "epoch": 0.11407365840571311, |
| "grad_norm": 1.0272102355957031, |
| "learning_rate": 0.0002907688625216147, |
| "loss": 3.21407958984375, |
| "step": 55700 |
| }, |
| { |
| "epoch": 0.11417605845814192, |
| "grad_norm": 0.8392390012741089, |
| "learning_rate": 0.0002907521542278033, |
| "loss": 3.5421328735351563, |
| "step": 55750 |
| }, |
| { |
| "epoch": 0.11427845851057075, |
| "grad_norm": 0.8752363324165344, |
| "learning_rate": 0.0002907354313076458, |
| "loss": 3.536468811035156, |
| "step": 55800 |
| }, |
| { |
| "epoch": 0.11438085856299958, |
| "grad_norm": 0.6718413233757019, |
| "learning_rate": 0.00029071869376288, |
| "loss": 3.5975299072265625, |
| "step": 55850 |
| }, |
| { |
| "epoch": 0.11448325861542841, |
| "grad_norm": 0.8909393548965454, |
| "learning_rate": 0.0002907019415952452, |
| "loss": 3.8420440673828127, |
| "step": 55900 |
| }, |
| { |
| "epoch": 0.11458565866785723, |
| "grad_norm": 0.7395539879798889, |
| "learning_rate": 0.00029068517480648217, |
| "loss": 3.4465701293945314, |
| "step": 55950 |
| }, |
| { |
| "epoch": 0.11468805872028606, |
| "grad_norm": 0.7831642627716064, |
| "learning_rate": 0.00029066839339833333, |
| "loss": 3.2164300537109374, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.11479045877271489, |
| "grad_norm": 0.8047283291816711, |
| "learning_rate": 0.0002906515973725424, |
| "loss": 3.697811279296875, |
| "step": 56050 |
| }, |
| { |
| "epoch": 0.11489285882514372, |
| "grad_norm": 0.7210569977760315, |
| "learning_rate": 0.00029063478673085484, |
| "loss": 3.0727462768554688, |
| "step": 56100 |
| }, |
| { |
| "epoch": 0.11499525887757255, |
| "grad_norm": 0.9832913875579834, |
| "learning_rate": 0.0002906179614750175, |
| "loss": 3.4165048217773437, |
| "step": 56150 |
| }, |
| { |
| "epoch": 0.11509765893000137, |
| "grad_norm": 0.9115371108055115, |
| "learning_rate": 0.0002906011216067788, |
| "loss": 3.485976257324219, |
| "step": 56200 |
| }, |
| { |
| "epoch": 0.1152000589824302, |
| "grad_norm": 0.9409294724464417, |
| "learning_rate": 0.0002905842671278887, |
| "loss": 2.942160339355469, |
| "step": 56250 |
| }, |
| { |
| "epoch": 0.11530245903485903, |
| "grad_norm": 1.1528805494308472, |
| "learning_rate": 0.0002905673980400986, |
| "loss": 3.5174395751953127, |
| "step": 56300 |
| }, |
| { |
| "epoch": 0.11540485908728786, |
| "grad_norm": 0.740906834602356, |
| "learning_rate": 0.0002905505143451614, |
| "loss": 3.086756286621094, |
| "step": 56350 |
| }, |
| { |
| "epoch": 0.11550725913971668, |
| "grad_norm": 0.888832688331604, |
| "learning_rate": 0.00029053361604483173, |
| "loss": 3.832029113769531, |
| "step": 56400 |
| }, |
| { |
| "epoch": 0.11560965919214551, |
| "grad_norm": 0.889111340045929, |
| "learning_rate": 0.00029051670314086546, |
| "loss": 3.207186584472656, |
| "step": 56450 |
| }, |
| { |
| "epoch": 0.11571205924457434, |
| "grad_norm": 0.7387615442276001, |
| "learning_rate": 0.0002904997756350202, |
| "loss": 3.152142333984375, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.11581445929700317, |
| "grad_norm": 0.8200859427452087, |
| "learning_rate": 0.00029048283352905486, |
| "loss": 3.671814270019531, |
| "step": 56550 |
| }, |
| { |
| "epoch": 0.11591685934943198, |
| "grad_norm": 1.1036324501037598, |
| "learning_rate": 0.0002904658768247301, |
| "loss": 3.4229196166992186, |
| "step": 56600 |
| }, |
| { |
| "epoch": 0.11601925940186081, |
| "grad_norm": 0.8785697221755981, |
| "learning_rate": 0.00029044890552380796, |
| "loss": 3.5630813598632813, |
| "step": 56650 |
| }, |
| { |
| "epoch": 0.11612165945428964, |
| "grad_norm": 1.0525559186935425, |
| "learning_rate": 0.000290431919628052, |
| "loss": 3.124271240234375, |
| "step": 56700 |
| }, |
| { |
| "epoch": 0.11622405950671846, |
| "grad_norm": 0.9268920421600342, |
| "learning_rate": 0.00029041491913922736, |
| "loss": 3.26138916015625, |
| "step": 56750 |
| }, |
| { |
| "epoch": 0.11632645955914729, |
| "grad_norm": 0.8036125898361206, |
| "learning_rate": 0.0002903979040591006, |
| "loss": 3.1208505249023437, |
| "step": 56800 |
| }, |
| { |
| "epoch": 0.11642885961157612, |
| "grad_norm": 0.871330976486206, |
| "learning_rate": 0.0002903808743894399, |
| "loss": 3.6094674682617187, |
| "step": 56850 |
| }, |
| { |
| "epoch": 0.11653125966400495, |
| "grad_norm": 0.7573062181472778, |
| "learning_rate": 0.00029036383013201486, |
| "loss": 3.4403109741210938, |
| "step": 56900 |
| }, |
| { |
| "epoch": 0.11663365971643377, |
| "grad_norm": 0.8866212964057922, |
| "learning_rate": 0.0002903467712885967, |
| "loss": 3.023941955566406, |
| "step": 56950 |
| }, |
| { |
| "epoch": 0.1167360597688626, |
| "grad_norm": 1.240868330001831, |
| "learning_rate": 0.00029032969786095807, |
| "loss": 3.81320556640625, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.11683845982129143, |
| "grad_norm": 0.9338199496269226, |
| "learning_rate": 0.0002903126098508732, |
| "loss": 3.1648443603515624, |
| "step": 57050 |
| }, |
| { |
| "epoch": 0.11694085987372026, |
| "grad_norm": 0.8921442031860352, |
| "learning_rate": 0.0002902955072601177, |
| "loss": 3.7754312133789063, |
| "step": 57100 |
| }, |
| { |
| "epoch": 0.11704325992614908, |
| "grad_norm": 0.7555287480354309, |
| "learning_rate": 0.00029027839009046887, |
| "loss": 3.6020452880859377, |
| "step": 57150 |
| }, |
| { |
| "epoch": 0.11714565997857791, |
| "grad_norm": 0.8668673038482666, |
| "learning_rate": 0.00029026125834370547, |
| "loss": 3.1613735961914062, |
| "step": 57200 |
| }, |
| { |
| "epoch": 0.11724806003100674, |
| "grad_norm": 0.8572468757629395, |
| "learning_rate": 0.00029024411202160775, |
| "loss": 3.5449398803710936, |
| "step": 57250 |
| }, |
| { |
| "epoch": 0.11735046008343557, |
| "grad_norm": 1.0183916091918945, |
| "learning_rate": 0.0002902269511259575, |
| "loss": 3.4537921142578125, |
| "step": 57300 |
| }, |
| { |
| "epoch": 0.1174528601358644, |
| "grad_norm": 0.7662498354911804, |
| "learning_rate": 0.00029020977565853793, |
| "loss": 3.6329010009765623, |
| "step": 57350 |
| }, |
| { |
| "epoch": 0.11755526018829322, |
| "grad_norm": 0.7248380780220032, |
| "learning_rate": 0.0002901925856211339, |
| "loss": 3.476121826171875, |
| "step": 57400 |
| }, |
| { |
| "epoch": 0.11765766024072204, |
| "grad_norm": 0.5883516073226929, |
| "learning_rate": 0.0002901753810155316, |
| "loss": 3.1229867553710937, |
| "step": 57450 |
| }, |
| { |
| "epoch": 0.11776006029315086, |
| "grad_norm": 1.3006634712219238, |
| "learning_rate": 0.00029015816184351905, |
| "loss": 3.42736572265625, |
| "step": 57500 |
| }, |
| { |
| "epoch": 0.11786246034557969, |
| "grad_norm": 0.9047501683235168, |
| "learning_rate": 0.0002901409281068855, |
| "loss": 3.190472412109375, |
| "step": 57550 |
| }, |
| { |
| "epoch": 0.11796486039800852, |
| "grad_norm": 0.8864745497703552, |
| "learning_rate": 0.00029012367980742177, |
| "loss": 3.5744329833984376, |
| "step": 57600 |
| }, |
| { |
| "epoch": 0.11806726045043735, |
| "grad_norm": 0.9945940971374512, |
| "learning_rate": 0.0002901064169469203, |
| "loss": 3.613050537109375, |
| "step": 57650 |
| }, |
| { |
| "epoch": 0.11816966050286618, |
| "grad_norm": 0.9474062919616699, |
| "learning_rate": 0.00029008913952717486, |
| "loss": 3.755731201171875, |
| "step": 57700 |
| }, |
| { |
| "epoch": 0.118272060555295, |
| "grad_norm": 0.9160423874855042, |
| "learning_rate": 0.000290071847549981, |
| "loss": 3.7074078369140624, |
| "step": 57750 |
| }, |
| { |
| "epoch": 0.11837446060772383, |
| "grad_norm": 0.7401031851768494, |
| "learning_rate": 0.0002900545410171355, |
| "loss": 3.9821441650390623, |
| "step": 57800 |
| }, |
| { |
| "epoch": 0.11847686066015266, |
| "grad_norm": 0.9346218705177307, |
| "learning_rate": 0.00029003721993043686, |
| "loss": 3.6518328857421873, |
| "step": 57850 |
| }, |
| { |
| "epoch": 0.11857926071258149, |
| "grad_norm": 0.8762102723121643, |
| "learning_rate": 0.0002900198842916849, |
| "loss": 3.3181643676757813, |
| "step": 57900 |
| }, |
| { |
| "epoch": 0.11868166076501031, |
| "grad_norm": 0.8260309100151062, |
| "learning_rate": 0.00029000253410268117, |
| "loss": 3.745126953125, |
| "step": 57950 |
| }, |
| { |
| "epoch": 0.11878406081743914, |
| "grad_norm": 0.728134274482727, |
| "learning_rate": 0.00028998516936522864, |
| "loss": 3.5524822998046877, |
| "step": 58000 |
| }, |
| { |
| "epoch": 0.11888646086986797, |
| "grad_norm": 1.0089305639266968, |
| "learning_rate": 0.0002899677900811316, |
| "loss": 3.7944915771484373, |
| "step": 58050 |
| }, |
| { |
| "epoch": 0.1189888609222968, |
| "grad_norm": 0.8292215466499329, |
| "learning_rate": 0.0002899503962521963, |
| "loss": 3.89322021484375, |
| "step": 58100 |
| }, |
| { |
| "epoch": 0.11909126097472562, |
| "grad_norm": 0.8529530167579651, |
| "learning_rate": 0.00028993298788023005, |
| "loss": 2.9111569213867186, |
| "step": 58150 |
| }, |
| { |
| "epoch": 0.11919366102715445, |
| "grad_norm": 0.8902004361152649, |
| "learning_rate": 0.00028991556496704186, |
| "loss": 3.6739492797851563, |
| "step": 58200 |
| }, |
| { |
| "epoch": 0.11929606107958328, |
| "grad_norm": 0.9264180660247803, |
| "learning_rate": 0.0002898981275144423, |
| "loss": 4.041621704101562, |
| "step": 58250 |
| }, |
| { |
| "epoch": 0.1193984611320121, |
| "grad_norm": 0.8773983716964722, |
| "learning_rate": 0.0002898806755242433, |
| "loss": 3.3137640380859374, |
| "step": 58300 |
| }, |
| { |
| "epoch": 0.11950086118444092, |
| "grad_norm": 0.8258083462715149, |
| "learning_rate": 0.00028986320899825855, |
| "loss": 3.7007760620117187, |
| "step": 58350 |
| }, |
| { |
| "epoch": 0.11960326123686975, |
| "grad_norm": 0.4484880566596985, |
| "learning_rate": 0.00028984572793830295, |
| "loss": 2.6619467163085937, |
| "step": 58400 |
| }, |
| { |
| "epoch": 0.11970566128929858, |
| "grad_norm": 0.7841198444366455, |
| "learning_rate": 0.0002898282323461931, |
| "loss": 3.9329864501953127, |
| "step": 58450 |
| }, |
| { |
| "epoch": 0.1198080613417274, |
| "grad_norm": 1.0872740745544434, |
| "learning_rate": 0.0002898107222237471, |
| "loss": 3.7529037475585936, |
| "step": 58500 |
| }, |
| { |
| "epoch": 0.11991046139415623, |
| "grad_norm": 1.1370536088943481, |
| "learning_rate": 0.0002897931975727845, |
| "loss": 3.598294372558594, |
| "step": 58550 |
| }, |
| { |
| "epoch": 0.12001286144658506, |
| "grad_norm": 0.5730462670326233, |
| "learning_rate": 0.0002897756583951264, |
| "loss": 3.4985086059570314, |
| "step": 58600 |
| }, |
| { |
| "epoch": 0.12011526149901389, |
| "grad_norm": 0.8920771479606628, |
| "learning_rate": 0.00028975810469259535, |
| "loss": 3.025179443359375, |
| "step": 58650 |
| }, |
| { |
| "epoch": 0.12021766155144271, |
| "grad_norm": 0.877116858959198, |
| "learning_rate": 0.0002897405364670155, |
| "loss": 3.4373843383789064, |
| "step": 58700 |
| }, |
| { |
| "epoch": 0.12032006160387154, |
| "grad_norm": 0.9569665193557739, |
| "learning_rate": 0.0002897229537202124, |
| "loss": 4.03067626953125, |
| "step": 58750 |
| }, |
| { |
| "epoch": 0.12042246165630037, |
| "grad_norm": 0.9027877449989319, |
| "learning_rate": 0.00028970535645401324, |
| "loss": 3.0247479248046876, |
| "step": 58800 |
| }, |
| { |
| "epoch": 0.1205248617087292, |
| "grad_norm": 0.7448411583900452, |
| "learning_rate": 0.0002896877446702467, |
| "loss": 3.632384948730469, |
| "step": 58850 |
| }, |
| { |
| "epoch": 0.12062726176115803, |
| "grad_norm": 0.8100590705871582, |
| "learning_rate": 0.0002896701183707428, |
| "loss": 3.263778076171875, |
| "step": 58900 |
| }, |
| { |
| "epoch": 0.12072966181358685, |
| "grad_norm": 1.191540241241455, |
| "learning_rate": 0.0002896524775573332, |
| "loss": 3.6475961303710935, |
| "step": 58950 |
| }, |
| { |
| "epoch": 0.12083206186601568, |
| "grad_norm": 0.7784574031829834, |
| "learning_rate": 0.00028963482223185106, |
| "loss": 3.554160461425781, |
| "step": 59000 |
| }, |
| { |
| "epoch": 0.12093446191844451, |
| "grad_norm": 0.5998643040657043, |
| "learning_rate": 0.0002896171523961312, |
| "loss": 3.2943960571289064, |
| "step": 59050 |
| }, |
| { |
| "epoch": 0.12103686197087334, |
| "grad_norm": 0.8640596270561218, |
| "learning_rate": 0.0002895994680520096, |
| "loss": 2.6187591552734375, |
| "step": 59100 |
| }, |
| { |
| "epoch": 0.12113926202330215, |
| "grad_norm": 0.6376426219940186, |
| "learning_rate": 0.00028958176920132396, |
| "loss": 3.336057434082031, |
| "step": 59150 |
| }, |
| { |
| "epoch": 0.12124166207573098, |
| "grad_norm": 1.1029490232467651, |
| "learning_rate": 0.0002895640558459136, |
| "loss": 3.192468566894531, |
| "step": 59200 |
| }, |
| { |
| "epoch": 0.1213440621281598, |
| "grad_norm": 0.9253978729248047, |
| "learning_rate": 0.00028954632798761906, |
| "loss": 3.82802490234375, |
| "step": 59250 |
| }, |
| { |
| "epoch": 0.12144646218058863, |
| "grad_norm": 1.0808192491531372, |
| "learning_rate": 0.0002895285856282826, |
| "loss": 3.537474365234375, |
| "step": 59300 |
| }, |
| { |
| "epoch": 0.12154886223301746, |
| "grad_norm": 0.7610458731651306, |
| "learning_rate": 0.000289510828769748, |
| "loss": 3.7149560546875, |
| "step": 59350 |
| }, |
| { |
| "epoch": 0.12165126228544629, |
| "grad_norm": 1.0239511728286743, |
| "learning_rate": 0.0002894930574138604, |
| "loss": 3.168520202636719, |
| "step": 59400 |
| }, |
| { |
| "epoch": 0.12175366233787512, |
| "grad_norm": 1.482177495956421, |
| "learning_rate": 0.0002894752715624665, |
| "loss": 3.8551751708984376, |
| "step": 59450 |
| }, |
| { |
| "epoch": 0.12185606239030394, |
| "grad_norm": 0.8012579083442688, |
| "learning_rate": 0.00028945747121741455, |
| "loss": 3.244693603515625, |
| "step": 59500 |
| }, |
| { |
| "epoch": 0.12195846244273277, |
| "grad_norm": 0.6927148699760437, |
| "learning_rate": 0.0002894396563805543, |
| "loss": 3.722396240234375, |
| "step": 59550 |
| }, |
| { |
| "epoch": 0.1220608624951616, |
| "grad_norm": 0.7614629864692688, |
| "learning_rate": 0.00028942182705373707, |
| "loss": 3.183421630859375, |
| "step": 59600 |
| }, |
| { |
| "epoch": 0.12216326254759043, |
| "grad_norm": 0.8808593153953552, |
| "learning_rate": 0.0002894039832388154, |
| "loss": 3.444737854003906, |
| "step": 59650 |
| }, |
| { |
| "epoch": 0.12226566260001925, |
| "grad_norm": 0.9153810143470764, |
| "learning_rate": 0.0002893861249376437, |
| "loss": 3.483736267089844, |
| "step": 59700 |
| }, |
| { |
| "epoch": 0.12236806265244808, |
| "grad_norm": 0.8851150870323181, |
| "learning_rate": 0.0002893682521520777, |
| "loss": 3.7175869750976562, |
| "step": 59750 |
| }, |
| { |
| "epoch": 0.12247046270487691, |
| "grad_norm": 0.7266696095466614, |
| "learning_rate": 0.00028935036488397466, |
| "loss": 3.335245361328125, |
| "step": 59800 |
| }, |
| { |
| "epoch": 0.12257286275730574, |
| "grad_norm": 0.9137750864028931, |
| "learning_rate": 0.0002893324631351933, |
| "loss": 2.9954302978515623, |
| "step": 59850 |
| }, |
| { |
| "epoch": 0.12267526280973456, |
| "grad_norm": 0.8360620141029358, |
| "learning_rate": 0.00028931454690759396, |
| "loss": 3.0706732177734377, |
| "step": 59900 |
| }, |
| { |
| "epoch": 0.12277766286216339, |
| "grad_norm": 0.8443347811698914, |
| "learning_rate": 0.00028929661620303833, |
| "loss": 3.848203430175781, |
| "step": 59950 |
| }, |
| { |
| "epoch": 0.1228800629145922, |
| "grad_norm": 0.9306533932685852, |
| "learning_rate": 0.0002892786710233898, |
| "loss": 2.975295104980469, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.12298246296702103, |
| "grad_norm": 0.9441879391670227, |
| "learning_rate": 0.00028926071137051307, |
| "loss": 3.9100912475585936, |
| "step": 60050 |
| }, |
| { |
| "epoch": 0.12308486301944986, |
| "grad_norm": 0.7004597187042236, |
| "learning_rate": 0.00028924273724627444, |
| "loss": 3.670945739746094, |
| "step": 60100 |
| }, |
| { |
| "epoch": 0.12318726307187869, |
| "grad_norm": 0.7978895306587219, |
| "learning_rate": 0.00028922474865254174, |
| "loss": 3.39288818359375, |
| "step": 60150 |
| }, |
| { |
| "epoch": 0.12328966312430752, |
| "grad_norm": 0.8944730758666992, |
| "learning_rate": 0.0002892067455911842, |
| "loss": 3.4790631103515626, |
| "step": 60200 |
| }, |
| { |
| "epoch": 0.12339206317673634, |
| "grad_norm": 0.9302740097045898, |
| "learning_rate": 0.0002891887280640727, |
| "loss": 3.564194641113281, |
| "step": 60250 |
| }, |
| { |
| "epoch": 0.12349446322916517, |
| "grad_norm": 0.7751696109771729, |
| "learning_rate": 0.0002891706960730795, |
| "loss": 3.4631011962890623, |
| "step": 60300 |
| }, |
| { |
| "epoch": 0.123596863281594, |
| "grad_norm": 0.998839795589447, |
| "learning_rate": 0.00028915264962007836, |
| "loss": 3.490992126464844, |
| "step": 60350 |
| }, |
| { |
| "epoch": 0.12369926333402283, |
| "grad_norm": 1.2390878200531006, |
| "learning_rate": 0.0002891345887069447, |
| "loss": 3.6483099365234377, |
| "step": 60400 |
| }, |
| { |
| "epoch": 0.12380166338645165, |
| "grad_norm": 0.8795660138130188, |
| "learning_rate": 0.0002891165133355553, |
| "loss": 3.6523648071289063, |
| "step": 60450 |
| }, |
| { |
| "epoch": 0.12390406343888048, |
| "grad_norm": 0.8491701483726501, |
| "learning_rate": 0.00028909842350778836, |
| "loss": 3.5479266357421877, |
| "step": 60500 |
| }, |
| { |
| "epoch": 0.12400646349130931, |
| "grad_norm": 0.7775550484657288, |
| "learning_rate": 0.00028908031922552377, |
| "loss": 3.1797994995117187, |
| "step": 60550 |
| }, |
| { |
| "epoch": 0.12410886354373814, |
| "grad_norm": 0.7711923718452454, |
| "learning_rate": 0.0002890622004906429, |
| "loss": 3.398070068359375, |
| "step": 60600 |
| }, |
| { |
| "epoch": 0.12421126359616697, |
| "grad_norm": 0.490875244140625, |
| "learning_rate": 0.0002890440673050285, |
| "loss": 3.022109069824219, |
| "step": 60650 |
| }, |
| { |
| "epoch": 0.12431366364859579, |
| "grad_norm": 0.7348693609237671, |
| "learning_rate": 0.0002890259196705649, |
| "loss": 3.46414794921875, |
| "step": 60700 |
| }, |
| { |
| "epoch": 0.12441606370102462, |
| "grad_norm": 0.9327791929244995, |
| "learning_rate": 0.000289007757589138, |
| "loss": 3.681882629394531, |
| "step": 60750 |
| }, |
| { |
| "epoch": 0.12451846375345345, |
| "grad_norm": 0.8426567912101746, |
| "learning_rate": 0.000288989581062635, |
| "loss": 4.021507568359375, |
| "step": 60800 |
| }, |
| { |
| "epoch": 0.12462086380588226, |
| "grad_norm": 0.9796308875083923, |
| "learning_rate": 0.0002889713900929448, |
| "loss": 3.8382940673828125, |
| "step": 60850 |
| }, |
| { |
| "epoch": 0.12472326385831109, |
| "grad_norm": 0.7347166538238525, |
| "learning_rate": 0.0002889531846819577, |
| "loss": 3.32147216796875, |
| "step": 60900 |
| }, |
| { |
| "epoch": 0.12482566391073992, |
| "grad_norm": 0.770237147808075, |
| "learning_rate": 0.0002889349648315655, |
| "loss": 3.648823547363281, |
| "step": 60950 |
| }, |
| { |
| "epoch": 0.12492806396316875, |
| "grad_norm": 0.6420400738716125, |
| "learning_rate": 0.00028891673054366165, |
| "loss": 3.17007568359375, |
| "step": 61000 |
| }, |
| { |
| "epoch": 0.1250304640155976, |
| "grad_norm": 0.7027015089988708, |
| "learning_rate": 0.00028889848182014086, |
| "loss": 3.246382141113281, |
| "step": 61050 |
| }, |
| { |
| "epoch": 0.1251328640680264, |
| "grad_norm": 0.868607759475708, |
| "learning_rate": 0.0002888802186628995, |
| "loss": 3.6044903564453126, |
| "step": 61100 |
| }, |
| { |
| "epoch": 0.12523526412045524, |
| "grad_norm": 0.8410335183143616, |
| "learning_rate": 0.00028886194107383535, |
| "loss": 3.066201171875, |
| "step": 61150 |
| }, |
| { |
| "epoch": 0.12533766417288406, |
| "grad_norm": 1.0808706283569336, |
| "learning_rate": 0.00028884364905484784, |
| "loss": 3.1906118774414063, |
| "step": 61200 |
| }, |
| { |
| "epoch": 0.12544006422531287, |
| "grad_norm": 0.872553825378418, |
| "learning_rate": 0.00028882534260783765, |
| "loss": 3.3807113647460936, |
| "step": 61250 |
| }, |
| { |
| "epoch": 0.1255424642777417, |
| "grad_norm": 0.9935702681541443, |
| "learning_rate": 0.0002888070217347072, |
| "loss": 3.3980447387695314, |
| "step": 61300 |
| }, |
| { |
| "epoch": 0.12564486433017052, |
| "grad_norm": 0.8990649580955505, |
| "learning_rate": 0.0002887886864373603, |
| "loss": 3.4861651611328126, |
| "step": 61350 |
| }, |
| { |
| "epoch": 0.12574726438259937, |
| "grad_norm": 0.8892736434936523, |
| "learning_rate": 0.0002887703367177023, |
| "loss": 3.9071136474609376, |
| "step": 61400 |
| }, |
| { |
| "epoch": 0.12584966443502818, |
| "grad_norm": 0.7861908078193665, |
| "learning_rate": 0.00028875197257763997, |
| "loss": 3.886827392578125, |
| "step": 61450 |
| }, |
| { |
| "epoch": 0.12595206448745702, |
| "grad_norm": 0.8096019625663757, |
| "learning_rate": 0.0002887335940190817, |
| "loss": 2.9763027954101564, |
| "step": 61500 |
| }, |
| { |
| "epoch": 0.12605446453988584, |
| "grad_norm": 0.8015087246894836, |
| "learning_rate": 0.00028871520104393724, |
| "loss": 3.5265399169921876, |
| "step": 61550 |
| }, |
| { |
| "epoch": 0.12615686459231468, |
| "grad_norm": 1.0955448150634766, |
| "learning_rate": 0.00028869679365411786, |
| "loss": 3.746468811035156, |
| "step": 61600 |
| }, |
| { |
| "epoch": 0.1262592646447435, |
| "grad_norm": 0.9293431043624878, |
| "learning_rate": 0.00028867837185153654, |
| "loss": 3.725838317871094, |
| "step": 61650 |
| }, |
| { |
| "epoch": 0.12636166469717233, |
| "grad_norm": 0.881248950958252, |
| "learning_rate": 0.0002886599356381075, |
| "loss": 3.873548583984375, |
| "step": 61700 |
| }, |
| { |
| "epoch": 0.12646406474960115, |
| "grad_norm": 0.7995479702949524, |
| "learning_rate": 0.00028864148501574655, |
| "loss": 3.55103515625, |
| "step": 61750 |
| }, |
| { |
| "epoch": 0.12656646480203, |
| "grad_norm": 0.7834081053733826, |
| "learning_rate": 0.00028862301998637096, |
| "loss": 3.5016546630859375, |
| "step": 61800 |
| }, |
| { |
| "epoch": 0.1266688648544588, |
| "grad_norm": 0.8396415710449219, |
| "learning_rate": 0.00028860454055189955, |
| "loss": 3.15347900390625, |
| "step": 61850 |
| }, |
| { |
| "epoch": 0.12677126490688764, |
| "grad_norm": 0.7540357112884521, |
| "learning_rate": 0.00028858604671425266, |
| "loss": 3.5248077392578123, |
| "step": 61900 |
| }, |
| { |
| "epoch": 0.12687366495931646, |
| "grad_norm": 1.1297228336334229, |
| "learning_rate": 0.00028856753847535213, |
| "loss": 3.4668838500976564, |
| "step": 61950 |
| }, |
| { |
| "epoch": 0.1269760650117453, |
| "grad_norm": 0.7924526929855347, |
| "learning_rate": 0.0002885490158371212, |
| "loss": 3.7679620361328126, |
| "step": 62000 |
| }, |
| { |
| "epoch": 0.1270784650641741, |
| "grad_norm": 0.8227761387825012, |
| "learning_rate": 0.0002885304788014846, |
| "loss": 3.809046325683594, |
| "step": 62050 |
| }, |
| { |
| "epoch": 0.12718086511660295, |
| "grad_norm": 0.8400523662567139, |
| "learning_rate": 0.0002885119273703687, |
| "loss": 3.74009765625, |
| "step": 62100 |
| }, |
| { |
| "epoch": 0.12728326516903177, |
| "grad_norm": 1.1692306995391846, |
| "learning_rate": 0.0002884933615457012, |
| "loss": 4.000062866210937, |
| "step": 62150 |
| }, |
| { |
| "epoch": 0.12738566522146058, |
| "grad_norm": 0.7943342328071594, |
| "learning_rate": 0.00028847478132941153, |
| "loss": 3.8031546020507814, |
| "step": 62200 |
| }, |
| { |
| "epoch": 0.12748806527388942, |
| "grad_norm": 0.9809468984603882, |
| "learning_rate": 0.0002884561867234303, |
| "loss": 3.805234680175781, |
| "step": 62250 |
| }, |
| { |
| "epoch": 0.12759046532631824, |
| "grad_norm": 0.9183539748191833, |
| "learning_rate": 0.00028843757772968994, |
| "loss": 4.105808715820313, |
| "step": 62300 |
| }, |
| { |
| "epoch": 0.12769286537874708, |
| "grad_norm": 0.9354544281959534, |
| "learning_rate": 0.0002884189543501241, |
| "loss": 3.7343814086914064, |
| "step": 62350 |
| }, |
| { |
| "epoch": 0.1277952654311759, |
| "grad_norm": 0.8216899633407593, |
| "learning_rate": 0.00028840031658666803, |
| "loss": 3.678810729980469, |
| "step": 62400 |
| }, |
| { |
| "epoch": 0.12789766548360473, |
| "grad_norm": 0.8827342987060547, |
| "learning_rate": 0.00028838166444125857, |
| "loss": 3.634096374511719, |
| "step": 62450 |
| }, |
| { |
| "epoch": 0.12800006553603355, |
| "grad_norm": 0.9468240141868591, |
| "learning_rate": 0.00028836299791583386, |
| "loss": 3.0597830200195313, |
| "step": 62500 |
| }, |
| { |
| "epoch": 0.1281024655884624, |
| "grad_norm": 0.9269732236862183, |
| "learning_rate": 0.00028834431701233376, |
| "loss": 3.667522277832031, |
| "step": 62550 |
| }, |
| { |
| "epoch": 0.1282048656408912, |
| "grad_norm": 0.7396625280380249, |
| "learning_rate": 0.0002883256217326994, |
| "loss": 3.823531494140625, |
| "step": 62600 |
| }, |
| { |
| "epoch": 0.12830726569332004, |
| "grad_norm": 0.653838574886322, |
| "learning_rate": 0.0002883069120788737, |
| "loss": 3.5563314819335936, |
| "step": 62650 |
| }, |
| { |
| "epoch": 0.12840966574574886, |
| "grad_norm": 0.8573964834213257, |
| "learning_rate": 0.0002882881880528006, |
| "loss": 3.9133944702148438, |
| "step": 62700 |
| }, |
| { |
| "epoch": 0.1285120657981777, |
| "grad_norm": 0.8567407727241516, |
| "learning_rate": 0.00028826944965642604, |
| "loss": 3.63771484375, |
| "step": 62750 |
| }, |
| { |
| "epoch": 0.1286144658506065, |
| "grad_norm": 0.8221452832221985, |
| "learning_rate": 0.00028825069689169706, |
| "loss": 3.7375106811523438, |
| "step": 62800 |
| }, |
| { |
| "epoch": 0.12871686590303535, |
| "grad_norm": 0.8458483815193176, |
| "learning_rate": 0.0002882319297605626, |
| "loss": 3.4764666748046875, |
| "step": 62850 |
| }, |
| { |
| "epoch": 0.12881926595546417, |
| "grad_norm": 0.5829837322235107, |
| "learning_rate": 0.0002882131482649727, |
| "loss": 3.4318243408203126, |
| "step": 62900 |
| }, |
| { |
| "epoch": 0.128921666007893, |
| "grad_norm": 0.6864559054374695, |
| "learning_rate": 0.000288194352406879, |
| "loss": 2.9922601318359376, |
| "step": 62950 |
| }, |
| { |
| "epoch": 0.12902406606032182, |
| "grad_norm": 0.7200921177864075, |
| "learning_rate": 0.0002881755421882348, |
| "loss": 3.436331481933594, |
| "step": 63000 |
| }, |
| { |
| "epoch": 0.12912646611275064, |
| "grad_norm": 0.8018766045570374, |
| "learning_rate": 0.00028815671761099474, |
| "loss": 3.8753070068359374, |
| "step": 63050 |
| }, |
| { |
| "epoch": 0.12922886616517948, |
| "grad_norm": 0.7417867183685303, |
| "learning_rate": 0.00028813787867711495, |
| "loss": 3.4831881713867188, |
| "step": 63100 |
| }, |
| { |
| "epoch": 0.1293312662176083, |
| "grad_norm": 0.8872492909431458, |
| "learning_rate": 0.0002881190253885531, |
| "loss": 3.471279296875, |
| "step": 63150 |
| }, |
| { |
| "epoch": 0.12943366627003713, |
| "grad_norm": 0.9026205539703369, |
| "learning_rate": 0.00028810015774726844, |
| "loss": 3.930486755371094, |
| "step": 63200 |
| }, |
| { |
| "epoch": 0.12953606632246595, |
| "grad_norm": 0.8319406509399414, |
| "learning_rate": 0.0002880812757552215, |
| "loss": 3.876917419433594, |
| "step": 63250 |
| }, |
| { |
| "epoch": 0.1296384663748948, |
| "grad_norm": 0.7153857946395874, |
| "learning_rate": 0.00028806237941437444, |
| "loss": 3.4448760986328124, |
| "step": 63300 |
| }, |
| { |
| "epoch": 0.1297408664273236, |
| "grad_norm": 0.7869312763214111, |
| "learning_rate": 0.00028804346872669085, |
| "loss": 3.9350848388671875, |
| "step": 63350 |
| }, |
| { |
| "epoch": 0.12984326647975244, |
| "grad_norm": 0.7719307541847229, |
| "learning_rate": 0.00028802454369413594, |
| "loss": 3.8888482666015625, |
| "step": 63400 |
| }, |
| { |
| "epoch": 0.12994566653218126, |
| "grad_norm": 1.150686502456665, |
| "learning_rate": 0.00028800560431867624, |
| "loss": 2.990634765625, |
| "step": 63450 |
| }, |
| { |
| "epoch": 0.1300480665846101, |
| "grad_norm": 0.7204848527908325, |
| "learning_rate": 0.00028798665060227984, |
| "loss": 2.155850067138672, |
| "step": 63500 |
| }, |
| { |
| "epoch": 0.13015046663703891, |
| "grad_norm": 1.2251051664352417, |
| "learning_rate": 0.0002879676825469164, |
| "loss": 3.9703302001953125, |
| "step": 63550 |
| }, |
| { |
| "epoch": 0.13025286668946776, |
| "grad_norm": 0.7243852615356445, |
| "learning_rate": 0.00028794870015455695, |
| "loss": 3.6895037841796876, |
| "step": 63600 |
| }, |
| { |
| "epoch": 0.13035526674189657, |
| "grad_norm": 0.8284658193588257, |
| "learning_rate": 0.00028792970342717407, |
| "loss": 3.7690008544921874, |
| "step": 63650 |
| }, |
| { |
| "epoch": 0.1304576667943254, |
| "grad_norm": 0.05757651478052139, |
| "learning_rate": 0.0002879106923667418, |
| "loss": 1.9905595397949218, |
| "step": 63700 |
| }, |
| { |
| "epoch": 0.13056006684675422, |
| "grad_norm": 0.7437557578086853, |
| "learning_rate": 0.0002878916669752357, |
| "loss": 1.6622731018066406, |
| "step": 63750 |
| }, |
| { |
| "epoch": 0.13066246689918307, |
| "grad_norm": 0.8517412543296814, |
| "learning_rate": 0.0002878726272546328, |
| "loss": 4.094966430664062, |
| "step": 63800 |
| }, |
| { |
| "epoch": 0.13076486695161188, |
| "grad_norm": 0.8423788547515869, |
| "learning_rate": 0.00028785357320691154, |
| "loss": 4.379864196777344, |
| "step": 63850 |
| }, |
| { |
| "epoch": 0.1308672670040407, |
| "grad_norm": 0.853164792060852, |
| "learning_rate": 0.0002878345048340521, |
| "loss": 3.3010690307617185, |
| "step": 63900 |
| }, |
| { |
| "epoch": 0.13096966705646954, |
| "grad_norm": 0.7724624872207642, |
| "learning_rate": 0.00028781542213803587, |
| "loss": 3.83298095703125, |
| "step": 63950 |
| }, |
| { |
| "epoch": 0.13107206710889835, |
| "grad_norm": 0.599814236164093, |
| "learning_rate": 0.0002877963251208459, |
| "loss": 3.6117398071289064, |
| "step": 64000 |
| }, |
| { |
| "epoch": 0.1311744671613272, |
| "grad_norm": 0.87944096326828, |
| "learning_rate": 0.00028777721378446655, |
| "loss": 3.77650390625, |
| "step": 64050 |
| }, |
| { |
| "epoch": 0.131276867213756, |
| "grad_norm": 0.8216091990470886, |
| "learning_rate": 0.000287758088130884, |
| "loss": 3.966680603027344, |
| "step": 64100 |
| }, |
| { |
| "epoch": 0.13137926726618485, |
| "grad_norm": 0.9879843592643738, |
| "learning_rate": 0.00028773894816208547, |
| "loss": 3.368244323730469, |
| "step": 64150 |
| }, |
| { |
| "epoch": 0.13148166731861366, |
| "grad_norm": 0.7889556288719177, |
| "learning_rate": 0.00028771979388006, |
| "loss": 3.514817199707031, |
| "step": 64200 |
| }, |
| { |
| "epoch": 0.1315840673710425, |
| "grad_norm": 0.7712281942367554, |
| "learning_rate": 0.00028770062528679814, |
| "loss": 3.8969122314453126, |
| "step": 64250 |
| }, |
| { |
| "epoch": 0.13168646742347132, |
| "grad_norm": 0.6825302243232727, |
| "learning_rate": 0.0002876814423842916, |
| "loss": 2.702755126953125, |
| "step": 64300 |
| }, |
| { |
| "epoch": 0.13178886747590016, |
| "grad_norm": 0.7740472555160522, |
| "learning_rate": 0.0002876622451745339, |
| "loss": 3.027957763671875, |
| "step": 64350 |
| }, |
| { |
| "epoch": 0.13189126752832897, |
| "grad_norm": 0.7272697687149048, |
| "learning_rate": 0.00028764303365951986, |
| "loss": 3.3588211059570314, |
| "step": 64400 |
| }, |
| { |
| "epoch": 0.1319936675807578, |
| "grad_norm": 0.8405432105064392, |
| "learning_rate": 0.00028762380784124597, |
| "loss": 3.6030569458007813, |
| "step": 64450 |
| }, |
| { |
| "epoch": 0.13209606763318663, |
| "grad_norm": 0.8467888236045837, |
| "learning_rate": 0.00028760456772171004, |
| "loss": 3.6072647094726564, |
| "step": 64500 |
| }, |
| { |
| "epoch": 0.13219846768561547, |
| "grad_norm": 0.7771287560462952, |
| "learning_rate": 0.0002875853133029113, |
| "loss": 3.8847897338867186, |
| "step": 64550 |
| }, |
| { |
| "epoch": 0.13230086773804428, |
| "grad_norm": 0.9590752720832825, |
| "learning_rate": 0.0002875660445868507, |
| "loss": 3.6549798583984376, |
| "step": 64600 |
| }, |
| { |
| "epoch": 0.13240326779047312, |
| "grad_norm": 0.7539810538291931, |
| "learning_rate": 0.0002875467615755306, |
| "loss": 3.3866226196289064, |
| "step": 64650 |
| }, |
| { |
| "epoch": 0.13250566784290194, |
| "grad_norm": 0.8308656215667725, |
| "learning_rate": 0.0002875274642709548, |
| "loss": 3.721044006347656, |
| "step": 64700 |
| }, |
| { |
| "epoch": 0.13260806789533075, |
| "grad_norm": 0.8056835532188416, |
| "learning_rate": 0.00028750815267512847, |
| "loss": 3.9817669677734373, |
| "step": 64750 |
| }, |
| { |
| "epoch": 0.1327104679477596, |
| "grad_norm": 1.1581485271453857, |
| "learning_rate": 0.0002874888267900585, |
| "loss": 3.785094909667969, |
| "step": 64800 |
| }, |
| { |
| "epoch": 0.1328128680001884, |
| "grad_norm": 0.6927155256271362, |
| "learning_rate": 0.0002874694866177531, |
| "loss": 3.867703857421875, |
| "step": 64850 |
| }, |
| { |
| "epoch": 0.13291526805261725, |
| "grad_norm": 1.1712969541549683, |
| "learning_rate": 0.00028745013216022197, |
| "loss": 3.79897705078125, |
| "step": 64900 |
| }, |
| { |
| "epoch": 0.13301766810504606, |
| "grad_norm": 0.7104830741882324, |
| "learning_rate": 0.0002874307634194765, |
| "loss": 3.595622863769531, |
| "step": 64950 |
| }, |
| { |
| "epoch": 0.1331200681574749, |
| "grad_norm": 0.8754029273986816, |
| "learning_rate": 0.00028741138039752923, |
| "loss": 3.6854147338867187, |
| "step": 65000 |
| }, |
| { |
| "epoch": 0.13322246820990372, |
| "grad_norm": 0.8316354751586914, |
| "learning_rate": 0.00028739198309639445, |
| "loss": 3.955341491699219, |
| "step": 65050 |
| }, |
| { |
| "epoch": 0.13332486826233256, |
| "grad_norm": 0.7100203037261963, |
| "learning_rate": 0.00028737257151808783, |
| "loss": 3.53195556640625, |
| "step": 65100 |
| }, |
| { |
| "epoch": 0.13342726831476137, |
| "grad_norm": 0.7703724503517151, |
| "learning_rate": 0.00028735314566462653, |
| "loss": 3.5027481079101563, |
| "step": 65150 |
| }, |
| { |
| "epoch": 0.1335296683671902, |
| "grad_norm": 0.6825149059295654, |
| "learning_rate": 0.00028733370553802917, |
| "loss": 2.5823513793945314, |
| "step": 65200 |
| }, |
| { |
| "epoch": 0.13363206841961903, |
| "grad_norm": 0.7070282101631165, |
| "learning_rate": 0.00028731425114031595, |
| "loss": 3.5302462768554688, |
| "step": 65250 |
| }, |
| { |
| "epoch": 0.13373446847204787, |
| "grad_norm": 0.8907217383384705, |
| "learning_rate": 0.0002872947824735084, |
| "loss": 3.7343438720703124, |
| "step": 65300 |
| }, |
| { |
| "epoch": 0.13383686852447668, |
| "grad_norm": 0.6310061812400818, |
| "learning_rate": 0.00028727529953962973, |
| "loss": 3.046968688964844, |
| "step": 65350 |
| }, |
| { |
| "epoch": 0.13393926857690552, |
| "grad_norm": 0.830430269241333, |
| "learning_rate": 0.00028725580234070444, |
| "loss": 3.7792376708984374, |
| "step": 65400 |
| }, |
| { |
| "epoch": 0.13404166862933434, |
| "grad_norm": 0.7595807313919067, |
| "learning_rate": 0.0002872362908787586, |
| "loss": 4.01267578125, |
| "step": 65450 |
| }, |
| { |
| "epoch": 0.13414406868176318, |
| "grad_norm": 0.939785897731781, |
| "learning_rate": 0.00028721676515581975, |
| "loss": 3.7015313720703125, |
| "step": 65500 |
| }, |
| { |
| "epoch": 0.134246468734192, |
| "grad_norm": 0.7830142378807068, |
| "learning_rate": 0.00028719722517391694, |
| "loss": 3.7573947143554687, |
| "step": 65550 |
| }, |
| { |
| "epoch": 0.1343488687866208, |
| "grad_norm": 0.8249261379241943, |
| "learning_rate": 0.00028717767093508066, |
| "loss": 2.924357604980469, |
| "step": 65600 |
| }, |
| { |
| "epoch": 0.13445126883904965, |
| "grad_norm": 0.8103399276733398, |
| "learning_rate": 0.00028715810244134293, |
| "loss": 3.1508941650390625, |
| "step": 65650 |
| }, |
| { |
| "epoch": 0.13455366889147846, |
| "grad_norm": 1.0751904249191284, |
| "learning_rate": 0.0002871385196947372, |
| "loss": 3.2744952392578126, |
| "step": 65700 |
| }, |
| { |
| "epoch": 0.1346560689439073, |
| "grad_norm": 0.8905739188194275, |
| "learning_rate": 0.0002871189226972984, |
| "loss": 3.7018252563476564, |
| "step": 65750 |
| }, |
| { |
| "epoch": 0.13475846899633612, |
| "grad_norm": 0.9014281630516052, |
| "learning_rate": 0.00028709931145106304, |
| "loss": 3.712538757324219, |
| "step": 65800 |
| }, |
| { |
| "epoch": 0.13486086904876496, |
| "grad_norm": 0.9917147159576416, |
| "learning_rate": 0.000287079685958069, |
| "loss": 3.321624755859375, |
| "step": 65850 |
| }, |
| { |
| "epoch": 0.13496326910119377, |
| "grad_norm": 0.9449427127838135, |
| "learning_rate": 0.0002870600462203556, |
| "loss": 3.589186096191406, |
| "step": 65900 |
| }, |
| { |
| "epoch": 0.13506566915362261, |
| "grad_norm": 0.8208171725273132, |
| "learning_rate": 0.00028704039223996383, |
| "loss": 3.7818731689453124, |
| "step": 65950 |
| }, |
| { |
| "epoch": 0.13516806920605143, |
| "grad_norm": 0.8270769119262695, |
| "learning_rate": 0.0002870207240189359, |
| "loss": 3.731416015625, |
| "step": 66000 |
| }, |
| { |
| "epoch": 0.13527046925848027, |
| "grad_norm": 1.045253038406372, |
| "learning_rate": 0.0002870010415593159, |
| "loss": 3.7312091064453123, |
| "step": 66050 |
| }, |
| { |
| "epoch": 0.13537286931090908, |
| "grad_norm": 0.7662860155105591, |
| "learning_rate": 0.00028698134486314884, |
| "loss": 3.7503961181640624, |
| "step": 66100 |
| }, |
| { |
| "epoch": 0.13547526936333792, |
| "grad_norm": 0.7599702477455139, |
| "learning_rate": 0.0002869616339324817, |
| "loss": 2.9531689453125, |
| "step": 66150 |
| }, |
| { |
| "epoch": 0.13557766941576674, |
| "grad_norm": 0.9016150236129761, |
| "learning_rate": 0.00028694190876936274, |
| "loss": 3.9108657836914062, |
| "step": 66200 |
| }, |
| { |
| "epoch": 0.13568006946819558, |
| "grad_norm": 0.9253189563751221, |
| "learning_rate": 0.00028692216937584164, |
| "loss": 3.645496520996094, |
| "step": 66250 |
| }, |
| { |
| "epoch": 0.1357824695206244, |
| "grad_norm": 0.9780471324920654, |
| "learning_rate": 0.0002869024157539697, |
| "loss": 3.7777984619140623, |
| "step": 66300 |
| }, |
| { |
| "epoch": 0.13588486957305324, |
| "grad_norm": 0.7383785843849182, |
| "learning_rate": 0.00028688264790579956, |
| "loss": 3.588190002441406, |
| "step": 66350 |
| }, |
| { |
| "epoch": 0.13598726962548205, |
| "grad_norm": 0.8228618502616882, |
| "learning_rate": 0.00028686286583338554, |
| "loss": 3.2836099243164063, |
| "step": 66400 |
| }, |
| { |
| "epoch": 0.13608966967791086, |
| "grad_norm": 0.989874541759491, |
| "learning_rate": 0.00028684306953878316, |
| "loss": 3.2741690063476563, |
| "step": 66450 |
| }, |
| { |
| "epoch": 0.1361920697303397, |
| "grad_norm": 0.6227463483810425, |
| "learning_rate": 0.00028682325902404957, |
| "loss": 3.5655419921875, |
| "step": 66500 |
| }, |
| { |
| "epoch": 0.13629446978276852, |
| "grad_norm": 0.9205330014228821, |
| "learning_rate": 0.00028680343429124356, |
| "loss": 3.891072998046875, |
| "step": 66550 |
| }, |
| { |
| "epoch": 0.13639686983519736, |
| "grad_norm": 0.9149171113967896, |
| "learning_rate": 0.000286783595342425, |
| "loss": 3.59095458984375, |
| "step": 66600 |
| }, |
| { |
| "epoch": 0.13649926988762617, |
| "grad_norm": 0.9638737440109253, |
| "learning_rate": 0.00028676374217965567, |
| "loss": 3.623572998046875, |
| "step": 66650 |
| }, |
| { |
| "epoch": 0.13660166994005501, |
| "grad_norm": 1.3770073652267456, |
| "learning_rate": 0.0002867438748049985, |
| "loss": 3.716294250488281, |
| "step": 66700 |
| }, |
| { |
| "epoch": 0.13670406999248383, |
| "grad_norm": 0.7525309324264526, |
| "learning_rate": 0.000286723993220518, |
| "loss": 4.217930603027344, |
| "step": 66750 |
| }, |
| { |
| "epoch": 0.13680647004491267, |
| "grad_norm": 0.8076726198196411, |
| "learning_rate": 0.0002867040974282803, |
| "loss": 3.8803009033203124, |
| "step": 66800 |
| }, |
| { |
| "epoch": 0.13690887009734148, |
| "grad_norm": 0.6948472261428833, |
| "learning_rate": 0.00028668418743035275, |
| "loss": 3.5436331176757814, |
| "step": 66850 |
| }, |
| { |
| "epoch": 0.13701127014977033, |
| "grad_norm": 0.8509873151779175, |
| "learning_rate": 0.00028666426322880443, |
| "loss": 3.499276428222656, |
| "step": 66900 |
| }, |
| { |
| "epoch": 0.13711367020219914, |
| "grad_norm": 0.734075665473938, |
| "learning_rate": 0.0002866443248257057, |
| "loss": 3.3526876831054686, |
| "step": 66950 |
| }, |
| { |
| "epoch": 0.13721607025462798, |
| "grad_norm": 0.8169065713882446, |
| "learning_rate": 0.0002866243722231285, |
| "loss": 3.3126312255859376, |
| "step": 67000 |
| }, |
| { |
| "epoch": 0.1373184703070568, |
| "grad_norm": 0.8438522219657898, |
| "learning_rate": 0.0002866044054231462, |
| "loss": 3.438792419433594, |
| "step": 67050 |
| }, |
| { |
| "epoch": 0.13742087035948564, |
| "grad_norm": 0.8047662973403931, |
| "learning_rate": 0.00028658442442783364, |
| "loss": 3.4803237915039062, |
| "step": 67100 |
| }, |
| { |
| "epoch": 0.13752327041191445, |
| "grad_norm": 0.7526935338973999, |
| "learning_rate": 0.00028656442923926723, |
| "loss": 3.5479522705078126, |
| "step": 67150 |
| }, |
| { |
| "epoch": 0.1376256704643433, |
| "grad_norm": 0.8287502527236938, |
| "learning_rate": 0.0002865444198595247, |
| "loss": 3.7390045166015624, |
| "step": 67200 |
| }, |
| { |
| "epoch": 0.1377280705167721, |
| "grad_norm": 0.6148055791854858, |
| "learning_rate": 0.00028652439629068535, |
| "loss": 3.7372897338867186, |
| "step": 67250 |
| }, |
| { |
| "epoch": 0.13783047056920092, |
| "grad_norm": 0.8581375479698181, |
| "learning_rate": 0.00028650435853483006, |
| "loss": 2.9981643676757814, |
| "step": 67300 |
| }, |
| { |
| "epoch": 0.13793287062162976, |
| "grad_norm": 1.0106624364852905, |
| "learning_rate": 0.0002864843065940409, |
| "loss": 3.4451068115234373, |
| "step": 67350 |
| }, |
| { |
| "epoch": 0.13803527067405857, |
| "grad_norm": 0.938605785369873, |
| "learning_rate": 0.0002864642404704017, |
| "loss": 3.6765966796875, |
| "step": 67400 |
| }, |
| { |
| "epoch": 0.13813767072648742, |
| "grad_norm": 1.1929186582565308, |
| "learning_rate": 0.0002864441601659975, |
| "loss": 3.147588195800781, |
| "step": 67450 |
| }, |
| { |
| "epoch": 0.13824007077891623, |
| "grad_norm": 0.5836741328239441, |
| "learning_rate": 0.00028642406568291513, |
| "loss": 2.8205252075195313, |
| "step": 67500 |
| }, |
| { |
| "epoch": 0.13834247083134507, |
| "grad_norm": 0.8532480001449585, |
| "learning_rate": 0.0002864039570232426, |
| "loss": 3.003614501953125, |
| "step": 67550 |
| }, |
| { |
| "epoch": 0.13844487088377389, |
| "grad_norm": 0.7523052096366882, |
| "learning_rate": 0.0002863838341890696, |
| "loss": 3.6742901611328125, |
| "step": 67600 |
| }, |
| { |
| "epoch": 0.13854727093620273, |
| "grad_norm": 0.8439714908599854, |
| "learning_rate": 0.0002863636971824872, |
| "loss": 3.3492770385742188, |
| "step": 67650 |
| }, |
| { |
| "epoch": 0.13864967098863154, |
| "grad_norm": 0.8030802607536316, |
| "learning_rate": 0.00028634354600558785, |
| "loss": 3.6775198364257813, |
| "step": 67700 |
| }, |
| { |
| "epoch": 0.13875207104106038, |
| "grad_norm": 0.8020223379135132, |
| "learning_rate": 0.00028632338066046566, |
| "loss": 3.622167663574219, |
| "step": 67750 |
| }, |
| { |
| "epoch": 0.1388544710934892, |
| "grad_norm": 0.7629789710044861, |
| "learning_rate": 0.00028630320114921606, |
| "loss": 3.1032611083984376, |
| "step": 67800 |
| }, |
| { |
| "epoch": 0.13895687114591804, |
| "grad_norm": 0.8953397274017334, |
| "learning_rate": 0.0002862830074739361, |
| "loss": 3.3124514770507814, |
| "step": 67850 |
| }, |
| { |
| "epoch": 0.13905927119834685, |
| "grad_norm": 0.7486206293106079, |
| "learning_rate": 0.00028626279963672415, |
| "loss": 3.154571838378906, |
| "step": 67900 |
| }, |
| { |
| "epoch": 0.1391616712507757, |
| "grad_norm": 0.8250375986099243, |
| "learning_rate": 0.00028624257763968015, |
| "loss": 3.6296453857421875, |
| "step": 67950 |
| }, |
| { |
| "epoch": 0.1392640713032045, |
| "grad_norm": 1.0587407350540161, |
| "learning_rate": 0.00028622234148490544, |
| "loss": 3.5324700927734374, |
| "step": 68000 |
| }, |
| { |
| "epoch": 0.13936647135563335, |
| "grad_norm": 0.7875683903694153, |
| "learning_rate": 0.00028620209117450295, |
| "loss": 3.170576477050781, |
| "step": 68050 |
| }, |
| { |
| "epoch": 0.13946887140806216, |
| "grad_norm": 1.1913716793060303, |
| "learning_rate": 0.00028618182671057694, |
| "loss": 3.6836483764648436, |
| "step": 68100 |
| }, |
| { |
| "epoch": 0.13957127146049098, |
| "grad_norm": 0.8803089261054993, |
| "learning_rate": 0.00028616154809523326, |
| "loss": 3.468567199707031, |
| "step": 68150 |
| }, |
| { |
| "epoch": 0.13967367151291982, |
| "grad_norm": 0.6812267303466797, |
| "learning_rate": 0.00028614125533057906, |
| "loss": 3.56980712890625, |
| "step": 68200 |
| }, |
| { |
| "epoch": 0.13977607156534863, |
| "grad_norm": 0.6622804999351501, |
| "learning_rate": 0.0002861209484187232, |
| "loss": 3.2988763427734376, |
| "step": 68250 |
| }, |
| { |
| "epoch": 0.13987847161777747, |
| "grad_norm": 0.8914295434951782, |
| "learning_rate": 0.0002861006273617758, |
| "loss": 3.1821719360351564, |
| "step": 68300 |
| }, |
| { |
| "epoch": 0.13998087167020629, |
| "grad_norm": 0.9383370876312256, |
| "learning_rate": 0.00028608029216184867, |
| "loss": 3.6463201904296874, |
| "step": 68350 |
| }, |
| { |
| "epoch": 0.14008327172263513, |
| "grad_norm": 0.795408308506012, |
| "learning_rate": 0.0002860599428210548, |
| "loss": 3.4441323852539063, |
| "step": 68400 |
| }, |
| { |
| "epoch": 0.14018567177506394, |
| "grad_norm": 0.9368188381195068, |
| "learning_rate": 0.0002860395793415088, |
| "loss": 3.4534127807617185, |
| "step": 68450 |
| }, |
| { |
| "epoch": 0.14028807182749278, |
| "grad_norm": 0.9888190627098083, |
| "learning_rate": 0.0002860192017253269, |
| "loss": 3.839812927246094, |
| "step": 68500 |
| }, |
| { |
| "epoch": 0.1403904718799216, |
| "grad_norm": 1.1791257858276367, |
| "learning_rate": 0.0002859988099746266, |
| "loss": 3.30308837890625, |
| "step": 68550 |
| }, |
| { |
| "epoch": 0.14049287193235044, |
| "grad_norm": 0.8144651651382446, |
| "learning_rate": 0.00028597840409152683, |
| "loss": 3.5844757080078127, |
| "step": 68600 |
| }, |
| { |
| "epoch": 0.14059527198477925, |
| "grad_norm": 0.8788326382637024, |
| "learning_rate": 0.00028595798407814817, |
| "loss": 3.5440103149414064, |
| "step": 68650 |
| }, |
| { |
| "epoch": 0.1406976720372081, |
| "grad_norm": 0.754426121711731, |
| "learning_rate": 0.00028593754993661247, |
| "loss": 3.38293701171875, |
| "step": 68700 |
| }, |
| { |
| "epoch": 0.1408000720896369, |
| "grad_norm": 0.8822509050369263, |
| "learning_rate": 0.0002859171016690433, |
| "loss": 3.699421691894531, |
| "step": 68750 |
| }, |
| { |
| "epoch": 0.14090247214206575, |
| "grad_norm": 0.6882439255714417, |
| "learning_rate": 0.00028589663927756546, |
| "loss": 3.33095947265625, |
| "step": 68800 |
| }, |
| { |
| "epoch": 0.14100487219449456, |
| "grad_norm": 0.8108435273170471, |
| "learning_rate": 0.00028587616276430536, |
| "loss": 3.5015853881835937, |
| "step": 68850 |
| }, |
| { |
| "epoch": 0.1411072722469234, |
| "grad_norm": 0.6340552568435669, |
| "learning_rate": 0.00028585567213139075, |
| "loss": 3.374276123046875, |
| "step": 68900 |
| }, |
| { |
| "epoch": 0.14120967229935222, |
| "grad_norm": 0.6358705163002014, |
| "learning_rate": 0.0002858351673809511, |
| "loss": 3.372686462402344, |
| "step": 68950 |
| }, |
| { |
| "epoch": 0.14131207235178103, |
| "grad_norm": 0.6987962126731873, |
| "learning_rate": 0.000285814648515117, |
| "loss": 3.635752868652344, |
| "step": 69000 |
| }, |
| { |
| "epoch": 0.14141447240420987, |
| "grad_norm": 0.9426242113113403, |
| "learning_rate": 0.0002857941155360207, |
| "loss": 3.790219421386719, |
| "step": 69050 |
| }, |
| { |
| "epoch": 0.1415168724566387, |
| "grad_norm": 0.5323778986930847, |
| "learning_rate": 0.000285773568445796, |
| "loss": 3.5456610107421875, |
| "step": 69100 |
| }, |
| { |
| "epoch": 0.14161927250906753, |
| "grad_norm": 0.7765032052993774, |
| "learning_rate": 0.000285753007246578, |
| "loss": 2.8608853149414064, |
| "step": 69150 |
| }, |
| { |
| "epoch": 0.14172167256149634, |
| "grad_norm": 1.0102488994598389, |
| "learning_rate": 0.0002857324319405033, |
| "loss": 3.7360980224609377, |
| "step": 69200 |
| }, |
| { |
| "epoch": 0.14182407261392518, |
| "grad_norm": 0.6676150560379028, |
| "learning_rate": 0.00028571184252971, |
| "loss": 3.7574533081054686, |
| "step": 69250 |
| }, |
| { |
| "epoch": 0.141926472666354, |
| "grad_norm": 0.8389192223548889, |
| "learning_rate": 0.00028569123901633773, |
| "loss": 3.7205816650390626, |
| "step": 69300 |
| }, |
| { |
| "epoch": 0.14202887271878284, |
| "grad_norm": 0.9630427956581116, |
| "learning_rate": 0.0002856706214025275, |
| "loss": 3.1625067138671876, |
| "step": 69350 |
| }, |
| { |
| "epoch": 0.14213127277121165, |
| "grad_norm": 0.8320639729499817, |
| "learning_rate": 0.0002856499896904217, |
| "loss": 2.9422607421875, |
| "step": 69400 |
| }, |
| { |
| "epoch": 0.1422336728236405, |
| "grad_norm": 0.9393151998519897, |
| "learning_rate": 0.0002856293438821644, |
| "loss": 3.5568783569335936, |
| "step": 69450 |
| }, |
| { |
| "epoch": 0.1423360728760693, |
| "grad_norm": 0.8972524404525757, |
| "learning_rate": 0.000285608683979901, |
| "loss": 3.5590420532226563, |
| "step": 69500 |
| }, |
| { |
| "epoch": 0.14243847292849815, |
| "grad_norm": 0.5622543096542358, |
| "learning_rate": 0.00028558800998577835, |
| "loss": 2.8899127197265626, |
| "step": 69550 |
| }, |
| { |
| "epoch": 0.14254087298092696, |
| "grad_norm": 0.8466945886611938, |
| "learning_rate": 0.00028556732190194485, |
| "loss": 3.2979135131835937, |
| "step": 69600 |
| }, |
| { |
| "epoch": 0.1426432730333558, |
| "grad_norm": 1.3375204801559448, |
| "learning_rate": 0.00028554661973055026, |
| "loss": 3.5246792602539063, |
| "step": 69650 |
| }, |
| { |
| "epoch": 0.14274567308578462, |
| "grad_norm": 0.7531492114067078, |
| "learning_rate": 0.00028552590347374586, |
| "loss": 3.3118746948242186, |
| "step": 69700 |
| }, |
| { |
| "epoch": 0.14284807313821346, |
| "grad_norm": 0.8651145100593567, |
| "learning_rate": 0.00028550517313368444, |
| "loss": 3.485458984375, |
| "step": 69750 |
| }, |
| { |
| "epoch": 0.14295047319064227, |
| "grad_norm": 0.625991940498352, |
| "learning_rate": 0.0002854844287125202, |
| "loss": 3.164065246582031, |
| "step": 69800 |
| }, |
| { |
| "epoch": 0.1430528732430711, |
| "grad_norm": 2.080458402633667, |
| "learning_rate": 0.0002854636702124088, |
| "loss": 3.0976217651367186, |
| "step": 69850 |
| }, |
| { |
| "epoch": 0.14315527329549993, |
| "grad_norm": 0.7514007687568665, |
| "learning_rate": 0.00028544289763550733, |
| "loss": 3.78799072265625, |
| "step": 69900 |
| }, |
| { |
| "epoch": 0.14325767334792874, |
| "grad_norm": 0.5652868151664734, |
| "learning_rate": 0.00028542211098397447, |
| "loss": 2.5083651733398438, |
| "step": 69950 |
| }, |
| { |
| "epoch": 0.14336007340035758, |
| "grad_norm": 0.7237803339958191, |
| "learning_rate": 0.0002854013102599702, |
| "loss": 3.534099426269531, |
| "step": 70000 |
| }, |
| { |
| "epoch": 0.1434624734527864, |
| "grad_norm": 0.8753382563591003, |
| "learning_rate": 0.00028538049546565603, |
| "loss": 4.047043762207031, |
| "step": 70050 |
| }, |
| { |
| "epoch": 0.14356487350521524, |
| "grad_norm": 0.8999593257904053, |
| "learning_rate": 0.000285359666603195, |
| "loss": 3.6831619262695314, |
| "step": 70100 |
| }, |
| { |
| "epoch": 0.14366727355764405, |
| "grad_norm": 0.7087032794952393, |
| "learning_rate": 0.00028533882367475156, |
| "loss": 2.866451416015625, |
| "step": 70150 |
| }, |
| { |
| "epoch": 0.1437696736100729, |
| "grad_norm": 0.6140325665473938, |
| "learning_rate": 0.0002853179666824916, |
| "loss": 1.8367611694335937, |
| "step": 70200 |
| }, |
| { |
| "epoch": 0.1438720736625017, |
| "grad_norm": 0.7460519671440125, |
| "learning_rate": 0.0002852970956285824, |
| "loss": 2.957001037597656, |
| "step": 70250 |
| }, |
| { |
| "epoch": 0.14397447371493055, |
| "grad_norm": 1.0516009330749512, |
| "learning_rate": 0.0002852762105151929, |
| "loss": 2.5348553466796875, |
| "step": 70300 |
| }, |
| { |
| "epoch": 0.14407687376735936, |
| "grad_norm": 0.5429277420043945, |
| "learning_rate": 0.0002852553113444934, |
| "loss": 3.53834228515625, |
| "step": 70350 |
| }, |
| { |
| "epoch": 0.1441792738197882, |
| "grad_norm": 0.8015134334564209, |
| "learning_rate": 0.0002852343981186556, |
| "loss": 3.64453857421875, |
| "step": 70400 |
| }, |
| { |
| "epoch": 0.14428167387221702, |
| "grad_norm": 0.7445142269134521, |
| "learning_rate": 0.00028521347083985266, |
| "loss": 3.6188226318359376, |
| "step": 70450 |
| }, |
| { |
| "epoch": 0.14438407392464586, |
| "grad_norm": 0.9419240355491638, |
| "learning_rate": 0.00028519252951025935, |
| "loss": 2.8771868896484376, |
| "step": 70500 |
| }, |
| { |
| "epoch": 0.14448647397707468, |
| "grad_norm": 0.8755508065223694, |
| "learning_rate": 0.0002851715741320517, |
| "loss": 3.6672409057617186, |
| "step": 70550 |
| }, |
| { |
| "epoch": 0.14458887402950352, |
| "grad_norm": 0.6970762014389038, |
| "learning_rate": 0.00028515060470740743, |
| "loss": 3.7528070068359374, |
| "step": 70600 |
| }, |
| { |
| "epoch": 0.14469127408193233, |
| "grad_norm": 0.5237036943435669, |
| "learning_rate": 0.0002851296212385055, |
| "loss": 3.318054504394531, |
| "step": 70650 |
| }, |
| { |
| "epoch": 0.14479367413436114, |
| "grad_norm": 0.7218162417411804, |
| "learning_rate": 0.0002851086237275264, |
| "loss": 1.9354142761230468, |
| "step": 70700 |
| }, |
| { |
| "epoch": 0.14489607418678999, |
| "grad_norm": 0.46474677324295044, |
| "learning_rate": 0.00028508761217665215, |
| "loss": 1.882958221435547, |
| "step": 70750 |
| }, |
| { |
| "epoch": 0.1449984742392188, |
| "grad_norm": 0.665745198726654, |
| "learning_rate": 0.0002850665865880662, |
| "loss": 1.9264730834960937, |
| "step": 70800 |
| }, |
| { |
| "epoch": 0.14510087429164764, |
| "grad_norm": 0.9114018082618713, |
| "learning_rate": 0.00028504554696395334, |
| "loss": 2.677998046875, |
| "step": 70850 |
| }, |
| { |
| "epoch": 0.14520327434407646, |
| "grad_norm": 0.729942798614502, |
| "learning_rate": 0.0002850244933065, |
| "loss": 3.4295562744140624, |
| "step": 70900 |
| }, |
| { |
| "epoch": 0.1453056743965053, |
| "grad_norm": 1.0335681438446045, |
| "learning_rate": 0.000285003425617894, |
| "loss": 3.524678039550781, |
| "step": 70950 |
| }, |
| { |
| "epoch": 0.1454080744489341, |
| "grad_norm": 1.325173258781433, |
| "learning_rate": 0.00028498234390032453, |
| "loss": 3.4061398315429687, |
| "step": 71000 |
| }, |
| { |
| "epoch": 0.14551047450136295, |
| "grad_norm": 0.7562994956970215, |
| "learning_rate": 0.00028496124815598233, |
| "loss": 3.4216473388671873, |
| "step": 71050 |
| }, |
| { |
| "epoch": 0.14561287455379177, |
| "grad_norm": 0.8231451511383057, |
| "learning_rate": 0.00028494013838705964, |
| "loss": 3.4331399536132814, |
| "step": 71100 |
| }, |
| { |
| "epoch": 0.1457152746062206, |
| "grad_norm": 0.9096212387084961, |
| "learning_rate": 0.00028491901459575, |
| "loss": 3.2372637939453126, |
| "step": 71150 |
| }, |
| { |
| "epoch": 0.14581767465864942, |
| "grad_norm": 0.8419906497001648, |
| "learning_rate": 0.00028489787678424855, |
| "loss": 3.490650329589844, |
| "step": 71200 |
| }, |
| { |
| "epoch": 0.14592007471107826, |
| "grad_norm": 0.9181749224662781, |
| "learning_rate": 0.00028487672495475187, |
| "loss": 3.988592224121094, |
| "step": 71250 |
| }, |
| { |
| "epoch": 0.14602247476350708, |
| "grad_norm": 0.7534500360488892, |
| "learning_rate": 0.0002848555591094579, |
| "loss": 3.256888427734375, |
| "step": 71300 |
| }, |
| { |
| "epoch": 0.14612487481593592, |
| "grad_norm": 1.1033469438552856, |
| "learning_rate": 0.00028483437925056615, |
| "loss": 3.3334320068359373, |
| "step": 71350 |
| }, |
| { |
| "epoch": 0.14622727486836473, |
| "grad_norm": 0.9356803894042969, |
| "learning_rate": 0.0002848131853802775, |
| "loss": 2.933785705566406, |
| "step": 71400 |
| }, |
| { |
| "epoch": 0.14632967492079357, |
| "grad_norm": 0.8622822165489197, |
| "learning_rate": 0.00028479197750079434, |
| "loss": 3.4190252685546874, |
| "step": 71450 |
| }, |
| { |
| "epoch": 0.1464320749732224, |
| "grad_norm": 0.7642265558242798, |
| "learning_rate": 0.0002847707556143205, |
| "loss": 3.637124328613281, |
| "step": 71500 |
| }, |
| { |
| "epoch": 0.1465344750256512, |
| "grad_norm": 0.8362610340118408, |
| "learning_rate": 0.0002847495197230613, |
| "loss": 3.78397705078125, |
| "step": 71550 |
| }, |
| { |
| "epoch": 0.14663687507808004, |
| "grad_norm": 0.7937034964561462, |
| "learning_rate": 0.0002847282698292234, |
| "loss": 3.7184579467773435, |
| "step": 71600 |
| }, |
| { |
| "epoch": 0.14673927513050886, |
| "grad_norm": 0.8799037933349609, |
| "learning_rate": 0.0002847070059350151, |
| "loss": 3.535165100097656, |
| "step": 71650 |
| }, |
| { |
| "epoch": 0.1468416751829377, |
| "grad_norm": 0.7818918824195862, |
| "learning_rate": 0.000284685728042646, |
| "loss": 3.6417041015625, |
| "step": 71700 |
| }, |
| { |
| "epoch": 0.1469440752353665, |
| "grad_norm": 0.84147709608078, |
| "learning_rate": 0.00028466443615432713, |
| "loss": 3.487315673828125, |
| "step": 71750 |
| }, |
| { |
| "epoch": 0.14704647528779535, |
| "grad_norm": 0.6987602710723877, |
| "learning_rate": 0.00028464313027227117, |
| "loss": 3.3947982788085938, |
| "step": 71800 |
| }, |
| { |
| "epoch": 0.14714887534022417, |
| "grad_norm": 0.7040350437164307, |
| "learning_rate": 0.0002846218103986921, |
| "loss": 3.48228271484375, |
| "step": 71850 |
| }, |
| { |
| "epoch": 0.147251275392653, |
| "grad_norm": 0.753700852394104, |
| "learning_rate": 0.0002846004765358053, |
| "loss": 2.8886663818359377, |
| "step": 71900 |
| }, |
| { |
| "epoch": 0.14735367544508182, |
| "grad_norm": 0.6964828968048096, |
| "learning_rate": 0.0002845791286858278, |
| "loss": 3.3485205078125, |
| "step": 71950 |
| }, |
| { |
| "epoch": 0.14745607549751066, |
| "grad_norm": 0.7957376837730408, |
| "learning_rate": 0.00028455776685097796, |
| "loss": 4.061175842285156, |
| "step": 72000 |
| }, |
| { |
| "epoch": 0.14755847554993948, |
| "grad_norm": 0.8718839883804321, |
| "learning_rate": 0.00028453639103347557, |
| "loss": 3.8466424560546875, |
| "step": 72050 |
| }, |
| { |
| "epoch": 0.14766087560236832, |
| "grad_norm": 0.6680410504341125, |
| "learning_rate": 0.00028451500123554194, |
| "loss": 3.4465017700195313, |
| "step": 72100 |
| }, |
| { |
| "epoch": 0.14776327565479713, |
| "grad_norm": 0.7632986903190613, |
| "learning_rate": 0.0002844935974593998, |
| "loss": 3.496392822265625, |
| "step": 72150 |
| }, |
| { |
| "epoch": 0.14786567570722597, |
| "grad_norm": 0.8088258504867554, |
| "learning_rate": 0.0002844721797072733, |
| "loss": 3.6753024291992187, |
| "step": 72200 |
| }, |
| { |
| "epoch": 0.1479680757596548, |
| "grad_norm": 0.7746132016181946, |
| "learning_rate": 0.0002844507479813881, |
| "loss": 3.4665756225585938, |
| "step": 72250 |
| }, |
| { |
| "epoch": 0.14807047581208363, |
| "grad_norm": 0.9574618339538574, |
| "learning_rate": 0.00028442930228397134, |
| "loss": 3.6266830444335936, |
| "step": 72300 |
| }, |
| { |
| "epoch": 0.14817287586451244, |
| "grad_norm": 3.6261954307556152, |
| "learning_rate": 0.0002844078426172515, |
| "loss": 3.3307794189453124, |
| "step": 72350 |
| }, |
| { |
| "epoch": 0.14827527591694126, |
| "grad_norm": 1.3732120990753174, |
| "learning_rate": 0.00028438636898345856, |
| "loss": 3.371138000488281, |
| "step": 72400 |
| }, |
| { |
| "epoch": 0.1483776759693701, |
| "grad_norm": 0.8364174962043762, |
| "learning_rate": 0.00028436488138482407, |
| "loss": 3.6167337036132814, |
| "step": 72450 |
| }, |
| { |
| "epoch": 0.1484800760217989, |
| "grad_norm": 0.7466509938240051, |
| "learning_rate": 0.0002843433798235808, |
| "loss": 3.7814892578125, |
| "step": 72500 |
| }, |
| { |
| "epoch": 0.14858247607422775, |
| "grad_norm": 0.8802339434623718, |
| "learning_rate": 0.00028432186430196315, |
| "loss": 3.364360656738281, |
| "step": 72550 |
| }, |
| { |
| "epoch": 0.14868487612665657, |
| "grad_norm": 0.6437531113624573, |
| "learning_rate": 0.00028430033482220693, |
| "loss": 3.211015625, |
| "step": 72600 |
| }, |
| { |
| "epoch": 0.1487872761790854, |
| "grad_norm": 0.7954172492027283, |
| "learning_rate": 0.0002842787913865494, |
| "loss": 3.1693716430664063, |
| "step": 72650 |
| }, |
| { |
| "epoch": 0.14888967623151422, |
| "grad_norm": 0.735313892364502, |
| "learning_rate": 0.0002842572339972292, |
| "loss": 3.4131680297851563, |
| "step": 72700 |
| }, |
| { |
| "epoch": 0.14899207628394306, |
| "grad_norm": 1.2493815422058105, |
| "learning_rate": 0.0002842356626564866, |
| "loss": 2.6316799926757812, |
| "step": 72750 |
| }, |
| { |
| "epoch": 0.14909447633637188, |
| "grad_norm": 0.506629228591919, |
| "learning_rate": 0.00028421407736656305, |
| "loss": 2.7435052490234373, |
| "step": 72800 |
| }, |
| { |
| "epoch": 0.14919687638880072, |
| "grad_norm": 0.4948784112930298, |
| "learning_rate": 0.0002841924781297017, |
| "loss": 1.9055368041992187, |
| "step": 72850 |
| }, |
| { |
| "epoch": 0.14929927644122953, |
| "grad_norm": 0.9006844162940979, |
| "learning_rate": 0.000284170864948147, |
| "loss": 2.6189675903320313, |
| "step": 72900 |
| }, |
| { |
| "epoch": 0.14940167649365838, |
| "grad_norm": 0.6539232134819031, |
| "learning_rate": 0.00028414923782414496, |
| "loss": 3.5976483154296877, |
| "step": 72950 |
| }, |
| { |
| "epoch": 0.1495040765460872, |
| "grad_norm": 0.6356167793273926, |
| "learning_rate": 0.0002841275967599429, |
| "loss": 3.402171936035156, |
| "step": 73000 |
| }, |
| { |
| "epoch": 0.14960647659851603, |
| "grad_norm": 1.5707745552062988, |
| "learning_rate": 0.00028410594175778964, |
| "loss": 3.7100360107421877, |
| "step": 73050 |
| }, |
| { |
| "epoch": 0.14970887665094484, |
| "grad_norm": 0.5473312139511108, |
| "learning_rate": 0.0002840842728199356, |
| "loss": 2.539022674560547, |
| "step": 73100 |
| }, |
| { |
| "epoch": 0.14981127670337369, |
| "grad_norm": 0.8895722031593323, |
| "learning_rate": 0.00028406258994863245, |
| "loss": 2.5107452392578127, |
| "step": 73150 |
| }, |
| { |
| "epoch": 0.1499136767558025, |
| "grad_norm": 0.5692305564880371, |
| "learning_rate": 0.00028404089314613333, |
| "loss": 1.6453628540039062, |
| "step": 73200 |
| }, |
| { |
| "epoch": 0.1500160768082313, |
| "grad_norm": 0.6427550315856934, |
| "learning_rate": 0.00028401918241469294, |
| "loss": 1.628760986328125, |
| "step": 73250 |
| }, |
| { |
| "epoch": 0.15011847686066015, |
| "grad_norm": 0.705071747303009, |
| "learning_rate": 0.0002839974577565674, |
| "loss": 3.1914212036132814, |
| "step": 73300 |
| }, |
| { |
| "epoch": 0.15022087691308897, |
| "grad_norm": 0.7019383907318115, |
| "learning_rate": 0.0002839757191740141, |
| "loss": 3.628504333496094, |
| "step": 73350 |
| }, |
| { |
| "epoch": 0.1503232769655178, |
| "grad_norm": 0.8977711200714111, |
| "learning_rate": 0.0002839539666692921, |
| "loss": 3.583207092285156, |
| "step": 73400 |
| }, |
| { |
| "epoch": 0.15042567701794662, |
| "grad_norm": 0.7372389435768127, |
| "learning_rate": 0.00028393220024466187, |
| "loss": 2.969400634765625, |
| "step": 73450 |
| }, |
| { |
| "epoch": 0.15052807707037547, |
| "grad_norm": 0.7931883931159973, |
| "learning_rate": 0.0002839104199023853, |
| "loss": 3.232490234375, |
| "step": 73500 |
| }, |
| { |
| "epoch": 0.15063047712280428, |
| "grad_norm": 0.6523383259773254, |
| "learning_rate": 0.0002838886256447256, |
| "loss": 3.325892028808594, |
| "step": 73550 |
| }, |
| { |
| "epoch": 0.15073287717523312, |
| "grad_norm": 0.6729732155799866, |
| "learning_rate": 0.00028386681747394755, |
| "loss": 3.335216064453125, |
| "step": 73600 |
| }, |
| { |
| "epoch": 0.15083527722766193, |
| "grad_norm": 0.818371057510376, |
| "learning_rate": 0.0002838449953923174, |
| "loss": 3.518477783203125, |
| "step": 73650 |
| }, |
| { |
| "epoch": 0.15093767728009078, |
| "grad_norm": 0.7028401494026184, |
| "learning_rate": 0.00028382315940210284, |
| "loss": 3.509742431640625, |
| "step": 73700 |
| }, |
| { |
| "epoch": 0.1510400773325196, |
| "grad_norm": 1.2517348527908325, |
| "learning_rate": 0.0002838013095055729, |
| "loss": 3.626214599609375, |
| "step": 73750 |
| }, |
| { |
| "epoch": 0.15114247738494843, |
| "grad_norm": 0.7776418328285217, |
| "learning_rate": 0.00028377944570499814, |
| "loss": 3.3807473754882813, |
| "step": 73800 |
| }, |
| { |
| "epoch": 0.15124487743737725, |
| "grad_norm": 1.246304988861084, |
| "learning_rate": 0.0002837575680026506, |
| "loss": 3.671220703125, |
| "step": 73850 |
| }, |
| { |
| "epoch": 0.1513472774898061, |
| "grad_norm": 0.8468489050865173, |
| "learning_rate": 0.00028373567640080366, |
| "loss": 3.7939553833007813, |
| "step": 73900 |
| }, |
| { |
| "epoch": 0.1514496775422349, |
| "grad_norm": 0.9071077108383179, |
| "learning_rate": 0.0002837137709017322, |
| "loss": 4.0460302734375, |
| "step": 73950 |
| }, |
| { |
| "epoch": 0.15155207759466374, |
| "grad_norm": 0.8705784678459167, |
| "learning_rate": 0.00028369185150771257, |
| "loss": 3.0467730712890626, |
| "step": 74000 |
| }, |
| { |
| "epoch": 0.15165447764709256, |
| "grad_norm": 1.1212836503982544, |
| "learning_rate": 0.00028366991822102256, |
| "loss": 3.2574063110351563, |
| "step": 74050 |
| }, |
| { |
| "epoch": 0.15175687769952137, |
| "grad_norm": 0.6991548538208008, |
| "learning_rate": 0.0002836479710439413, |
| "loss": 3.221210632324219, |
| "step": 74100 |
| }, |
| { |
| "epoch": 0.1518592777519502, |
| "grad_norm": 0.7652693390846252, |
| "learning_rate": 0.00028362600997874953, |
| "loss": 3.4262896728515626, |
| "step": 74150 |
| }, |
| { |
| "epoch": 0.15196167780437903, |
| "grad_norm": 0.5328712463378906, |
| "learning_rate": 0.00028360403502772927, |
| "loss": 2.2504594421386717, |
| "step": 74200 |
| }, |
| { |
| "epoch": 0.15206407785680787, |
| "grad_norm": 0.623674750328064, |
| "learning_rate": 0.00028358204619316414, |
| "loss": 1.6738412475585938, |
| "step": 74250 |
| }, |
| { |
| "epoch": 0.15216647790923668, |
| "grad_norm": 0.7511982321739197, |
| "learning_rate": 0.0002835600434773391, |
| "loss": 3.5196023559570313, |
| "step": 74300 |
| }, |
| { |
| "epoch": 0.15226887796166552, |
| "grad_norm": 0.7045626640319824, |
| "learning_rate": 0.0002835380268825405, |
| "loss": 3.4125076293945313, |
| "step": 74350 |
| }, |
| { |
| "epoch": 0.15237127801409434, |
| "grad_norm": 0.688127875328064, |
| "learning_rate": 0.00028351599641105634, |
| "loss": 3.532620544433594, |
| "step": 74400 |
| }, |
| { |
| "epoch": 0.15247367806652318, |
| "grad_norm": 0.7123726606369019, |
| "learning_rate": 0.0002834939520651758, |
| "loss": 3.450240478515625, |
| "step": 74450 |
| }, |
| { |
| "epoch": 0.152576078118952, |
| "grad_norm": 0.6914170980453491, |
| "learning_rate": 0.0002834718938471897, |
| "loss": 3.7383859252929685, |
| "step": 74500 |
| }, |
| { |
| "epoch": 0.15267847817138083, |
| "grad_norm": 1.8187841176986694, |
| "learning_rate": 0.0002834498217593902, |
| "loss": 3.352877197265625, |
| "step": 74550 |
| }, |
| { |
| "epoch": 0.15278087822380965, |
| "grad_norm": 0.5876966118812561, |
| "learning_rate": 0.00028342773580407104, |
| "loss": 3.5138931274414062, |
| "step": 74600 |
| }, |
| { |
| "epoch": 0.1528832782762385, |
| "grad_norm": 0.6575474143028259, |
| "learning_rate": 0.00028340563598352716, |
| "loss": 2.800203857421875, |
| "step": 74650 |
| }, |
| { |
| "epoch": 0.1529856783286673, |
| "grad_norm": 0.9087119102478027, |
| "learning_rate": 0.0002833835223000551, |
| "loss": 3.22402587890625, |
| "step": 74700 |
| }, |
| { |
| "epoch": 0.15308807838109614, |
| "grad_norm": 0.5269556641578674, |
| "learning_rate": 0.0002833613947559529, |
| "loss": 3.43998291015625, |
| "step": 74750 |
| }, |
| { |
| "epoch": 0.15319047843352496, |
| "grad_norm": 0.7771069407463074, |
| "learning_rate": 0.0002833392533535198, |
| "loss": 3.7308123779296873, |
| "step": 74800 |
| }, |
| { |
| "epoch": 0.1532928784859538, |
| "grad_norm": 0.9969513416290283, |
| "learning_rate": 0.00028331709809505687, |
| "loss": 3.7192803955078126, |
| "step": 74850 |
| }, |
| { |
| "epoch": 0.1533952785383826, |
| "grad_norm": 0.705575704574585, |
| "learning_rate": 0.00028329492898286623, |
| "loss": 3.504131164550781, |
| "step": 74900 |
| }, |
| { |
| "epoch": 0.15349767859081143, |
| "grad_norm": 0.5487853288650513, |
| "learning_rate": 0.0002832727460192516, |
| "loss": 2.8572216796875, |
| "step": 74950 |
| }, |
| { |
| "epoch": 0.15360007864324027, |
| "grad_norm": 0.7012720108032227, |
| "learning_rate": 0.00028325054920651813, |
| "loss": 2.238103485107422, |
| "step": 75000 |
| }, |
| { |
| "epoch": 0.15370247869566908, |
| "grad_norm": 0.7673011422157288, |
| "learning_rate": 0.00028322833854697247, |
| "loss": 3.7670169067382813, |
| "step": 75050 |
| }, |
| { |
| "epoch": 0.15380487874809792, |
| "grad_norm": 0.7762806415557861, |
| "learning_rate": 0.00028320611404292266, |
| "loss": 3.65732177734375, |
| "step": 75100 |
| }, |
| { |
| "epoch": 0.15390727880052674, |
| "grad_norm": 0.8351573348045349, |
| "learning_rate": 0.0002831838756966781, |
| "loss": 3.3902908325195313, |
| "step": 75150 |
| }, |
| { |
| "epoch": 0.15400967885295558, |
| "grad_norm": 0.9058986306190491, |
| "learning_rate": 0.00028316162351054976, |
| "loss": 3.2655902099609375, |
| "step": 75200 |
| }, |
| { |
| "epoch": 0.1541120789053844, |
| "grad_norm": 0.7718729376792908, |
| "learning_rate": 0.0002831393574868499, |
| "loss": 2.826809997558594, |
| "step": 75250 |
| }, |
| { |
| "epoch": 0.15421447895781323, |
| "grad_norm": 0.8767629861831665, |
| "learning_rate": 0.00028311707762789255, |
| "loss": 3.345711975097656, |
| "step": 75300 |
| }, |
| { |
| "epoch": 0.15431687901024205, |
| "grad_norm": 0.7267951369285583, |
| "learning_rate": 0.00028309478393599263, |
| "loss": 3.440138244628906, |
| "step": 75350 |
| }, |
| { |
| "epoch": 0.1544192790626709, |
| "grad_norm": 0.8214264512062073, |
| "learning_rate": 0.000283072476413467, |
| "loss": 3.795940246582031, |
| "step": 75400 |
| }, |
| { |
| "epoch": 0.1545216791150997, |
| "grad_norm": 0.7978084087371826, |
| "learning_rate": 0.0002830501550626337, |
| "loss": 3.481332092285156, |
| "step": 75450 |
| }, |
| { |
| "epoch": 0.15462407916752854, |
| "grad_norm": 0.6108945608139038, |
| "learning_rate": 0.0002830278198858122, |
| "loss": 3.0552932739257814, |
| "step": 75500 |
| }, |
| { |
| "epoch": 0.15472647921995736, |
| "grad_norm": 0.7742500901222229, |
| "learning_rate": 0.0002830054708853236, |
| "loss": 3.446549072265625, |
| "step": 75550 |
| }, |
| { |
| "epoch": 0.1548288792723862, |
| "grad_norm": 0.8176902532577515, |
| "learning_rate": 0.0002829831080634903, |
| "loss": 3.7375308227539064, |
| "step": 75600 |
| }, |
| { |
| "epoch": 0.154931279324815, |
| "grad_norm": 0.7569313645362854, |
| "learning_rate": 0.00028296073142263596, |
| "loss": 3.7137493896484375, |
| "step": 75650 |
| }, |
| { |
| "epoch": 0.15503367937724385, |
| "grad_norm": 0.4838169515132904, |
| "learning_rate": 0.00028293834096508613, |
| "loss": 3.6915240478515625, |
| "step": 75700 |
| }, |
| { |
| "epoch": 0.15513607942967267, |
| "grad_norm": 0.669226348400116, |
| "learning_rate": 0.0002829159366931673, |
| "loss": 3.220513916015625, |
| "step": 75750 |
| }, |
| { |
| "epoch": 0.15523847948210148, |
| "grad_norm": 0.8119651079177856, |
| "learning_rate": 0.0002828935186092078, |
| "loss": 3.58036865234375, |
| "step": 75800 |
| }, |
| { |
| "epoch": 0.15534087953453032, |
| "grad_norm": 0.5975949168205261, |
| "learning_rate": 0.00028287108671553706, |
| "loss": 3.3334951782226563, |
| "step": 75850 |
| }, |
| { |
| "epoch": 0.15544327958695914, |
| "grad_norm": 0.8672727942466736, |
| "learning_rate": 0.0002828486410144862, |
| "loss": 3.343040771484375, |
| "step": 75900 |
| }, |
| { |
| "epoch": 0.15554567963938798, |
| "grad_norm": 0.7106810212135315, |
| "learning_rate": 0.0002828261815083877, |
| "loss": 3.3205633544921875, |
| "step": 75950 |
| }, |
| { |
| "epoch": 0.1556480796918168, |
| "grad_norm": 0.6641435623168945, |
| "learning_rate": 0.0002828037081995754, |
| "loss": 3.215283203125, |
| "step": 76000 |
| }, |
| { |
| "epoch": 0.15575047974424563, |
| "grad_norm": 0.8929319977760315, |
| "learning_rate": 0.0002827812210903846, |
| "loss": 2.9793209838867187, |
| "step": 76050 |
| }, |
| { |
| "epoch": 0.15585287979667445, |
| "grad_norm": 0.9121986627578735, |
| "learning_rate": 0.0002827587201831522, |
| "loss": 3.9058187866210936, |
| "step": 76100 |
| }, |
| { |
| "epoch": 0.1559552798491033, |
| "grad_norm": 0.7762316465377808, |
| "learning_rate": 0.00028273620548021624, |
| "loss": 3.6440216064453126, |
| "step": 76150 |
| }, |
| { |
| "epoch": 0.1560576799015321, |
| "grad_norm": 0.9442132711410522, |
| "learning_rate": 0.0002827136769839164, |
| "loss": 3.2715243530273437, |
| "step": 76200 |
| }, |
| { |
| "epoch": 0.15616007995396095, |
| "grad_norm": 0.8481264710426331, |
| "learning_rate": 0.00028269113469659373, |
| "loss": 3.4502252197265624, |
| "step": 76250 |
| }, |
| { |
| "epoch": 0.15626248000638976, |
| "grad_norm": 0.48445141315460205, |
| "learning_rate": 0.00028266857862059076, |
| "loss": 2.9071063232421874, |
| "step": 76300 |
| }, |
| { |
| "epoch": 0.1563648800588186, |
| "grad_norm": 0.7879681587219238, |
| "learning_rate": 0.00028264600875825145, |
| "loss": 3.08685546875, |
| "step": 76350 |
| }, |
| { |
| "epoch": 0.15646728011124741, |
| "grad_norm": 0.6723935604095459, |
| "learning_rate": 0.00028262342511192106, |
| "loss": 3.23456298828125, |
| "step": 76400 |
| }, |
| { |
| "epoch": 0.15656968016367626, |
| "grad_norm": 0.8366503119468689, |
| "learning_rate": 0.0002826008276839465, |
| "loss": 3.12440185546875, |
| "step": 76450 |
| }, |
| { |
| "epoch": 0.15667208021610507, |
| "grad_norm": 0.6648255586624146, |
| "learning_rate": 0.00028257821647667585, |
| "loss": 3.6282342529296874, |
| "step": 76500 |
| }, |
| { |
| "epoch": 0.1567744802685339, |
| "grad_norm": 0.233867809176445, |
| "learning_rate": 0.00028255559149245894, |
| "loss": 2.4692172241210937, |
| "step": 76550 |
| }, |
| { |
| "epoch": 0.15687688032096272, |
| "grad_norm": 0.9552132487297058, |
| "learning_rate": 0.00028253295273364675, |
| "loss": 2.3723199462890623, |
| "step": 76600 |
| }, |
| { |
| "epoch": 0.15697928037339154, |
| "grad_norm": 0.9119518995285034, |
| "learning_rate": 0.00028251030020259177, |
| "loss": 3.5183111572265626, |
| "step": 76650 |
| }, |
| { |
| "epoch": 0.15708168042582038, |
| "grad_norm": 0.7538524866104126, |
| "learning_rate": 0.00028248763390164807, |
| "loss": 3.69493896484375, |
| "step": 76700 |
| }, |
| { |
| "epoch": 0.1571840804782492, |
| "grad_norm": 0.9746021628379822, |
| "learning_rate": 0.00028246495383317093, |
| "loss": 2.4459327697753905, |
| "step": 76750 |
| }, |
| { |
| "epoch": 0.15728648053067804, |
| "grad_norm": 1.0355682373046875, |
| "learning_rate": 0.0002824422599995172, |
| "loss": 3.1521530151367188, |
| "step": 76800 |
| }, |
| { |
| "epoch": 0.15738888058310685, |
| "grad_norm": 1.0927115678787231, |
| "learning_rate": 0.00028241955240304513, |
| "loss": 3.066300048828125, |
| "step": 76850 |
| }, |
| { |
| "epoch": 0.1574912806355357, |
| "grad_norm": 0.9498497247695923, |
| "learning_rate": 0.00028239683104611433, |
| "loss": 3.6030181884765624, |
| "step": 76900 |
| }, |
| { |
| "epoch": 0.1575936806879645, |
| "grad_norm": 0.6711775660514832, |
| "learning_rate": 0.00028237409593108605, |
| "loss": 2.9892807006835938, |
| "step": 76950 |
| }, |
| { |
| "epoch": 0.15769608074039335, |
| "grad_norm": 0.8505134582519531, |
| "learning_rate": 0.00028235134706032267, |
| "loss": 3.558472900390625, |
| "step": 77000 |
| }, |
| { |
| "epoch": 0.15779848079282216, |
| "grad_norm": 0.7711685299873352, |
| "learning_rate": 0.0002823285844361883, |
| "loss": 3.2466412353515626, |
| "step": 77050 |
| }, |
| { |
| "epoch": 0.157900880845251, |
| "grad_norm": 0.8435817360877991, |
| "learning_rate": 0.00028230580806104814, |
| "loss": 3.2454754638671877, |
| "step": 77100 |
| }, |
| { |
| "epoch": 0.15800328089767982, |
| "grad_norm": 0.7799451947212219, |
| "learning_rate": 0.00028228301793726916, |
| "loss": 3.4074356079101564, |
| "step": 77150 |
| }, |
| { |
| "epoch": 0.15810568095010866, |
| "grad_norm": 0.7728955149650574, |
| "learning_rate": 0.0002822602140672196, |
| "loss": 3.511580505371094, |
| "step": 77200 |
| }, |
| { |
| "epoch": 0.15820808100253747, |
| "grad_norm": 0.7063544988632202, |
| "learning_rate": 0.0002822373964532691, |
| "loss": 3.2738442993164063, |
| "step": 77250 |
| }, |
| { |
| "epoch": 0.1583104810549663, |
| "grad_norm": 0.7023544907569885, |
| "learning_rate": 0.00028221456509778875, |
| "loss": 3.7345950317382814, |
| "step": 77300 |
| }, |
| { |
| "epoch": 0.15841288110739513, |
| "grad_norm": 0.9115304946899414, |
| "learning_rate": 0.0002821917200031511, |
| "loss": 3.140256042480469, |
| "step": 77350 |
| }, |
| { |
| "epoch": 0.15851528115982397, |
| "grad_norm": 0.7438466548919678, |
| "learning_rate": 0.00028216886117173013, |
| "loss": 3.7709716796875, |
| "step": 77400 |
| }, |
| { |
| "epoch": 0.15861768121225278, |
| "grad_norm": 0.7658351063728333, |
| "learning_rate": 0.0002821459886059013, |
| "loss": 2.747354736328125, |
| "step": 77450 |
| }, |
| { |
| "epoch": 0.1587200812646816, |
| "grad_norm": 0.6408258676528931, |
| "learning_rate": 0.0002821231023080412, |
| "loss": 3.999375915527344, |
| "step": 77500 |
| }, |
| { |
| "epoch": 0.15882248131711044, |
| "grad_norm": 0.772813081741333, |
| "learning_rate": 0.0002821002022805283, |
| "loss": 3.42555419921875, |
| "step": 77550 |
| }, |
| { |
| "epoch": 0.15892488136953925, |
| "grad_norm": 0.950205385684967, |
| "learning_rate": 0.0002820772885257422, |
| "loss": 3.6866400146484377, |
| "step": 77600 |
| }, |
| { |
| "epoch": 0.1590272814219681, |
| "grad_norm": 0.5234514474868774, |
| "learning_rate": 0.000282054361046064, |
| "loss": 2.6636093139648436, |
| "step": 77650 |
| }, |
| { |
| "epoch": 0.1591296814743969, |
| "grad_norm": 0.7165791988372803, |
| "learning_rate": 0.0002820314198438761, |
| "loss": 3.308489990234375, |
| "step": 77700 |
| }, |
| { |
| "epoch": 0.15923208152682575, |
| "grad_norm": 0.7389218211174011, |
| "learning_rate": 0.00028200846492156266, |
| "loss": 3.701646728515625, |
| "step": 77750 |
| }, |
| { |
| "epoch": 0.15933448157925456, |
| "grad_norm": 0.6821298599243164, |
| "learning_rate": 0.0002819854962815089, |
| "loss": 3.6112545776367186, |
| "step": 77800 |
| }, |
| { |
| "epoch": 0.1594368816316834, |
| "grad_norm": 0.6218832731246948, |
| "learning_rate": 0.00028196251392610173, |
| "loss": 3.4739862060546876, |
| "step": 77850 |
| }, |
| { |
| "epoch": 0.15953928168411222, |
| "grad_norm": 0.7211093902587891, |
| "learning_rate": 0.00028193951785772923, |
| "loss": 3.646156921386719, |
| "step": 77900 |
| }, |
| { |
| "epoch": 0.15964168173654106, |
| "grad_norm": 0.9792724847793579, |
| "learning_rate": 0.00028191650807878125, |
| "loss": 2.4151596069335937, |
| "step": 77950 |
| }, |
| { |
| "epoch": 0.15974408178896987, |
| "grad_norm": 0.6804146766662598, |
| "learning_rate": 0.0002818934845916487, |
| "loss": 2.791448059082031, |
| "step": 78000 |
| }, |
| { |
| "epoch": 0.1598464818413987, |
| "grad_norm": 0.7963101863861084, |
| "learning_rate": 0.0002818704473987241, |
| "loss": 2.887415771484375, |
| "step": 78050 |
| }, |
| { |
| "epoch": 0.15994888189382753, |
| "grad_norm": 1.2759873867034912, |
| "learning_rate": 0.00028184739650240144, |
| "loss": 3.3000274658203126, |
| "step": 78100 |
| }, |
| { |
| "epoch": 0.16005128194625637, |
| "grad_norm": 0.9473127126693726, |
| "learning_rate": 0.0002818243319050761, |
| "loss": 3.280038146972656, |
| "step": 78150 |
| }, |
| { |
| "epoch": 0.16015368199868518, |
| "grad_norm": 1.025072693824768, |
| "learning_rate": 0.0002818012536091447, |
| "loss": 3.278867492675781, |
| "step": 78200 |
| }, |
| { |
| "epoch": 0.16025608205111402, |
| "grad_norm": 0.6705909967422485, |
| "learning_rate": 0.00028177816161700553, |
| "loss": 3.659829406738281, |
| "step": 78250 |
| }, |
| { |
| "epoch": 0.16035848210354284, |
| "grad_norm": 0.5486139059066772, |
| "learning_rate": 0.00028175505593105825, |
| "loss": 2.785064697265625, |
| "step": 78300 |
| }, |
| { |
| "epoch": 0.16046088215597165, |
| "grad_norm": 0.7707447409629822, |
| "learning_rate": 0.0002817319365537038, |
| "loss": 3.8123992919921874, |
| "step": 78350 |
| }, |
| { |
| "epoch": 0.1605632822084005, |
| "grad_norm": 0.8866334557533264, |
| "learning_rate": 0.0002817088034873448, |
| "loss": 3.45692626953125, |
| "step": 78400 |
| }, |
| { |
| "epoch": 0.1606656822608293, |
| "grad_norm": 0.6880883574485779, |
| "learning_rate": 0.0002816856567343849, |
| "loss": 3.526729736328125, |
| "step": 78450 |
| }, |
| { |
| "epoch": 0.16076808231325815, |
| "grad_norm": 0.6768396496772766, |
| "learning_rate": 0.00028166249629722956, |
| "loss": 3.4408908081054688, |
| "step": 78500 |
| }, |
| { |
| "epoch": 0.16087048236568696, |
| "grad_norm": 0.9719237685203552, |
| "learning_rate": 0.0002816393221782856, |
| "loss": 2.7149722290039064, |
| "step": 78550 |
| }, |
| { |
| "epoch": 0.1609728824181158, |
| "grad_norm": 0.9855000376701355, |
| "learning_rate": 0.000281616134379961, |
| "loss": 3.5189361572265625, |
| "step": 78600 |
| }, |
| { |
| "epoch": 0.16107528247054462, |
| "grad_norm": 0.8920623660087585, |
| "learning_rate": 0.0002815929329046654, |
| "loss": 3.5056884765625, |
| "step": 78650 |
| }, |
| { |
| "epoch": 0.16117768252297346, |
| "grad_norm": 1.0610677003860474, |
| "learning_rate": 0.0002815697177548098, |
| "loss": 3.097916259765625, |
| "step": 78700 |
| }, |
| { |
| "epoch": 0.16128008257540227, |
| "grad_norm": 0.8963422775268555, |
| "learning_rate": 0.0002815464889328066, |
| "loss": 3.181166076660156, |
| "step": 78750 |
| }, |
| { |
| "epoch": 0.16138248262783111, |
| "grad_norm": 0.7184786200523376, |
| "learning_rate": 0.00028152324644106964, |
| "loss": 3.4025540161132812, |
| "step": 78800 |
| }, |
| { |
| "epoch": 0.16148488268025993, |
| "grad_norm": 0.8972223401069641, |
| "learning_rate": 0.00028149999028201426, |
| "loss": 3.6795730590820312, |
| "step": 78850 |
| }, |
| { |
| "epoch": 0.16158728273268877, |
| "grad_norm": 0.7884389758110046, |
| "learning_rate": 0.000281476720458057, |
| "loss": 3.221437683105469, |
| "step": 78900 |
| }, |
| { |
| "epoch": 0.16168968278511758, |
| "grad_norm": 0.5186575651168823, |
| "learning_rate": 0.00028145343697161604, |
| "loss": 3.423157043457031, |
| "step": 78950 |
| }, |
| { |
| "epoch": 0.16179208283754642, |
| "grad_norm": 0.6838876605033875, |
| "learning_rate": 0.0002814301398251109, |
| "loss": 2.6037109375, |
| "step": 79000 |
| }, |
| { |
| "epoch": 0.16189448288997524, |
| "grad_norm": 0.7351782321929932, |
| "learning_rate": 0.0002814068290209625, |
| "loss": 3.258736877441406, |
| "step": 79050 |
| }, |
| { |
| "epoch": 0.16199688294240408, |
| "grad_norm": 0.8734768033027649, |
| "learning_rate": 0.00028138350456159315, |
| "loss": 3.6974835205078125, |
| "step": 79100 |
| }, |
| { |
| "epoch": 0.1620992829948329, |
| "grad_norm": 0.7983621954917908, |
| "learning_rate": 0.00028136016644942665, |
| "loss": 3.60543701171875, |
| "step": 79150 |
| }, |
| { |
| "epoch": 0.1622016830472617, |
| "grad_norm": 0.6577921509742737, |
| "learning_rate": 0.0002813368146868883, |
| "loss": 3.336464538574219, |
| "step": 79200 |
| }, |
| { |
| "epoch": 0.16230408309969055, |
| "grad_norm": 0.6782569289207458, |
| "learning_rate": 0.0002813134492764046, |
| "loss": 3.735032043457031, |
| "step": 79250 |
| }, |
| { |
| "epoch": 0.16240648315211936, |
| "grad_norm": 0.8127371072769165, |
| "learning_rate": 0.0002812900702204036, |
| "loss": 3.584259033203125, |
| "step": 79300 |
| }, |
| { |
| "epoch": 0.1625088832045482, |
| "grad_norm": 0.8227265477180481, |
| "learning_rate": 0.00028126667752131473, |
| "loss": 2.95750244140625, |
| "step": 79350 |
| }, |
| { |
| "epoch": 0.16261128325697702, |
| "grad_norm": 0.8162257671356201, |
| "learning_rate": 0.00028124327118156893, |
| "loss": 3.5253372192382812, |
| "step": 79400 |
| }, |
| { |
| "epoch": 0.16271368330940586, |
| "grad_norm": 0.7429577708244324, |
| "learning_rate": 0.0002812198512035984, |
| "loss": 3.1853790283203125, |
| "step": 79450 |
| }, |
| { |
| "epoch": 0.16281608336183467, |
| "grad_norm": 0.7135078310966492, |
| "learning_rate": 0.00028119641758983695, |
| "loss": 3.3338772583007814, |
| "step": 79500 |
| }, |
| { |
| "epoch": 0.16291848341426352, |
| "grad_norm": 0.8013560175895691, |
| "learning_rate": 0.00028117297034271953, |
| "loss": 3.761092224121094, |
| "step": 79550 |
| }, |
| { |
| "epoch": 0.16302088346669233, |
| "grad_norm": 0.678429126739502, |
| "learning_rate": 0.0002811495094646828, |
| "loss": 3.4881576538085937, |
| "step": 79600 |
| }, |
| { |
| "epoch": 0.16312328351912117, |
| "grad_norm": 0.9312568306922913, |
| "learning_rate": 0.0002811260349581647, |
| "loss": 3.203521728515625, |
| "step": 79650 |
| }, |
| { |
| "epoch": 0.16322568357154998, |
| "grad_norm": 0.8246340751647949, |
| "learning_rate": 0.0002811025468256046, |
| "loss": 3.5164459228515623, |
| "step": 79700 |
| }, |
| { |
| "epoch": 0.16332808362397883, |
| "grad_norm": 1.7668465375900269, |
| "learning_rate": 0.00028107904506944324, |
| "loss": 2.763003234863281, |
| "step": 79750 |
| }, |
| { |
| "epoch": 0.16343048367640764, |
| "grad_norm": 0.4166733920574188, |
| "learning_rate": 0.00028105552969212284, |
| "loss": 2.9914471435546877, |
| "step": 79800 |
| }, |
| { |
| "epoch": 0.16353288372883648, |
| "grad_norm": 0.6907941699028015, |
| "learning_rate": 0.0002810320006960871, |
| "loss": 3.1851446533203127, |
| "step": 79850 |
| }, |
| { |
| "epoch": 0.1636352837812653, |
| "grad_norm": 0.6729636192321777, |
| "learning_rate": 0.00028100845808378083, |
| "loss": 3.1257308959960937, |
| "step": 79900 |
| }, |
| { |
| "epoch": 0.16373768383369414, |
| "grad_norm": 0.7356630563735962, |
| "learning_rate": 0.0002809849018576507, |
| "loss": 3.1083673095703124, |
| "step": 79950 |
| }, |
| { |
| "epoch": 0.16384008388612295, |
| "grad_norm": 0.9909185767173767, |
| "learning_rate": 0.00028096133202014443, |
| "loss": 3.5541717529296877, |
| "step": 80000 |
| }, |
| { |
| "epoch": 0.16394248393855176, |
| "grad_norm": 0.4952726662158966, |
| "learning_rate": 0.00028093774857371146, |
| "loss": 2.9160995483398438, |
| "step": 80050 |
| }, |
| { |
| "epoch": 0.1640448839909806, |
| "grad_norm": 0.6858778595924377, |
| "learning_rate": 0.00028091415152080225, |
| "loss": 3.611160888671875, |
| "step": 80100 |
| }, |
| { |
| "epoch": 0.16414728404340942, |
| "grad_norm": 0.6998670101165771, |
| "learning_rate": 0.0002808905408638691, |
| "loss": 3.506941833496094, |
| "step": 80150 |
| }, |
| { |
| "epoch": 0.16424968409583826, |
| "grad_norm": 0.8609181642532349, |
| "learning_rate": 0.0002808669166053654, |
| "loss": 3.5502630615234376, |
| "step": 80200 |
| }, |
| { |
| "epoch": 0.16435208414826707, |
| "grad_norm": 0.7192445993423462, |
| "learning_rate": 0.00028084327874774615, |
| "loss": 3.56413330078125, |
| "step": 80250 |
| }, |
| { |
| "epoch": 0.16445448420069592, |
| "grad_norm": 0.7595858573913574, |
| "learning_rate": 0.0002808196272934676, |
| "loss": 3.339110107421875, |
| "step": 80300 |
| }, |
| { |
| "epoch": 0.16455688425312473, |
| "grad_norm": 0.7111859321594238, |
| "learning_rate": 0.0002807959622449877, |
| "loss": 3.051424560546875, |
| "step": 80350 |
| }, |
| { |
| "epoch": 0.16465928430555357, |
| "grad_norm": 0.6211318373680115, |
| "learning_rate": 0.00028077228360476537, |
| "loss": 3.5583587646484376, |
| "step": 80400 |
| }, |
| { |
| "epoch": 0.16476168435798239, |
| "grad_norm": 0.783703625202179, |
| "learning_rate": 0.00028074859137526136, |
| "loss": 2.740151062011719, |
| "step": 80450 |
| }, |
| { |
| "epoch": 0.16486408441041123, |
| "grad_norm": 0.30799928307533264, |
| "learning_rate": 0.0002807248855589376, |
| "loss": 2.932861633300781, |
| "step": 80500 |
| }, |
| { |
| "epoch": 0.16496648446284004, |
| "grad_norm": 0.9074276089668274, |
| "learning_rate": 0.0002807011661582575, |
| "loss": 2.6673263549804687, |
| "step": 80550 |
| }, |
| { |
| "epoch": 0.16506888451526888, |
| "grad_norm": 0.680355429649353, |
| "learning_rate": 0.00028067743317568587, |
| "loss": 3.8975335693359376, |
| "step": 80600 |
| }, |
| { |
| "epoch": 0.1651712845676977, |
| "grad_norm": 1.1440931558609009, |
| "learning_rate": 0.000280653686613689, |
| "loss": 3.4063519287109374, |
| "step": 80650 |
| }, |
| { |
| "epoch": 0.16527368462012654, |
| "grad_norm": 0.7195169925689697, |
| "learning_rate": 0.00028062992647473445, |
| "loss": 3.4463735961914064, |
| "step": 80700 |
| }, |
| { |
| "epoch": 0.16537608467255535, |
| "grad_norm": 2.4933488368988037, |
| "learning_rate": 0.0002806061527612913, |
| "loss": 2.4636448669433593, |
| "step": 80750 |
| }, |
| { |
| "epoch": 0.1654784847249842, |
| "grad_norm": 0.8146184086799622, |
| "learning_rate": 0.00028058236547582997, |
| "loss": 3.016216125488281, |
| "step": 80800 |
| }, |
| { |
| "epoch": 0.165580884777413, |
| "grad_norm": 0.7645831108093262, |
| "learning_rate": 0.0002805585646208224, |
| "loss": 3.7815518188476562, |
| "step": 80850 |
| }, |
| { |
| "epoch": 0.16568328482984182, |
| "grad_norm": 0.5321808457374573, |
| "learning_rate": 0.00028053475019874187, |
| "loss": 3.1232025146484377, |
| "step": 80900 |
| }, |
| { |
| "epoch": 0.16578568488227066, |
| "grad_norm": 0.5005676746368408, |
| "learning_rate": 0.000280510922212063, |
| "loss": 2.5462303161621094, |
| "step": 80950 |
| }, |
| { |
| "epoch": 0.16588808493469948, |
| "grad_norm": 0.5022799372673035, |
| "learning_rate": 0.00028048708066326193, |
| "loss": 2.064752502441406, |
| "step": 81000 |
| }, |
| { |
| "epoch": 0.16599048498712832, |
| "grad_norm": 0.40124621987342834, |
| "learning_rate": 0.0002804632255548162, |
| "loss": 3.146656799316406, |
| "step": 81050 |
| }, |
| { |
| "epoch": 0.16609288503955713, |
| "grad_norm": 0.45695436000823975, |
| "learning_rate": 0.00028043935688920466, |
| "loss": 1.9398663330078125, |
| "step": 81100 |
| }, |
| { |
| "epoch": 0.16619528509198597, |
| "grad_norm": 0.9878008365631104, |
| "learning_rate": 0.0002804154746689077, |
| "loss": 3.3690643310546875, |
| "step": 81150 |
| }, |
| { |
| "epoch": 0.1662976851444148, |
| "grad_norm": 0.9054487943649292, |
| "learning_rate": 0.000280391578896407, |
| "loss": 3.499583740234375, |
| "step": 81200 |
| }, |
| { |
| "epoch": 0.16640008519684363, |
| "grad_norm": 0.7078624367713928, |
| "learning_rate": 0.00028036766957418576, |
| "loss": 3.645855712890625, |
| "step": 81250 |
| }, |
| { |
| "epoch": 0.16650248524927244, |
| "grad_norm": 0.8132025003433228, |
| "learning_rate": 0.0002803437467047285, |
| "loss": 2.701116027832031, |
| "step": 81300 |
| }, |
| { |
| "epoch": 0.16660488530170128, |
| "grad_norm": 0.6939849257469177, |
| "learning_rate": 0.00028031981029052116, |
| "loss": 3.399428405761719, |
| "step": 81350 |
| }, |
| { |
| "epoch": 0.1667072853541301, |
| "grad_norm": 0.7499716877937317, |
| "learning_rate": 0.00028029586033405114, |
| "loss": 3.7939776611328124, |
| "step": 81400 |
| }, |
| { |
| "epoch": 0.16680968540655894, |
| "grad_norm": 0.6567860245704651, |
| "learning_rate": 0.00028027189683780716, |
| "loss": 3.5192059326171874, |
| "step": 81450 |
| }, |
| { |
| "epoch": 0.16691208545898775, |
| "grad_norm": 0.5446729063987732, |
| "learning_rate": 0.0002802479198042795, |
| "loss": 4.063154602050782, |
| "step": 81500 |
| }, |
| { |
| "epoch": 0.1670144855114166, |
| "grad_norm": 1.0988305807113647, |
| "learning_rate": 0.00028022392923595973, |
| "loss": 3.8179937744140626, |
| "step": 81550 |
| }, |
| { |
| "epoch": 0.1671168855638454, |
| "grad_norm": 0.7563129663467407, |
| "learning_rate": 0.00028019992513534075, |
| "loss": 3.4095263671875, |
| "step": 81600 |
| }, |
| { |
| "epoch": 0.16721928561627425, |
| "grad_norm": 0.8743115067481995, |
| "learning_rate": 0.0002801759075049171, |
| "loss": 3.422995300292969, |
| "step": 81650 |
| }, |
| { |
| "epoch": 0.16732168566870306, |
| "grad_norm": 0.7077412605285645, |
| "learning_rate": 0.0002801518763471844, |
| "loss": 3.8168359375, |
| "step": 81700 |
| }, |
| { |
| "epoch": 0.16742408572113188, |
| "grad_norm": 0.5643756985664368, |
| "learning_rate": 0.00028012783166464, |
| "loss": 3.531881103515625, |
| "step": 81750 |
| }, |
| { |
| "epoch": 0.16752648577356072, |
| "grad_norm": 0.5498968362808228, |
| "learning_rate": 0.0002801037734597825, |
| "loss": 3.502477722167969, |
| "step": 81800 |
| }, |
| { |
| "epoch": 0.16762888582598953, |
| "grad_norm": 0.6478745937347412, |
| "learning_rate": 0.00028007970173511194, |
| "loss": 3.3485955810546875, |
| "step": 81850 |
| }, |
| { |
| "epoch": 0.16773128587841837, |
| "grad_norm": 0.837211012840271, |
| "learning_rate": 0.0002800556164931297, |
| "loss": 2.738769836425781, |
| "step": 81900 |
| }, |
| { |
| "epoch": 0.1678336859308472, |
| "grad_norm": 0.8199461102485657, |
| "learning_rate": 0.0002800315177363386, |
| "loss": 3.374551086425781, |
| "step": 81950 |
| }, |
| { |
| "epoch": 0.16793608598327603, |
| "grad_norm": 0.6247820258140564, |
| "learning_rate": 0.00028000740546724293, |
| "loss": 3.48384521484375, |
| "step": 82000 |
| }, |
| { |
| "epoch": 0.16803848603570484, |
| "grad_norm": 0.6754735708236694, |
| "learning_rate": 0.0002799832796883483, |
| "loss": 3.0733773803710935, |
| "step": 82050 |
| }, |
| { |
| "epoch": 0.16814088608813368, |
| "grad_norm": 0.7133215069770813, |
| "learning_rate": 0.0002799591404021617, |
| "loss": 3.0067816162109375, |
| "step": 82100 |
| }, |
| { |
| "epoch": 0.1682432861405625, |
| "grad_norm": 0.7670831680297852, |
| "learning_rate": 0.0002799349876111918, |
| "loss": 3.1390863037109376, |
| "step": 82150 |
| }, |
| { |
| "epoch": 0.16834568619299134, |
| "grad_norm": 0.4578395485877991, |
| "learning_rate": 0.0002799108213179482, |
| "loss": 2.519589691162109, |
| "step": 82200 |
| }, |
| { |
| "epoch": 0.16844808624542015, |
| "grad_norm": 0.6592589020729065, |
| "learning_rate": 0.0002798866415249422, |
| "loss": 2.3550537109375, |
| "step": 82250 |
| }, |
| { |
| "epoch": 0.168550486297849, |
| "grad_norm": 0.3382033407688141, |
| "learning_rate": 0.0002798624482346866, |
| "loss": 3.0852642822265626, |
| "step": 82300 |
| }, |
| { |
| "epoch": 0.1686528863502778, |
| "grad_norm": 0.5380098819732666, |
| "learning_rate": 0.00027983824144969533, |
| "loss": 2.1543919372558595, |
| "step": 82350 |
| }, |
| { |
| "epoch": 0.16875528640270665, |
| "grad_norm": 0.7728050351142883, |
| "learning_rate": 0.0002798140211724839, |
| "loss": 2.7856259155273437, |
| "step": 82400 |
| }, |
| { |
| "epoch": 0.16885768645513546, |
| "grad_norm": 0.7540838718414307, |
| "learning_rate": 0.00027978978740556915, |
| "loss": 3.5116085815429687, |
| "step": 82450 |
| }, |
| { |
| "epoch": 0.1689600865075643, |
| "grad_norm": 1.1563106775283813, |
| "learning_rate": 0.0002797655401514693, |
| "loss": 3.811059875488281, |
| "step": 82500 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 488281, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.6106783358976e+17, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|