{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9973783198449789, "eval_steps": 500, "global_step": 245000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 1.589376449584961, "learning_rate": 1.6000000000000003e-05, "loss": 1.777, "step": 10 }, { "epoch": 0.0, "grad_norm": 1.149321436882019, "learning_rate": 3.6e-05, "loss": 1.7363, "step": 20 }, { "epoch": 0.0, "grad_norm": 1.9109302759170532, "learning_rate": 5.4000000000000005e-05, "loss": 1.519, "step": 30 }, { "epoch": 0.0, "grad_norm": 3.3910934925079346, "learning_rate": 7.4e-05, "loss": 1.6348, "step": 40 }, { "epoch": 0.0, "grad_norm": 0.8185428380966187, "learning_rate": 9.4e-05, "loss": 1.5175, "step": 50 }, { "epoch": 0.0, "grad_norm": 1.8231908082962036, "learning_rate": 0.00011399999999999999, "loss": 1.5218, "step": 60 }, { "epoch": 0.0, "grad_norm": 2.2001290321350098, "learning_rate": 0.000134, "loss": 1.4374, "step": 70 }, { "epoch": 0.0, "grad_norm": 2.3446545600891113, "learning_rate": 0.000154, "loss": 1.5026, "step": 80 }, { "epoch": 0.0, "grad_norm": 0.8444089889526367, "learning_rate": 0.000174, "loss": 1.5993, "step": 90 }, { "epoch": 0.0, "grad_norm": 2.628640651702881, "learning_rate": 0.000194, "loss": 1.4912, "step": 100 }, { "epoch": 0.0, "grad_norm": 3.5933313369750977, "learning_rate": 0.0002, "loss": 1.5046, "step": 110 }, { "epoch": 0.0, "grad_norm": 1.1921671628952026, "learning_rate": 0.0002, "loss": 1.3653, "step": 120 }, { "epoch": 0.0, "grad_norm": 8.399609565734863, "learning_rate": 0.0002, "loss": 1.689, "step": 130 }, { "epoch": 0.0, "grad_norm": 1.3887488842010498, "learning_rate": 0.0002, "loss": 1.444, "step": 140 }, { "epoch": 0.0, "grad_norm": 0.9178386926651001, "learning_rate": 0.0002, "loss": 1.7654, "step": 150 }, { "epoch": 0.0, "grad_norm": 3.6707193851470947, "learning_rate": 0.0002, "loss": 1.707, "step": 160 }, { "epoch": 0.0, "grad_norm": 2.496619462966919, "learning_rate": 0.0002, "loss": 1.425, "step": 170 }, { "epoch": 0.0, "grad_norm": 4.617307186126709, "learning_rate": 0.0002, "loss": 1.6097, "step": 180 }, { "epoch": 0.0, "grad_norm": 1.321842074394226, "learning_rate": 0.0002, "loss": 1.4371, "step": 190 }, { "epoch": 0.0, "grad_norm": 2.0756170749664307, "learning_rate": 0.0002, "loss": 1.6344, "step": 200 }, { "epoch": 0.0, "grad_norm": 1.6241226196289062, "learning_rate": 0.0002, "loss": 1.6591, "step": 210 }, { "epoch": 0.0, "grad_norm": 1.9414540529251099, "learning_rate": 0.0002, "loss": 1.3501, "step": 220 }, { "epoch": 0.0, "grad_norm": 1.469902753829956, "learning_rate": 0.0002, "loss": 1.8602, "step": 230 }, { "epoch": 0.0, "grad_norm": 1.3209344148635864, "learning_rate": 0.0002, "loss": 1.4553, "step": 240 }, { "epoch": 0.0, "grad_norm": 0.9776040315628052, "learning_rate": 0.0002, "loss": 1.418, "step": 250 }, { "epoch": 0.0, "grad_norm": 1.5219510793685913, "learning_rate": 0.0002, "loss": 1.3733, "step": 260 }, { "epoch": 0.0, "grad_norm": 1.063873052597046, "learning_rate": 0.0002, "loss": 1.6955, "step": 270 }, { "epoch": 0.0, "grad_norm": 1.6686502695083618, "learning_rate": 0.0002, "loss": 1.5146, "step": 280 }, { "epoch": 0.0, "grad_norm": 1.575346827507019, "learning_rate": 0.0002, "loss": 1.2118, "step": 290 }, { "epoch": 0.0, "grad_norm": 1.7042474746704102, "learning_rate": 0.0002, "loss": 1.6291, "step": 300 }, { "epoch": 0.0, "grad_norm": 2.9513139724731445, "learning_rate": 0.0002, "loss": 1.6987, "step": 310 }, { "epoch": 0.0, "grad_norm": 2.114675998687744, "learning_rate": 0.0002, "loss": 1.6638, "step": 320 }, { "epoch": 0.0, "grad_norm": 2.097245454788208, "learning_rate": 0.0002, "loss": 1.6209, "step": 330 }, { "epoch": 0.0, "grad_norm": 0.7791861295700073, "learning_rate": 0.0002, "loss": 1.3781, "step": 340 }, { "epoch": 0.0, "grad_norm": 1.4525409936904907, "learning_rate": 0.0002, "loss": 1.5178, "step": 350 }, { "epoch": 0.0, "grad_norm": 1.2239762544631958, "learning_rate": 0.0002, "loss": 1.3836, "step": 360 }, { "epoch": 0.0, "grad_norm": 2.4900193214416504, "learning_rate": 0.0002, "loss": 1.2168, "step": 370 }, { "epoch": 0.0, "grad_norm": 1.3550528287887573, "learning_rate": 0.0002, "loss": 1.466, "step": 380 }, { "epoch": 0.0, "grad_norm": 1.3808491230010986, "learning_rate": 0.0002, "loss": 1.5827, "step": 390 }, { "epoch": 0.0, "grad_norm": 1.8640412092208862, "learning_rate": 0.0002, "loss": 1.6618, "step": 400 }, { "epoch": 0.0, "grad_norm": 1.4722106456756592, "learning_rate": 0.0002, "loss": 1.3958, "step": 410 }, { "epoch": 0.0, "grad_norm": 1.0778567790985107, "learning_rate": 0.0002, "loss": 1.722, "step": 420 }, { "epoch": 0.0, "grad_norm": 1.1710599660873413, "learning_rate": 0.0002, "loss": 1.4496, "step": 430 }, { "epoch": 0.0, "grad_norm": 2.5744879245758057, "learning_rate": 0.0002, "loss": 1.5801, "step": 440 }, { "epoch": 0.0, "grad_norm": 1.3434849977493286, "learning_rate": 0.0002, "loss": 1.3938, "step": 450 }, { "epoch": 0.0, "grad_norm": 1.7501955032348633, "learning_rate": 0.0002, "loss": 1.4331, "step": 460 }, { "epoch": 0.0, "grad_norm": 1.2190204858779907, "learning_rate": 0.0002, "loss": 1.6053, "step": 470 }, { "epoch": 0.0, "grad_norm": 0.8149442672729492, "learning_rate": 0.0002, "loss": 1.5, "step": 480 }, { "epoch": 0.0, "grad_norm": 1.5202057361602783, "learning_rate": 0.0002, "loss": 1.4946, "step": 490 }, { "epoch": 0.0, "grad_norm": 1.1657288074493408, "learning_rate": 0.0002, "loss": 1.4609, "step": 500 }, { "epoch": 0.0, "grad_norm": 0.9077476263046265, "learning_rate": 0.0002, "loss": 1.489, "step": 510 }, { "epoch": 0.0, "grad_norm": 1.2785530090332031, "learning_rate": 0.0002, "loss": 1.5392, "step": 520 }, { "epoch": 0.0, "grad_norm": 1.7118219137191772, "learning_rate": 0.0002, "loss": 1.4813, "step": 530 }, { "epoch": 0.0, "grad_norm": 0.9123232960700989, "learning_rate": 0.0002, "loss": 1.4512, "step": 540 }, { "epoch": 0.0, "grad_norm": 1.9979891777038574, "learning_rate": 0.0002, "loss": 1.5469, "step": 550 }, { "epoch": 0.0, "grad_norm": 1.6821433305740356, "learning_rate": 0.0002, "loss": 1.3714, "step": 560 }, { "epoch": 0.0, "grad_norm": 1.3975540399551392, "learning_rate": 0.0002, "loss": 1.3536, "step": 570 }, { "epoch": 0.0, "grad_norm": 2.0896730422973633, "learning_rate": 0.0002, "loss": 1.4185, "step": 580 }, { "epoch": 0.0, "grad_norm": 0.6683186292648315, "learning_rate": 0.0002, "loss": 1.5823, "step": 590 }, { "epoch": 0.0, "grad_norm": 1.4588614702224731, "learning_rate": 0.0002, "loss": 1.5492, "step": 600 }, { "epoch": 0.0, "grad_norm": 0.9020287990570068, "learning_rate": 0.0002, "loss": 1.3861, "step": 610 }, { "epoch": 0.0, "grad_norm": 1.4958571195602417, "learning_rate": 0.0002, "loss": 1.2042, "step": 620 }, { "epoch": 0.0, "grad_norm": 1.055851936340332, "learning_rate": 0.0002, "loss": 1.3475, "step": 630 }, { "epoch": 0.0, "grad_norm": 0.9734490513801575, "learning_rate": 0.0002, "loss": 1.2971, "step": 640 }, { "epoch": 0.0, "grad_norm": 1.5048621892929077, "learning_rate": 0.0002, "loss": 1.3869, "step": 650 }, { "epoch": 0.0, "grad_norm": 1.880979299545288, "learning_rate": 0.0002, "loss": 1.4463, "step": 660 }, { "epoch": 0.0, "grad_norm": 1.7626601457595825, "learning_rate": 0.0002, "loss": 1.5171, "step": 670 }, { "epoch": 0.0, "grad_norm": 1.5099576711654663, "learning_rate": 0.0002, "loss": 1.6468, "step": 680 }, { "epoch": 0.0, "grad_norm": 1.4935935735702515, "learning_rate": 0.0002, "loss": 1.4994, "step": 690 }, { "epoch": 0.0, "grad_norm": 1.1450934410095215, "learning_rate": 0.0002, "loss": 1.7447, "step": 700 }, { "epoch": 0.0, "grad_norm": 1.7508375644683838, "learning_rate": 0.0002, "loss": 1.4911, "step": 710 }, { "epoch": 0.0, "grad_norm": 1.6189720630645752, "learning_rate": 0.0002, "loss": 1.3287, "step": 720 }, { "epoch": 0.0, "grad_norm": 1.4210704565048218, "learning_rate": 0.0002, "loss": 1.6431, "step": 730 }, { "epoch": 0.0, "grad_norm": 1.722288727760315, "learning_rate": 0.0002, "loss": 1.5733, "step": 740 }, { "epoch": 0.0, "grad_norm": 1.1026442050933838, "learning_rate": 0.0002, "loss": 1.342, "step": 750 }, { "epoch": 0.0, "grad_norm": 1.4286895990371704, "learning_rate": 0.0002, "loss": 1.6235, "step": 760 }, { "epoch": 0.0, "grad_norm": 3.7663393020629883, "learning_rate": 0.0002, "loss": 1.4672, "step": 770 }, { "epoch": 0.0, "grad_norm": 2.1426870822906494, "learning_rate": 0.0002, "loss": 1.3531, "step": 780 }, { "epoch": 0.0, "grad_norm": 2.00779128074646, "learning_rate": 0.0002, "loss": 1.4249, "step": 790 }, { "epoch": 0.0, "grad_norm": 1.3968355655670166, "learning_rate": 0.0002, "loss": 1.4604, "step": 800 }, { "epoch": 0.0, "grad_norm": 1.6210627555847168, "learning_rate": 0.0002, "loss": 1.3682, "step": 810 }, { "epoch": 0.0, "grad_norm": 2.0366439819335938, "learning_rate": 0.0002, "loss": 1.4459, "step": 820 }, { "epoch": 0.0, "grad_norm": 2.028634548187256, "learning_rate": 0.0002, "loss": 1.5687, "step": 830 }, { "epoch": 0.0, "grad_norm": 2.311163902282715, "learning_rate": 0.0002, "loss": 1.5004, "step": 840 }, { "epoch": 0.0, "grad_norm": 1.767933964729309, "learning_rate": 0.0002, "loss": 1.5239, "step": 850 }, { "epoch": 0.0, "grad_norm": 0.9912189841270447, "learning_rate": 0.0002, "loss": 1.6502, "step": 860 }, { "epoch": 0.0, "grad_norm": 1.0725475549697876, "learning_rate": 0.0002, "loss": 1.2749, "step": 870 }, { "epoch": 0.0, "grad_norm": 1.9378693103790283, "learning_rate": 0.0002, "loss": 1.5629, "step": 880 }, { "epoch": 0.0, "grad_norm": 1.351197600364685, "learning_rate": 0.0002, "loss": 1.6127, "step": 890 }, { "epoch": 0.0, "grad_norm": 2.0754082202911377, "learning_rate": 0.0002, "loss": 1.432, "step": 900 }, { "epoch": 0.0, "grad_norm": 0.646776020526886, "learning_rate": 0.0002, "loss": 1.3646, "step": 910 }, { "epoch": 0.0, "grad_norm": 1.3977205753326416, "learning_rate": 0.0002, "loss": 1.4515, "step": 920 }, { "epoch": 0.0, "grad_norm": 1.2073358297348022, "learning_rate": 0.0002, "loss": 1.4996, "step": 930 }, { "epoch": 0.0, "grad_norm": 2.5377461910247803, "learning_rate": 0.0002, "loss": 1.166, "step": 940 }, { "epoch": 0.0, "grad_norm": 0.9897949695587158, "learning_rate": 0.0002, "loss": 1.3275, "step": 950 }, { "epoch": 0.0, "grad_norm": 0.66123366355896, "learning_rate": 0.0002, "loss": 1.4602, "step": 960 }, { "epoch": 0.0, "grad_norm": 1.007681131362915, "learning_rate": 0.0002, "loss": 1.2411, "step": 970 }, { "epoch": 0.0, "grad_norm": 0.8497790098190308, "learning_rate": 0.0002, "loss": 1.4757, "step": 980 }, { "epoch": 0.0, "grad_norm": 2.143144369125366, "learning_rate": 0.0002, "loss": 1.284, "step": 990 }, { "epoch": 0.0, "grad_norm": 0.8760665059089661, "learning_rate": 0.0002, "loss": 1.369, "step": 1000 }, { "epoch": 0.0, "grad_norm": 2.4156322479248047, "learning_rate": 0.0002, "loss": 1.5217, "step": 1010 }, { "epoch": 0.0, "grad_norm": 1.8571357727050781, "learning_rate": 0.0002, "loss": 1.1094, "step": 1020 }, { "epoch": 0.0, "grad_norm": 1.3299635648727417, "learning_rate": 0.0002, "loss": 1.5131, "step": 1030 }, { "epoch": 0.0, "grad_norm": 1.6106808185577393, "learning_rate": 0.0002, "loss": 1.7099, "step": 1040 }, { "epoch": 0.0, "grad_norm": 1.2647922039031982, "learning_rate": 0.0002, "loss": 1.5159, "step": 1050 }, { "epoch": 0.0, "grad_norm": 1.4886081218719482, "learning_rate": 0.0002, "loss": 1.4819, "step": 1060 }, { "epoch": 0.0, "grad_norm": 0.741792619228363, "learning_rate": 0.0002, "loss": 1.4647, "step": 1070 }, { "epoch": 0.0, "grad_norm": 1.80961012840271, "learning_rate": 0.0002, "loss": 1.4864, "step": 1080 }, { "epoch": 0.0, "grad_norm": 1.3303622007369995, "learning_rate": 0.0002, "loss": 1.6562, "step": 1090 }, { "epoch": 0.0, "grad_norm": 1.5424312353134155, "learning_rate": 0.0002, "loss": 1.4969, "step": 1100 }, { "epoch": 0.0, "grad_norm": 1.3558670282363892, "learning_rate": 0.0002, "loss": 1.6004, "step": 1110 }, { "epoch": 0.0, "grad_norm": 1.6301796436309814, "learning_rate": 0.0002, "loss": 1.2829, "step": 1120 }, { "epoch": 0.0, "grad_norm": 1.880791187286377, "learning_rate": 0.0002, "loss": 1.5011, "step": 1130 }, { "epoch": 0.0, "grad_norm": 1.2466365098953247, "learning_rate": 0.0002, "loss": 1.3614, "step": 1140 }, { "epoch": 0.0, "grad_norm": 2.2848308086395264, "learning_rate": 0.0002, "loss": 1.3612, "step": 1150 }, { "epoch": 0.0, "grad_norm": 4.2493767738342285, "learning_rate": 0.0002, "loss": 1.6622, "step": 1160 }, { "epoch": 0.0, "grad_norm": 2.218007802963257, "learning_rate": 0.0002, "loss": 1.4354, "step": 1170 }, { "epoch": 0.0, "grad_norm": 2.1784889698028564, "learning_rate": 0.0002, "loss": 1.5091, "step": 1180 }, { "epoch": 0.0, "grad_norm": 3.0463290214538574, "learning_rate": 0.0002, "loss": 1.4874, "step": 1190 }, { "epoch": 0.0, "grad_norm": 1.1924620866775513, "learning_rate": 0.0002, "loss": 1.3122, "step": 1200 }, { "epoch": 0.0, "grad_norm": 1.573365330696106, "learning_rate": 0.0002, "loss": 1.1113, "step": 1210 }, { "epoch": 0.0, "grad_norm": 1.8420658111572266, "learning_rate": 0.0002, "loss": 1.3445, "step": 1220 }, { "epoch": 0.01, "grad_norm": 1.4060885906219482, "learning_rate": 0.0002, "loss": 1.3697, "step": 1230 }, { "epoch": 0.01, "grad_norm": 0.582984983921051, "learning_rate": 0.0002, "loss": 1.3794, "step": 1240 }, { "epoch": 0.01, "grad_norm": 2.182332754135132, "learning_rate": 0.0002, "loss": 1.3588, "step": 1250 }, { "epoch": 0.01, "grad_norm": 2.2213656902313232, "learning_rate": 0.0002, "loss": 1.3707, "step": 1260 }, { "epoch": 0.01, "grad_norm": 1.8811941146850586, "learning_rate": 0.0002, "loss": 1.2271, "step": 1270 }, { "epoch": 0.01, "grad_norm": 5.178961277008057, "learning_rate": 0.0002, "loss": 1.4302, "step": 1280 }, { "epoch": 0.01, "grad_norm": 1.287880778312683, "learning_rate": 0.0002, "loss": 1.2615, "step": 1290 }, { "epoch": 0.01, "grad_norm": 2.516469717025757, "learning_rate": 0.0002, "loss": 1.4153, "step": 1300 }, { "epoch": 0.01, "grad_norm": 1.6935205459594727, "learning_rate": 0.0002, "loss": 1.598, "step": 1310 }, { "epoch": 0.01, "grad_norm": 1.0534515380859375, "learning_rate": 0.0002, "loss": 1.6028, "step": 1320 }, { "epoch": 0.01, "grad_norm": 1.2225326299667358, "learning_rate": 0.0002, "loss": 1.256, "step": 1330 }, { "epoch": 0.01, "grad_norm": 1.54330313205719, "learning_rate": 0.0002, "loss": 1.4904, "step": 1340 }, { "epoch": 0.01, "grad_norm": 0.913129448890686, "learning_rate": 0.0002, "loss": 1.2235, "step": 1350 }, { "epoch": 0.01, "grad_norm": 0.8094781637191772, "learning_rate": 0.0002, "loss": 1.6662, "step": 1360 }, { "epoch": 0.01, "grad_norm": 1.0636080503463745, "learning_rate": 0.0002, "loss": 1.5025, "step": 1370 }, { "epoch": 0.01, "grad_norm": 1.9710248708724976, "learning_rate": 0.0002, "loss": 1.4538, "step": 1380 }, { "epoch": 0.01, "grad_norm": 1.5366309881210327, "learning_rate": 0.0002, "loss": 1.2414, "step": 1390 }, { "epoch": 0.01, "grad_norm": 1.5726699829101562, "learning_rate": 0.0002, "loss": 1.6799, "step": 1400 }, { "epoch": 0.01, "grad_norm": 2.3889243602752686, "learning_rate": 0.0002, "loss": 1.5001, "step": 1410 }, { "epoch": 0.01, "grad_norm": 1.929504156112671, "learning_rate": 0.0002, "loss": 1.5763, "step": 1420 }, { "epoch": 0.01, "grad_norm": 1.4241243600845337, "learning_rate": 0.0002, "loss": 1.4849, "step": 1430 }, { "epoch": 0.01, "grad_norm": 1.4324612617492676, "learning_rate": 0.0002, "loss": 1.2418, "step": 1440 }, { "epoch": 0.01, "grad_norm": 2.7622690200805664, "learning_rate": 0.0002, "loss": 1.2798, "step": 1450 }, { "epoch": 0.01, "grad_norm": 1.1792781352996826, "learning_rate": 0.0002, "loss": 1.45, "step": 1460 }, { "epoch": 0.01, "grad_norm": 1.94503653049469, "learning_rate": 0.0002, "loss": 1.4409, "step": 1470 }, { "epoch": 0.01, "grad_norm": 2.4807915687561035, "learning_rate": 0.0002, "loss": 1.5117, "step": 1480 }, { "epoch": 0.01, "grad_norm": 1.8685152530670166, "learning_rate": 0.0002, "loss": 1.4939, "step": 1490 }, { "epoch": 0.01, "grad_norm": 2.3348734378814697, "learning_rate": 0.0002, "loss": 1.3579, "step": 1500 }, { "epoch": 0.01, "grad_norm": 2.220153570175171, "learning_rate": 0.0002, "loss": 1.5083, "step": 1510 }, { "epoch": 0.01, "grad_norm": 1.7537442445755005, "learning_rate": 0.0002, "loss": 1.4628, "step": 1520 }, { "epoch": 0.01, "grad_norm": 1.128240942955017, "learning_rate": 0.0002, "loss": 1.5573, "step": 1530 }, { "epoch": 0.01, "grad_norm": 1.4191685914993286, "learning_rate": 0.0002, "loss": 1.2812, "step": 1540 }, { "epoch": 0.01, "grad_norm": 2.054636240005493, "learning_rate": 0.0002, "loss": 1.7338, "step": 1550 }, { "epoch": 0.01, "grad_norm": 2.0800065994262695, "learning_rate": 0.0002, "loss": 1.558, "step": 1560 }, { "epoch": 0.01, "grad_norm": 1.8358025550842285, "learning_rate": 0.0002, "loss": 1.5691, "step": 1570 }, { "epoch": 0.01, "grad_norm": 1.7277332544326782, "learning_rate": 0.0002, "loss": 1.4561, "step": 1580 }, { "epoch": 0.01, "grad_norm": 0.976081132888794, "learning_rate": 0.0002, "loss": 1.4291, "step": 1590 }, { "epoch": 0.01, "grad_norm": 1.043621301651001, "learning_rate": 0.0002, "loss": 1.4766, "step": 1600 }, { "epoch": 0.01, "grad_norm": 1.0692806243896484, "learning_rate": 0.0002, "loss": 1.4894, "step": 1610 }, { "epoch": 0.01, "grad_norm": 1.310453176498413, "learning_rate": 0.0002, "loss": 1.3846, "step": 1620 }, { "epoch": 0.01, "grad_norm": 1.8618420362472534, "learning_rate": 0.0002, "loss": 1.3761, "step": 1630 }, { "epoch": 0.01, "grad_norm": 1.2714115381240845, "learning_rate": 0.0002, "loss": 1.4464, "step": 1640 }, { "epoch": 0.01, "grad_norm": 2.3289361000061035, "learning_rate": 0.0002, "loss": 1.1381, "step": 1650 }, { "epoch": 0.01, "grad_norm": 1.4412096738815308, "learning_rate": 0.0002, "loss": 1.3958, "step": 1660 }, { "epoch": 0.01, "grad_norm": 2.147345542907715, "learning_rate": 0.0002, "loss": 1.374, "step": 1670 }, { "epoch": 0.01, "grad_norm": 0.973152220249176, "learning_rate": 0.0002, "loss": 1.6481, "step": 1680 }, { "epoch": 0.01, "grad_norm": 0.8938317894935608, "learning_rate": 0.0002, "loss": 1.3461, "step": 1690 }, { "epoch": 0.01, "grad_norm": 1.0354856252670288, "learning_rate": 0.0002, "loss": 1.3808, "step": 1700 }, { "epoch": 0.01, "grad_norm": 1.4256852865219116, "learning_rate": 0.0002, "loss": 1.4144, "step": 1710 }, { "epoch": 0.01, "grad_norm": 2.049783229827881, "learning_rate": 0.0002, "loss": 1.3842, "step": 1720 }, { "epoch": 0.01, "grad_norm": 1.1235336065292358, "learning_rate": 0.0002, "loss": 1.2927, "step": 1730 }, { "epoch": 0.01, "grad_norm": 2.087367534637451, "learning_rate": 0.0002, "loss": 1.6645, "step": 1740 }, { "epoch": 0.01, "grad_norm": 2.567688226699829, "learning_rate": 0.0002, "loss": 1.4004, "step": 1750 }, { "epoch": 0.01, "grad_norm": 1.561964511871338, "learning_rate": 0.0002, "loss": 1.3989, "step": 1760 }, { "epoch": 0.01, "grad_norm": 1.0652399063110352, "learning_rate": 0.0002, "loss": 1.3743, "step": 1770 }, { "epoch": 0.01, "grad_norm": 0.9610545635223389, "learning_rate": 0.0002, "loss": 1.3778, "step": 1780 }, { "epoch": 0.01, "grad_norm": 1.8028895854949951, "learning_rate": 0.0002, "loss": 1.5468, "step": 1790 }, { "epoch": 0.01, "grad_norm": 1.8976826667785645, "learning_rate": 0.0002, "loss": 1.2487, "step": 1800 }, { "epoch": 0.01, "grad_norm": 1.1269726753234863, "learning_rate": 0.0002, "loss": 1.3676, "step": 1810 }, { "epoch": 0.01, "grad_norm": 1.1023149490356445, "learning_rate": 0.0002, "loss": 1.7443, "step": 1820 }, { "epoch": 0.01, "grad_norm": 2.3436059951782227, "learning_rate": 0.0002, "loss": 1.7106, "step": 1830 }, { "epoch": 0.01, "grad_norm": 2.542353391647339, "learning_rate": 0.0002, "loss": 1.4899, "step": 1840 }, { "epoch": 0.01, "grad_norm": 2.066040277481079, "learning_rate": 0.0002, "loss": 1.4712, "step": 1850 }, { "epoch": 0.01, "grad_norm": 1.6463730335235596, "learning_rate": 0.0002, "loss": 1.6491, "step": 1860 }, { "epoch": 0.01, "grad_norm": 1.6781314611434937, "learning_rate": 0.0002, "loss": 1.6578, "step": 1870 }, { "epoch": 0.01, "grad_norm": 2.426217555999756, "learning_rate": 0.0002, "loss": 1.5053, "step": 1880 }, { "epoch": 0.01, "grad_norm": 1.546567678451538, "learning_rate": 0.0002, "loss": 1.6975, "step": 1890 }, { "epoch": 0.01, "grad_norm": 1.4242192506790161, "learning_rate": 0.0002, "loss": 1.4491, "step": 1900 }, { "epoch": 0.01, "grad_norm": 2.1818065643310547, "learning_rate": 0.0002, "loss": 1.5301, "step": 1910 }, { "epoch": 0.01, "grad_norm": 0.9450239539146423, "learning_rate": 0.0002, "loss": 1.2692, "step": 1920 }, { "epoch": 0.01, "grad_norm": 0.9360896348953247, "learning_rate": 0.0002, "loss": 1.4231, "step": 1930 }, { "epoch": 0.01, "grad_norm": 1.0713781118392944, "learning_rate": 0.0002, "loss": 1.4824, "step": 1940 }, { "epoch": 0.01, "grad_norm": 1.2130794525146484, "learning_rate": 0.0002, "loss": 1.7109, "step": 1950 }, { "epoch": 0.01, "grad_norm": 1.315224289894104, "learning_rate": 0.0002, "loss": 1.5106, "step": 1960 }, { "epoch": 0.01, "grad_norm": 1.5737098455429077, "learning_rate": 0.0002, "loss": 1.3665, "step": 1970 }, { "epoch": 0.01, "grad_norm": 0.7626425623893738, "learning_rate": 0.0002, "loss": 1.5376, "step": 1980 }, { "epoch": 0.01, "grad_norm": 0.9248694181442261, "learning_rate": 0.0002, "loss": 1.3057, "step": 1990 }, { "epoch": 0.01, "grad_norm": 1.0387037992477417, "learning_rate": 0.0002, "loss": 1.425, "step": 2000 }, { "epoch": 0.01, "grad_norm": 1.2247133255004883, "learning_rate": 0.0002, "loss": 1.3325, "step": 2010 }, { "epoch": 0.01, "grad_norm": 1.1097569465637207, "learning_rate": 0.0002, "loss": 1.467, "step": 2020 }, { "epoch": 0.01, "grad_norm": 1.5594412088394165, "learning_rate": 0.0002, "loss": 1.5736, "step": 2030 }, { "epoch": 0.01, "grad_norm": 2.783676862716675, "learning_rate": 0.0002, "loss": 1.6586, "step": 2040 }, { "epoch": 0.01, "grad_norm": 1.9329684972763062, "learning_rate": 0.0002, "loss": 1.6003, "step": 2050 }, { "epoch": 0.01, "grad_norm": 1.3528674840927124, "learning_rate": 0.0002, "loss": 1.633, "step": 2060 }, { "epoch": 0.01, "grad_norm": 1.31196129322052, "learning_rate": 0.0002, "loss": 1.4794, "step": 2070 }, { "epoch": 0.01, "grad_norm": 2.402500629425049, "learning_rate": 0.0002, "loss": 1.5088, "step": 2080 }, { "epoch": 0.01, "grad_norm": 1.648729681968689, "learning_rate": 0.0002, "loss": 1.8165, "step": 2090 }, { "epoch": 0.01, "grad_norm": 2.115878105163574, "learning_rate": 0.0002, "loss": 1.591, "step": 2100 }, { "epoch": 0.01, "grad_norm": 1.285029649734497, "learning_rate": 0.0002, "loss": 1.3903, "step": 2110 }, { "epoch": 0.01, "grad_norm": 1.7052081823349, "learning_rate": 0.0002, "loss": 1.2914, "step": 2120 }, { "epoch": 0.01, "grad_norm": 2.2718091011047363, "learning_rate": 0.0002, "loss": 1.4064, "step": 2130 }, { "epoch": 0.01, "grad_norm": 2.302302598953247, "learning_rate": 0.0002, "loss": 1.3486, "step": 2140 }, { "epoch": 0.01, "grad_norm": 0.9521153569221497, "learning_rate": 0.0002, "loss": 1.3359, "step": 2150 }, { "epoch": 0.01, "grad_norm": 1.6532702445983887, "learning_rate": 0.0002, "loss": 1.5133, "step": 2160 }, { "epoch": 0.01, "grad_norm": 1.6376208066940308, "learning_rate": 0.0002, "loss": 1.4904, "step": 2170 }, { "epoch": 0.01, "grad_norm": 1.4355058670043945, "learning_rate": 0.0002, "loss": 1.4644, "step": 2180 }, { "epoch": 0.01, "grad_norm": 1.325963020324707, "learning_rate": 0.0002, "loss": 1.5141, "step": 2190 }, { "epoch": 0.01, "grad_norm": 1.5482227802276611, "learning_rate": 0.0002, "loss": 1.5414, "step": 2200 }, { "epoch": 0.01, "grad_norm": 1.761547327041626, "learning_rate": 0.0002, "loss": 1.3667, "step": 2210 }, { "epoch": 0.01, "grad_norm": 2.286831855773926, "learning_rate": 0.0002, "loss": 1.4121, "step": 2220 }, { "epoch": 0.01, "grad_norm": 1.991927146911621, "learning_rate": 0.0002, "loss": 1.4139, "step": 2230 }, { "epoch": 0.01, "grad_norm": 1.6264787912368774, "learning_rate": 0.0002, "loss": 1.6761, "step": 2240 }, { "epoch": 0.01, "grad_norm": 1.2189797163009644, "learning_rate": 0.0002, "loss": 1.6795, "step": 2250 }, { "epoch": 0.01, "grad_norm": 2.8594465255737305, "learning_rate": 0.0002, "loss": 1.2851, "step": 2260 }, { "epoch": 0.01, "grad_norm": 1.3625386953353882, "learning_rate": 0.0002, "loss": 1.3797, "step": 2270 }, { "epoch": 0.01, "grad_norm": 2.9144654273986816, "learning_rate": 0.0002, "loss": 1.4489, "step": 2280 }, { "epoch": 0.01, "grad_norm": 2.035429000854492, "learning_rate": 0.0002, "loss": 1.5335, "step": 2290 }, { "epoch": 0.01, "grad_norm": 1.0782172679901123, "learning_rate": 0.0002, "loss": 1.2988, "step": 2300 }, { "epoch": 0.01, "grad_norm": 1.7201775312423706, "learning_rate": 0.0002, "loss": 1.2911, "step": 2310 }, { "epoch": 0.01, "grad_norm": 1.213869571685791, "learning_rate": 0.0002, "loss": 1.4179, "step": 2320 }, { "epoch": 0.01, "grad_norm": 4.5290141105651855, "learning_rate": 0.0002, "loss": 1.3113, "step": 2330 }, { "epoch": 0.01, "grad_norm": 2.852376699447632, "learning_rate": 0.0002, "loss": 1.3771, "step": 2340 }, { "epoch": 0.01, "grad_norm": 4.796501636505127, "learning_rate": 0.0002, "loss": 1.7229, "step": 2350 }, { "epoch": 0.01, "grad_norm": 1.366723656654358, "learning_rate": 0.0002, "loss": 1.6255, "step": 2360 }, { "epoch": 0.01, "grad_norm": 1.583186149597168, "learning_rate": 0.0002, "loss": 1.65, "step": 2370 }, { "epoch": 0.01, "grad_norm": 1.9587935209274292, "learning_rate": 0.0002, "loss": 1.4866, "step": 2380 }, { "epoch": 0.01, "grad_norm": 1.6079210042953491, "learning_rate": 0.0002, "loss": 1.4312, "step": 2390 }, { "epoch": 0.01, "grad_norm": 1.7863242626190186, "learning_rate": 0.0002, "loss": 1.5615, "step": 2400 }, { "epoch": 0.01, "grad_norm": 1.302526831626892, "learning_rate": 0.0002, "loss": 1.1162, "step": 2410 }, { "epoch": 0.01, "grad_norm": 2.0517868995666504, "learning_rate": 0.0002, "loss": 1.4223, "step": 2420 }, { "epoch": 0.01, "grad_norm": 4.172087669372559, "learning_rate": 0.0002, "loss": 1.6023, "step": 2430 }, { "epoch": 0.01, "grad_norm": 2.599093198776245, "learning_rate": 0.0002, "loss": 1.6116, "step": 2440 }, { "epoch": 0.01, "grad_norm": 1.244591474533081, "learning_rate": 0.0002, "loss": 1.7212, "step": 2450 }, { "epoch": 0.01, "grad_norm": 2.6463654041290283, "learning_rate": 0.0002, "loss": 1.3109, "step": 2460 }, { "epoch": 0.01, "grad_norm": 1.9666030406951904, "learning_rate": 0.0002, "loss": 1.4587, "step": 2470 }, { "epoch": 0.01, "grad_norm": 1.1653075218200684, "learning_rate": 0.0002, "loss": 1.4593, "step": 2480 }, { "epoch": 0.01, "grad_norm": 2.0642759799957275, "learning_rate": 0.0002, "loss": 1.4863, "step": 2490 }, { "epoch": 0.01, "grad_norm": 1.4935320615768433, "learning_rate": 0.0002, "loss": 1.5473, "step": 2500 }, { "epoch": 0.01, "grad_norm": 3.1814987659454346, "learning_rate": 0.0002, "loss": 1.7573, "step": 2510 }, { "epoch": 0.01, "grad_norm": 2.1878743171691895, "learning_rate": 0.0002, "loss": 1.6289, "step": 2520 }, { "epoch": 0.01, "grad_norm": 3.1959917545318604, "learning_rate": 0.0002, "loss": 1.21, "step": 2530 }, { "epoch": 0.01, "grad_norm": 2.2993996143341064, "learning_rate": 0.0002, "loss": 1.7104, "step": 2540 }, { "epoch": 0.01, "grad_norm": 1.1968332529067993, "learning_rate": 0.0002, "loss": 1.4651, "step": 2550 }, { "epoch": 0.01, "grad_norm": 2.1751720905303955, "learning_rate": 0.0002, "loss": 1.4756, "step": 2560 }, { "epoch": 0.01, "grad_norm": 2.312274932861328, "learning_rate": 0.0002, "loss": 1.2744, "step": 2570 }, { "epoch": 0.01, "grad_norm": 1.0814118385314941, "learning_rate": 0.0002, "loss": 1.4595, "step": 2580 }, { "epoch": 0.01, "grad_norm": 2.3007423877716064, "learning_rate": 0.0002, "loss": 1.3541, "step": 2590 }, { "epoch": 0.01, "grad_norm": 1.1635894775390625, "learning_rate": 0.0002, "loss": 1.4753, "step": 2600 }, { "epoch": 0.01, "grad_norm": 1.5198445320129395, "learning_rate": 0.0002, "loss": 1.2556, "step": 2610 }, { "epoch": 0.01, "grad_norm": 1.3646886348724365, "learning_rate": 0.0002, "loss": 1.5229, "step": 2620 }, { "epoch": 0.01, "grad_norm": 1.2258124351501465, "learning_rate": 0.0002, "loss": 1.6887, "step": 2630 }, { "epoch": 0.01, "grad_norm": 1.377655029296875, "learning_rate": 0.0002, "loss": 1.4365, "step": 2640 }, { "epoch": 0.01, "grad_norm": 1.0924650430679321, "learning_rate": 0.0002, "loss": 1.371, "step": 2650 }, { "epoch": 0.01, "grad_norm": 1.9510098695755005, "learning_rate": 0.0002, "loss": 1.357, "step": 2660 }, { "epoch": 0.01, "grad_norm": 0.9686455726623535, "learning_rate": 0.0002, "loss": 1.5168, "step": 2670 }, { "epoch": 0.01, "grad_norm": 2.6925392150878906, "learning_rate": 0.0002, "loss": 1.5893, "step": 2680 }, { "epoch": 0.01, "grad_norm": 2.4536304473876953, "learning_rate": 0.0002, "loss": 1.6354, "step": 2690 }, { "epoch": 0.01, "grad_norm": 1.5758863687515259, "learning_rate": 0.0002, "loss": 1.548, "step": 2700 }, { "epoch": 0.01, "grad_norm": 1.158151626586914, "learning_rate": 0.0002, "loss": 1.4753, "step": 2710 }, { "epoch": 0.01, "grad_norm": 2.06003475189209, "learning_rate": 0.0002, "loss": 1.625, "step": 2720 }, { "epoch": 0.01, "grad_norm": 1.659198522567749, "learning_rate": 0.0002, "loss": 1.5136, "step": 2730 }, { "epoch": 0.01, "grad_norm": 2.667346239089966, "learning_rate": 0.0002, "loss": 1.3396, "step": 2740 }, { "epoch": 0.01, "grad_norm": 2.890037775039673, "learning_rate": 0.0002, "loss": 1.3673, "step": 2750 }, { "epoch": 0.01, "grad_norm": 1.1365361213684082, "learning_rate": 0.0002, "loss": 1.6113, "step": 2760 }, { "epoch": 0.01, "grad_norm": 1.8777906894683838, "learning_rate": 0.0002, "loss": 1.5148, "step": 2770 }, { "epoch": 0.01, "grad_norm": 1.444021224975586, "learning_rate": 0.0002, "loss": 1.6066, "step": 2780 }, { "epoch": 0.01, "grad_norm": 1.706199288368225, "learning_rate": 0.0002, "loss": 1.287, "step": 2790 }, { "epoch": 0.01, "grad_norm": 1.1679260730743408, "learning_rate": 0.0002, "loss": 1.4947, "step": 2800 }, { "epoch": 0.01, "grad_norm": 3.4862565994262695, "learning_rate": 0.0002, "loss": 1.5115, "step": 2810 }, { "epoch": 0.01, "grad_norm": 1.0051679611206055, "learning_rate": 0.0002, "loss": 1.5768, "step": 2820 }, { "epoch": 0.01, "grad_norm": 1.3081517219543457, "learning_rate": 0.0002, "loss": 1.4446, "step": 2830 }, { "epoch": 0.01, "grad_norm": 1.901951789855957, "learning_rate": 0.0002, "loss": 1.7023, "step": 2840 }, { "epoch": 0.01, "grad_norm": 2.0230605602264404, "learning_rate": 0.0002, "loss": 1.4959, "step": 2850 }, { "epoch": 0.01, "grad_norm": 3.524280548095703, "learning_rate": 0.0002, "loss": 1.3969, "step": 2860 }, { "epoch": 0.01, "grad_norm": 1.0255800485610962, "learning_rate": 0.0002, "loss": 1.4957, "step": 2870 }, { "epoch": 0.01, "grad_norm": 1.4753506183624268, "learning_rate": 0.0002, "loss": 1.56, "step": 2880 }, { "epoch": 0.01, "grad_norm": 1.0972057580947876, "learning_rate": 0.0002, "loss": 1.4796, "step": 2890 }, { "epoch": 0.01, "grad_norm": 1.4642239809036255, "learning_rate": 0.0002, "loss": 1.4658, "step": 2900 }, { "epoch": 0.01, "grad_norm": 1.312801480293274, "learning_rate": 0.0002, "loss": 1.44, "step": 2910 }, { "epoch": 0.01, "grad_norm": 1.2456297874450684, "learning_rate": 0.0002, "loss": 1.461, "step": 2920 }, { "epoch": 0.01, "grad_norm": 1.3437690734863281, "learning_rate": 0.0002, "loss": 1.478, "step": 2930 }, { "epoch": 0.01, "grad_norm": 1.9658869504928589, "learning_rate": 0.0002, "loss": 1.461, "step": 2940 }, { "epoch": 0.01, "grad_norm": 1.5944026708602905, "learning_rate": 0.0002, "loss": 1.5676, "step": 2950 }, { "epoch": 0.01, "grad_norm": 1.1189651489257812, "learning_rate": 0.0002, "loss": 1.7022, "step": 2960 }, { "epoch": 0.01, "grad_norm": 2.2801690101623535, "learning_rate": 0.0002, "loss": 1.6261, "step": 2970 }, { "epoch": 0.01, "grad_norm": 2.571725845336914, "learning_rate": 0.0002, "loss": 1.4764, "step": 2980 }, { "epoch": 0.01, "grad_norm": 1.51327645778656, "learning_rate": 0.0002, "loss": 1.365, "step": 2990 }, { "epoch": 0.01, "grad_norm": 1.8581318855285645, "learning_rate": 0.0002, "loss": 1.5682, "step": 3000 }, { "epoch": 0.01, "grad_norm": 1.889491081237793, "learning_rate": 0.0002, "loss": 1.54, "step": 3010 }, { "epoch": 0.01, "grad_norm": 2.3872663974761963, "learning_rate": 0.0002, "loss": 1.4517, "step": 3020 }, { "epoch": 0.01, "grad_norm": 2.27540922164917, "learning_rate": 0.0002, "loss": 1.4421, "step": 3030 }, { "epoch": 0.01, "grad_norm": 2.4882636070251465, "learning_rate": 0.0002, "loss": 1.4864, "step": 3040 }, { "epoch": 0.01, "grad_norm": 0.9985896944999695, "learning_rate": 0.0002, "loss": 1.3385, "step": 3050 }, { "epoch": 0.01, "grad_norm": 4.17658805847168, "learning_rate": 0.0002, "loss": 1.3348, "step": 3060 }, { "epoch": 0.01, "grad_norm": 1.6819359064102173, "learning_rate": 0.0002, "loss": 1.583, "step": 3070 }, { "epoch": 0.01, "grad_norm": 1.4815325736999512, "learning_rate": 0.0002, "loss": 1.5708, "step": 3080 }, { "epoch": 0.01, "grad_norm": 3.030972957611084, "learning_rate": 0.0002, "loss": 1.3897, "step": 3090 }, { "epoch": 0.01, "grad_norm": 1.523781418800354, "learning_rate": 0.0002, "loss": 1.5234, "step": 3100 }, { "epoch": 0.01, "grad_norm": 1.90911865234375, "learning_rate": 0.0002, "loss": 1.4178, "step": 3110 }, { "epoch": 0.01, "grad_norm": 0.9892047643661499, "learning_rate": 0.0002, "loss": 1.3946, "step": 3120 }, { "epoch": 0.01, "grad_norm": 2.998006820678711, "learning_rate": 0.0002, "loss": 1.3066, "step": 3130 }, { "epoch": 0.01, "grad_norm": 2.279416799545288, "learning_rate": 0.0002, "loss": 1.2717, "step": 3140 }, { "epoch": 0.01, "grad_norm": 2.591686248779297, "learning_rate": 0.0002, "loss": 1.3422, "step": 3150 }, { "epoch": 0.01, "grad_norm": 1.805487871170044, "learning_rate": 0.0002, "loss": 1.5156, "step": 3160 }, { "epoch": 0.01, "grad_norm": 2.1312081813812256, "learning_rate": 0.0002, "loss": 1.4785, "step": 3170 }, { "epoch": 0.01, "grad_norm": 2.9344465732574463, "learning_rate": 0.0002, "loss": 1.4991, "step": 3180 }, { "epoch": 0.01, "grad_norm": 3.629490375518799, "learning_rate": 0.0002, "loss": 1.4419, "step": 3190 }, { "epoch": 0.01, "grad_norm": 1.8151328563690186, "learning_rate": 0.0002, "loss": 1.5141, "step": 3200 }, { "epoch": 0.01, "grad_norm": 1.9774075746536255, "learning_rate": 0.0002, "loss": 1.36, "step": 3210 }, { "epoch": 0.01, "grad_norm": 1.722943663597107, "learning_rate": 0.0002, "loss": 1.3924, "step": 3220 }, { "epoch": 0.01, "grad_norm": 1.8671678304672241, "learning_rate": 0.0002, "loss": 1.5823, "step": 3230 }, { "epoch": 0.01, "grad_norm": 2.161663293838501, "learning_rate": 0.0002, "loss": 1.381, "step": 3240 }, { "epoch": 0.01, "grad_norm": 3.3129711151123047, "learning_rate": 0.0002, "loss": 1.5389, "step": 3250 }, { "epoch": 0.01, "grad_norm": 0.8698174953460693, "learning_rate": 0.0002, "loss": 1.5793, "step": 3260 }, { "epoch": 0.01, "grad_norm": 3.3204448223114014, "learning_rate": 0.0002, "loss": 1.4166, "step": 3270 }, { "epoch": 0.01, "grad_norm": 1.7231913805007935, "learning_rate": 0.0002, "loss": 1.4012, "step": 3280 }, { "epoch": 0.01, "grad_norm": 1.559320092201233, "learning_rate": 0.0002, "loss": 1.3185, "step": 3290 }, { "epoch": 0.01, "grad_norm": 2.076484203338623, "learning_rate": 0.0002, "loss": 1.6419, "step": 3300 }, { "epoch": 0.01, "grad_norm": 1.3201106786727905, "learning_rate": 0.0002, "loss": 1.2976, "step": 3310 }, { "epoch": 0.01, "grad_norm": 1.6204051971435547, "learning_rate": 0.0002, "loss": 1.5194, "step": 3320 }, { "epoch": 0.01, "grad_norm": 1.951922059059143, "learning_rate": 0.0002, "loss": 1.3322, "step": 3330 }, { "epoch": 0.01, "grad_norm": 2.4534664154052734, "learning_rate": 0.0002, "loss": 1.37, "step": 3340 }, { "epoch": 0.01, "grad_norm": 1.05072021484375, "learning_rate": 0.0002, "loss": 1.5052, "step": 3350 }, { "epoch": 0.01, "grad_norm": 1.628923773765564, "learning_rate": 0.0002, "loss": 1.6537, "step": 3360 }, { "epoch": 0.01, "grad_norm": 2.132843494415283, "learning_rate": 0.0002, "loss": 1.4266, "step": 3370 }, { "epoch": 0.01, "grad_norm": 2.0541625022888184, "learning_rate": 0.0002, "loss": 1.4845, "step": 3380 }, { "epoch": 0.01, "grad_norm": 1.121385097503662, "learning_rate": 0.0002, "loss": 1.1799, "step": 3390 }, { "epoch": 0.01, "grad_norm": 1.2966097593307495, "learning_rate": 0.0002, "loss": 1.3918, "step": 3400 }, { "epoch": 0.01, "grad_norm": 1.2932034730911255, "learning_rate": 0.0002, "loss": 1.4987, "step": 3410 }, { "epoch": 0.01, "grad_norm": 1.7998820543289185, "learning_rate": 0.0002, "loss": 1.5505, "step": 3420 }, { "epoch": 0.01, "grad_norm": 2.4562363624572754, "learning_rate": 0.0002, "loss": 1.3929, "step": 3430 }, { "epoch": 0.01, "grad_norm": 1.6272681951522827, "learning_rate": 0.0002, "loss": 1.7316, "step": 3440 }, { "epoch": 0.01, "grad_norm": 4.452996730804443, "learning_rate": 0.0002, "loss": 1.5859, "step": 3450 }, { "epoch": 0.01, "grad_norm": 1.616959810256958, "learning_rate": 0.0002, "loss": 1.488, "step": 3460 }, { "epoch": 0.01, "grad_norm": 1.4198147058486938, "learning_rate": 0.0002, "loss": 1.2464, "step": 3470 }, { "epoch": 0.01, "grad_norm": 2.766491651535034, "learning_rate": 0.0002, "loss": 1.3372, "step": 3480 }, { "epoch": 0.01, "grad_norm": 2.331312894821167, "learning_rate": 0.0002, "loss": 1.5555, "step": 3490 }, { "epoch": 0.01, "grad_norm": 2.652804374694824, "learning_rate": 0.0002, "loss": 1.5046, "step": 3500 }, { "epoch": 0.01, "grad_norm": 1.7266465425491333, "learning_rate": 0.0002, "loss": 1.5567, "step": 3510 }, { "epoch": 0.01, "grad_norm": 3.474001169204712, "learning_rate": 0.0002, "loss": 1.2181, "step": 3520 }, { "epoch": 0.01, "grad_norm": 2.1394519805908203, "learning_rate": 0.0002, "loss": 1.4087, "step": 3530 }, { "epoch": 0.01, "grad_norm": 2.038754940032959, "learning_rate": 0.0002, "loss": 1.4722, "step": 3540 }, { "epoch": 0.01, "grad_norm": 1.6730581521987915, "learning_rate": 0.0002, "loss": 1.301, "step": 3550 }, { "epoch": 0.01, "grad_norm": 1.8325802087783813, "learning_rate": 0.0002, "loss": 1.3638, "step": 3560 }, { "epoch": 0.01, "grad_norm": 1.8193095922470093, "learning_rate": 0.0002, "loss": 1.5059, "step": 3570 }, { "epoch": 0.01, "grad_norm": 1.230997085571289, "learning_rate": 0.0002, "loss": 1.5953, "step": 3580 }, { "epoch": 0.01, "grad_norm": 0.8044302463531494, "learning_rate": 0.0002, "loss": 1.2335, "step": 3590 }, { "epoch": 0.01, "grad_norm": 1.899245023727417, "learning_rate": 0.0002, "loss": 1.5507, "step": 3600 }, { "epoch": 0.01, "grad_norm": 3.7805113792419434, "learning_rate": 0.0002, "loss": 1.5757, "step": 3610 }, { "epoch": 0.01, "grad_norm": 1.3325307369232178, "learning_rate": 0.0002, "loss": 1.3673, "step": 3620 }, { "epoch": 0.01, "grad_norm": 2.7180113792419434, "learning_rate": 0.0002, "loss": 1.4974, "step": 3630 }, { "epoch": 0.01, "grad_norm": 2.549239158630371, "learning_rate": 0.0002, "loss": 1.8687, "step": 3640 }, { "epoch": 0.01, "grad_norm": 2.928926706314087, "learning_rate": 0.0002, "loss": 1.4974, "step": 3650 }, { "epoch": 0.01, "grad_norm": 1.7045044898986816, "learning_rate": 0.0002, "loss": 1.4554, "step": 3660 }, { "epoch": 0.01, "grad_norm": 1.8304955959320068, "learning_rate": 0.0002, "loss": 1.2548, "step": 3670 }, { "epoch": 0.01, "grad_norm": 1.162506341934204, "learning_rate": 0.0002, "loss": 1.65, "step": 3680 }, { "epoch": 0.02, "grad_norm": 1.8491357564926147, "learning_rate": 0.0002, "loss": 1.6297, "step": 3690 }, { "epoch": 0.02, "grad_norm": 7.0602593421936035, "learning_rate": 0.0002, "loss": 1.6033, "step": 3700 }, { "epoch": 0.02, "grad_norm": 2.382282018661499, "learning_rate": 0.0002, "loss": 1.4587, "step": 3710 }, { "epoch": 0.02, "grad_norm": 0.8784319758415222, "learning_rate": 0.0002, "loss": 1.4073, "step": 3720 }, { "epoch": 0.02, "grad_norm": 1.055025577545166, "learning_rate": 0.0002, "loss": 1.4006, "step": 3730 }, { "epoch": 0.02, "grad_norm": 2.0446479320526123, "learning_rate": 0.0002, "loss": 1.33, "step": 3740 }, { "epoch": 0.02, "grad_norm": 1.2168147563934326, "learning_rate": 0.0002, "loss": 1.4453, "step": 3750 }, { "epoch": 0.02, "grad_norm": 1.1343603134155273, "learning_rate": 0.0002, "loss": 1.4049, "step": 3760 }, { "epoch": 0.02, "grad_norm": 1.727087378501892, "learning_rate": 0.0002, "loss": 1.6674, "step": 3770 }, { "epoch": 0.02, "grad_norm": 1.6251883506774902, "learning_rate": 0.0002, "loss": 1.4629, "step": 3780 }, { "epoch": 0.02, "grad_norm": 1.6518908739089966, "learning_rate": 0.0002, "loss": 1.4216, "step": 3790 }, { "epoch": 0.02, "grad_norm": 1.2526757717132568, "learning_rate": 0.0002, "loss": 1.5006, "step": 3800 }, { "epoch": 0.02, "grad_norm": 1.2282580137252808, "learning_rate": 0.0002, "loss": 1.502, "step": 3810 }, { "epoch": 0.02, "grad_norm": 1.940980315208435, "learning_rate": 0.0002, "loss": 1.7393, "step": 3820 }, { "epoch": 0.02, "grad_norm": 2.6348321437835693, "learning_rate": 0.0002, "loss": 1.3481, "step": 3830 }, { "epoch": 0.02, "grad_norm": 0.6545029282569885, "learning_rate": 0.0002, "loss": 1.3179, "step": 3840 }, { "epoch": 0.02, "grad_norm": 1.9354933500289917, "learning_rate": 0.0002, "loss": 1.4702, "step": 3850 }, { "epoch": 0.02, "grad_norm": 1.9089199304580688, "learning_rate": 0.0002, "loss": 1.5385, "step": 3860 }, { "epoch": 0.02, "grad_norm": 1.5357296466827393, "learning_rate": 0.0002, "loss": 1.625, "step": 3870 }, { "epoch": 0.02, "grad_norm": 1.5253922939300537, "learning_rate": 0.0002, "loss": 1.2586, "step": 3880 }, { "epoch": 0.02, "grad_norm": 1.8816895484924316, "learning_rate": 0.0002, "loss": 1.585, "step": 3890 }, { "epoch": 0.02, "grad_norm": 2.098456382751465, "learning_rate": 0.0002, "loss": 1.2573, "step": 3900 }, { "epoch": 0.02, "grad_norm": 2.813429832458496, "learning_rate": 0.0002, "loss": 1.3572, "step": 3910 }, { "epoch": 0.02, "grad_norm": 1.7141144275665283, "learning_rate": 0.0002, "loss": 1.4232, "step": 3920 }, { "epoch": 0.02, "grad_norm": 1.5040993690490723, "learning_rate": 0.0002, "loss": 1.6041, "step": 3930 }, { "epoch": 0.02, "grad_norm": 3.9464242458343506, "learning_rate": 0.0002, "loss": 1.7099, "step": 3940 }, { "epoch": 0.02, "grad_norm": 2.5177812576293945, "learning_rate": 0.0002, "loss": 1.406, "step": 3950 }, { "epoch": 0.02, "grad_norm": 1.447244644165039, "learning_rate": 0.0002, "loss": 1.4479, "step": 3960 }, { "epoch": 0.02, "grad_norm": 3.261845111846924, "learning_rate": 0.0002, "loss": 1.4147, "step": 3970 }, { "epoch": 0.02, "grad_norm": 1.2140867710113525, "learning_rate": 0.0002, "loss": 1.3579, "step": 3980 }, { "epoch": 0.02, "grad_norm": 1.7440218925476074, "learning_rate": 0.0002, "loss": 1.5552, "step": 3990 }, { "epoch": 0.02, "grad_norm": 1.4754027128219604, "learning_rate": 0.0002, "loss": 1.649, "step": 4000 }, { "epoch": 0.02, "grad_norm": 2.036698579788208, "learning_rate": 0.0002, "loss": 1.4775, "step": 4010 }, { "epoch": 0.02, "grad_norm": 1.4059628248214722, "learning_rate": 0.0002, "loss": 1.5401, "step": 4020 }, { "epoch": 0.02, "grad_norm": 1.0339330434799194, "learning_rate": 0.0002, "loss": 1.3522, "step": 4030 }, { "epoch": 0.02, "grad_norm": 2.240767240524292, "learning_rate": 0.0002, "loss": 1.6474, "step": 4040 }, { "epoch": 0.02, "grad_norm": 2.3955187797546387, "learning_rate": 0.0002, "loss": 1.5375, "step": 4050 }, { "epoch": 0.02, "grad_norm": 3.288022756576538, "learning_rate": 0.0002, "loss": 1.2496, "step": 4060 }, { "epoch": 0.02, "grad_norm": 1.9553142786026, "learning_rate": 0.0002, "loss": 1.777, "step": 4070 }, { "epoch": 0.02, "grad_norm": 0.9128702282905579, "learning_rate": 0.0002, "loss": 1.3176, "step": 4080 }, { "epoch": 0.02, "grad_norm": 2.389540672302246, "learning_rate": 0.0002, "loss": 1.803, "step": 4090 }, { "epoch": 0.02, "grad_norm": 2.2321815490722656, "learning_rate": 0.0002, "loss": 1.294, "step": 4100 }, { "epoch": 0.02, "grad_norm": 1.7091177701950073, "learning_rate": 0.0002, "loss": 1.5074, "step": 4110 }, { "epoch": 0.02, "grad_norm": 2.6222808361053467, "learning_rate": 0.0002, "loss": 1.6764, "step": 4120 }, { "epoch": 0.02, "grad_norm": 1.921373724937439, "learning_rate": 0.0002, "loss": 1.5948, "step": 4130 }, { "epoch": 0.02, "grad_norm": 1.4412789344787598, "learning_rate": 0.0002, "loss": 1.3432, "step": 4140 }, { "epoch": 0.02, "grad_norm": 5.218766689300537, "learning_rate": 0.0002, "loss": 1.4362, "step": 4150 }, { "epoch": 0.02, "grad_norm": 1.5714985132217407, "learning_rate": 0.0002, "loss": 1.3565, "step": 4160 }, { "epoch": 0.02, "grad_norm": 1.7275433540344238, "learning_rate": 0.0002, "loss": 1.3042, "step": 4170 }, { "epoch": 0.02, "grad_norm": 1.2498929500579834, "learning_rate": 0.0002, "loss": 1.2245, "step": 4180 }, { "epoch": 0.02, "grad_norm": 2.5174193382263184, "learning_rate": 0.0002, "loss": 1.4891, "step": 4190 }, { "epoch": 0.02, "grad_norm": 2.0389020442962646, "learning_rate": 0.0002, "loss": 1.4531, "step": 4200 }, { "epoch": 0.02, "grad_norm": 1.803632140159607, "learning_rate": 0.0002, "loss": 1.6657, "step": 4210 }, { "epoch": 0.02, "grad_norm": 2.0227162837982178, "learning_rate": 0.0002, "loss": 1.5007, "step": 4220 }, { "epoch": 0.02, "grad_norm": 1.7084730863571167, "learning_rate": 0.0002, "loss": 1.3624, "step": 4230 }, { "epoch": 0.02, "grad_norm": 2.1109089851379395, "learning_rate": 0.0002, "loss": 1.371, "step": 4240 }, { "epoch": 0.02, "grad_norm": 2.9875266551971436, "learning_rate": 0.0002, "loss": 1.5402, "step": 4250 }, { "epoch": 0.02, "grad_norm": 1.2295817136764526, "learning_rate": 0.0002, "loss": 1.3749, "step": 4260 }, { "epoch": 0.02, "grad_norm": 1.8105697631835938, "learning_rate": 0.0002, "loss": 1.4469, "step": 4270 }, { "epoch": 0.02, "grad_norm": 2.8642690181732178, "learning_rate": 0.0002, "loss": 1.3601, "step": 4280 }, { "epoch": 0.02, "grad_norm": 2.1495773792266846, "learning_rate": 0.0002, "loss": 1.6598, "step": 4290 }, { "epoch": 0.02, "grad_norm": 1.8136775493621826, "learning_rate": 0.0002, "loss": 1.4668, "step": 4300 }, { "epoch": 0.02, "grad_norm": 1.6787856817245483, "learning_rate": 0.0002, "loss": 1.5255, "step": 4310 }, { "epoch": 0.02, "grad_norm": 2.8019282817840576, "learning_rate": 0.0002, "loss": 1.3523, "step": 4320 }, { "epoch": 0.02, "grad_norm": 1.4048575162887573, "learning_rate": 0.0002, "loss": 1.4844, "step": 4330 }, { "epoch": 0.02, "grad_norm": 3.301051616668701, "learning_rate": 0.0002, "loss": 1.5767, "step": 4340 }, { "epoch": 0.02, "grad_norm": 0.9946045279502869, "learning_rate": 0.0002, "loss": 1.4519, "step": 4350 }, { "epoch": 0.02, "grad_norm": 1.645694375038147, "learning_rate": 0.0002, "loss": 1.4278, "step": 4360 }, { "epoch": 0.02, "grad_norm": 1.5837746858596802, "learning_rate": 0.0002, "loss": 1.3992, "step": 4370 }, { "epoch": 0.02, "grad_norm": 2.164364814758301, "learning_rate": 0.0002, "loss": 1.4853, "step": 4380 }, { "epoch": 0.02, "grad_norm": 1.9654324054718018, "learning_rate": 0.0002, "loss": 1.3968, "step": 4390 }, { "epoch": 0.02, "grad_norm": 1.500498652458191, "learning_rate": 0.0002, "loss": 1.9741, "step": 4400 }, { "epoch": 0.02, "grad_norm": 1.7744848728179932, "learning_rate": 0.0002, "loss": 1.3446, "step": 4410 }, { "epoch": 0.02, "grad_norm": 2.0537919998168945, "learning_rate": 0.0002, "loss": 1.5538, "step": 4420 }, { "epoch": 0.02, "grad_norm": 2.8792989253997803, "learning_rate": 0.0002, "loss": 1.2576, "step": 4430 }, { "epoch": 0.02, "grad_norm": 2.046884298324585, "learning_rate": 0.0002, "loss": 1.6526, "step": 4440 }, { "epoch": 0.02, "grad_norm": 1.5159200429916382, "learning_rate": 0.0002, "loss": 1.3056, "step": 4450 }, { "epoch": 0.02, "grad_norm": 4.057891845703125, "learning_rate": 0.0002, "loss": 1.5421, "step": 4460 }, { "epoch": 0.02, "grad_norm": 2.0374948978424072, "learning_rate": 0.0002, "loss": 1.3253, "step": 4470 }, { "epoch": 0.02, "grad_norm": 2.7143142223358154, "learning_rate": 0.0002, "loss": 1.3678, "step": 4480 }, { "epoch": 0.02, "grad_norm": 1.2295711040496826, "learning_rate": 0.0002, "loss": 1.5948, "step": 4490 }, { "epoch": 0.02, "grad_norm": 5.688213348388672, "learning_rate": 0.0002, "loss": 1.4016, "step": 4500 }, { "epoch": 0.02, "grad_norm": 2.001575469970703, "learning_rate": 0.0002, "loss": 1.3075, "step": 4510 }, { "epoch": 0.02, "grad_norm": 4.5063395500183105, "learning_rate": 0.0002, "loss": 1.5218, "step": 4520 }, { "epoch": 0.02, "grad_norm": 1.1385338306427002, "learning_rate": 0.0002, "loss": 1.4481, "step": 4530 }, { "epoch": 0.02, "grad_norm": 1.3758723735809326, "learning_rate": 0.0002, "loss": 1.463, "step": 4540 }, { "epoch": 0.02, "grad_norm": 1.7803267240524292, "learning_rate": 0.0002, "loss": 1.4652, "step": 4550 }, { "epoch": 0.02, "grad_norm": 2.814828872680664, "learning_rate": 0.0002, "loss": 1.6019, "step": 4560 }, { "epoch": 0.02, "grad_norm": 2.362867593765259, "learning_rate": 0.0002, "loss": 1.5313, "step": 4570 }, { "epoch": 0.02, "grad_norm": 1.793631911277771, "learning_rate": 0.0002, "loss": 1.7477, "step": 4580 }, { "epoch": 0.02, "grad_norm": 2.7016189098358154, "learning_rate": 0.0002, "loss": 1.2822, "step": 4590 }, { "epoch": 0.02, "grad_norm": 2.326237916946411, "learning_rate": 0.0002, "loss": 1.2643, "step": 4600 }, { "epoch": 0.02, "grad_norm": 3.419785499572754, "learning_rate": 0.0002, "loss": 1.175, "step": 4610 }, { "epoch": 0.02, "grad_norm": 1.5114496946334839, "learning_rate": 0.0002, "loss": 1.5619, "step": 4620 }, { "epoch": 0.02, "grad_norm": 1.5788434743881226, "learning_rate": 0.0002, "loss": 1.4682, "step": 4630 }, { "epoch": 0.02, "grad_norm": 2.286411762237549, "learning_rate": 0.0002, "loss": 1.4605, "step": 4640 }, { "epoch": 0.02, "grad_norm": 2.1366095542907715, "learning_rate": 0.0002, "loss": 1.5961, "step": 4650 }, { "epoch": 0.02, "grad_norm": 1.677075982093811, "learning_rate": 0.0002, "loss": 1.5384, "step": 4660 }, { "epoch": 0.02, "grad_norm": 1.1716564893722534, "learning_rate": 0.0002, "loss": 1.5573, "step": 4670 }, { "epoch": 0.02, "grad_norm": 2.291321039199829, "learning_rate": 0.0002, "loss": 1.5027, "step": 4680 }, { "epoch": 0.02, "grad_norm": 3.0376081466674805, "learning_rate": 0.0002, "loss": 1.3246, "step": 4690 }, { "epoch": 0.02, "grad_norm": 0.8095433115959167, "learning_rate": 0.0002, "loss": 1.3711, "step": 4700 }, { "epoch": 0.02, "grad_norm": 1.4861657619476318, "learning_rate": 0.0002, "loss": 1.6095, "step": 4710 }, { "epoch": 0.02, "grad_norm": 1.7577202320098877, "learning_rate": 0.0002, "loss": 1.2351, "step": 4720 }, { "epoch": 0.02, "grad_norm": 2.1339240074157715, "learning_rate": 0.0002, "loss": 1.3191, "step": 4730 }, { "epoch": 0.02, "grad_norm": 1.4511398077011108, "learning_rate": 0.0002, "loss": 1.5076, "step": 4740 }, { "epoch": 0.02, "grad_norm": 2.3744795322418213, "learning_rate": 0.0002, "loss": 1.5136, "step": 4750 }, { "epoch": 0.02, "grad_norm": 1.8578964471817017, "learning_rate": 0.0002, "loss": 1.6047, "step": 4760 }, { "epoch": 0.02, "grad_norm": 2.423591136932373, "learning_rate": 0.0002, "loss": 1.4402, "step": 4770 }, { "epoch": 0.02, "grad_norm": 2.2411272525787354, "learning_rate": 0.0002, "loss": 1.4178, "step": 4780 }, { "epoch": 0.02, "grad_norm": 0.8498905897140503, "learning_rate": 0.0002, "loss": 1.4931, "step": 4790 }, { "epoch": 0.02, "grad_norm": 2.2757842540740967, "learning_rate": 0.0002, "loss": 1.285, "step": 4800 }, { "epoch": 0.02, "grad_norm": 1.611397385597229, "learning_rate": 0.0002, "loss": 1.4121, "step": 4810 }, { "epoch": 0.02, "grad_norm": 1.6526505947113037, "learning_rate": 0.0002, "loss": 1.4551, "step": 4820 }, { "epoch": 0.02, "grad_norm": 1.5237524509429932, "learning_rate": 0.0002, "loss": 1.752, "step": 4830 }, { "epoch": 0.02, "grad_norm": 1.6694109439849854, "learning_rate": 0.0002, "loss": 1.3163, "step": 4840 }, { "epoch": 0.02, "grad_norm": 1.2885265350341797, "learning_rate": 0.0002, "loss": 1.4148, "step": 4850 }, { "epoch": 0.02, "grad_norm": 1.5598951578140259, "learning_rate": 0.0002, "loss": 1.4635, "step": 4860 }, { "epoch": 0.02, "grad_norm": 3.247941493988037, "learning_rate": 0.0002, "loss": 1.7576, "step": 4870 }, { "epoch": 0.02, "grad_norm": 1.6904298067092896, "learning_rate": 0.0002, "loss": 1.5465, "step": 4880 }, { "epoch": 0.02, "grad_norm": 2.211230754852295, "learning_rate": 0.0002, "loss": 1.5461, "step": 4890 }, { "epoch": 0.02, "grad_norm": 1.4877630472183228, "learning_rate": 0.0002, "loss": 1.3066, "step": 4900 }, { "epoch": 0.02, "grad_norm": 2.6459383964538574, "learning_rate": 0.0002, "loss": 1.4415, "step": 4910 }, { "epoch": 0.02, "grad_norm": 3.117734909057617, "learning_rate": 0.0002, "loss": 1.5839, "step": 4920 }, { "epoch": 0.02, "grad_norm": 1.5885357856750488, "learning_rate": 0.0002, "loss": 1.5053, "step": 4930 }, { "epoch": 0.02, "grad_norm": 2.7542388439178467, "learning_rate": 0.0002, "loss": 1.4553, "step": 4940 }, { "epoch": 0.02, "grad_norm": 2.650062084197998, "learning_rate": 0.0002, "loss": 1.653, "step": 4950 }, { "epoch": 0.02, "grad_norm": 1.830851674079895, "learning_rate": 0.0002, "loss": 1.4107, "step": 4960 }, { "epoch": 0.02, "grad_norm": 1.3347156047821045, "learning_rate": 0.0002, "loss": 1.4527, "step": 4970 }, { "epoch": 0.02, "grad_norm": 2.2195956707000732, "learning_rate": 0.0002, "loss": 1.4101, "step": 4980 }, { "epoch": 0.02, "grad_norm": 5.392250061035156, "learning_rate": 0.0002, "loss": 1.5458, "step": 4990 }, { "epoch": 0.02, "grad_norm": 1.5412298440933228, "learning_rate": 0.0002, "loss": 1.5183, "step": 5000 }, { "epoch": 0.02, "grad_norm": 3.1859960556030273, "learning_rate": 0.0002, "loss": 1.2861, "step": 5010 }, { "epoch": 0.02, "grad_norm": 2.3503944873809814, "learning_rate": 0.0002, "loss": 1.5574, "step": 5020 }, { "epoch": 0.02, "grad_norm": 2.0846574306488037, "learning_rate": 0.0002, "loss": 1.3864, "step": 5030 }, { "epoch": 0.02, "grad_norm": 1.474351167678833, "learning_rate": 0.0002, "loss": 1.4218, "step": 5040 }, { "epoch": 0.02, "grad_norm": 2.10864520072937, "learning_rate": 0.0002, "loss": 1.3193, "step": 5050 }, { "epoch": 0.02, "grad_norm": 2.0224852561950684, "learning_rate": 0.0002, "loss": 1.3212, "step": 5060 }, { "epoch": 0.02, "grad_norm": 1.0523300170898438, "learning_rate": 0.0002, "loss": 1.5041, "step": 5070 }, { "epoch": 0.02, "grad_norm": 2.0369372367858887, "learning_rate": 0.0002, "loss": 1.6834, "step": 5080 }, { "epoch": 0.02, "grad_norm": 1.9140881299972534, "learning_rate": 0.0002, "loss": 1.572, "step": 5090 }, { "epoch": 0.02, "grad_norm": 1.9214980602264404, "learning_rate": 0.0002, "loss": 1.3266, "step": 5100 }, { "epoch": 0.02, "grad_norm": 0.7563208341598511, "learning_rate": 0.0002, "loss": 1.3071, "step": 5110 }, { "epoch": 0.02, "grad_norm": 1.0874344110488892, "learning_rate": 0.0002, "loss": 1.4457, "step": 5120 }, { "epoch": 0.02, "grad_norm": 1.245445728302002, "learning_rate": 0.0002, "loss": 1.572, "step": 5130 }, { "epoch": 0.02, "grad_norm": 1.5960768461227417, "learning_rate": 0.0002, "loss": 1.3821, "step": 5140 }, { "epoch": 0.02, "grad_norm": 0.8120943903923035, "learning_rate": 0.0002, "loss": 1.4008, "step": 5150 }, { "epoch": 0.02, "grad_norm": 3.0680229663848877, "learning_rate": 0.0002, "loss": 1.3662, "step": 5160 }, { "epoch": 0.02, "grad_norm": 0.943189799785614, "learning_rate": 0.0002, "loss": 1.1208, "step": 5170 }, { "epoch": 0.02, "grad_norm": 2.7593064308166504, "learning_rate": 0.0002, "loss": 1.2095, "step": 5180 }, { "epoch": 0.02, "grad_norm": 2.5266363620758057, "learning_rate": 0.0002, "loss": 1.4886, "step": 5190 }, { "epoch": 0.02, "grad_norm": 1.9075592756271362, "learning_rate": 0.0002, "loss": 1.5656, "step": 5200 }, { "epoch": 0.02, "grad_norm": 1.6977505683898926, "learning_rate": 0.0002, "loss": 1.4905, "step": 5210 }, { "epoch": 0.02, "grad_norm": 1.341680645942688, "learning_rate": 0.0002, "loss": 1.4423, "step": 5220 }, { "epoch": 0.02, "grad_norm": 6.307382583618164, "learning_rate": 0.0002, "loss": 1.3768, "step": 5230 }, { "epoch": 0.02, "grad_norm": 1.8845576047897339, "learning_rate": 0.0002, "loss": 1.4532, "step": 5240 }, { "epoch": 0.02, "grad_norm": 2.268080949783325, "learning_rate": 0.0002, "loss": 1.241, "step": 5250 }, { "epoch": 0.02, "grad_norm": 2.3958635330200195, "learning_rate": 0.0002, "loss": 1.3381, "step": 5260 }, { "epoch": 0.02, "grad_norm": 2.3364169597625732, "learning_rate": 0.0002, "loss": 1.2595, "step": 5270 }, { "epoch": 0.02, "grad_norm": 1.8330961465835571, "learning_rate": 0.0002, "loss": 1.4532, "step": 5280 }, { "epoch": 0.02, "grad_norm": 2.3653934001922607, "learning_rate": 0.0002, "loss": 1.6858, "step": 5290 }, { "epoch": 0.02, "grad_norm": 3.034287929534912, "learning_rate": 0.0002, "loss": 1.5868, "step": 5300 }, { "epoch": 0.02, "grad_norm": 2.6200761795043945, "learning_rate": 0.0002, "loss": 1.3268, "step": 5310 }, { "epoch": 0.02, "grad_norm": 1.83577299118042, "learning_rate": 0.0002, "loss": 1.5636, "step": 5320 }, { "epoch": 0.02, "grad_norm": 1.7755309343338013, "learning_rate": 0.0002, "loss": 1.5925, "step": 5330 }, { "epoch": 0.02, "grad_norm": 2.353520393371582, "learning_rate": 0.0002, "loss": 1.2194, "step": 5340 }, { "epoch": 0.02, "grad_norm": 2.5982866287231445, "learning_rate": 0.0002, "loss": 1.4909, "step": 5350 }, { "epoch": 0.02, "grad_norm": 3.5171477794647217, "learning_rate": 0.0002, "loss": 1.4224, "step": 5360 }, { "epoch": 0.02, "grad_norm": 1.2178151607513428, "learning_rate": 0.0002, "loss": 1.4091, "step": 5370 }, { "epoch": 0.02, "grad_norm": 1.8130680322647095, "learning_rate": 0.0002, "loss": 1.633, "step": 5380 }, { "epoch": 0.02, "grad_norm": 3.1245245933532715, "learning_rate": 0.0002, "loss": 1.458, "step": 5390 }, { "epoch": 0.02, "grad_norm": 3.4017343521118164, "learning_rate": 0.0002, "loss": 1.5442, "step": 5400 }, { "epoch": 0.02, "grad_norm": 2.666928291320801, "learning_rate": 0.0002, "loss": 1.4658, "step": 5410 }, { "epoch": 0.02, "grad_norm": 2.3548436164855957, "learning_rate": 0.0002, "loss": 1.2646, "step": 5420 }, { "epoch": 0.02, "grad_norm": 3.1714022159576416, "learning_rate": 0.0002, "loss": 1.3789, "step": 5430 }, { "epoch": 0.02, "grad_norm": 3.420097589492798, "learning_rate": 0.0002, "loss": 1.6742, "step": 5440 }, { "epoch": 0.02, "grad_norm": 1.4659534692764282, "learning_rate": 0.0002, "loss": 1.3098, "step": 5450 }, { "epoch": 0.02, "grad_norm": 3.3147194385528564, "learning_rate": 0.0002, "loss": 1.5343, "step": 5460 }, { "epoch": 0.02, "grad_norm": 2.378434658050537, "learning_rate": 0.0002, "loss": 1.6956, "step": 5470 }, { "epoch": 0.02, "grad_norm": 2.215756416320801, "learning_rate": 0.0002, "loss": 1.5946, "step": 5480 }, { "epoch": 0.02, "grad_norm": 1.898219108581543, "learning_rate": 0.0002, "loss": 1.4423, "step": 5490 }, { "epoch": 0.02, "grad_norm": 1.8882572650909424, "learning_rate": 0.0002, "loss": 1.3072, "step": 5500 }, { "epoch": 0.02, "grad_norm": 1.8098652362823486, "learning_rate": 0.0002, "loss": 1.4343, "step": 5510 }, { "epoch": 0.02, "grad_norm": 2.1160221099853516, "learning_rate": 0.0002, "loss": 1.2477, "step": 5520 }, { "epoch": 0.02, "grad_norm": 1.3238621950149536, "learning_rate": 0.0002, "loss": 1.7233, "step": 5530 }, { "epoch": 0.02, "grad_norm": 3.452319622039795, "learning_rate": 0.0002, "loss": 1.4072, "step": 5540 }, { "epoch": 0.02, "grad_norm": 2.5927276611328125, "learning_rate": 0.0002, "loss": 1.4225, "step": 5550 }, { "epoch": 0.02, "grad_norm": 2.149977922439575, "learning_rate": 0.0002, "loss": 1.5539, "step": 5560 }, { "epoch": 0.02, "grad_norm": 2.3991901874542236, "learning_rate": 0.0002, "loss": 1.4328, "step": 5570 }, { "epoch": 0.02, "grad_norm": 1.2652732133865356, "learning_rate": 0.0002, "loss": 1.4655, "step": 5580 }, { "epoch": 0.02, "grad_norm": 2.124457597732544, "learning_rate": 0.0002, "loss": 1.3962, "step": 5590 }, { "epoch": 0.02, "grad_norm": 2.1508629322052, "learning_rate": 0.0002, "loss": 1.4386, "step": 5600 }, { "epoch": 0.02, "grad_norm": 2.5079100131988525, "learning_rate": 0.0002, "loss": 1.4704, "step": 5610 }, { "epoch": 0.02, "grad_norm": 2.5126919746398926, "learning_rate": 0.0002, "loss": 1.4608, "step": 5620 }, { "epoch": 0.02, "grad_norm": 1.9253414869308472, "learning_rate": 0.0002, "loss": 1.5227, "step": 5630 }, { "epoch": 0.02, "grad_norm": 2.259361505508423, "learning_rate": 0.0002, "loss": 1.4061, "step": 5640 }, { "epoch": 0.02, "grad_norm": 2.7666821479797363, "learning_rate": 0.0002, "loss": 1.3744, "step": 5650 }, { "epoch": 0.02, "grad_norm": 2.007599115371704, "learning_rate": 0.0002, "loss": 1.6547, "step": 5660 }, { "epoch": 0.02, "grad_norm": 2.878804922103882, "learning_rate": 0.0002, "loss": 1.5172, "step": 5670 }, { "epoch": 0.02, "grad_norm": 1.2835594415664673, "learning_rate": 0.0002, "loss": 1.5272, "step": 5680 }, { "epoch": 0.02, "grad_norm": 3.050595760345459, "learning_rate": 0.0002, "loss": 1.3918, "step": 5690 }, { "epoch": 0.02, "grad_norm": 2.2935404777526855, "learning_rate": 0.0002, "loss": 1.4666, "step": 5700 }, { "epoch": 0.02, "grad_norm": 1.4360989332199097, "learning_rate": 0.0002, "loss": 1.5142, "step": 5710 }, { "epoch": 0.02, "grad_norm": 1.6009156703948975, "learning_rate": 0.0002, "loss": 1.5194, "step": 5720 }, { "epoch": 0.02, "grad_norm": 4.426061153411865, "learning_rate": 0.0002, "loss": 1.3446, "step": 5730 }, { "epoch": 0.02, "grad_norm": 2.1125872135162354, "learning_rate": 0.0002, "loss": 1.3795, "step": 5740 }, { "epoch": 0.02, "grad_norm": 2.8455095291137695, "learning_rate": 0.0002, "loss": 1.4497, "step": 5750 }, { "epoch": 0.02, "grad_norm": 1.4728127717971802, "learning_rate": 0.0002, "loss": 1.4234, "step": 5760 }, { "epoch": 0.02, "grad_norm": 2.291569709777832, "learning_rate": 0.0002, "loss": 1.4188, "step": 5770 }, { "epoch": 0.02, "grad_norm": 2.484872817993164, "learning_rate": 0.0002, "loss": 1.4539, "step": 5780 }, { "epoch": 0.02, "grad_norm": 1.0069770812988281, "learning_rate": 0.0002, "loss": 1.7105, "step": 5790 }, { "epoch": 0.02, "grad_norm": 1.61063814163208, "learning_rate": 0.0002, "loss": 1.2883, "step": 5800 }, { "epoch": 0.02, "grad_norm": 1.9941885471343994, "learning_rate": 0.0002, "loss": 1.6583, "step": 5810 }, { "epoch": 0.02, "grad_norm": 2.176799774169922, "learning_rate": 0.0002, "loss": 1.6518, "step": 5820 }, { "epoch": 0.02, "grad_norm": 2.7112438678741455, "learning_rate": 0.0002, "loss": 1.5786, "step": 5830 }, { "epoch": 0.02, "grad_norm": 2.021254539489746, "learning_rate": 0.0002, "loss": 1.5062, "step": 5840 }, { "epoch": 0.02, "grad_norm": 2.756404161453247, "learning_rate": 0.0002, "loss": 1.5379, "step": 5850 }, { "epoch": 0.02, "grad_norm": 2.3705666065216064, "learning_rate": 0.0002, "loss": 1.7886, "step": 5860 }, { "epoch": 0.02, "grad_norm": 1.2316961288452148, "learning_rate": 0.0002, "loss": 1.6543, "step": 5870 }, { "epoch": 0.02, "grad_norm": 2.7069594860076904, "learning_rate": 0.0002, "loss": 1.2872, "step": 5880 }, { "epoch": 0.02, "grad_norm": 2.68432879447937, "learning_rate": 0.0002, "loss": 1.2556, "step": 5890 }, { "epoch": 0.02, "grad_norm": 1.4712070226669312, "learning_rate": 0.0002, "loss": 1.5752, "step": 5900 }, { "epoch": 0.02, "grad_norm": 1.6727226972579956, "learning_rate": 0.0002, "loss": 1.3554, "step": 5910 }, { "epoch": 0.02, "grad_norm": 1.6589205265045166, "learning_rate": 0.0002, "loss": 1.5178, "step": 5920 }, { "epoch": 0.02, "grad_norm": 2.742879867553711, "learning_rate": 0.0002, "loss": 1.6026, "step": 5930 }, { "epoch": 0.02, "grad_norm": 2.019139051437378, "learning_rate": 0.0002, "loss": 1.28, "step": 5940 }, { "epoch": 0.02, "grad_norm": 1.9394197463989258, "learning_rate": 0.0002, "loss": 1.3359, "step": 5950 }, { "epoch": 0.02, "grad_norm": 1.564385175704956, "learning_rate": 0.0002, "loss": 1.4835, "step": 5960 }, { "epoch": 0.02, "grad_norm": 1.9986765384674072, "learning_rate": 0.0002, "loss": 1.4196, "step": 5970 }, { "epoch": 0.02, "grad_norm": 1.0516208410263062, "learning_rate": 0.0002, "loss": 1.4822, "step": 5980 }, { "epoch": 0.02, "grad_norm": 1.6204100847244263, "learning_rate": 0.0002, "loss": 1.224, "step": 5990 }, { "epoch": 0.02, "grad_norm": 2.45428204536438, "learning_rate": 0.0002, "loss": 1.709, "step": 6000 }, { "epoch": 0.02, "grad_norm": 2.525660753250122, "learning_rate": 0.0002, "loss": 1.4738, "step": 6010 }, { "epoch": 0.02, "grad_norm": 2.2215919494628906, "learning_rate": 0.0002, "loss": 1.4409, "step": 6020 }, { "epoch": 0.02, "grad_norm": 2.100590467453003, "learning_rate": 0.0002, "loss": 1.5492, "step": 6030 }, { "epoch": 0.02, "grad_norm": 1.030824065208435, "learning_rate": 0.0002, "loss": 1.4696, "step": 6040 }, { "epoch": 0.02, "grad_norm": 2.024529218673706, "learning_rate": 0.0002, "loss": 1.3306, "step": 6050 }, { "epoch": 0.02, "grad_norm": 3.4388670921325684, "learning_rate": 0.0002, "loss": 1.516, "step": 6060 }, { "epoch": 0.02, "grad_norm": 2.093074083328247, "learning_rate": 0.0002, "loss": 1.6914, "step": 6070 }, { "epoch": 0.02, "grad_norm": 1.6650023460388184, "learning_rate": 0.0002, "loss": 1.3428, "step": 6080 }, { "epoch": 0.02, "grad_norm": 1.0002425909042358, "learning_rate": 0.0002, "loss": 1.4295, "step": 6090 }, { "epoch": 0.02, "grad_norm": 2.5335757732391357, "learning_rate": 0.0002, "loss": 1.4878, "step": 6100 }, { "epoch": 0.02, "grad_norm": 1.1671799421310425, "learning_rate": 0.0002, "loss": 1.46, "step": 6110 }, { "epoch": 0.02, "grad_norm": 1.6275402307510376, "learning_rate": 0.0002, "loss": 1.3411, "step": 6120 }, { "epoch": 0.02, "grad_norm": 1.2435277700424194, "learning_rate": 0.0002, "loss": 1.3834, "step": 6130 }, { "epoch": 0.02, "grad_norm": 1.3694039583206177, "learning_rate": 0.0002, "loss": 1.4491, "step": 6140 }, { "epoch": 0.03, "grad_norm": 5.044478893280029, "learning_rate": 0.0002, "loss": 1.3612, "step": 6150 }, { "epoch": 0.03, "grad_norm": 2.802211284637451, "learning_rate": 0.0002, "loss": 1.3764, "step": 6160 }, { "epoch": 0.03, "grad_norm": 1.232427954673767, "learning_rate": 0.0002, "loss": 1.4777, "step": 6170 }, { "epoch": 0.03, "grad_norm": 1.3317430019378662, "learning_rate": 0.0002, "loss": 1.6362, "step": 6180 }, { "epoch": 0.03, "grad_norm": 2.087475299835205, "learning_rate": 0.0002, "loss": 1.3712, "step": 6190 }, { "epoch": 0.03, "grad_norm": 2.087576389312744, "learning_rate": 0.0002, "loss": 1.5892, "step": 6200 }, { "epoch": 0.03, "grad_norm": 2.076953411102295, "learning_rate": 0.0002, "loss": 1.4966, "step": 6210 }, { "epoch": 0.03, "grad_norm": 2.358738899230957, "learning_rate": 0.0002, "loss": 1.396, "step": 6220 }, { "epoch": 0.03, "grad_norm": 2.792039632797241, "learning_rate": 0.0002, "loss": 1.4311, "step": 6230 }, { "epoch": 0.03, "grad_norm": 1.591100811958313, "learning_rate": 0.0002, "loss": 1.459, "step": 6240 }, { "epoch": 0.03, "grad_norm": 1.7752562761306763, "learning_rate": 0.0002, "loss": 1.3407, "step": 6250 }, { "epoch": 0.03, "grad_norm": 2.151242733001709, "learning_rate": 0.0002, "loss": 1.576, "step": 6260 }, { "epoch": 0.03, "grad_norm": 1.698040246963501, "learning_rate": 0.0002, "loss": 1.6572, "step": 6270 }, { "epoch": 0.03, "grad_norm": 1.84919011592865, "learning_rate": 0.0002, "loss": 1.9046, "step": 6280 }, { "epoch": 0.03, "grad_norm": 2.629206895828247, "learning_rate": 0.0002, "loss": 1.5068, "step": 6290 }, { "epoch": 0.03, "grad_norm": 1.8951278924942017, "learning_rate": 0.0002, "loss": 1.6079, "step": 6300 }, { "epoch": 0.03, "grad_norm": 0.9351910948753357, "learning_rate": 0.0002, "loss": 1.6587, "step": 6310 }, { "epoch": 0.03, "grad_norm": 2.0446789264678955, "learning_rate": 0.0002, "loss": 1.6485, "step": 6320 }, { "epoch": 0.03, "grad_norm": 3.493908166885376, "learning_rate": 0.0002, "loss": 1.5605, "step": 6330 }, { "epoch": 0.03, "grad_norm": 1.7522810697555542, "learning_rate": 0.0002, "loss": 1.606, "step": 6340 }, { "epoch": 0.03, "grad_norm": 1.5873836278915405, "learning_rate": 0.0002, "loss": 1.5405, "step": 6350 }, { "epoch": 0.03, "grad_norm": 1.7795326709747314, "learning_rate": 0.0002, "loss": 1.2983, "step": 6360 }, { "epoch": 0.03, "grad_norm": 1.5147981643676758, "learning_rate": 0.0002, "loss": 1.5495, "step": 6370 }, { "epoch": 0.03, "grad_norm": 2.136378288269043, "learning_rate": 0.0002, "loss": 1.5879, "step": 6380 }, { "epoch": 0.03, "grad_norm": 2.463564395904541, "learning_rate": 0.0002, "loss": 1.5162, "step": 6390 }, { "epoch": 0.03, "grad_norm": 2.1059184074401855, "learning_rate": 0.0002, "loss": 1.5067, "step": 6400 }, { "epoch": 0.03, "grad_norm": 1.378782033920288, "learning_rate": 0.0002, "loss": 1.5175, "step": 6410 }, { "epoch": 0.03, "grad_norm": 1.630387783050537, "learning_rate": 0.0002, "loss": 1.3656, "step": 6420 }, { "epoch": 0.03, "grad_norm": 2.031229257583618, "learning_rate": 0.0002, "loss": 1.3661, "step": 6430 }, { "epoch": 0.03, "grad_norm": 2.67181396484375, "learning_rate": 0.0002, "loss": 1.368, "step": 6440 }, { "epoch": 0.03, "grad_norm": 1.7481715679168701, "learning_rate": 0.0002, "loss": 1.5492, "step": 6450 }, { "epoch": 0.03, "grad_norm": 2.5627405643463135, "learning_rate": 0.0002, "loss": 1.4232, "step": 6460 }, { "epoch": 0.03, "grad_norm": 1.4607422351837158, "learning_rate": 0.0002, "loss": 1.3668, "step": 6470 }, { "epoch": 0.03, "grad_norm": 1.047680377960205, "learning_rate": 0.0002, "loss": 1.4234, "step": 6480 }, { "epoch": 0.03, "grad_norm": 2.161345958709717, "learning_rate": 0.0002, "loss": 1.4759, "step": 6490 }, { "epoch": 0.03, "grad_norm": 1.5795981884002686, "learning_rate": 0.0002, "loss": 1.3631, "step": 6500 }, { "epoch": 0.03, "grad_norm": 2.8286476135253906, "learning_rate": 0.0002, "loss": 1.3634, "step": 6510 }, { "epoch": 0.03, "grad_norm": 1.4919159412384033, "learning_rate": 0.0002, "loss": 1.6688, "step": 6520 }, { "epoch": 0.03, "grad_norm": 3.939310312271118, "learning_rate": 0.0002, "loss": 1.2548, "step": 6530 }, { "epoch": 0.03, "grad_norm": 2.7942464351654053, "learning_rate": 0.0002, "loss": 1.4912, "step": 6540 }, { "epoch": 0.03, "grad_norm": 3.1298062801361084, "learning_rate": 0.0002, "loss": 1.3692, "step": 6550 }, { "epoch": 0.03, "grad_norm": 2.1913225650787354, "learning_rate": 0.0002, "loss": 1.3852, "step": 6560 }, { "epoch": 0.03, "grad_norm": 1.2599784135818481, "learning_rate": 0.0002, "loss": 1.4739, "step": 6570 }, { "epoch": 0.03, "grad_norm": 1.9151824712753296, "learning_rate": 0.0002, "loss": 1.6342, "step": 6580 }, { "epoch": 0.03, "grad_norm": 1.5189236402511597, "learning_rate": 0.0002, "loss": 1.5724, "step": 6590 }, { "epoch": 0.03, "grad_norm": 4.138619899749756, "learning_rate": 0.0002, "loss": 1.5754, "step": 6600 }, { "epoch": 0.03, "grad_norm": 1.187021255493164, "learning_rate": 0.0002, "loss": 1.4372, "step": 6610 }, { "epoch": 0.03, "grad_norm": 3.0855581760406494, "learning_rate": 0.0002, "loss": 1.3865, "step": 6620 }, { "epoch": 0.03, "grad_norm": 1.6004631519317627, "learning_rate": 0.0002, "loss": 1.4242, "step": 6630 }, { "epoch": 0.03, "grad_norm": 4.328795909881592, "learning_rate": 0.0002, "loss": 1.4789, "step": 6640 }, { "epoch": 0.03, "grad_norm": 1.6815425157546997, "learning_rate": 0.0002, "loss": 1.4197, "step": 6650 }, { "epoch": 0.03, "grad_norm": 3.164928913116455, "learning_rate": 0.0002, "loss": 1.3123, "step": 6660 }, { "epoch": 0.03, "grad_norm": 1.4927771091461182, "learning_rate": 0.0002, "loss": 1.6346, "step": 6670 }, { "epoch": 0.03, "grad_norm": 1.8386131525039673, "learning_rate": 0.0002, "loss": 1.4881, "step": 6680 }, { "epoch": 0.03, "grad_norm": 3.0461878776550293, "learning_rate": 0.0002, "loss": 1.5258, "step": 6690 }, { "epoch": 0.03, "grad_norm": 1.4647200107574463, "learning_rate": 0.0002, "loss": 1.6144, "step": 6700 }, { "epoch": 0.03, "grad_norm": 2.3131022453308105, "learning_rate": 0.0002, "loss": 1.4192, "step": 6710 }, { "epoch": 0.03, "grad_norm": 1.9447567462921143, "learning_rate": 0.0002, "loss": 1.334, "step": 6720 }, { "epoch": 0.03, "grad_norm": 2.782794713973999, "learning_rate": 0.0002, "loss": 1.2065, "step": 6730 }, { "epoch": 0.03, "grad_norm": 2.416659355163574, "learning_rate": 0.0002, "loss": 1.532, "step": 6740 }, { "epoch": 0.03, "grad_norm": 2.3938450813293457, "learning_rate": 0.0002, "loss": 1.4685, "step": 6750 }, { "epoch": 0.03, "grad_norm": 1.6116114854812622, "learning_rate": 0.0002, "loss": 1.5222, "step": 6760 }, { "epoch": 0.03, "grad_norm": 1.7817723751068115, "learning_rate": 0.0002, "loss": 1.2982, "step": 6770 }, { "epoch": 0.03, "grad_norm": 3.516310453414917, "learning_rate": 0.0002, "loss": 1.4886, "step": 6780 }, { "epoch": 0.03, "grad_norm": 1.9711170196533203, "learning_rate": 0.0002, "loss": 1.5906, "step": 6790 }, { "epoch": 0.03, "grad_norm": 2.1479814052581787, "learning_rate": 0.0002, "loss": 1.5246, "step": 6800 }, { "epoch": 0.03, "grad_norm": 2.0428731441497803, "learning_rate": 0.0002, "loss": 1.5125, "step": 6810 }, { "epoch": 0.03, "grad_norm": 1.8732916116714478, "learning_rate": 0.0002, "loss": 1.3148, "step": 6820 }, { "epoch": 0.03, "grad_norm": 1.7431749105453491, "learning_rate": 0.0002, "loss": 1.3617, "step": 6830 }, { "epoch": 0.03, "grad_norm": 1.9835073947906494, "learning_rate": 0.0002, "loss": 1.2695, "step": 6840 }, { "epoch": 0.03, "grad_norm": 1.943679690361023, "learning_rate": 0.0002, "loss": 1.4001, "step": 6850 }, { "epoch": 0.03, "grad_norm": 1.833566665649414, "learning_rate": 0.0002, "loss": 1.3851, "step": 6860 }, { "epoch": 0.03, "grad_norm": 2.9364609718322754, "learning_rate": 0.0002, "loss": 1.3697, "step": 6870 }, { "epoch": 0.03, "grad_norm": 3.392754554748535, "learning_rate": 0.0002, "loss": 1.5085, "step": 6880 }, { "epoch": 0.03, "grad_norm": 1.563408374786377, "learning_rate": 0.0002, "loss": 1.5033, "step": 6890 }, { "epoch": 0.03, "grad_norm": 2.9272842407226562, "learning_rate": 0.0002, "loss": 1.4681, "step": 6900 }, { "epoch": 0.03, "grad_norm": 2.8433003425598145, "learning_rate": 0.0002, "loss": 1.6869, "step": 6910 }, { "epoch": 0.03, "grad_norm": 2.6814963817596436, "learning_rate": 0.0002, "loss": 1.5496, "step": 6920 }, { "epoch": 0.03, "grad_norm": 3.4982035160064697, "learning_rate": 0.0002, "loss": 1.3157, "step": 6930 }, { "epoch": 0.03, "grad_norm": 2.015127658843994, "learning_rate": 0.0002, "loss": 1.3284, "step": 6940 }, { "epoch": 0.03, "grad_norm": 1.688003659248352, "learning_rate": 0.0002, "loss": 1.517, "step": 6950 }, { "epoch": 0.03, "grad_norm": 2.0096094608306885, "learning_rate": 0.0002, "loss": 1.5708, "step": 6960 }, { "epoch": 0.03, "grad_norm": 3.5806844234466553, "learning_rate": 0.0002, "loss": 1.7589, "step": 6970 }, { "epoch": 0.03, "grad_norm": 2.5612547397613525, "learning_rate": 0.0002, "loss": 1.4217, "step": 6980 }, { "epoch": 0.03, "grad_norm": 1.9584002494812012, "learning_rate": 0.0002, "loss": 1.3739, "step": 6990 }, { "epoch": 0.03, "grad_norm": 1.8669718503952026, "learning_rate": 0.0002, "loss": 1.4848, "step": 7000 }, { "epoch": 0.03, "grad_norm": 2.245661735534668, "learning_rate": 0.0002, "loss": 1.5845, "step": 7010 }, { "epoch": 0.03, "grad_norm": 2.1773505210876465, "learning_rate": 0.0002, "loss": 1.6585, "step": 7020 }, { "epoch": 0.03, "grad_norm": 1.6792418956756592, "learning_rate": 0.0002, "loss": 1.5193, "step": 7030 }, { "epoch": 0.03, "grad_norm": 2.744091272354126, "learning_rate": 0.0002, "loss": 1.5219, "step": 7040 }, { "epoch": 0.03, "grad_norm": 1.5455691814422607, "learning_rate": 0.0002, "loss": 1.524, "step": 7050 }, { "epoch": 0.03, "grad_norm": 1.9714967012405396, "learning_rate": 0.0002, "loss": 1.4738, "step": 7060 }, { "epoch": 0.03, "grad_norm": 1.6202677488327026, "learning_rate": 0.0002, "loss": 1.4657, "step": 7070 }, { "epoch": 0.03, "grad_norm": 1.0998156070709229, "learning_rate": 0.0002, "loss": 1.5102, "step": 7080 }, { "epoch": 0.03, "grad_norm": 2.6180241107940674, "learning_rate": 0.0002, "loss": 1.45, "step": 7090 }, { "epoch": 0.03, "grad_norm": 2.4060463905334473, "learning_rate": 0.0002, "loss": 1.49, "step": 7100 }, { "epoch": 0.03, "grad_norm": 2.8635990619659424, "learning_rate": 0.0002, "loss": 1.7457, "step": 7110 }, { "epoch": 0.03, "grad_norm": 1.7686954736709595, "learning_rate": 0.0002, "loss": 1.4729, "step": 7120 }, { "epoch": 0.03, "grad_norm": 2.690091371536255, "learning_rate": 0.0002, "loss": 1.2948, "step": 7130 }, { "epoch": 0.03, "grad_norm": 3.2614479064941406, "learning_rate": 0.0002, "loss": 1.2996, "step": 7140 }, { "epoch": 0.03, "grad_norm": 2.593601942062378, "learning_rate": 0.0002, "loss": 1.5496, "step": 7150 }, { "epoch": 0.03, "grad_norm": 2.258699655532837, "learning_rate": 0.0002, "loss": 1.4209, "step": 7160 }, { "epoch": 0.03, "grad_norm": 2.160553455352783, "learning_rate": 0.0002, "loss": 1.6017, "step": 7170 }, { "epoch": 0.03, "grad_norm": 4.023937702178955, "learning_rate": 0.0002, "loss": 1.5019, "step": 7180 }, { "epoch": 0.03, "grad_norm": 1.9569917917251587, "learning_rate": 0.0002, "loss": 1.6492, "step": 7190 }, { "epoch": 0.03, "grad_norm": 1.9132297039031982, "learning_rate": 0.0002, "loss": 1.5536, "step": 7200 }, { "epoch": 0.03, "grad_norm": 2.095108985900879, "learning_rate": 0.0002, "loss": 1.464, "step": 7210 }, { "epoch": 0.03, "grad_norm": 3.72226881980896, "learning_rate": 0.0002, "loss": 1.3493, "step": 7220 }, { "epoch": 0.03, "grad_norm": 1.3911701440811157, "learning_rate": 0.0002, "loss": 1.3813, "step": 7230 }, { "epoch": 0.03, "grad_norm": 3.566089391708374, "learning_rate": 0.0002, "loss": 1.4534, "step": 7240 }, { "epoch": 0.03, "grad_norm": 2.0315113067626953, "learning_rate": 0.0002, "loss": 1.4385, "step": 7250 }, { "epoch": 0.03, "grad_norm": 0.9933320879936218, "learning_rate": 0.0002, "loss": 1.2703, "step": 7260 }, { "epoch": 0.03, "grad_norm": 2.372129440307617, "learning_rate": 0.0002, "loss": 1.4312, "step": 7270 }, { "epoch": 0.03, "grad_norm": 1.9197068214416504, "learning_rate": 0.0002, "loss": 1.5714, "step": 7280 }, { "epoch": 0.03, "grad_norm": 2.017265558242798, "learning_rate": 0.0002, "loss": 1.3747, "step": 7290 }, { "epoch": 0.03, "grad_norm": 2.3808460235595703, "learning_rate": 0.0002, "loss": 1.671, "step": 7300 }, { "epoch": 0.03, "grad_norm": 2.110639810562134, "learning_rate": 0.0002, "loss": 1.4868, "step": 7310 }, { "epoch": 0.03, "grad_norm": 1.858498215675354, "learning_rate": 0.0002, "loss": 1.5261, "step": 7320 }, { "epoch": 0.03, "grad_norm": 2.286231756210327, "learning_rate": 0.0002, "loss": 1.2593, "step": 7330 }, { "epoch": 0.03, "grad_norm": 2.6206212043762207, "learning_rate": 0.0002, "loss": 1.6359, "step": 7340 }, { "epoch": 0.03, "grad_norm": 2.575974464416504, "learning_rate": 0.0002, "loss": 1.7826, "step": 7350 }, { "epoch": 0.03, "grad_norm": 2.0349574089050293, "learning_rate": 0.0002, "loss": 1.3867, "step": 7360 }, { "epoch": 0.03, "grad_norm": 1.8271406888961792, "learning_rate": 0.0002, "loss": 1.5767, "step": 7370 }, { "epoch": 0.03, "grad_norm": 2.701258897781372, "learning_rate": 0.0002, "loss": 1.4637, "step": 7380 }, { "epoch": 0.03, "grad_norm": 1.4367636442184448, "learning_rate": 0.0002, "loss": 1.7273, "step": 7390 }, { "epoch": 0.03, "grad_norm": 2.8828911781311035, "learning_rate": 0.0002, "loss": 1.5351, "step": 7400 }, { "epoch": 0.03, "grad_norm": 1.1568763256072998, "learning_rate": 0.0002, "loss": 1.4692, "step": 7410 }, { "epoch": 0.03, "grad_norm": 2.265610456466675, "learning_rate": 0.0002, "loss": 1.259, "step": 7420 }, { "epoch": 0.03, "grad_norm": 1.7286001443862915, "learning_rate": 0.0002, "loss": 1.6157, "step": 7430 }, { "epoch": 0.03, "grad_norm": 2.737251043319702, "learning_rate": 0.0002, "loss": 1.679, "step": 7440 }, { "epoch": 0.03, "grad_norm": 4.436177730560303, "learning_rate": 0.0002, "loss": 1.8326, "step": 7450 }, { "epoch": 0.03, "grad_norm": 1.8627389669418335, "learning_rate": 0.0002, "loss": 1.513, "step": 7460 }, { "epoch": 0.03, "grad_norm": 3.5262715816497803, "learning_rate": 0.0002, "loss": 1.3618, "step": 7470 }, { "epoch": 0.03, "grad_norm": 1.7772070169448853, "learning_rate": 0.0002, "loss": 1.6979, "step": 7480 }, { "epoch": 0.03, "grad_norm": 2.518167495727539, "learning_rate": 0.0002, "loss": 1.5054, "step": 7490 }, { "epoch": 0.03, "grad_norm": 2.140885829925537, "learning_rate": 0.0002, "loss": 1.5651, "step": 7500 }, { "epoch": 0.03, "grad_norm": 3.0500457286834717, "learning_rate": 0.0002, "loss": 1.2958, "step": 7510 }, { "epoch": 0.03, "grad_norm": 1.6070023775100708, "learning_rate": 0.0002, "loss": 1.2575, "step": 7520 }, { "epoch": 0.03, "grad_norm": 2.2046730518341064, "learning_rate": 0.0002, "loss": 1.4124, "step": 7530 }, { "epoch": 0.03, "grad_norm": 3.9345946311950684, "learning_rate": 0.0002, "loss": 1.7162, "step": 7540 }, { "epoch": 0.03, "grad_norm": 2.396435499191284, "learning_rate": 0.0002, "loss": 1.5682, "step": 7550 }, { "epoch": 0.03, "grad_norm": 3.5719194412231445, "learning_rate": 0.0002, "loss": 1.435, "step": 7560 }, { "epoch": 0.03, "grad_norm": 1.154420256614685, "learning_rate": 0.0002, "loss": 1.5553, "step": 7570 }, { "epoch": 0.03, "grad_norm": 3.6857495307922363, "learning_rate": 0.0002, "loss": 1.5356, "step": 7580 }, { "epoch": 0.03, "grad_norm": 0.7779808044433594, "learning_rate": 0.0002, "loss": 1.5375, "step": 7590 }, { "epoch": 0.03, "grad_norm": 1.38425874710083, "learning_rate": 0.0002, "loss": 1.4818, "step": 7600 }, { "epoch": 0.03, "grad_norm": 3.16524076461792, "learning_rate": 0.0002, "loss": 1.343, "step": 7610 }, { "epoch": 0.03, "grad_norm": 1.945223093032837, "learning_rate": 0.0002, "loss": 1.6943, "step": 7620 }, { "epoch": 0.03, "grad_norm": 1.9650940895080566, "learning_rate": 0.0002, "loss": 1.5946, "step": 7630 }, { "epoch": 0.03, "grad_norm": 4.145795822143555, "learning_rate": 0.0002, "loss": 1.3693, "step": 7640 }, { "epoch": 0.03, "grad_norm": 1.9153331518173218, "learning_rate": 0.0002, "loss": 1.3027, "step": 7650 }, { "epoch": 0.03, "grad_norm": 3.134053945541382, "learning_rate": 0.0002, "loss": 1.4434, "step": 7660 }, { "epoch": 0.03, "grad_norm": 2.1781742572784424, "learning_rate": 0.0002, "loss": 1.6125, "step": 7670 }, { "epoch": 0.03, "grad_norm": 3.6190197467803955, "learning_rate": 0.0002, "loss": 1.5754, "step": 7680 }, { "epoch": 0.03, "grad_norm": 1.304452896118164, "learning_rate": 0.0002, "loss": 1.4387, "step": 7690 }, { "epoch": 0.03, "grad_norm": 2.0923118591308594, "learning_rate": 0.0002, "loss": 1.5147, "step": 7700 }, { "epoch": 0.03, "grad_norm": 2.483900308609009, "learning_rate": 0.0002, "loss": 1.2662, "step": 7710 }, { "epoch": 0.03, "grad_norm": 1.1088978052139282, "learning_rate": 0.0002, "loss": 1.361, "step": 7720 }, { "epoch": 0.03, "grad_norm": 2.493356704711914, "learning_rate": 0.0002, "loss": 1.6416, "step": 7730 }, { "epoch": 0.03, "grad_norm": 1.9285528659820557, "learning_rate": 0.0002, "loss": 1.3874, "step": 7740 }, { "epoch": 0.03, "grad_norm": 3.862790584564209, "learning_rate": 0.0002, "loss": 1.4578, "step": 7750 }, { "epoch": 0.03, "grad_norm": 2.6288437843322754, "learning_rate": 0.0002, "loss": 1.4603, "step": 7760 }, { "epoch": 0.03, "grad_norm": 3.1699905395507812, "learning_rate": 0.0002, "loss": 1.3294, "step": 7770 }, { "epoch": 0.03, "grad_norm": 1.7477073669433594, "learning_rate": 0.0002, "loss": 1.6082, "step": 7780 }, { "epoch": 0.03, "grad_norm": 1.898258090019226, "learning_rate": 0.0002, "loss": 1.5309, "step": 7790 }, { "epoch": 0.03, "grad_norm": 1.9159409999847412, "learning_rate": 0.0002, "loss": 1.4367, "step": 7800 }, { "epoch": 0.03, "grad_norm": 1.462538242340088, "learning_rate": 0.0002, "loss": 1.6914, "step": 7810 }, { "epoch": 0.03, "grad_norm": 3.1840784549713135, "learning_rate": 0.0002, "loss": 1.5239, "step": 7820 }, { "epoch": 0.03, "grad_norm": 2.2235822677612305, "learning_rate": 0.0002, "loss": 1.6292, "step": 7830 }, { "epoch": 0.03, "grad_norm": 2.494419813156128, "learning_rate": 0.0002, "loss": 1.432, "step": 7840 }, { "epoch": 0.03, "grad_norm": 1.3516007661819458, "learning_rate": 0.0002, "loss": 1.6564, "step": 7850 }, { "epoch": 0.03, "grad_norm": 2.7921183109283447, "learning_rate": 0.0002, "loss": 1.3592, "step": 7860 }, { "epoch": 0.03, "grad_norm": 1.4526376724243164, "learning_rate": 0.0002, "loss": 1.7454, "step": 7870 }, { "epoch": 0.03, "grad_norm": 1.5744378566741943, "learning_rate": 0.0002, "loss": 1.6315, "step": 7880 }, { "epoch": 0.03, "grad_norm": 2.559884548187256, "learning_rate": 0.0002, "loss": 1.3783, "step": 7890 }, { "epoch": 0.03, "grad_norm": 0.8809816837310791, "learning_rate": 0.0002, "loss": 1.4147, "step": 7900 }, { "epoch": 0.03, "grad_norm": 2.094362497329712, "learning_rate": 0.0002, "loss": 1.4058, "step": 7910 }, { "epoch": 0.03, "grad_norm": 2.9641854763031006, "learning_rate": 0.0002, "loss": 1.649, "step": 7920 }, { "epoch": 0.03, "grad_norm": 2.2631683349609375, "learning_rate": 0.0002, "loss": 1.561, "step": 7930 }, { "epoch": 0.03, "grad_norm": 1.604196548461914, "learning_rate": 0.0002, "loss": 1.4867, "step": 7940 }, { "epoch": 0.03, "grad_norm": 2.517463445663452, "learning_rate": 0.0002, "loss": 1.1611, "step": 7950 }, { "epoch": 0.03, "grad_norm": 2.8635005950927734, "learning_rate": 0.0002, "loss": 1.7848, "step": 7960 }, { "epoch": 0.03, "grad_norm": 2.3463542461395264, "learning_rate": 0.0002, "loss": 1.5169, "step": 7970 }, { "epoch": 0.03, "grad_norm": 3.968959093093872, "learning_rate": 0.0002, "loss": 1.6241, "step": 7980 }, { "epoch": 0.03, "grad_norm": 2.1093666553497314, "learning_rate": 0.0002, "loss": 1.6001, "step": 7990 }, { "epoch": 0.03, "grad_norm": 1.2288600206375122, "learning_rate": 0.0002, "loss": 1.5742, "step": 8000 }, { "epoch": 0.03, "grad_norm": 2.7395567893981934, "learning_rate": 0.0002, "loss": 1.5018, "step": 8010 }, { "epoch": 0.03, "grad_norm": 2.667539596557617, "learning_rate": 0.0002, "loss": 1.4155, "step": 8020 }, { "epoch": 0.03, "grad_norm": 2.203382730484009, "learning_rate": 0.0002, "loss": 1.546, "step": 8030 }, { "epoch": 0.03, "grad_norm": 2.665142774581909, "learning_rate": 0.0002, "loss": 1.594, "step": 8040 }, { "epoch": 0.03, "grad_norm": 1.4381822347640991, "learning_rate": 0.0002, "loss": 1.5246, "step": 8050 }, { "epoch": 0.03, "grad_norm": 2.033236026763916, "learning_rate": 0.0002, "loss": 1.3808, "step": 8060 }, { "epoch": 0.03, "grad_norm": 2.8416035175323486, "learning_rate": 0.0002, "loss": 1.6278, "step": 8070 }, { "epoch": 0.03, "grad_norm": 2.2717461585998535, "learning_rate": 0.0002, "loss": 1.6368, "step": 8080 }, { "epoch": 0.03, "grad_norm": 1.6822937726974487, "learning_rate": 0.0002, "loss": 1.8042, "step": 8090 }, { "epoch": 0.03, "grad_norm": 2.2757809162139893, "learning_rate": 0.0002, "loss": 1.3632, "step": 8100 }, { "epoch": 0.03, "grad_norm": 2.5641632080078125, "learning_rate": 0.0002, "loss": 1.3666, "step": 8110 }, { "epoch": 0.03, "grad_norm": 3.034785270690918, "learning_rate": 0.0002, "loss": 1.3044, "step": 8120 }, { "epoch": 0.03, "grad_norm": 1.7490886449813843, "learning_rate": 0.0002, "loss": 1.3738, "step": 8130 }, { "epoch": 0.03, "grad_norm": 1.9364845752716064, "learning_rate": 0.0002, "loss": 1.3621, "step": 8140 }, { "epoch": 0.03, "grad_norm": 2.104452610015869, "learning_rate": 0.0002, "loss": 1.6767, "step": 8150 }, { "epoch": 0.03, "grad_norm": 1.9614759683609009, "learning_rate": 0.0002, "loss": 1.5056, "step": 8160 }, { "epoch": 0.03, "grad_norm": 1.840954303741455, "learning_rate": 0.0002, "loss": 1.627, "step": 8170 }, { "epoch": 0.03, "grad_norm": 3.7783827781677246, "learning_rate": 0.0002, "loss": 1.6658, "step": 8180 }, { "epoch": 0.03, "grad_norm": 1.141282081604004, "learning_rate": 0.0002, "loss": 1.6485, "step": 8190 }, { "epoch": 0.03, "grad_norm": 3.315969228744507, "learning_rate": 0.0002, "loss": 1.4744, "step": 8200 }, { "epoch": 0.03, "grad_norm": 1.2877119779586792, "learning_rate": 0.0002, "loss": 1.6829, "step": 8210 }, { "epoch": 0.03, "grad_norm": 4.281208515167236, "learning_rate": 0.0002, "loss": 1.4176, "step": 8220 }, { "epoch": 0.03, "grad_norm": 2.234999656677246, "learning_rate": 0.0002, "loss": 1.4157, "step": 8230 }, { "epoch": 0.03, "grad_norm": 2.7089924812316895, "learning_rate": 0.0002, "loss": 1.5155, "step": 8240 }, { "epoch": 0.03, "grad_norm": 2.311685800552368, "learning_rate": 0.0002, "loss": 1.4363, "step": 8250 }, { "epoch": 0.03, "grad_norm": 2.488600969314575, "learning_rate": 0.0002, "loss": 1.5047, "step": 8260 }, { "epoch": 0.03, "grad_norm": 1.373652696609497, "learning_rate": 0.0002, "loss": 1.3931, "step": 8270 }, { "epoch": 0.03, "grad_norm": 2.8195712566375732, "learning_rate": 0.0002, "loss": 1.6171, "step": 8280 }, { "epoch": 0.03, "grad_norm": 2.549273729324341, "learning_rate": 0.0002, "loss": 1.2737, "step": 8290 }, { "epoch": 0.03, "grad_norm": 1.5742788314819336, "learning_rate": 0.0002, "loss": 1.4043, "step": 8300 }, { "epoch": 0.03, "grad_norm": 2.5004942417144775, "learning_rate": 0.0002, "loss": 1.6627, "step": 8310 }, { "epoch": 0.03, "grad_norm": 1.145879864692688, "learning_rate": 0.0002, "loss": 1.6745, "step": 8320 }, { "epoch": 0.03, "grad_norm": 2.9033162593841553, "learning_rate": 0.0002, "loss": 1.4686, "step": 8330 }, { "epoch": 0.03, "grad_norm": 2.5100314617156982, "learning_rate": 0.0002, "loss": 1.0918, "step": 8340 }, { "epoch": 0.03, "grad_norm": 2.0803890228271484, "learning_rate": 0.0002, "loss": 1.4504, "step": 8350 }, { "epoch": 0.03, "grad_norm": 2.3879268169403076, "learning_rate": 0.0002, "loss": 1.6581, "step": 8360 }, { "epoch": 0.03, "grad_norm": 2.1629672050476074, "learning_rate": 0.0002, "loss": 1.4707, "step": 8370 }, { "epoch": 0.03, "grad_norm": 2.096961259841919, "learning_rate": 0.0002, "loss": 1.6281, "step": 8380 }, { "epoch": 0.03, "grad_norm": 2.53523850440979, "learning_rate": 0.0002, "loss": 1.3341, "step": 8390 }, { "epoch": 0.03, "grad_norm": 2.577096700668335, "learning_rate": 0.0002, "loss": 1.6072, "step": 8400 }, { "epoch": 0.03, "grad_norm": 2.29491925239563, "learning_rate": 0.0002, "loss": 1.4487, "step": 8410 }, { "epoch": 0.03, "grad_norm": 1.2818350791931152, "learning_rate": 0.0002, "loss": 1.5634, "step": 8420 }, { "epoch": 0.03, "grad_norm": 1.684656023979187, "learning_rate": 0.0002, "loss": 1.5255, "step": 8430 }, { "epoch": 0.03, "grad_norm": 2.4209766387939453, "learning_rate": 0.0002, "loss": 1.4623, "step": 8440 }, { "epoch": 0.03, "grad_norm": 2.714721918106079, "learning_rate": 0.0002, "loss": 1.5035, "step": 8450 }, { "epoch": 0.03, "grad_norm": 2.6516129970550537, "learning_rate": 0.0002, "loss": 1.6526, "step": 8460 }, { "epoch": 0.03, "grad_norm": 1.9107693433761597, "learning_rate": 0.0002, "loss": 1.6077, "step": 8470 }, { "epoch": 0.03, "grad_norm": 1.675378441810608, "learning_rate": 0.0002, "loss": 1.4569, "step": 8480 }, { "epoch": 0.03, "grad_norm": 1.5161449909210205, "learning_rate": 0.0002, "loss": 1.3121, "step": 8490 }, { "epoch": 0.03, "grad_norm": 2.8807923793792725, "learning_rate": 0.0002, "loss": 1.5103, "step": 8500 }, { "epoch": 0.03, "grad_norm": 2.0420048236846924, "learning_rate": 0.0002, "loss": 1.4143, "step": 8510 }, { "epoch": 0.03, "grad_norm": 2.8521578311920166, "learning_rate": 0.0002, "loss": 1.6713, "step": 8520 }, { "epoch": 0.03, "grad_norm": 1.6955260038375854, "learning_rate": 0.0002, "loss": 1.5151, "step": 8530 }, { "epoch": 0.03, "grad_norm": 2.172991991043091, "learning_rate": 0.0002, "loss": 1.6779, "step": 8540 }, { "epoch": 0.03, "grad_norm": 2.1868200302124023, "learning_rate": 0.0002, "loss": 1.2294, "step": 8550 }, { "epoch": 0.03, "grad_norm": 2.040480613708496, "learning_rate": 0.0002, "loss": 1.6027, "step": 8560 }, { "epoch": 0.03, "grad_norm": 4.582374095916748, "learning_rate": 0.0002, "loss": 1.6903, "step": 8570 }, { "epoch": 0.03, "grad_norm": 2.434680461883545, "learning_rate": 0.0002, "loss": 1.4721, "step": 8580 }, { "epoch": 0.03, "grad_norm": 2.010171890258789, "learning_rate": 0.0002, "loss": 1.4817, "step": 8590 }, { "epoch": 0.04, "grad_norm": 3.3142685890197754, "learning_rate": 0.0002, "loss": 1.4002, "step": 8600 }, { "epoch": 0.04, "grad_norm": 2.504002571105957, "learning_rate": 0.0002, "loss": 1.5702, "step": 8610 }, { "epoch": 0.04, "grad_norm": 3.0412392616271973, "learning_rate": 0.0002, "loss": 1.5282, "step": 8620 }, { "epoch": 0.04, "grad_norm": 1.7234206199645996, "learning_rate": 0.0002, "loss": 1.5982, "step": 8630 }, { "epoch": 0.04, "grad_norm": 2.2017600536346436, "learning_rate": 0.0002, "loss": 1.1656, "step": 8640 }, { "epoch": 0.04, "grad_norm": 2.375318765640259, "learning_rate": 0.0002, "loss": 1.7292, "step": 8650 }, { "epoch": 0.04, "grad_norm": 2.852872610092163, "learning_rate": 0.0002, "loss": 1.2841, "step": 8660 }, { "epoch": 0.04, "grad_norm": 1.6619428396224976, "learning_rate": 0.0002, "loss": 1.4966, "step": 8670 }, { "epoch": 0.04, "grad_norm": 2.434842348098755, "learning_rate": 0.0002, "loss": 1.6472, "step": 8680 }, { "epoch": 0.04, "grad_norm": 3.3770740032196045, "learning_rate": 0.0002, "loss": 1.4468, "step": 8690 }, { "epoch": 0.04, "grad_norm": 2.617666482925415, "learning_rate": 0.0002, "loss": 1.4659, "step": 8700 }, { "epoch": 0.04, "grad_norm": 2.8038082122802734, "learning_rate": 0.0002, "loss": 1.4438, "step": 8710 }, { "epoch": 0.04, "grad_norm": 1.0472348928451538, "learning_rate": 0.0002, "loss": 1.599, "step": 8720 }, { "epoch": 0.04, "grad_norm": 2.575782299041748, "learning_rate": 0.0002, "loss": 1.2006, "step": 8730 }, { "epoch": 0.04, "grad_norm": 3.5544443130493164, "learning_rate": 0.0002, "loss": 1.4356, "step": 8740 }, { "epoch": 0.04, "grad_norm": 2.0749611854553223, "learning_rate": 0.0002, "loss": 1.4611, "step": 8750 }, { "epoch": 0.04, "grad_norm": 1.95218825340271, "learning_rate": 0.0002, "loss": 1.3819, "step": 8760 }, { "epoch": 0.04, "grad_norm": 1.5037180185317993, "learning_rate": 0.0002, "loss": 1.4629, "step": 8770 }, { "epoch": 0.04, "grad_norm": 1.5092358589172363, "learning_rate": 0.0002, "loss": 1.6568, "step": 8780 }, { "epoch": 0.04, "grad_norm": 2.3440051078796387, "learning_rate": 0.0002, "loss": 1.2744, "step": 8790 }, { "epoch": 0.04, "grad_norm": 2.44008207321167, "learning_rate": 0.0002, "loss": 1.7025, "step": 8800 }, { "epoch": 0.04, "grad_norm": 2.107292652130127, "learning_rate": 0.0002, "loss": 1.4946, "step": 8810 }, { "epoch": 0.04, "grad_norm": 1.3421411514282227, "learning_rate": 0.0002, "loss": 1.4804, "step": 8820 }, { "epoch": 0.04, "grad_norm": 1.2419626712799072, "learning_rate": 0.0002, "loss": 1.5096, "step": 8830 }, { "epoch": 0.04, "grad_norm": 2.472987174987793, "learning_rate": 0.0002, "loss": 1.715, "step": 8840 }, { "epoch": 0.04, "grad_norm": 1.8042207956314087, "learning_rate": 0.0002, "loss": 1.5836, "step": 8850 }, { "epoch": 0.04, "grad_norm": 1.3530335426330566, "learning_rate": 0.0002, "loss": 1.5668, "step": 8860 }, { "epoch": 0.04, "grad_norm": 1.811501383781433, "learning_rate": 0.0002, "loss": 1.469, "step": 8870 }, { "epoch": 0.04, "grad_norm": 0.640588104724884, "learning_rate": 0.0002, "loss": 1.5066, "step": 8880 }, { "epoch": 0.04, "grad_norm": 3.302619218826294, "learning_rate": 0.0002, "loss": 1.3602, "step": 8890 }, { "epoch": 0.04, "grad_norm": 1.70187246799469, "learning_rate": 0.0002, "loss": 1.5234, "step": 8900 }, { "epoch": 0.04, "grad_norm": 2.531583547592163, "learning_rate": 0.0002, "loss": 1.529, "step": 8910 }, { "epoch": 0.04, "grad_norm": 1.9994226694107056, "learning_rate": 0.0002, "loss": 1.4338, "step": 8920 }, { "epoch": 0.04, "grad_norm": 2.6550629138946533, "learning_rate": 0.0002, "loss": 1.2551, "step": 8930 }, { "epoch": 0.04, "grad_norm": 2.0423574447631836, "learning_rate": 0.0002, "loss": 1.5489, "step": 8940 }, { "epoch": 0.04, "grad_norm": 2.280535936355591, "learning_rate": 0.0002, "loss": 1.6398, "step": 8950 }, { "epoch": 0.04, "grad_norm": 2.0616676807403564, "learning_rate": 0.0002, "loss": 1.6291, "step": 8960 }, { "epoch": 0.04, "grad_norm": 2.286020517349243, "learning_rate": 0.0002, "loss": 1.7186, "step": 8970 }, { "epoch": 0.04, "grad_norm": 2.8370234966278076, "learning_rate": 0.0002, "loss": 1.3507, "step": 8980 }, { "epoch": 0.04, "grad_norm": 1.7390313148498535, "learning_rate": 0.0002, "loss": 1.5841, "step": 8990 }, { "epoch": 0.04, "grad_norm": 1.9247982501983643, "learning_rate": 0.0002, "loss": 1.4037, "step": 9000 }, { "epoch": 0.04, "grad_norm": 1.8252466917037964, "learning_rate": 0.0002, "loss": 1.5878, "step": 9010 }, { "epoch": 0.04, "grad_norm": 2.1486916542053223, "learning_rate": 0.0002, "loss": 1.6451, "step": 9020 }, { "epoch": 0.04, "grad_norm": 2.4366111755371094, "learning_rate": 0.0002, "loss": 1.6577, "step": 9030 }, { "epoch": 0.04, "grad_norm": 2.163590908050537, "learning_rate": 0.0002, "loss": 1.5718, "step": 9040 }, { "epoch": 0.04, "grad_norm": 2.35859751701355, "learning_rate": 0.0002, "loss": 1.6796, "step": 9050 }, { "epoch": 0.04, "grad_norm": 1.9160187244415283, "learning_rate": 0.0002, "loss": 1.3861, "step": 9060 }, { "epoch": 0.04, "grad_norm": 3.1791484355926514, "learning_rate": 0.0002, "loss": 1.4197, "step": 9070 }, { "epoch": 0.04, "grad_norm": 2.5261693000793457, "learning_rate": 0.0002, "loss": 1.4219, "step": 9080 }, { "epoch": 0.04, "grad_norm": 1.7544994354248047, "learning_rate": 0.0002, "loss": 1.484, "step": 9090 }, { "epoch": 0.04, "grad_norm": 6.958573341369629, "learning_rate": 0.0002, "loss": 1.6725, "step": 9100 }, { "epoch": 0.04, "grad_norm": 2.2099661827087402, "learning_rate": 0.0002, "loss": 1.2423, "step": 9110 }, { "epoch": 0.04, "grad_norm": 2.733203887939453, "learning_rate": 0.0002, "loss": 1.5408, "step": 9120 }, { "epoch": 0.04, "grad_norm": 1.7101913690567017, "learning_rate": 0.0002, "loss": 1.5737, "step": 9130 }, { "epoch": 0.04, "grad_norm": 1.9901279211044312, "learning_rate": 0.0002, "loss": 1.4771, "step": 9140 }, { "epoch": 0.04, "grad_norm": 1.3482441902160645, "learning_rate": 0.0002, "loss": 1.5977, "step": 9150 }, { "epoch": 0.04, "grad_norm": 1.5688316822052002, "learning_rate": 0.0002, "loss": 1.318, "step": 9160 }, { "epoch": 0.04, "grad_norm": 1.9875363111495972, "learning_rate": 0.0002, "loss": 1.373, "step": 9170 }, { "epoch": 0.04, "grad_norm": 1.3701061010360718, "learning_rate": 0.0002, "loss": 1.697, "step": 9180 }, { "epoch": 0.04, "grad_norm": 2.0173275470733643, "learning_rate": 0.0002, "loss": 1.2989, "step": 9190 }, { "epoch": 0.04, "grad_norm": 2.2299139499664307, "learning_rate": 0.0002, "loss": 1.3661, "step": 9200 }, { "epoch": 0.04, "grad_norm": 1.6160924434661865, "learning_rate": 0.0002, "loss": 1.2699, "step": 9210 }, { "epoch": 0.04, "grad_norm": 3.994410276412964, "learning_rate": 0.0002, "loss": 1.4801, "step": 9220 }, { "epoch": 0.04, "grad_norm": 1.927617073059082, "learning_rate": 0.0002, "loss": 1.5189, "step": 9230 }, { "epoch": 0.04, "grad_norm": 2.1041600704193115, "learning_rate": 0.0002, "loss": 1.5791, "step": 9240 }, { "epoch": 0.04, "grad_norm": 2.8898520469665527, "learning_rate": 0.0002, "loss": 1.4934, "step": 9250 }, { "epoch": 0.04, "grad_norm": 4.153921127319336, "learning_rate": 0.0002, "loss": 1.6638, "step": 9260 }, { "epoch": 0.04, "grad_norm": 1.9454998970031738, "learning_rate": 0.0002, "loss": 1.4235, "step": 9270 }, { "epoch": 0.04, "grad_norm": 2.861973762512207, "learning_rate": 0.0002, "loss": 1.5341, "step": 9280 }, { "epoch": 0.04, "grad_norm": 6.263934135437012, "learning_rate": 0.0002, "loss": 1.3967, "step": 9290 }, { "epoch": 0.04, "grad_norm": 2.406322956085205, "learning_rate": 0.0002, "loss": 1.605, "step": 9300 }, { "epoch": 0.04, "grad_norm": 4.154240131378174, "learning_rate": 0.0002, "loss": 1.3341, "step": 9310 }, { "epoch": 0.04, "grad_norm": 1.9010679721832275, "learning_rate": 0.0002, "loss": 1.5088, "step": 9320 }, { "epoch": 0.04, "grad_norm": 3.185878038406372, "learning_rate": 0.0002, "loss": 1.5608, "step": 9330 }, { "epoch": 0.04, "grad_norm": 3.0581703186035156, "learning_rate": 0.0002, "loss": 1.5438, "step": 9340 }, { "epoch": 0.04, "grad_norm": 2.709169864654541, "learning_rate": 0.0002, "loss": 1.424, "step": 9350 }, { "epoch": 0.04, "grad_norm": 1.7780500650405884, "learning_rate": 0.0002, "loss": 1.3725, "step": 9360 }, { "epoch": 0.04, "grad_norm": 3.6188547611236572, "learning_rate": 0.0002, "loss": 1.4067, "step": 9370 }, { "epoch": 0.04, "grad_norm": 1.421235203742981, "learning_rate": 0.0002, "loss": 1.5405, "step": 9380 }, { "epoch": 0.04, "grad_norm": 1.7477631568908691, "learning_rate": 0.0002, "loss": 1.5725, "step": 9390 }, { "epoch": 0.04, "grad_norm": 2.976655960083008, "learning_rate": 0.0002, "loss": 1.2413, "step": 9400 }, { "epoch": 0.04, "grad_norm": 2.146394729614258, "learning_rate": 0.0002, "loss": 1.5004, "step": 9410 }, { "epoch": 0.04, "grad_norm": 1.380875825881958, "learning_rate": 0.0002, "loss": 1.5016, "step": 9420 }, { "epoch": 0.04, "grad_norm": 2.232473134994507, "learning_rate": 0.0002, "loss": 1.2433, "step": 9430 }, { "epoch": 0.04, "grad_norm": 2.344813108444214, "learning_rate": 0.0002, "loss": 1.3076, "step": 9440 }, { "epoch": 0.04, "grad_norm": 2.1861815452575684, "learning_rate": 0.0002, "loss": 1.5661, "step": 9450 }, { "epoch": 0.04, "grad_norm": 1.5195194482803345, "learning_rate": 0.0002, "loss": 1.5702, "step": 9460 }, { "epoch": 0.04, "grad_norm": 2.4760591983795166, "learning_rate": 0.0002, "loss": 1.176, "step": 9470 }, { "epoch": 0.04, "grad_norm": 2.191601514816284, "learning_rate": 0.0002, "loss": 1.468, "step": 9480 }, { "epoch": 0.04, "grad_norm": 2.204343795776367, "learning_rate": 0.0002, "loss": 1.7225, "step": 9490 }, { "epoch": 0.04, "grad_norm": 2.054029703140259, "learning_rate": 0.0002, "loss": 1.4516, "step": 9500 }, { "epoch": 0.04, "grad_norm": 3.388739585876465, "learning_rate": 0.0002, "loss": 1.3118, "step": 9510 }, { "epoch": 0.04, "grad_norm": 3.6222074031829834, "learning_rate": 0.0002, "loss": 1.7605, "step": 9520 }, { "epoch": 0.04, "grad_norm": 3.0071871280670166, "learning_rate": 0.0002, "loss": 1.6159, "step": 9530 }, { "epoch": 0.04, "grad_norm": 2.9839935302734375, "learning_rate": 0.0002, "loss": 1.3029, "step": 9540 }, { "epoch": 0.04, "grad_norm": 2.6663055419921875, "learning_rate": 0.0002, "loss": 1.3406, "step": 9550 }, { "epoch": 0.04, "grad_norm": 1.866543173789978, "learning_rate": 0.0002, "loss": 1.7184, "step": 9560 }, { "epoch": 0.04, "grad_norm": 1.5483835935592651, "learning_rate": 0.0002, "loss": 1.5868, "step": 9570 }, { "epoch": 0.04, "grad_norm": 2.717207670211792, "learning_rate": 0.0002, "loss": 1.4164, "step": 9580 }, { "epoch": 0.04, "grad_norm": 1.4116331338882446, "learning_rate": 0.0002, "loss": 1.6029, "step": 9590 }, { "epoch": 0.04, "grad_norm": 1.5197316408157349, "learning_rate": 0.0002, "loss": 1.6099, "step": 9600 }, { "epoch": 0.04, "grad_norm": 2.425053119659424, "learning_rate": 0.0002, "loss": 1.5524, "step": 9610 }, { "epoch": 0.04, "grad_norm": 4.0569047927856445, "learning_rate": 0.0002, "loss": 1.5491, "step": 9620 }, { "epoch": 0.04, "grad_norm": 3.654546022415161, "learning_rate": 0.0002, "loss": 1.5997, "step": 9630 }, { "epoch": 0.04, "grad_norm": 3.356492280960083, "learning_rate": 0.0002, "loss": 1.727, "step": 9640 }, { "epoch": 0.04, "grad_norm": 1.541933298110962, "learning_rate": 0.0002, "loss": 1.3646, "step": 9650 }, { "epoch": 0.04, "grad_norm": 3.0479490756988525, "learning_rate": 0.0002, "loss": 1.3044, "step": 9660 }, { "epoch": 0.04, "grad_norm": 2.163189649581909, "learning_rate": 0.0002, "loss": 1.2729, "step": 9670 }, { "epoch": 0.04, "grad_norm": 2.558377265930176, "learning_rate": 0.0002, "loss": 1.5186, "step": 9680 }, { "epoch": 0.04, "grad_norm": 2.0095067024230957, "learning_rate": 0.0002, "loss": 1.2986, "step": 9690 }, { "epoch": 0.04, "grad_norm": 1.452296257019043, "learning_rate": 0.0002, "loss": 1.2854, "step": 9700 }, { "epoch": 0.04, "grad_norm": 1.7544686794281006, "learning_rate": 0.0002, "loss": 1.608, "step": 9710 }, { "epoch": 0.04, "grad_norm": 3.6297078132629395, "learning_rate": 0.0002, "loss": 1.6141, "step": 9720 }, { "epoch": 0.04, "grad_norm": 2.1381561756134033, "learning_rate": 0.0002, "loss": 1.388, "step": 9730 }, { "epoch": 0.04, "grad_norm": 1.739957571029663, "learning_rate": 0.0002, "loss": 1.3013, "step": 9740 }, { "epoch": 0.04, "grad_norm": 1.6483803987503052, "learning_rate": 0.0002, "loss": 1.7471, "step": 9750 }, { "epoch": 0.04, "grad_norm": 3.202650785446167, "learning_rate": 0.0002, "loss": 1.5489, "step": 9760 }, { "epoch": 0.04, "grad_norm": 1.9149564504623413, "learning_rate": 0.0002, "loss": 1.4204, "step": 9770 }, { "epoch": 0.04, "grad_norm": 1.4379650354385376, "learning_rate": 0.0002, "loss": 1.7234, "step": 9780 }, { "epoch": 0.04, "grad_norm": 2.5064618587493896, "learning_rate": 0.0002, "loss": 1.4735, "step": 9790 }, { "epoch": 0.04, "grad_norm": 3.574941873550415, "learning_rate": 0.0002, "loss": 1.5288, "step": 9800 }, { "epoch": 0.04, "grad_norm": 2.5288143157958984, "learning_rate": 0.0002, "loss": 1.3287, "step": 9810 }, { "epoch": 0.04, "grad_norm": 2.7682929039001465, "learning_rate": 0.0002, "loss": 1.409, "step": 9820 }, { "epoch": 0.04, "grad_norm": 3.806698799133301, "learning_rate": 0.0002, "loss": 1.8559, "step": 9830 }, { "epoch": 0.04, "grad_norm": 2.2512664794921875, "learning_rate": 0.0002, "loss": 1.4684, "step": 9840 }, { "epoch": 0.04, "grad_norm": 1.6088216304779053, "learning_rate": 0.0002, "loss": 1.4779, "step": 9850 }, { "epoch": 0.04, "grad_norm": 2.0424270629882812, "learning_rate": 0.0002, "loss": 1.5806, "step": 9860 }, { "epoch": 0.04, "grad_norm": 5.026051044464111, "learning_rate": 0.0002, "loss": 1.514, "step": 9870 }, { "epoch": 0.04, "grad_norm": 2.103221893310547, "learning_rate": 0.0002, "loss": 1.4094, "step": 9880 }, { "epoch": 0.04, "grad_norm": 5.502892971038818, "learning_rate": 0.0002, "loss": 1.359, "step": 9890 }, { "epoch": 0.04, "grad_norm": 1.8271530866622925, "learning_rate": 0.0002, "loss": 1.3619, "step": 9900 }, { "epoch": 0.04, "grad_norm": 1.3533602952957153, "learning_rate": 0.0002, "loss": 1.4644, "step": 9910 }, { "epoch": 0.04, "grad_norm": 2.6266567707061768, "learning_rate": 0.0002, "loss": 1.4567, "step": 9920 }, { "epoch": 0.04, "grad_norm": 1.6078795194625854, "learning_rate": 0.0002, "loss": 1.4917, "step": 9930 }, { "epoch": 0.04, "grad_norm": 2.6393001079559326, "learning_rate": 0.0002, "loss": 1.6625, "step": 9940 }, { "epoch": 0.04, "grad_norm": 1.798059344291687, "learning_rate": 0.0002, "loss": 1.4274, "step": 9950 }, { "epoch": 0.04, "grad_norm": 1.5622259378433228, "learning_rate": 0.0002, "loss": 1.8045, "step": 9960 }, { "epoch": 0.04, "grad_norm": 2.147831916809082, "learning_rate": 0.0002, "loss": 1.532, "step": 9970 }, { "epoch": 0.04, "grad_norm": 1.8744089603424072, "learning_rate": 0.0002, "loss": 1.3692, "step": 9980 }, { "epoch": 0.04, "grad_norm": 1.7500641345977783, "learning_rate": 0.0002, "loss": 1.4423, "step": 9990 }, { "epoch": 0.04, "grad_norm": 1.7468748092651367, "learning_rate": 0.0002, "loss": 1.4265, "step": 10000 }, { "epoch": 0.04, "grad_norm": 2.510979175567627, "learning_rate": 0.0002, "loss": 1.5098, "step": 10010 }, { "epoch": 0.04, "grad_norm": 3.1777942180633545, "learning_rate": 0.0002, "loss": 1.6246, "step": 10020 }, { "epoch": 0.04, "grad_norm": 4.326938629150391, "learning_rate": 0.0002, "loss": 1.3719, "step": 10030 }, { "epoch": 0.04, "grad_norm": 1.5160945653915405, "learning_rate": 0.0002, "loss": 1.5542, "step": 10040 }, { "epoch": 0.04, "grad_norm": 1.9833611249923706, "learning_rate": 0.0002, "loss": 1.3501, "step": 10050 }, { "epoch": 0.04, "grad_norm": 2.608032703399658, "learning_rate": 0.0002, "loss": 1.6279, "step": 10060 }, { "epoch": 0.04, "grad_norm": 1.9517425298690796, "learning_rate": 0.0002, "loss": 1.4667, "step": 10070 }, { "epoch": 0.04, "grad_norm": 2.6290266513824463, "learning_rate": 0.0002, "loss": 1.5703, "step": 10080 }, { "epoch": 0.04, "grad_norm": 2.295994281768799, "learning_rate": 0.0002, "loss": 1.5761, "step": 10090 }, { "epoch": 0.04, "grad_norm": 2.9883697032928467, "learning_rate": 0.0002, "loss": 1.3417, "step": 10100 }, { "epoch": 0.04, "grad_norm": 2.5551838874816895, "learning_rate": 0.0002, "loss": 1.3978, "step": 10110 }, { "epoch": 0.04, "grad_norm": 2.2913262844085693, "learning_rate": 0.0002, "loss": 1.5589, "step": 10120 }, { "epoch": 0.04, "grad_norm": 5.924326419830322, "learning_rate": 0.0002, "loss": 1.5101, "step": 10130 }, { "epoch": 0.04, "grad_norm": 5.040271282196045, "learning_rate": 0.0002, "loss": 1.4617, "step": 10140 }, { "epoch": 0.04, "grad_norm": 2.8267102241516113, "learning_rate": 0.0002, "loss": 1.4153, "step": 10150 }, { "epoch": 0.04, "grad_norm": 1.9938791990280151, "learning_rate": 0.0002, "loss": 1.5282, "step": 10160 }, { "epoch": 0.04, "grad_norm": 1.4544986486434937, "learning_rate": 0.0002, "loss": 1.413, "step": 10170 }, { "epoch": 0.04, "grad_norm": 1.4264335632324219, "learning_rate": 0.0002, "loss": 1.3419, "step": 10180 }, { "epoch": 0.04, "grad_norm": 1.414275884628296, "learning_rate": 0.0002, "loss": 1.6333, "step": 10190 }, { "epoch": 0.04, "grad_norm": 1.7165502309799194, "learning_rate": 0.0002, "loss": 1.4672, "step": 10200 }, { "epoch": 0.04, "grad_norm": 1.3620363473892212, "learning_rate": 0.0002, "loss": 1.519, "step": 10210 }, { "epoch": 0.04, "grad_norm": 2.2293219566345215, "learning_rate": 0.0002, "loss": 1.5962, "step": 10220 }, { "epoch": 0.04, "grad_norm": 1.8290833234786987, "learning_rate": 0.0002, "loss": 1.5504, "step": 10230 }, { "epoch": 0.04, "grad_norm": 2.5602290630340576, "learning_rate": 0.0002, "loss": 1.3161, "step": 10240 }, { "epoch": 0.04, "grad_norm": 2.729640245437622, "learning_rate": 0.0002, "loss": 1.6121, "step": 10250 }, { "epoch": 0.04, "grad_norm": 2.6101460456848145, "learning_rate": 0.0002, "loss": 1.5954, "step": 10260 }, { "epoch": 0.04, "grad_norm": 1.937704086303711, "learning_rate": 0.0002, "loss": 1.4409, "step": 10270 }, { "epoch": 0.04, "grad_norm": 1.9774819612503052, "learning_rate": 0.0002, "loss": 1.4809, "step": 10280 }, { "epoch": 0.04, "grad_norm": 1.655508041381836, "learning_rate": 0.0002, "loss": 1.174, "step": 10290 }, { "epoch": 0.04, "grad_norm": 0.974080502986908, "learning_rate": 0.0002, "loss": 1.6599, "step": 10300 }, { "epoch": 0.04, "grad_norm": 1.601430892944336, "learning_rate": 0.0002, "loss": 1.2902, "step": 10310 }, { "epoch": 0.04, "grad_norm": 2.4249191284179688, "learning_rate": 0.0002, "loss": 1.3584, "step": 10320 }, { "epoch": 0.04, "grad_norm": 2.393831491470337, "learning_rate": 0.0002, "loss": 1.3079, "step": 10330 }, { "epoch": 0.04, "grad_norm": 2.6059916019439697, "learning_rate": 0.0002, "loss": 1.5193, "step": 10340 }, { "epoch": 0.04, "grad_norm": 2.351746082305908, "learning_rate": 0.0002, "loss": 1.5522, "step": 10350 }, { "epoch": 0.04, "grad_norm": 1.9017951488494873, "learning_rate": 0.0002, "loss": 1.68, "step": 10360 }, { "epoch": 0.04, "grad_norm": 2.102987289428711, "learning_rate": 0.0002, "loss": 1.491, "step": 10370 }, { "epoch": 0.04, "grad_norm": 1.8690834045410156, "learning_rate": 0.0002, "loss": 1.3253, "step": 10380 }, { "epoch": 0.04, "grad_norm": 1.289420485496521, "learning_rate": 0.0002, "loss": 1.3491, "step": 10390 }, { "epoch": 0.04, "grad_norm": 2.12929105758667, "learning_rate": 0.0002, "loss": 1.4564, "step": 10400 }, { "epoch": 0.04, "grad_norm": 1.734675645828247, "learning_rate": 0.0002, "loss": 1.4616, "step": 10410 }, { "epoch": 0.04, "grad_norm": 3.0055181980133057, "learning_rate": 0.0002, "loss": 1.5626, "step": 10420 }, { "epoch": 0.04, "grad_norm": 2.704716205596924, "learning_rate": 0.0002, "loss": 1.5201, "step": 10430 }, { "epoch": 0.04, "grad_norm": 1.6757135391235352, "learning_rate": 0.0002, "loss": 1.7263, "step": 10440 }, { "epoch": 0.04, "grad_norm": 2.5231552124023438, "learning_rate": 0.0002, "loss": 1.6591, "step": 10450 }, { "epoch": 0.04, "grad_norm": 3.847229242324829, "learning_rate": 0.0002, "loss": 1.4583, "step": 10460 }, { "epoch": 0.04, "grad_norm": 5.027800559997559, "learning_rate": 0.0002, "loss": 1.3525, "step": 10470 }, { "epoch": 0.04, "grad_norm": 2.7662501335144043, "learning_rate": 0.0002, "loss": 1.3803, "step": 10480 }, { "epoch": 0.04, "grad_norm": 6.627991199493408, "learning_rate": 0.0002, "loss": 1.4035, "step": 10490 }, { "epoch": 0.04, "grad_norm": 3.6827170848846436, "learning_rate": 0.0002, "loss": 1.374, "step": 10500 }, { "epoch": 0.04, "grad_norm": 3.2306199073791504, "learning_rate": 0.0002, "loss": 1.6404, "step": 10510 }, { "epoch": 0.04, "grad_norm": 2.344848155975342, "learning_rate": 0.0002, "loss": 1.5727, "step": 10520 }, { "epoch": 0.04, "grad_norm": 1.332263708114624, "learning_rate": 0.0002, "loss": 1.5175, "step": 10530 }, { "epoch": 0.04, "grad_norm": 2.0251519680023193, "learning_rate": 0.0002, "loss": 1.4298, "step": 10540 }, { "epoch": 0.04, "grad_norm": 4.969330787658691, "learning_rate": 0.0002, "loss": 1.4877, "step": 10550 }, { "epoch": 0.04, "grad_norm": 1.0357476472854614, "learning_rate": 0.0002, "loss": 1.5273, "step": 10560 }, { "epoch": 0.04, "grad_norm": 1.5685274600982666, "learning_rate": 0.0002, "loss": 1.3869, "step": 10570 }, { "epoch": 0.04, "grad_norm": 1.9355400800704956, "learning_rate": 0.0002, "loss": 1.5163, "step": 10580 }, { "epoch": 0.04, "grad_norm": 5.838099479675293, "learning_rate": 0.0002, "loss": 1.7477, "step": 10590 }, { "epoch": 0.04, "grad_norm": 2.3616695404052734, "learning_rate": 0.0002, "loss": 1.3375, "step": 10600 }, { "epoch": 0.04, "grad_norm": 2.524017810821533, "learning_rate": 0.0002, "loss": 1.5801, "step": 10610 }, { "epoch": 0.04, "grad_norm": 4.311459064483643, "learning_rate": 0.0002, "loss": 1.6042, "step": 10620 }, { "epoch": 0.04, "grad_norm": 3.8719394207000732, "learning_rate": 0.0002, "loss": 1.3737, "step": 10630 }, { "epoch": 0.04, "grad_norm": 3.6623716354370117, "learning_rate": 0.0002, "loss": 1.4965, "step": 10640 }, { "epoch": 0.04, "grad_norm": 2.2239303588867188, "learning_rate": 0.0002, "loss": 1.5064, "step": 10650 }, { "epoch": 0.04, "grad_norm": 1.8010504245758057, "learning_rate": 0.0002, "loss": 1.4152, "step": 10660 }, { "epoch": 0.04, "grad_norm": 2.1300582885742188, "learning_rate": 0.0002, "loss": 1.4518, "step": 10670 }, { "epoch": 0.04, "grad_norm": 1.296439290046692, "learning_rate": 0.0002, "loss": 1.5465, "step": 10680 }, { "epoch": 0.04, "grad_norm": 2.1948704719543457, "learning_rate": 0.0002, "loss": 1.5866, "step": 10690 }, { "epoch": 0.04, "grad_norm": 0.8526033163070679, "learning_rate": 0.0002, "loss": 1.7373, "step": 10700 }, { "epoch": 0.04, "grad_norm": 2.8582730293273926, "learning_rate": 0.0002, "loss": 1.5725, "step": 10710 }, { "epoch": 0.04, "grad_norm": 3.7736663818359375, "learning_rate": 0.0002, "loss": 1.502, "step": 10720 }, { "epoch": 0.04, "grad_norm": 2.800734519958496, "learning_rate": 0.0002, "loss": 1.4864, "step": 10730 }, { "epoch": 0.04, "grad_norm": 2.2678401470184326, "learning_rate": 0.0002, "loss": 1.4931, "step": 10740 }, { "epoch": 0.04, "grad_norm": 2.0854079723358154, "learning_rate": 0.0002, "loss": 1.6373, "step": 10750 }, { "epoch": 0.04, "grad_norm": 2.0455338954925537, "learning_rate": 0.0002, "loss": 1.6451, "step": 10760 }, { "epoch": 0.04, "grad_norm": 2.884866237640381, "learning_rate": 0.0002, "loss": 1.5295, "step": 10770 }, { "epoch": 0.04, "grad_norm": 3.3238112926483154, "learning_rate": 0.0002, "loss": 1.4764, "step": 10780 }, { "epoch": 0.04, "grad_norm": 2.2513654232025146, "learning_rate": 0.0002, "loss": 1.5734, "step": 10790 }, { "epoch": 0.04, "grad_norm": 1.9655472040176392, "learning_rate": 0.0002, "loss": 1.3751, "step": 10800 }, { "epoch": 0.04, "grad_norm": 1.4972126483917236, "learning_rate": 0.0002, "loss": 1.2372, "step": 10810 }, { "epoch": 0.04, "grad_norm": 2.207970142364502, "learning_rate": 0.0002, "loss": 1.4521, "step": 10820 }, { "epoch": 0.04, "grad_norm": 3.09212589263916, "learning_rate": 0.0002, "loss": 1.3911, "step": 10830 }, { "epoch": 0.04, "grad_norm": 1.4520213603973389, "learning_rate": 0.0002, "loss": 1.5371, "step": 10840 }, { "epoch": 0.04, "grad_norm": 2.301799774169922, "learning_rate": 0.0002, "loss": 1.5244, "step": 10850 }, { "epoch": 0.04, "grad_norm": 2.394136905670166, "learning_rate": 0.0002, "loss": 1.5169, "step": 10860 }, { "epoch": 0.04, "grad_norm": 1.8320896625518799, "learning_rate": 0.0002, "loss": 1.7286, "step": 10870 }, { "epoch": 0.04, "grad_norm": 2.68621563911438, "learning_rate": 0.0002, "loss": 1.5138, "step": 10880 }, { "epoch": 0.04, "grad_norm": 2.8740596771240234, "learning_rate": 0.0002, "loss": 1.5204, "step": 10890 }, { "epoch": 0.04, "grad_norm": 2.1964375972747803, "learning_rate": 0.0002, "loss": 1.554, "step": 10900 }, { "epoch": 0.04, "grad_norm": 2.271799325942993, "learning_rate": 0.0002, "loss": 1.4675, "step": 10910 }, { "epoch": 0.04, "grad_norm": 2.6335554122924805, "learning_rate": 0.0002, "loss": 1.3646, "step": 10920 }, { "epoch": 0.04, "grad_norm": 2.877833843231201, "learning_rate": 0.0002, "loss": 1.3154, "step": 10930 }, { "epoch": 0.04, "grad_norm": 3.0377161502838135, "learning_rate": 0.0002, "loss": 1.3505, "step": 10940 }, { "epoch": 0.04, "grad_norm": 1.8202954530715942, "learning_rate": 0.0002, "loss": 1.529, "step": 10950 }, { "epoch": 0.04, "grad_norm": 1.6337590217590332, "learning_rate": 0.0002, "loss": 1.5391, "step": 10960 }, { "epoch": 0.04, "grad_norm": 2.756498098373413, "learning_rate": 0.0002, "loss": 1.6629, "step": 10970 }, { "epoch": 0.04, "grad_norm": 3.0724730491638184, "learning_rate": 0.0002, "loss": 1.5406, "step": 10980 }, { "epoch": 0.04, "grad_norm": 1.6531891822814941, "learning_rate": 0.0002, "loss": 1.2066, "step": 10990 }, { "epoch": 0.04, "grad_norm": 1.447715163230896, "learning_rate": 0.0002, "loss": 1.0133, "step": 11000 }, { "epoch": 0.04, "grad_norm": 2.77742600440979, "learning_rate": 0.0002, "loss": 1.1923, "step": 11010 }, { "epoch": 0.04, "grad_norm": 1.810593605041504, "learning_rate": 0.0002, "loss": 1.4563, "step": 11020 }, { "epoch": 0.04, "grad_norm": 2.3262619972229004, "learning_rate": 0.0002, "loss": 1.5912, "step": 11030 }, { "epoch": 0.04, "grad_norm": 2.162961721420288, "learning_rate": 0.0002, "loss": 1.3866, "step": 11040 }, { "epoch": 0.04, "grad_norm": 1.0494539737701416, "learning_rate": 0.0002, "loss": 1.5927, "step": 11050 }, { "epoch": 0.05, "grad_norm": 1.2355360984802246, "learning_rate": 0.0002, "loss": 1.5277, "step": 11060 }, { "epoch": 0.05, "grad_norm": 1.9509824514389038, "learning_rate": 0.0002, "loss": 1.5931, "step": 11070 }, { "epoch": 0.05, "grad_norm": 2.6511220932006836, "learning_rate": 0.0002, "loss": 1.7524, "step": 11080 }, { "epoch": 0.05, "grad_norm": 3.7089943885803223, "learning_rate": 0.0002, "loss": 1.7531, "step": 11090 }, { "epoch": 0.05, "grad_norm": 2.746441125869751, "learning_rate": 0.0002, "loss": 1.5111, "step": 11100 }, { "epoch": 0.05, "grad_norm": 1.2044302225112915, "learning_rate": 0.0002, "loss": 1.4216, "step": 11110 }, { "epoch": 0.05, "grad_norm": 1.507944107055664, "learning_rate": 0.0002, "loss": 1.4297, "step": 11120 }, { "epoch": 0.05, "grad_norm": 2.243171215057373, "learning_rate": 0.0002, "loss": 1.659, "step": 11130 }, { "epoch": 0.05, "grad_norm": 2.441171407699585, "learning_rate": 0.0002, "loss": 1.6268, "step": 11140 }, { "epoch": 0.05, "grad_norm": 5.163371562957764, "learning_rate": 0.0002, "loss": 1.4867, "step": 11150 }, { "epoch": 0.05, "grad_norm": 1.6589463949203491, "learning_rate": 0.0002, "loss": 1.5882, "step": 11160 }, { "epoch": 0.05, "grad_norm": 2.7173140048980713, "learning_rate": 0.0002, "loss": 1.5195, "step": 11170 }, { "epoch": 0.05, "grad_norm": 1.6784995794296265, "learning_rate": 0.0002, "loss": 1.4661, "step": 11180 }, { "epoch": 0.05, "grad_norm": 1.335965871810913, "learning_rate": 0.0002, "loss": 1.311, "step": 11190 }, { "epoch": 0.05, "grad_norm": 1.9160479307174683, "learning_rate": 0.0002, "loss": 1.658, "step": 11200 }, { "epoch": 0.05, "grad_norm": 2.7417726516723633, "learning_rate": 0.0002, "loss": 1.4986, "step": 11210 }, { "epoch": 0.05, "grad_norm": 1.9751862287521362, "learning_rate": 0.0002, "loss": 1.4853, "step": 11220 }, { "epoch": 0.05, "grad_norm": 2.057513952255249, "learning_rate": 0.0002, "loss": 1.378, "step": 11230 }, { "epoch": 0.05, "grad_norm": 2.17313551902771, "learning_rate": 0.0002, "loss": 1.6113, "step": 11240 }, { "epoch": 0.05, "grad_norm": 1.8268778324127197, "learning_rate": 0.0002, "loss": 1.6669, "step": 11250 }, { "epoch": 0.05, "grad_norm": 2.9265947341918945, "learning_rate": 0.0002, "loss": 1.7486, "step": 11260 }, { "epoch": 0.05, "grad_norm": 2.417320966720581, "learning_rate": 0.0002, "loss": 1.6536, "step": 11270 }, { "epoch": 0.05, "grad_norm": 1.8227659463882446, "learning_rate": 0.0002, "loss": 1.1781, "step": 11280 }, { "epoch": 0.05, "grad_norm": 1.8350491523742676, "learning_rate": 0.0002, "loss": 1.4288, "step": 11290 }, { "epoch": 0.05, "grad_norm": 2.507216691970825, "learning_rate": 0.0002, "loss": 1.5559, "step": 11300 }, { "epoch": 0.05, "grad_norm": 1.571454405784607, "learning_rate": 0.0002, "loss": 1.4842, "step": 11310 }, { "epoch": 0.05, "grad_norm": 3.9473659992218018, "learning_rate": 0.0002, "loss": 1.5347, "step": 11320 }, { "epoch": 0.05, "grad_norm": 3.77838397026062, "learning_rate": 0.0002, "loss": 1.4127, "step": 11330 }, { "epoch": 0.05, "grad_norm": 2.867417097091675, "learning_rate": 0.0002, "loss": 1.4114, "step": 11340 }, { "epoch": 0.05, "grad_norm": 2.2573728561401367, "learning_rate": 0.0002, "loss": 1.6295, "step": 11350 }, { "epoch": 0.05, "grad_norm": 3.1470425128936768, "learning_rate": 0.0002, "loss": 1.5217, "step": 11360 }, { "epoch": 0.05, "grad_norm": 2.005643129348755, "learning_rate": 0.0002, "loss": 1.5908, "step": 11370 }, { "epoch": 0.05, "grad_norm": 3.224104881286621, "learning_rate": 0.0002, "loss": 1.7079, "step": 11380 }, { "epoch": 0.05, "grad_norm": 2.380202531814575, "learning_rate": 0.0002, "loss": 1.6843, "step": 11390 }, { "epoch": 0.05, "grad_norm": 3.553837776184082, "learning_rate": 0.0002, "loss": 1.6159, "step": 11400 }, { "epoch": 0.05, "grad_norm": 2.519763946533203, "learning_rate": 0.0002, "loss": 1.4521, "step": 11410 }, { "epoch": 0.05, "grad_norm": 1.8581777811050415, "learning_rate": 0.0002, "loss": 1.566, "step": 11420 }, { "epoch": 0.05, "grad_norm": 2.212538957595825, "learning_rate": 0.0002, "loss": 1.5657, "step": 11430 }, { "epoch": 0.05, "grad_norm": 1.8733242750167847, "learning_rate": 0.0002, "loss": 1.6738, "step": 11440 }, { "epoch": 0.05, "grad_norm": 2.1914021968841553, "learning_rate": 0.0002, "loss": 1.4415, "step": 11450 }, { "epoch": 0.05, "grad_norm": 1.3208856582641602, "learning_rate": 0.0002, "loss": 1.1934, "step": 11460 }, { "epoch": 0.05, "grad_norm": 1.1816093921661377, "learning_rate": 0.0002, "loss": 1.5221, "step": 11470 }, { "epoch": 0.05, "grad_norm": 2.718468189239502, "learning_rate": 0.0002, "loss": 1.5997, "step": 11480 }, { "epoch": 0.05, "grad_norm": 2.1151044368743896, "learning_rate": 0.0002, "loss": 1.5541, "step": 11490 }, { "epoch": 0.05, "grad_norm": 1.9726210832595825, "learning_rate": 0.0002, "loss": 1.4098, "step": 11500 }, { "epoch": 0.05, "grad_norm": 1.5261651277542114, "learning_rate": 0.0002, "loss": 1.5831, "step": 11510 }, { "epoch": 0.05, "grad_norm": 1.4768158197402954, "learning_rate": 0.0002, "loss": 1.6033, "step": 11520 }, { "epoch": 0.05, "grad_norm": 2.694697141647339, "learning_rate": 0.0002, "loss": 1.4881, "step": 11530 }, { "epoch": 0.05, "grad_norm": 3.660682439804077, "learning_rate": 0.0002, "loss": 1.6258, "step": 11540 }, { "epoch": 0.05, "grad_norm": 1.678511381149292, "learning_rate": 0.0002, "loss": 1.4908, "step": 11550 }, { "epoch": 0.05, "grad_norm": 2.7471611499786377, "learning_rate": 0.0002, "loss": 1.5887, "step": 11560 }, { "epoch": 0.05, "grad_norm": 2.940774917602539, "learning_rate": 0.0002, "loss": 1.4863, "step": 11570 }, { "epoch": 0.05, "grad_norm": 2.1168856620788574, "learning_rate": 0.0002, "loss": 1.6226, "step": 11580 }, { "epoch": 0.05, "grad_norm": 2.0024027824401855, "learning_rate": 0.0002, "loss": 1.3893, "step": 11590 }, { "epoch": 0.05, "grad_norm": 2.512274742126465, "learning_rate": 0.0002, "loss": 1.6749, "step": 11600 }, { "epoch": 0.05, "grad_norm": 1.9915728569030762, "learning_rate": 0.0002, "loss": 1.4745, "step": 11610 }, { "epoch": 0.05, "grad_norm": 0.9832770824432373, "learning_rate": 0.0002, "loss": 1.4083, "step": 11620 }, { "epoch": 0.05, "grad_norm": 2.185338020324707, "learning_rate": 0.0002, "loss": 1.4357, "step": 11630 }, { "epoch": 0.05, "grad_norm": 2.6672377586364746, "learning_rate": 0.0002, "loss": 1.4133, "step": 11640 }, { "epoch": 0.05, "grad_norm": 2.944570302963257, "learning_rate": 0.0002, "loss": 1.6117, "step": 11650 }, { "epoch": 0.05, "grad_norm": 4.762887954711914, "learning_rate": 0.0002, "loss": 1.2563, "step": 11660 }, { "epoch": 0.05, "grad_norm": 2.663079023361206, "learning_rate": 0.0002, "loss": 1.4115, "step": 11670 }, { "epoch": 0.05, "grad_norm": 3.0015015602111816, "learning_rate": 0.0002, "loss": 1.4162, "step": 11680 }, { "epoch": 0.05, "grad_norm": 2.396171808242798, "learning_rate": 0.0002, "loss": 1.4558, "step": 11690 }, { "epoch": 0.05, "grad_norm": 1.7560505867004395, "learning_rate": 0.0002, "loss": 1.3592, "step": 11700 }, { "epoch": 0.05, "grad_norm": 4.635378837585449, "learning_rate": 0.0002, "loss": 1.5278, "step": 11710 }, { "epoch": 0.05, "grad_norm": 1.5887041091918945, "learning_rate": 0.0002, "loss": 1.5545, "step": 11720 }, { "epoch": 0.05, "grad_norm": 2.7149689197540283, "learning_rate": 0.0002, "loss": 1.5713, "step": 11730 }, { "epoch": 0.05, "grad_norm": 1.4947538375854492, "learning_rate": 0.0002, "loss": 1.4413, "step": 11740 }, { "epoch": 0.05, "grad_norm": 2.5944738388061523, "learning_rate": 0.0002, "loss": 1.4357, "step": 11750 }, { "epoch": 0.05, "grad_norm": 4.133978843688965, "learning_rate": 0.0002, "loss": 1.5933, "step": 11760 }, { "epoch": 0.05, "grad_norm": 1.4633005857467651, "learning_rate": 0.0002, "loss": 1.6954, "step": 11770 }, { "epoch": 0.05, "grad_norm": 2.3126778602600098, "learning_rate": 0.0002, "loss": 1.5596, "step": 11780 }, { "epoch": 0.05, "grad_norm": 2.5342612266540527, "learning_rate": 0.0002, "loss": 1.6418, "step": 11790 }, { "epoch": 0.05, "grad_norm": 3.3423168659210205, "learning_rate": 0.0002, "loss": 1.3049, "step": 11800 }, { "epoch": 0.05, "grad_norm": 2.786278486251831, "learning_rate": 0.0002, "loss": 1.6015, "step": 11810 }, { "epoch": 0.05, "grad_norm": 3.1910557746887207, "learning_rate": 0.0002, "loss": 1.7088, "step": 11820 }, { "epoch": 0.05, "grad_norm": 4.3060712814331055, "learning_rate": 0.0002, "loss": 1.3265, "step": 11830 }, { "epoch": 0.05, "grad_norm": 5.461009979248047, "learning_rate": 0.0002, "loss": 1.5117, "step": 11840 }, { "epoch": 0.05, "grad_norm": 1.760895848274231, "learning_rate": 0.0002, "loss": 1.4925, "step": 11850 }, { "epoch": 0.05, "grad_norm": 2.0398292541503906, "learning_rate": 0.0002, "loss": 1.4139, "step": 11860 }, { "epoch": 0.05, "grad_norm": 3.450397253036499, "learning_rate": 0.0002, "loss": 1.471, "step": 11870 }, { "epoch": 0.05, "grad_norm": 2.641833782196045, "learning_rate": 0.0002, "loss": 1.3813, "step": 11880 }, { "epoch": 0.05, "grad_norm": 2.4364418983459473, "learning_rate": 0.0002, "loss": 1.4434, "step": 11890 }, { "epoch": 0.05, "grad_norm": 2.4235455989837646, "learning_rate": 0.0002, "loss": 1.5651, "step": 11900 }, { "epoch": 0.05, "grad_norm": 1.9672844409942627, "learning_rate": 0.0002, "loss": 1.5779, "step": 11910 }, { "epoch": 0.05, "grad_norm": 1.913709044456482, "learning_rate": 0.0002, "loss": 1.3541, "step": 11920 }, { "epoch": 0.05, "grad_norm": 3.2269437313079834, "learning_rate": 0.0002, "loss": 1.6159, "step": 11930 }, { "epoch": 0.05, "grad_norm": 2.355872869491577, "learning_rate": 0.0002, "loss": 1.5156, "step": 11940 }, { "epoch": 0.05, "grad_norm": 1.5621236562728882, "learning_rate": 0.0002, "loss": 1.4017, "step": 11950 }, { "epoch": 0.05, "grad_norm": 1.757209062576294, "learning_rate": 0.0002, "loss": 1.7183, "step": 11960 }, { "epoch": 0.05, "grad_norm": 2.4067585468292236, "learning_rate": 0.0002, "loss": 1.5141, "step": 11970 }, { "epoch": 0.05, "grad_norm": 1.9901301860809326, "learning_rate": 0.0002, "loss": 1.3664, "step": 11980 }, { "epoch": 0.05, "grad_norm": 1.8144941329956055, "learning_rate": 0.0002, "loss": 1.5519, "step": 11990 }, { "epoch": 0.05, "grad_norm": 2.8013811111450195, "learning_rate": 0.0002, "loss": 1.646, "step": 12000 }, { "epoch": 0.05, "grad_norm": 2.4654316902160645, "learning_rate": 0.0002, "loss": 1.5895, "step": 12010 }, { "epoch": 0.05, "grad_norm": 3.305109739303589, "learning_rate": 0.0002, "loss": 1.5396, "step": 12020 }, { "epoch": 0.05, "grad_norm": 3.617060422897339, "learning_rate": 0.0002, "loss": 1.4869, "step": 12030 }, { "epoch": 0.05, "grad_norm": 1.9786045551300049, "learning_rate": 0.0002, "loss": 1.4256, "step": 12040 }, { "epoch": 0.05, "grad_norm": 2.861557722091675, "learning_rate": 0.0002, "loss": 1.5701, "step": 12050 }, { "epoch": 0.05, "grad_norm": 2.717815399169922, "learning_rate": 0.0002, "loss": 1.5773, "step": 12060 }, { "epoch": 0.05, "grad_norm": 1.5914254188537598, "learning_rate": 0.0002, "loss": 1.539, "step": 12070 }, { "epoch": 0.05, "grad_norm": 1.5610898733139038, "learning_rate": 0.0002, "loss": 1.755, "step": 12080 }, { "epoch": 0.05, "grad_norm": 2.625190496444702, "learning_rate": 0.0002, "loss": 1.5593, "step": 12090 }, { "epoch": 0.05, "grad_norm": 3.285456895828247, "learning_rate": 0.0002, "loss": 1.5615, "step": 12100 }, { "epoch": 0.05, "grad_norm": 2.3390791416168213, "learning_rate": 0.0002, "loss": 1.4053, "step": 12110 }, { "epoch": 0.05, "grad_norm": 1.4436551332473755, "learning_rate": 0.0002, "loss": 1.5758, "step": 12120 }, { "epoch": 0.05, "grad_norm": 1.8974682092666626, "learning_rate": 0.0002, "loss": 1.6636, "step": 12130 }, { "epoch": 0.05, "grad_norm": 2.0435781478881836, "learning_rate": 0.0002, "loss": 1.4744, "step": 12140 }, { "epoch": 0.05, "grad_norm": 3.1840403079986572, "learning_rate": 0.0002, "loss": 1.4026, "step": 12150 }, { "epoch": 0.05, "grad_norm": 3.8844058513641357, "learning_rate": 0.0002, "loss": 1.4576, "step": 12160 }, { "epoch": 0.05, "grad_norm": 5.03693962097168, "learning_rate": 0.0002, "loss": 1.4423, "step": 12170 }, { "epoch": 0.05, "grad_norm": 1.896777629852295, "learning_rate": 0.0002, "loss": 1.3152, "step": 12180 }, { "epoch": 0.05, "grad_norm": 1.980075478553772, "learning_rate": 0.0002, "loss": 1.6685, "step": 12190 }, { "epoch": 0.05, "grad_norm": 2.673046112060547, "learning_rate": 0.0002, "loss": 1.4205, "step": 12200 }, { "epoch": 0.05, "grad_norm": 3.413900375366211, "learning_rate": 0.0002, "loss": 1.5843, "step": 12210 }, { "epoch": 0.05, "grad_norm": 2.4403717517852783, "learning_rate": 0.0002, "loss": 1.45, "step": 12220 }, { "epoch": 0.05, "grad_norm": 3.8979074954986572, "learning_rate": 0.0002, "loss": 1.5255, "step": 12230 }, { "epoch": 0.05, "grad_norm": 2.662895441055298, "learning_rate": 0.0002, "loss": 1.2447, "step": 12240 }, { "epoch": 0.05, "grad_norm": 2.3295223712921143, "learning_rate": 0.0002, "loss": 1.7468, "step": 12250 }, { "epoch": 0.05, "grad_norm": 2.127802848815918, "learning_rate": 0.0002, "loss": 1.502, "step": 12260 }, { "epoch": 0.05, "grad_norm": 1.015741229057312, "learning_rate": 0.0002, "loss": 1.3787, "step": 12270 }, { "epoch": 0.05, "grad_norm": 2.3471806049346924, "learning_rate": 0.0002, "loss": 1.5766, "step": 12280 }, { "epoch": 0.05, "grad_norm": 1.7059611082077026, "learning_rate": 0.0002, "loss": 1.3717, "step": 12290 }, { "epoch": 0.05, "grad_norm": 2.7707438468933105, "learning_rate": 0.0002, "loss": 1.3278, "step": 12300 }, { "epoch": 0.05, "grad_norm": 2.5210978984832764, "learning_rate": 0.0002, "loss": 1.2591, "step": 12310 }, { "epoch": 0.05, "grad_norm": 2.7936208248138428, "learning_rate": 0.0002, "loss": 1.5387, "step": 12320 }, { "epoch": 0.05, "grad_norm": 3.178338050842285, "learning_rate": 0.0002, "loss": 1.5131, "step": 12330 }, { "epoch": 0.05, "grad_norm": 2.145134210586548, "learning_rate": 0.0002, "loss": 1.6204, "step": 12340 }, { "epoch": 0.05, "grad_norm": 1.0811946392059326, "learning_rate": 0.0002, "loss": 1.5111, "step": 12350 }, { "epoch": 0.05, "grad_norm": 2.2416553497314453, "learning_rate": 0.0002, "loss": 1.6966, "step": 12360 }, { "epoch": 0.05, "grad_norm": 2.9884517192840576, "learning_rate": 0.0002, "loss": 1.3913, "step": 12370 }, { "epoch": 0.05, "grad_norm": 1.7220945358276367, "learning_rate": 0.0002, "loss": 1.4806, "step": 12380 }, { "epoch": 0.05, "grad_norm": 2.3411877155303955, "learning_rate": 0.0002, "loss": 1.5434, "step": 12390 }, { "epoch": 0.05, "grad_norm": 1.6678677797317505, "learning_rate": 0.0002, "loss": 1.6481, "step": 12400 }, { "epoch": 0.05, "grad_norm": 2.911093235015869, "learning_rate": 0.0002, "loss": 1.6442, "step": 12410 }, { "epoch": 0.05, "grad_norm": 3.147317409515381, "learning_rate": 0.0002, "loss": 1.3084, "step": 12420 }, { "epoch": 0.05, "grad_norm": 2.375945568084717, "learning_rate": 0.0002, "loss": 1.3955, "step": 12430 }, { "epoch": 0.05, "grad_norm": 2.4865498542785645, "learning_rate": 0.0002, "loss": 1.3724, "step": 12440 }, { "epoch": 0.05, "grad_norm": 1.3380862474441528, "learning_rate": 0.0002, "loss": 1.4467, "step": 12450 }, { "epoch": 0.05, "grad_norm": 2.576388120651245, "learning_rate": 0.0002, "loss": 1.3205, "step": 12460 }, { "epoch": 0.05, "grad_norm": 3.9563231468200684, "learning_rate": 0.0002, "loss": 1.7504, "step": 12470 }, { "epoch": 0.05, "grad_norm": 2.7601165771484375, "learning_rate": 0.0002, "loss": 1.3423, "step": 12480 }, { "epoch": 0.05, "grad_norm": 2.6606462001800537, "learning_rate": 0.0002, "loss": 1.5153, "step": 12490 }, { "epoch": 0.05, "grad_norm": 3.4109950065612793, "learning_rate": 0.0002, "loss": 1.3473, "step": 12500 }, { "epoch": 0.05, "grad_norm": 1.4249670505523682, "learning_rate": 0.0002, "loss": 1.568, "step": 12510 }, { "epoch": 0.05, "grad_norm": 2.6350479125976562, "learning_rate": 0.0002, "loss": 1.4022, "step": 12520 }, { "epoch": 0.05, "grad_norm": 2.3093392848968506, "learning_rate": 0.0002, "loss": 1.4105, "step": 12530 }, { "epoch": 0.05, "grad_norm": 2.2910423278808594, "learning_rate": 0.0002, "loss": 1.4499, "step": 12540 }, { "epoch": 0.05, "grad_norm": 2.1575872898101807, "learning_rate": 0.0002, "loss": 1.4806, "step": 12550 }, { "epoch": 0.05, "grad_norm": 2.840871572494507, "learning_rate": 0.0002, "loss": 1.7184, "step": 12560 }, { "epoch": 0.05, "grad_norm": 2.77730393409729, "learning_rate": 0.0002, "loss": 1.5212, "step": 12570 }, { "epoch": 0.05, "grad_norm": 2.0363104343414307, "learning_rate": 0.0002, "loss": 1.5401, "step": 12580 }, { "epoch": 0.05, "grad_norm": 0.737808883190155, "learning_rate": 0.0002, "loss": 1.4246, "step": 12590 }, { "epoch": 0.05, "grad_norm": 2.0807909965515137, "learning_rate": 0.0002, "loss": 1.4173, "step": 12600 }, { "epoch": 0.05, "grad_norm": 1.3380087614059448, "learning_rate": 0.0002, "loss": 1.4773, "step": 12610 }, { "epoch": 0.05, "grad_norm": 3.4506592750549316, "learning_rate": 0.0002, "loss": 1.2388, "step": 12620 }, { "epoch": 0.05, "grad_norm": 3.05733060836792, "learning_rate": 0.0002, "loss": 1.2592, "step": 12630 }, { "epoch": 0.05, "grad_norm": 2.8035011291503906, "learning_rate": 0.0002, "loss": 1.4551, "step": 12640 }, { "epoch": 0.05, "grad_norm": 2.143364906311035, "learning_rate": 0.0002, "loss": 1.4808, "step": 12650 }, { "epoch": 0.05, "grad_norm": 2.318676710128784, "learning_rate": 0.0002, "loss": 1.6396, "step": 12660 }, { "epoch": 0.05, "grad_norm": 3.0842974185943604, "learning_rate": 0.0002, "loss": 1.5033, "step": 12670 }, { "epoch": 0.05, "grad_norm": 2.287768840789795, "learning_rate": 0.0002, "loss": 1.4699, "step": 12680 }, { "epoch": 0.05, "grad_norm": 1.9555660486221313, "learning_rate": 0.0002, "loss": 1.4724, "step": 12690 }, { "epoch": 0.05, "grad_norm": 1.977107048034668, "learning_rate": 0.0002, "loss": 1.4335, "step": 12700 }, { "epoch": 0.05, "grad_norm": 2.9799745082855225, "learning_rate": 0.0002, "loss": 1.2722, "step": 12710 }, { "epoch": 0.05, "grad_norm": 2.642850637435913, "learning_rate": 0.0002, "loss": 1.3955, "step": 12720 }, { "epoch": 0.05, "grad_norm": 2.281667709350586, "learning_rate": 0.0002, "loss": 1.5828, "step": 12730 }, { "epoch": 0.05, "grad_norm": 2.1411523818969727, "learning_rate": 0.0002, "loss": 1.4438, "step": 12740 }, { "epoch": 0.05, "grad_norm": 2.5626065731048584, "learning_rate": 0.0002, "loss": 1.5056, "step": 12750 }, { "epoch": 0.05, "grad_norm": 3.1733696460723877, "learning_rate": 0.0002, "loss": 1.2976, "step": 12760 }, { "epoch": 0.05, "grad_norm": 2.6811435222625732, "learning_rate": 0.0002, "loss": 1.3328, "step": 12770 }, { "epoch": 0.05, "grad_norm": 3.5871856212615967, "learning_rate": 0.0002, "loss": 1.5257, "step": 12780 }, { "epoch": 0.05, "grad_norm": 2.27850604057312, "learning_rate": 0.0002, "loss": 1.349, "step": 12790 }, { "epoch": 0.05, "grad_norm": 2.0845065116882324, "learning_rate": 0.0002, "loss": 1.7267, "step": 12800 }, { "epoch": 0.05, "grad_norm": 2.372403144836426, "learning_rate": 0.0002, "loss": 1.414, "step": 12810 }, { "epoch": 0.05, "grad_norm": 2.0217883586883545, "learning_rate": 0.0002, "loss": 1.5269, "step": 12820 }, { "epoch": 0.05, "grad_norm": 2.2953386306762695, "learning_rate": 0.0002, "loss": 1.3997, "step": 12830 }, { "epoch": 0.05, "grad_norm": 1.774943232536316, "learning_rate": 0.0002, "loss": 1.6069, "step": 12840 }, { "epoch": 0.05, "grad_norm": 1.6370368003845215, "learning_rate": 0.0002, "loss": 1.5391, "step": 12850 }, { "epoch": 0.05, "grad_norm": 3.576094627380371, "learning_rate": 0.0002, "loss": 1.8123, "step": 12860 }, { "epoch": 0.05, "grad_norm": 1.9301096200942993, "learning_rate": 0.0002, "loss": 1.3591, "step": 12870 }, { "epoch": 0.05, "grad_norm": 2.4153244495391846, "learning_rate": 0.0002, "loss": 1.4494, "step": 12880 }, { "epoch": 0.05, "grad_norm": 2.142298698425293, "learning_rate": 0.0002, "loss": 1.3398, "step": 12890 }, { "epoch": 0.05, "grad_norm": 2.3717033863067627, "learning_rate": 0.0002, "loss": 1.5548, "step": 12900 }, { "epoch": 0.05, "grad_norm": 1.8222994804382324, "learning_rate": 0.0002, "loss": 1.5637, "step": 12910 }, { "epoch": 0.05, "grad_norm": 1.9324641227722168, "learning_rate": 0.0002, "loss": 1.4855, "step": 12920 }, { "epoch": 0.05, "grad_norm": 1.9104112386703491, "learning_rate": 0.0002, "loss": 1.6691, "step": 12930 }, { "epoch": 0.05, "grad_norm": 1.842777967453003, "learning_rate": 0.0002, "loss": 1.3365, "step": 12940 }, { "epoch": 0.05, "grad_norm": 2.5748934745788574, "learning_rate": 0.0002, "loss": 1.5378, "step": 12950 }, { "epoch": 0.05, "grad_norm": 8.052715301513672, "learning_rate": 0.0002, "loss": 1.3626, "step": 12960 }, { "epoch": 0.05, "grad_norm": 4.895645618438721, "learning_rate": 0.0002, "loss": 1.1865, "step": 12970 }, { "epoch": 0.05, "grad_norm": 3.752565622329712, "learning_rate": 0.0002, "loss": 1.5763, "step": 12980 }, { "epoch": 0.05, "grad_norm": 2.315669298171997, "learning_rate": 0.0002, "loss": 1.5275, "step": 12990 }, { "epoch": 0.05, "grad_norm": 3.1068577766418457, "learning_rate": 0.0002, "loss": 1.4822, "step": 13000 }, { "epoch": 0.05, "grad_norm": 2.5025601387023926, "learning_rate": 0.0002, "loss": 1.3889, "step": 13010 }, { "epoch": 0.05, "grad_norm": 1.8430736064910889, "learning_rate": 0.0002, "loss": 1.5402, "step": 13020 }, { "epoch": 0.05, "grad_norm": 4.520298957824707, "learning_rate": 0.0002, "loss": 1.3541, "step": 13030 }, { "epoch": 0.05, "grad_norm": 4.4615864753723145, "learning_rate": 0.0002, "loss": 1.61, "step": 13040 }, { "epoch": 0.05, "grad_norm": 2.7028567790985107, "learning_rate": 0.0002, "loss": 1.3381, "step": 13050 }, { "epoch": 0.05, "grad_norm": 2.4872047901153564, "learning_rate": 0.0002, "loss": 1.3692, "step": 13060 }, { "epoch": 0.05, "grad_norm": 1.389922857284546, "learning_rate": 0.0002, "loss": 1.5844, "step": 13070 }, { "epoch": 0.05, "grad_norm": 7.344569683074951, "learning_rate": 0.0002, "loss": 1.4111, "step": 13080 }, { "epoch": 0.05, "grad_norm": 4.216423988342285, "learning_rate": 0.0002, "loss": 1.3921, "step": 13090 }, { "epoch": 0.05, "grad_norm": 2.6008083820343018, "learning_rate": 0.0002, "loss": 1.3978, "step": 13100 }, { "epoch": 0.05, "grad_norm": 1.6156994104385376, "learning_rate": 0.0002, "loss": 1.3403, "step": 13110 }, { "epoch": 0.05, "grad_norm": 2.0894150733947754, "learning_rate": 0.0002, "loss": 1.4076, "step": 13120 }, { "epoch": 0.05, "grad_norm": 3.9197299480438232, "learning_rate": 0.0002, "loss": 1.5188, "step": 13130 }, { "epoch": 0.05, "grad_norm": 2.8065922260284424, "learning_rate": 0.0002, "loss": 1.7332, "step": 13140 }, { "epoch": 0.05, "grad_norm": 2.3392419815063477, "learning_rate": 0.0002, "loss": 1.4588, "step": 13150 }, { "epoch": 0.05, "grad_norm": 1.3630917072296143, "learning_rate": 0.0002, "loss": 1.5617, "step": 13160 }, { "epoch": 0.05, "grad_norm": 2.2936253547668457, "learning_rate": 0.0002, "loss": 1.6493, "step": 13170 }, { "epoch": 0.05, "grad_norm": 2.4574482440948486, "learning_rate": 0.0002, "loss": 1.4564, "step": 13180 }, { "epoch": 0.05, "grad_norm": 1.6017999649047852, "learning_rate": 0.0002, "loss": 1.6044, "step": 13190 }, { "epoch": 0.05, "grad_norm": 4.069362640380859, "learning_rate": 0.0002, "loss": 1.4784, "step": 13200 }, { "epoch": 0.05, "grad_norm": 3.527803421020508, "learning_rate": 0.0002, "loss": 1.4187, "step": 13210 }, { "epoch": 0.05, "grad_norm": 1.4399003982543945, "learning_rate": 0.0002, "loss": 1.7946, "step": 13220 }, { "epoch": 0.05, "grad_norm": 1.725510597229004, "learning_rate": 0.0002, "loss": 1.5858, "step": 13230 }, { "epoch": 0.05, "grad_norm": 2.5649046897888184, "learning_rate": 0.0002, "loss": 1.636, "step": 13240 }, { "epoch": 0.05, "grad_norm": 3.0763111114501953, "learning_rate": 0.0002, "loss": 1.0959, "step": 13250 }, { "epoch": 0.05, "grad_norm": 1.9642081260681152, "learning_rate": 0.0002, "loss": 1.506, "step": 13260 }, { "epoch": 0.05, "grad_norm": 1.7233045101165771, "learning_rate": 0.0002, "loss": 1.3199, "step": 13270 }, { "epoch": 0.05, "grad_norm": 3.3774096965789795, "learning_rate": 0.0002, "loss": 1.5425, "step": 13280 }, { "epoch": 0.05, "grad_norm": 2.641364812850952, "learning_rate": 0.0002, "loss": 1.3038, "step": 13290 }, { "epoch": 0.05, "grad_norm": 2.810990571975708, "learning_rate": 0.0002, "loss": 1.5684, "step": 13300 }, { "epoch": 0.05, "grad_norm": 2.334082841873169, "learning_rate": 0.0002, "loss": 1.5987, "step": 13310 }, { "epoch": 0.05, "grad_norm": 2.322411060333252, "learning_rate": 0.0002, "loss": 1.55, "step": 13320 }, { "epoch": 0.05, "grad_norm": 3.2807016372680664, "learning_rate": 0.0002, "loss": 1.5715, "step": 13330 }, { "epoch": 0.05, "grad_norm": 1.6837371587753296, "learning_rate": 0.0002, "loss": 1.4338, "step": 13340 }, { "epoch": 0.05, "grad_norm": 1.8861652612686157, "learning_rate": 0.0002, "loss": 1.5652, "step": 13350 }, { "epoch": 0.05, "grad_norm": 3.5783355236053467, "learning_rate": 0.0002, "loss": 1.618, "step": 13360 }, { "epoch": 0.05, "grad_norm": 2.2970011234283447, "learning_rate": 0.0002, "loss": 1.6943, "step": 13370 }, { "epoch": 0.05, "grad_norm": 2.2545549869537354, "learning_rate": 0.0002, "loss": 1.5774, "step": 13380 }, { "epoch": 0.05, "grad_norm": 2.0612571239471436, "learning_rate": 0.0002, "loss": 1.5786, "step": 13390 }, { "epoch": 0.05, "grad_norm": 4.627199649810791, "learning_rate": 0.0002, "loss": 1.5129, "step": 13400 }, { "epoch": 0.05, "grad_norm": 2.087303876876831, "learning_rate": 0.0002, "loss": 1.6134, "step": 13410 }, { "epoch": 0.05, "grad_norm": 1.6917705535888672, "learning_rate": 0.0002, "loss": 1.3086, "step": 13420 }, { "epoch": 0.05, "grad_norm": 9.20538330078125, "learning_rate": 0.0002, "loss": 1.4102, "step": 13430 }, { "epoch": 0.05, "grad_norm": 2.9402689933776855, "learning_rate": 0.0002, "loss": 1.6175, "step": 13440 }, { "epoch": 0.05, "grad_norm": 1.818021535873413, "learning_rate": 0.0002, "loss": 1.439, "step": 13450 }, { "epoch": 0.05, "grad_norm": 2.1232593059539795, "learning_rate": 0.0002, "loss": 1.6524, "step": 13460 }, { "epoch": 0.05, "grad_norm": 2.8523855209350586, "learning_rate": 0.0002, "loss": 1.544, "step": 13470 }, { "epoch": 0.05, "grad_norm": 2.2183289527893066, "learning_rate": 0.0002, "loss": 1.5307, "step": 13480 }, { "epoch": 0.05, "grad_norm": 2.468364953994751, "learning_rate": 0.0002, "loss": 1.6325, "step": 13490 }, { "epoch": 0.05, "grad_norm": 2.0278167724609375, "learning_rate": 0.0002, "loss": 1.2963, "step": 13500 }, { "epoch": 0.05, "grad_norm": 1.9513875246047974, "learning_rate": 0.0002, "loss": 1.3436, "step": 13510 }, { "epoch": 0.06, "grad_norm": 2.4587512016296387, "learning_rate": 0.0002, "loss": 1.4948, "step": 13520 }, { "epoch": 0.06, "grad_norm": 2.1419622898101807, "learning_rate": 0.0002, "loss": 1.4532, "step": 13530 }, { "epoch": 0.06, "grad_norm": 2.139202117919922, "learning_rate": 0.0002, "loss": 1.1888, "step": 13540 }, { "epoch": 0.06, "grad_norm": 1.631632924079895, "learning_rate": 0.0002, "loss": 1.3888, "step": 13550 }, { "epoch": 0.06, "grad_norm": 2.141676902770996, "learning_rate": 0.0002, "loss": 1.7877, "step": 13560 }, { "epoch": 0.06, "grad_norm": 3.47462797164917, "learning_rate": 0.0002, "loss": 1.396, "step": 13570 }, { "epoch": 0.06, "grad_norm": 2.410607099533081, "learning_rate": 0.0002, "loss": 1.6335, "step": 13580 }, { "epoch": 0.06, "grad_norm": 3.0616073608398438, "learning_rate": 0.0002, "loss": 1.466, "step": 13590 }, { "epoch": 0.06, "grad_norm": 3.1438283920288086, "learning_rate": 0.0002, "loss": 1.4336, "step": 13600 }, { "epoch": 0.06, "grad_norm": 3.0383880138397217, "learning_rate": 0.0002, "loss": 1.466, "step": 13610 }, { "epoch": 0.06, "grad_norm": 3.897578001022339, "learning_rate": 0.0002, "loss": 1.6239, "step": 13620 }, { "epoch": 0.06, "grad_norm": 2.1789731979370117, "learning_rate": 0.0002, "loss": 1.1775, "step": 13630 }, { "epoch": 0.06, "grad_norm": 1.4128843545913696, "learning_rate": 0.0002, "loss": 1.3601, "step": 13640 }, { "epoch": 0.06, "grad_norm": 2.2503883838653564, "learning_rate": 0.0002, "loss": 1.7741, "step": 13650 }, { "epoch": 0.06, "grad_norm": 1.91132652759552, "learning_rate": 0.0002, "loss": 1.2129, "step": 13660 }, { "epoch": 0.06, "grad_norm": 1.4614183902740479, "learning_rate": 0.0002, "loss": 1.5362, "step": 13670 }, { "epoch": 0.06, "grad_norm": 2.382657527923584, "learning_rate": 0.0002, "loss": 1.4818, "step": 13680 }, { "epoch": 0.06, "grad_norm": 1.8020012378692627, "learning_rate": 0.0002, "loss": 1.7218, "step": 13690 }, { "epoch": 0.06, "grad_norm": 2.7673497200012207, "learning_rate": 0.0002, "loss": 1.3573, "step": 13700 }, { "epoch": 0.06, "grad_norm": 3.339601993560791, "learning_rate": 0.0002, "loss": 1.7758, "step": 13710 }, { "epoch": 0.06, "grad_norm": 2.143822193145752, "learning_rate": 0.0002, "loss": 1.3996, "step": 13720 }, { "epoch": 0.06, "grad_norm": 1.5548444986343384, "learning_rate": 0.0002, "loss": 1.5244, "step": 13730 }, { "epoch": 0.06, "grad_norm": 2.8328633308410645, "learning_rate": 0.0002, "loss": 1.4088, "step": 13740 }, { "epoch": 0.06, "grad_norm": 3.356036424636841, "learning_rate": 0.0002, "loss": 1.4347, "step": 13750 }, { "epoch": 0.06, "grad_norm": 2.2646734714508057, "learning_rate": 0.0002, "loss": 1.3547, "step": 13760 }, { "epoch": 0.06, "grad_norm": 2.2983036041259766, "learning_rate": 0.0002, "loss": 1.3609, "step": 13770 }, { "epoch": 0.06, "grad_norm": 3.0939998626708984, "learning_rate": 0.0002, "loss": 1.4704, "step": 13780 }, { "epoch": 0.06, "grad_norm": 1.6911048889160156, "learning_rate": 0.0002, "loss": 1.6688, "step": 13790 }, { "epoch": 0.06, "grad_norm": 6.645343780517578, "learning_rate": 0.0002, "loss": 1.3714, "step": 13800 }, { "epoch": 0.06, "grad_norm": 2.325322151184082, "learning_rate": 0.0002, "loss": 1.6233, "step": 13810 }, { "epoch": 0.06, "grad_norm": 1.2938079833984375, "learning_rate": 0.0002, "loss": 1.7618, "step": 13820 }, { "epoch": 0.06, "grad_norm": 3.6791915893554688, "learning_rate": 0.0002, "loss": 1.7786, "step": 13830 }, { "epoch": 0.06, "grad_norm": 2.645019292831421, "learning_rate": 0.0002, "loss": 1.3342, "step": 13840 }, { "epoch": 0.06, "grad_norm": 3.0650224685668945, "learning_rate": 0.0002, "loss": 1.34, "step": 13850 }, { "epoch": 0.06, "grad_norm": 1.9584311246871948, "learning_rate": 0.0002, "loss": 1.4314, "step": 13860 }, { "epoch": 0.06, "grad_norm": 1.7227412462234497, "learning_rate": 0.0002, "loss": 1.6025, "step": 13870 }, { "epoch": 0.06, "grad_norm": 2.362330913543701, "learning_rate": 0.0002, "loss": 1.6105, "step": 13880 }, { "epoch": 0.06, "grad_norm": 1.215280532836914, "learning_rate": 0.0002, "loss": 1.4412, "step": 13890 }, { "epoch": 0.06, "grad_norm": 2.64626407623291, "learning_rate": 0.0002, "loss": 1.6282, "step": 13900 }, { "epoch": 0.06, "grad_norm": 2.066565752029419, "learning_rate": 0.0002, "loss": 1.6818, "step": 13910 }, { "epoch": 0.06, "grad_norm": 1.6384437084197998, "learning_rate": 0.0002, "loss": 1.6259, "step": 13920 }, { "epoch": 0.06, "grad_norm": 2.593418598175049, "learning_rate": 0.0002, "loss": 1.5547, "step": 13930 }, { "epoch": 0.06, "grad_norm": 2.4121460914611816, "learning_rate": 0.0002, "loss": 1.2931, "step": 13940 }, { "epoch": 0.06, "grad_norm": 1.9080333709716797, "learning_rate": 0.0002, "loss": 1.4214, "step": 13950 }, { "epoch": 0.06, "grad_norm": 3.136892557144165, "learning_rate": 0.0002, "loss": 1.6551, "step": 13960 }, { "epoch": 0.06, "grad_norm": 1.6046448945999146, "learning_rate": 0.0002, "loss": 1.5585, "step": 13970 }, { "epoch": 0.06, "grad_norm": 1.6188585758209229, "learning_rate": 0.0002, "loss": 1.4609, "step": 13980 }, { "epoch": 0.06, "grad_norm": 1.4741960763931274, "learning_rate": 0.0002, "loss": 1.6084, "step": 13990 }, { "epoch": 0.06, "grad_norm": 2.2537519931793213, "learning_rate": 0.0002, "loss": 1.4246, "step": 14000 }, { "epoch": 0.06, "grad_norm": 1.7846534252166748, "learning_rate": 0.0002, "loss": 1.4712, "step": 14010 }, { "epoch": 0.06, "grad_norm": 2.1802475452423096, "learning_rate": 0.0002, "loss": 1.3273, "step": 14020 }, { "epoch": 0.06, "grad_norm": 3.8917479515075684, "learning_rate": 0.0002, "loss": 1.4448, "step": 14030 }, { "epoch": 0.06, "grad_norm": 4.401137828826904, "learning_rate": 0.0002, "loss": 1.5108, "step": 14040 }, { "epoch": 0.06, "grad_norm": 1.9029589891433716, "learning_rate": 0.0002, "loss": 1.3395, "step": 14050 }, { "epoch": 0.06, "grad_norm": 2.2426581382751465, "learning_rate": 0.0002, "loss": 1.4976, "step": 14060 }, { "epoch": 0.06, "grad_norm": 2.592045783996582, "learning_rate": 0.0002, "loss": 1.3677, "step": 14070 }, { "epoch": 0.06, "grad_norm": 2.3121516704559326, "learning_rate": 0.0002, "loss": 1.582, "step": 14080 }, { "epoch": 0.06, "grad_norm": 4.0786943435668945, "learning_rate": 0.0002, "loss": 1.6579, "step": 14090 }, { "epoch": 0.06, "grad_norm": 2.631723642349243, "learning_rate": 0.0002, "loss": 1.3962, "step": 14100 }, { "epoch": 0.06, "grad_norm": 3.0756709575653076, "learning_rate": 0.0002, "loss": 1.5551, "step": 14110 }, { "epoch": 0.06, "grad_norm": 3.2943546772003174, "learning_rate": 0.0002, "loss": 1.5768, "step": 14120 }, { "epoch": 0.06, "grad_norm": 4.415708541870117, "learning_rate": 0.0002, "loss": 1.4811, "step": 14130 }, { "epoch": 0.06, "grad_norm": 1.9878580570220947, "learning_rate": 0.0002, "loss": 1.6576, "step": 14140 }, { "epoch": 0.06, "grad_norm": 13.154341697692871, "learning_rate": 0.0002, "loss": 1.6624, "step": 14150 }, { "epoch": 0.06, "grad_norm": 2.2977538108825684, "learning_rate": 0.0002, "loss": 1.4789, "step": 14160 }, { "epoch": 0.06, "grad_norm": 3.2061350345611572, "learning_rate": 0.0002, "loss": 1.4857, "step": 14170 }, { "epoch": 0.06, "grad_norm": 2.054908514022827, "learning_rate": 0.0002, "loss": 1.4347, "step": 14180 }, { "epoch": 0.06, "grad_norm": 2.18485164642334, "learning_rate": 0.0002, "loss": 1.5035, "step": 14190 }, { "epoch": 0.06, "grad_norm": 2.292954444885254, "learning_rate": 0.0002, "loss": 1.4616, "step": 14200 }, { "epoch": 0.06, "grad_norm": 2.677495241165161, "learning_rate": 0.0002, "loss": 1.6317, "step": 14210 }, { "epoch": 0.06, "grad_norm": 2.480175018310547, "learning_rate": 0.0002, "loss": 1.6372, "step": 14220 }, { "epoch": 0.06, "grad_norm": 3.0484721660614014, "learning_rate": 0.0002, "loss": 1.5602, "step": 14230 }, { "epoch": 0.06, "grad_norm": 2.547044038772583, "learning_rate": 0.0002, "loss": 1.3433, "step": 14240 }, { "epoch": 0.06, "grad_norm": 2.4177279472351074, "learning_rate": 0.0002, "loss": 1.5738, "step": 14250 }, { "epoch": 0.06, "grad_norm": 3.297773838043213, "learning_rate": 0.0002, "loss": 1.4696, "step": 14260 }, { "epoch": 0.06, "grad_norm": 2.212527275085449, "learning_rate": 0.0002, "loss": 1.4309, "step": 14270 }, { "epoch": 0.06, "grad_norm": 3.200590133666992, "learning_rate": 0.0002, "loss": 1.6102, "step": 14280 }, { "epoch": 0.06, "grad_norm": 2.7100017070770264, "learning_rate": 0.0002, "loss": 1.3942, "step": 14290 }, { "epoch": 0.06, "grad_norm": 3.3315577507019043, "learning_rate": 0.0002, "loss": 1.447, "step": 14300 }, { "epoch": 0.06, "grad_norm": 1.8341234922409058, "learning_rate": 0.0002, "loss": 1.6457, "step": 14310 }, { "epoch": 0.06, "grad_norm": 3.7765302658081055, "learning_rate": 0.0002, "loss": 1.3691, "step": 14320 }, { "epoch": 0.06, "grad_norm": 4.862399101257324, "learning_rate": 0.0002, "loss": 1.6957, "step": 14330 }, { "epoch": 0.06, "grad_norm": 2.637957811355591, "learning_rate": 0.0002, "loss": 1.3409, "step": 14340 }, { "epoch": 0.06, "grad_norm": 1.9434045553207397, "learning_rate": 0.0002, "loss": 1.6394, "step": 14350 }, { "epoch": 0.06, "grad_norm": 2.2863032817840576, "learning_rate": 0.0002, "loss": 1.7227, "step": 14360 }, { "epoch": 0.06, "grad_norm": 1.894768238067627, "learning_rate": 0.0002, "loss": 1.6434, "step": 14370 }, { "epoch": 0.06, "grad_norm": 3.5913662910461426, "learning_rate": 0.0002, "loss": 1.7149, "step": 14380 }, { "epoch": 0.06, "grad_norm": 1.2909668684005737, "learning_rate": 0.0002, "loss": 1.4266, "step": 14390 }, { "epoch": 0.06, "grad_norm": 2.0327179431915283, "learning_rate": 0.0002, "loss": 1.4845, "step": 14400 }, { "epoch": 0.06, "grad_norm": 2.0423827171325684, "learning_rate": 0.0002, "loss": 1.4863, "step": 14410 }, { "epoch": 0.06, "grad_norm": 3.245973587036133, "learning_rate": 0.0002, "loss": 1.4722, "step": 14420 }, { "epoch": 0.06, "grad_norm": 3.812004327774048, "learning_rate": 0.0002, "loss": 1.4518, "step": 14430 }, { "epoch": 0.06, "grad_norm": 2.840451955795288, "learning_rate": 0.0002, "loss": 1.3562, "step": 14440 }, { "epoch": 0.06, "grad_norm": 3.3365023136138916, "learning_rate": 0.0002, "loss": 1.4202, "step": 14450 }, { "epoch": 0.06, "grad_norm": 2.02469801902771, "learning_rate": 0.0002, "loss": 1.6, "step": 14460 }, { "epoch": 0.06, "grad_norm": 3.5689280033111572, "learning_rate": 0.0002, "loss": 1.4203, "step": 14470 }, { "epoch": 0.06, "grad_norm": 4.820521354675293, "learning_rate": 0.0002, "loss": 1.5004, "step": 14480 }, { "epoch": 0.06, "grad_norm": 2.620676279067993, "learning_rate": 0.0002, "loss": 1.3671, "step": 14490 }, { "epoch": 0.06, "grad_norm": 2.4588518142700195, "learning_rate": 0.0002, "loss": 1.5198, "step": 14500 }, { "epoch": 0.06, "grad_norm": 3.0810680389404297, "learning_rate": 0.0002, "loss": 1.5443, "step": 14510 }, { "epoch": 0.06, "grad_norm": 2.890225648880005, "learning_rate": 0.0002, "loss": 1.5694, "step": 14520 }, { "epoch": 0.06, "grad_norm": 2.803964376449585, "learning_rate": 0.0002, "loss": 1.5104, "step": 14530 }, { "epoch": 0.06, "grad_norm": 2.0993869304656982, "learning_rate": 0.0002, "loss": 1.4238, "step": 14540 }, { "epoch": 0.06, "grad_norm": 3.6521825790405273, "learning_rate": 0.0002, "loss": 1.6894, "step": 14550 }, { "epoch": 0.06, "grad_norm": 3.12386417388916, "learning_rate": 0.0002, "loss": 1.5743, "step": 14560 }, { "epoch": 0.06, "grad_norm": 2.334963798522949, "learning_rate": 0.0002, "loss": 1.422, "step": 14570 }, { "epoch": 0.06, "grad_norm": 2.180354356765747, "learning_rate": 0.0002, "loss": 1.5837, "step": 14580 }, { "epoch": 0.06, "grad_norm": 1.775795817375183, "learning_rate": 0.0002, "loss": 1.5392, "step": 14590 }, { "epoch": 0.06, "grad_norm": 2.428835868835449, "learning_rate": 0.0002, "loss": 1.5849, "step": 14600 }, { "epoch": 0.06, "grad_norm": 3.308579444885254, "learning_rate": 0.0002, "loss": 1.5625, "step": 14610 }, { "epoch": 0.06, "grad_norm": 1.254639744758606, "learning_rate": 0.0002, "loss": 1.3408, "step": 14620 }, { "epoch": 0.06, "grad_norm": 2.071617364883423, "learning_rate": 0.0002, "loss": 1.7254, "step": 14630 }, { "epoch": 0.06, "grad_norm": 1.8536956310272217, "learning_rate": 0.0002, "loss": 1.5434, "step": 14640 }, { "epoch": 0.06, "grad_norm": 3.366196870803833, "learning_rate": 0.0002, "loss": 1.5891, "step": 14650 }, { "epoch": 0.06, "grad_norm": 1.893955111503601, "learning_rate": 0.0002, "loss": 1.6477, "step": 14660 }, { "epoch": 0.06, "grad_norm": 2.3141732215881348, "learning_rate": 0.0002, "loss": 1.6629, "step": 14670 }, { "epoch": 0.06, "grad_norm": 1.8695708513259888, "learning_rate": 0.0002, "loss": 1.5613, "step": 14680 }, { "epoch": 0.06, "grad_norm": 2.625892400741577, "learning_rate": 0.0002, "loss": 1.492, "step": 14690 }, { "epoch": 0.06, "grad_norm": 2.7318382263183594, "learning_rate": 0.0002, "loss": 1.4244, "step": 14700 }, { "epoch": 0.06, "grad_norm": 3.119800329208374, "learning_rate": 0.0002, "loss": 1.5485, "step": 14710 }, { "epoch": 0.06, "grad_norm": 1.969766616821289, "learning_rate": 0.0002, "loss": 1.6111, "step": 14720 }, { "epoch": 0.06, "grad_norm": 3.219896078109741, "learning_rate": 0.0002, "loss": 1.5327, "step": 14730 }, { "epoch": 0.06, "grad_norm": 2.3823537826538086, "learning_rate": 0.0002, "loss": 1.6587, "step": 14740 }, { "epoch": 0.06, "grad_norm": 1.7632097005844116, "learning_rate": 0.0002, "loss": 1.5505, "step": 14750 }, { "epoch": 0.06, "grad_norm": 1.253278136253357, "learning_rate": 0.0002, "loss": 1.4275, "step": 14760 }, { "epoch": 0.06, "grad_norm": 3.2250359058380127, "learning_rate": 0.0002, "loss": 1.4828, "step": 14770 }, { "epoch": 0.06, "grad_norm": 1.8711284399032593, "learning_rate": 0.0002, "loss": 1.4563, "step": 14780 }, { "epoch": 0.06, "grad_norm": 2.151470899581909, "learning_rate": 0.0002, "loss": 1.4923, "step": 14790 }, { "epoch": 0.06, "grad_norm": 1.7852730751037598, "learning_rate": 0.0002, "loss": 1.369, "step": 14800 }, { "epoch": 0.06, "grad_norm": 1.4918668270111084, "learning_rate": 0.0002, "loss": 1.2756, "step": 14810 }, { "epoch": 0.06, "grad_norm": 2.39117169380188, "learning_rate": 0.0002, "loss": 1.4224, "step": 14820 }, { "epoch": 0.06, "grad_norm": 2.3889076709747314, "learning_rate": 0.0002, "loss": 1.6147, "step": 14830 }, { "epoch": 0.06, "grad_norm": 1.7733474969863892, "learning_rate": 0.0002, "loss": 1.4541, "step": 14840 }, { "epoch": 0.06, "grad_norm": 2.9489643573760986, "learning_rate": 0.0002, "loss": 1.7623, "step": 14850 }, { "epoch": 0.06, "grad_norm": 3.24725604057312, "learning_rate": 0.0002, "loss": 1.6043, "step": 14860 }, { "epoch": 0.06, "grad_norm": 2.64306902885437, "learning_rate": 0.0002, "loss": 1.4879, "step": 14870 }, { "epoch": 0.06, "grad_norm": 2.876760244369507, "learning_rate": 0.0002, "loss": 1.6613, "step": 14880 }, { "epoch": 0.06, "grad_norm": 4.67704439163208, "learning_rate": 0.0002, "loss": 1.5805, "step": 14890 }, { "epoch": 0.06, "grad_norm": 2.7421956062316895, "learning_rate": 0.0002, "loss": 1.5781, "step": 14900 }, { "epoch": 0.06, "grad_norm": 1.8739968538284302, "learning_rate": 0.0002, "loss": 1.5341, "step": 14910 }, { "epoch": 0.06, "grad_norm": 2.749786615371704, "learning_rate": 0.0002, "loss": 1.3897, "step": 14920 }, { "epoch": 0.06, "grad_norm": 2.6351468563079834, "learning_rate": 0.0002, "loss": 1.3, "step": 14930 }, { "epoch": 0.06, "grad_norm": 2.9854612350463867, "learning_rate": 0.0002, "loss": 1.2122, "step": 14940 }, { "epoch": 0.06, "grad_norm": 2.588635206222534, "learning_rate": 0.0002, "loss": 1.6751, "step": 14950 }, { "epoch": 0.06, "grad_norm": 2.6352288722991943, "learning_rate": 0.0002, "loss": 1.4095, "step": 14960 }, { "epoch": 0.06, "grad_norm": 2.243826150894165, "learning_rate": 0.0002, "loss": 1.6023, "step": 14970 }, { "epoch": 0.06, "grad_norm": 3.2040107250213623, "learning_rate": 0.0002, "loss": 1.5294, "step": 14980 }, { "epoch": 0.06, "grad_norm": 3.2604732513427734, "learning_rate": 0.0002, "loss": 1.6659, "step": 14990 }, { "epoch": 0.06, "grad_norm": 2.767345905303955, "learning_rate": 0.0002, "loss": 1.4956, "step": 15000 }, { "epoch": 0.06, "grad_norm": 1.7129725217819214, "learning_rate": 0.0002, "loss": 1.665, "step": 15010 }, { "epoch": 0.06, "grad_norm": 1.9997265338897705, "learning_rate": 0.0002, "loss": 1.4009, "step": 15020 }, { "epoch": 0.06, "grad_norm": 2.093994379043579, "learning_rate": 0.0002, "loss": 1.2995, "step": 15030 }, { "epoch": 0.06, "grad_norm": 2.373432159423828, "learning_rate": 0.0002, "loss": 1.6358, "step": 15040 }, { "epoch": 0.06, "grad_norm": 3.5151174068450928, "learning_rate": 0.0002, "loss": 1.4727, "step": 15050 }, { "epoch": 0.06, "grad_norm": 3.2480647563934326, "learning_rate": 0.0002, "loss": 1.5434, "step": 15060 }, { "epoch": 0.06, "grad_norm": 2.080599784851074, "learning_rate": 0.0002, "loss": 1.5869, "step": 15070 }, { "epoch": 0.06, "grad_norm": 3.6360745429992676, "learning_rate": 0.0002, "loss": 1.4894, "step": 15080 }, { "epoch": 0.06, "grad_norm": 2.9621846675872803, "learning_rate": 0.0002, "loss": 1.7279, "step": 15090 }, { "epoch": 0.06, "grad_norm": 2.4492969512939453, "learning_rate": 0.0002, "loss": 1.4839, "step": 15100 }, { "epoch": 0.06, "grad_norm": 5.054863929748535, "learning_rate": 0.0002, "loss": 1.8192, "step": 15110 }, { "epoch": 0.06, "grad_norm": 1.9881037473678589, "learning_rate": 0.0002, "loss": 1.618, "step": 15120 }, { "epoch": 0.06, "grad_norm": 1.903610348701477, "learning_rate": 0.0002, "loss": 1.5619, "step": 15130 }, { "epoch": 0.06, "grad_norm": 1.6852997541427612, "learning_rate": 0.0002, "loss": 1.4896, "step": 15140 }, { "epoch": 0.06, "grad_norm": 2.0626564025878906, "learning_rate": 0.0002, "loss": 1.402, "step": 15150 }, { "epoch": 0.06, "grad_norm": 2.2431769371032715, "learning_rate": 0.0002, "loss": 1.4859, "step": 15160 }, { "epoch": 0.06, "grad_norm": 2.2500293254852295, "learning_rate": 0.0002, "loss": 1.4434, "step": 15170 }, { "epoch": 0.06, "grad_norm": 2.498338222503662, "learning_rate": 0.0002, "loss": 1.6345, "step": 15180 }, { "epoch": 0.06, "grad_norm": 3.5596303939819336, "learning_rate": 0.0002, "loss": 1.5553, "step": 15190 }, { "epoch": 0.06, "grad_norm": 1.695564866065979, "learning_rate": 0.0002, "loss": 1.4278, "step": 15200 }, { "epoch": 0.06, "grad_norm": 2.549098253250122, "learning_rate": 0.0002, "loss": 1.39, "step": 15210 }, { "epoch": 0.06, "grad_norm": 2.248055934906006, "learning_rate": 0.0002, "loss": 1.3453, "step": 15220 }, { "epoch": 0.06, "grad_norm": 2.5204341411590576, "learning_rate": 0.0002, "loss": 1.3049, "step": 15230 }, { "epoch": 0.06, "grad_norm": 2.130812168121338, "learning_rate": 0.0002, "loss": 1.5704, "step": 15240 }, { "epoch": 0.06, "grad_norm": 2.9856367111206055, "learning_rate": 0.0002, "loss": 1.8903, "step": 15250 }, { "epoch": 0.06, "grad_norm": 1.9935299158096313, "learning_rate": 0.0002, "loss": 1.5216, "step": 15260 }, { "epoch": 0.06, "grad_norm": 3.552006959915161, "learning_rate": 0.0002, "loss": 1.3204, "step": 15270 }, { "epoch": 0.06, "grad_norm": 2.2369370460510254, "learning_rate": 0.0002, "loss": 1.3567, "step": 15280 }, { "epoch": 0.06, "grad_norm": 2.111288547515869, "learning_rate": 0.0002, "loss": 1.6567, "step": 15290 }, { "epoch": 0.06, "grad_norm": 1.7375365495681763, "learning_rate": 0.0002, "loss": 1.3141, "step": 15300 }, { "epoch": 0.06, "grad_norm": 2.2135233879089355, "learning_rate": 0.0002, "loss": 1.4591, "step": 15310 }, { "epoch": 0.06, "grad_norm": 3.1015665531158447, "learning_rate": 0.0002, "loss": 1.7097, "step": 15320 }, { "epoch": 0.06, "grad_norm": 3.3735368251800537, "learning_rate": 0.0002, "loss": 1.6648, "step": 15330 }, { "epoch": 0.06, "grad_norm": 2.0800204277038574, "learning_rate": 0.0002, "loss": 1.2534, "step": 15340 }, { "epoch": 0.06, "grad_norm": 1.9286389350891113, "learning_rate": 0.0002, "loss": 1.2898, "step": 15350 }, { "epoch": 0.06, "grad_norm": 2.449716091156006, "learning_rate": 0.0002, "loss": 1.702, "step": 15360 }, { "epoch": 0.06, "grad_norm": 2.5334055423736572, "learning_rate": 0.0002, "loss": 1.4143, "step": 15370 }, { "epoch": 0.06, "grad_norm": 1.9982925653457642, "learning_rate": 0.0002, "loss": 1.6114, "step": 15380 }, { "epoch": 0.06, "grad_norm": 1.8462787866592407, "learning_rate": 0.0002, "loss": 1.5329, "step": 15390 }, { "epoch": 0.06, "grad_norm": 2.208853244781494, "learning_rate": 0.0002, "loss": 1.4659, "step": 15400 }, { "epoch": 0.06, "grad_norm": 3.5735363960266113, "learning_rate": 0.0002, "loss": 1.4623, "step": 15410 }, { "epoch": 0.06, "grad_norm": 3.103959798812866, "learning_rate": 0.0002, "loss": 1.6446, "step": 15420 }, { "epoch": 0.06, "grad_norm": 2.4990735054016113, "learning_rate": 0.0002, "loss": 1.4249, "step": 15430 }, { "epoch": 0.06, "grad_norm": 2.961481809616089, "learning_rate": 0.0002, "loss": 1.4784, "step": 15440 }, { "epoch": 0.06, "grad_norm": 2.6449053287506104, "learning_rate": 0.0002, "loss": 1.3948, "step": 15450 }, { "epoch": 0.06, "grad_norm": 3.01859974861145, "learning_rate": 0.0002, "loss": 1.5577, "step": 15460 }, { "epoch": 0.06, "grad_norm": 1.8644800186157227, "learning_rate": 0.0002, "loss": 1.3467, "step": 15470 }, { "epoch": 0.06, "grad_norm": 3.526700019836426, "learning_rate": 0.0002, "loss": 1.5659, "step": 15480 }, { "epoch": 0.06, "grad_norm": 2.0201988220214844, "learning_rate": 0.0002, "loss": 1.5889, "step": 15490 }, { "epoch": 0.06, "grad_norm": 2.4507126808166504, "learning_rate": 0.0002, "loss": 1.5097, "step": 15500 }, { "epoch": 0.06, "grad_norm": 1.5720025300979614, "learning_rate": 0.0002, "loss": 1.248, "step": 15510 }, { "epoch": 0.06, "grad_norm": 4.436056137084961, "learning_rate": 0.0002, "loss": 1.6951, "step": 15520 }, { "epoch": 0.06, "grad_norm": 2.4229910373687744, "learning_rate": 0.0002, "loss": 1.5231, "step": 15530 }, { "epoch": 0.06, "grad_norm": 2.176337242126465, "learning_rate": 0.0002, "loss": 1.363, "step": 15540 }, { "epoch": 0.06, "grad_norm": 3.4314839839935303, "learning_rate": 0.0002, "loss": 1.5503, "step": 15550 }, { "epoch": 0.06, "grad_norm": 2.3823282718658447, "learning_rate": 0.0002, "loss": 1.5571, "step": 15560 }, { "epoch": 0.06, "grad_norm": 4.155631065368652, "learning_rate": 0.0002, "loss": 1.6072, "step": 15570 }, { "epoch": 0.06, "grad_norm": 2.3668065071105957, "learning_rate": 0.0002, "loss": 1.4887, "step": 15580 }, { "epoch": 0.06, "grad_norm": 1.2746326923370361, "learning_rate": 0.0002, "loss": 1.587, "step": 15590 }, { "epoch": 0.06, "grad_norm": 1.9012266397476196, "learning_rate": 0.0002, "loss": 1.4393, "step": 15600 }, { "epoch": 0.06, "grad_norm": 1.7418338060379028, "learning_rate": 0.0002, "loss": 1.6533, "step": 15610 }, { "epoch": 0.06, "grad_norm": 2.451042890548706, "learning_rate": 0.0002, "loss": 1.4486, "step": 15620 }, { "epoch": 0.06, "grad_norm": 2.2347571849823, "learning_rate": 0.0002, "loss": 1.7601, "step": 15630 }, { "epoch": 0.06, "grad_norm": 1.8207886219024658, "learning_rate": 0.0002, "loss": 1.6064, "step": 15640 }, { "epoch": 0.06, "grad_norm": 3.937354564666748, "learning_rate": 0.0002, "loss": 1.3827, "step": 15650 }, { "epoch": 0.06, "grad_norm": 2.2860772609710693, "learning_rate": 0.0002, "loss": 1.436, "step": 15660 }, { "epoch": 0.06, "grad_norm": 3.96006441116333, "learning_rate": 0.0002, "loss": 1.6804, "step": 15670 }, { "epoch": 0.06, "grad_norm": 2.0836658477783203, "learning_rate": 0.0002, "loss": 1.4747, "step": 15680 }, { "epoch": 0.06, "grad_norm": 2.5822412967681885, "learning_rate": 0.0002, "loss": 1.4219, "step": 15690 }, { "epoch": 0.06, "grad_norm": 2.1448721885681152, "learning_rate": 0.0002, "loss": 1.381, "step": 15700 }, { "epoch": 0.06, "grad_norm": 3.088535785675049, "learning_rate": 0.0002, "loss": 1.464, "step": 15710 }, { "epoch": 0.06, "grad_norm": 1.7941774129867554, "learning_rate": 0.0002, "loss": 1.3913, "step": 15720 }, { "epoch": 0.06, "grad_norm": 4.408509254455566, "learning_rate": 0.0002, "loss": 1.5678, "step": 15730 }, { "epoch": 0.06, "grad_norm": 2.4366040229797363, "learning_rate": 0.0002, "loss": 1.4489, "step": 15740 }, { "epoch": 0.06, "grad_norm": 1.9171148538589478, "learning_rate": 0.0002, "loss": 1.4139, "step": 15750 }, { "epoch": 0.06, "grad_norm": 3.195218801498413, "learning_rate": 0.0002, "loss": 1.4487, "step": 15760 }, { "epoch": 0.06, "grad_norm": 2.873710870742798, "learning_rate": 0.0002, "loss": 1.6393, "step": 15770 }, { "epoch": 0.06, "grad_norm": 2.3600094318389893, "learning_rate": 0.0002, "loss": 1.3942, "step": 15780 }, { "epoch": 0.06, "grad_norm": 1.940338134765625, "learning_rate": 0.0002, "loss": 1.5551, "step": 15790 }, { "epoch": 0.06, "grad_norm": 3.0677289962768555, "learning_rate": 0.0002, "loss": 1.5509, "step": 15800 }, { "epoch": 0.06, "grad_norm": 1.9370156526565552, "learning_rate": 0.0002, "loss": 1.4761, "step": 15810 }, { "epoch": 0.06, "grad_norm": 2.165083885192871, "learning_rate": 0.0002, "loss": 1.2902, "step": 15820 }, { "epoch": 0.06, "grad_norm": 1.8504325151443481, "learning_rate": 0.0002, "loss": 1.3411, "step": 15830 }, { "epoch": 0.06, "grad_norm": 1.6289781332015991, "learning_rate": 0.0002, "loss": 1.5639, "step": 15840 }, { "epoch": 0.06, "grad_norm": 2.8182148933410645, "learning_rate": 0.0002, "loss": 1.5096, "step": 15850 }, { "epoch": 0.06, "grad_norm": 1.544786810874939, "learning_rate": 0.0002, "loss": 1.4641, "step": 15860 }, { "epoch": 0.06, "grad_norm": 6.842846870422363, "learning_rate": 0.0002, "loss": 1.6052, "step": 15870 }, { "epoch": 0.06, "grad_norm": 3.8773159980773926, "learning_rate": 0.0002, "loss": 1.4977, "step": 15880 }, { "epoch": 0.06, "grad_norm": 1.9370803833007812, "learning_rate": 0.0002, "loss": 1.5825, "step": 15890 }, { "epoch": 0.06, "grad_norm": 5.96719217300415, "learning_rate": 0.0002, "loss": 1.6369, "step": 15900 }, { "epoch": 0.06, "grad_norm": 2.1458797454833984, "learning_rate": 0.0002, "loss": 1.6403, "step": 15910 }, { "epoch": 0.06, "grad_norm": 2.93300199508667, "learning_rate": 0.0002, "loss": 1.5475, "step": 15920 }, { "epoch": 0.06, "grad_norm": 2.0605628490448, "learning_rate": 0.0002, "loss": 1.5603, "step": 15930 }, { "epoch": 0.06, "grad_norm": 2.6742656230926514, "learning_rate": 0.0002, "loss": 1.6246, "step": 15940 }, { "epoch": 0.06, "grad_norm": 3.1313092708587646, "learning_rate": 0.0002, "loss": 1.5168, "step": 15950 }, { "epoch": 0.06, "grad_norm": 4.331511974334717, "learning_rate": 0.0002, "loss": 1.4279, "step": 15960 }, { "epoch": 0.07, "grad_norm": 2.7668936252593994, "learning_rate": 0.0002, "loss": 1.4949, "step": 15970 }, { "epoch": 0.07, "grad_norm": 2.318352222442627, "learning_rate": 0.0002, "loss": 1.2269, "step": 15980 }, { "epoch": 0.07, "grad_norm": 3.7862956523895264, "learning_rate": 0.0002, "loss": 1.5565, "step": 15990 }, { "epoch": 0.07, "grad_norm": 2.102426290512085, "learning_rate": 0.0002, "loss": 1.3715, "step": 16000 }, { "epoch": 0.07, "grad_norm": 2.645273208618164, "learning_rate": 0.0002, "loss": 1.6875, "step": 16010 }, { "epoch": 0.07, "grad_norm": 2.5313150882720947, "learning_rate": 0.0002, "loss": 1.2705, "step": 16020 }, { "epoch": 0.07, "grad_norm": 4.142549514770508, "learning_rate": 0.0002, "loss": 1.6136, "step": 16030 }, { "epoch": 0.07, "grad_norm": 3.0588505268096924, "learning_rate": 0.0002, "loss": 1.4008, "step": 16040 }, { "epoch": 0.07, "grad_norm": 3.614771842956543, "learning_rate": 0.0002, "loss": 1.4978, "step": 16050 }, { "epoch": 0.07, "grad_norm": 2.1302266120910645, "learning_rate": 0.0002, "loss": 1.6029, "step": 16060 }, { "epoch": 0.07, "grad_norm": 2.1277008056640625, "learning_rate": 0.0002, "loss": 1.5201, "step": 16070 }, { "epoch": 0.07, "grad_norm": 2.213343858718872, "learning_rate": 0.0002, "loss": 1.5046, "step": 16080 }, { "epoch": 0.07, "grad_norm": 1.8283053636550903, "learning_rate": 0.0002, "loss": 1.5765, "step": 16090 }, { "epoch": 0.07, "grad_norm": 2.0042619705200195, "learning_rate": 0.0002, "loss": 1.6839, "step": 16100 }, { "epoch": 0.07, "grad_norm": 2.621737241744995, "learning_rate": 0.0002, "loss": 1.783, "step": 16110 }, { "epoch": 0.07, "grad_norm": 2.871393918991089, "learning_rate": 0.0002, "loss": 1.3855, "step": 16120 }, { "epoch": 0.07, "grad_norm": 4.354872703552246, "learning_rate": 0.0002, "loss": 1.556, "step": 16130 }, { "epoch": 0.07, "grad_norm": 5.499149799346924, "learning_rate": 0.0002, "loss": 1.461, "step": 16140 }, { "epoch": 0.07, "grad_norm": 1.8422507047653198, "learning_rate": 0.0002, "loss": 1.4595, "step": 16150 }, { "epoch": 0.07, "grad_norm": 1.3165950775146484, "learning_rate": 0.0002, "loss": 1.3729, "step": 16160 }, { "epoch": 0.07, "grad_norm": 1.1344366073608398, "learning_rate": 0.0002, "loss": 1.6631, "step": 16170 }, { "epoch": 0.07, "grad_norm": 2.6210415363311768, "learning_rate": 0.0002, "loss": 1.5951, "step": 16180 }, { "epoch": 0.07, "grad_norm": 2.7843034267425537, "learning_rate": 0.0002, "loss": 1.5642, "step": 16190 }, { "epoch": 0.07, "grad_norm": 2.045693874359131, "learning_rate": 0.0002, "loss": 1.2932, "step": 16200 }, { "epoch": 0.07, "grad_norm": 2.634047508239746, "learning_rate": 0.0002, "loss": 1.602, "step": 16210 }, { "epoch": 0.07, "grad_norm": 4.099408149719238, "learning_rate": 0.0002, "loss": 1.5168, "step": 16220 }, { "epoch": 0.07, "grad_norm": 2.2350270748138428, "learning_rate": 0.0002, "loss": 1.4282, "step": 16230 }, { "epoch": 0.07, "grad_norm": 3.7219901084899902, "learning_rate": 0.0002, "loss": 1.4477, "step": 16240 }, { "epoch": 0.07, "grad_norm": 1.4492441415786743, "learning_rate": 0.0002, "loss": 1.607, "step": 16250 }, { "epoch": 0.07, "grad_norm": 2.261380672454834, "learning_rate": 0.0002, "loss": 1.3543, "step": 16260 }, { "epoch": 0.07, "grad_norm": 2.5306966304779053, "learning_rate": 0.0002, "loss": 1.8233, "step": 16270 }, { "epoch": 0.07, "grad_norm": 3.527097463607788, "learning_rate": 0.0002, "loss": 1.7346, "step": 16280 }, { "epoch": 0.07, "grad_norm": 2.1880557537078857, "learning_rate": 0.0002, "loss": 1.7324, "step": 16290 }, { "epoch": 0.07, "grad_norm": 3.239741086959839, "learning_rate": 0.0002, "loss": 1.5136, "step": 16300 }, { "epoch": 0.07, "grad_norm": 2.8151652812957764, "learning_rate": 0.0002, "loss": 1.5277, "step": 16310 }, { "epoch": 0.07, "grad_norm": 3.4414749145507812, "learning_rate": 0.0002, "loss": 1.6889, "step": 16320 }, { "epoch": 0.07, "grad_norm": 3.0405032634735107, "learning_rate": 0.0002, "loss": 1.4591, "step": 16330 }, { "epoch": 0.07, "grad_norm": 3.189396858215332, "learning_rate": 0.0002, "loss": 1.64, "step": 16340 }, { "epoch": 0.07, "grad_norm": 1.8027597665786743, "learning_rate": 0.0002, "loss": 1.4124, "step": 16350 }, { "epoch": 0.07, "grad_norm": 1.5699474811553955, "learning_rate": 0.0002, "loss": 1.4428, "step": 16360 }, { "epoch": 0.07, "grad_norm": 2.8853776454925537, "learning_rate": 0.0002, "loss": 1.7399, "step": 16370 }, { "epoch": 0.07, "grad_norm": 2.1074647903442383, "learning_rate": 0.0002, "loss": 1.5764, "step": 16380 }, { "epoch": 0.07, "grad_norm": 2.079133987426758, "learning_rate": 0.0002, "loss": 1.3704, "step": 16390 }, { "epoch": 0.07, "grad_norm": 1.6228352785110474, "learning_rate": 0.0002, "loss": 1.4367, "step": 16400 }, { "epoch": 0.07, "grad_norm": 2.8225808143615723, "learning_rate": 0.0002, "loss": 1.5411, "step": 16410 }, { "epoch": 0.07, "grad_norm": 2.204834222793579, "learning_rate": 0.0002, "loss": 1.5111, "step": 16420 }, { "epoch": 0.07, "grad_norm": 1.8771271705627441, "learning_rate": 0.0002, "loss": 1.6084, "step": 16430 }, { "epoch": 0.07, "grad_norm": 2.464444398880005, "learning_rate": 0.0002, "loss": 1.5406, "step": 16440 }, { "epoch": 0.07, "grad_norm": 2.519049882888794, "learning_rate": 0.0002, "loss": 1.7349, "step": 16450 }, { "epoch": 0.07, "grad_norm": 1.3391666412353516, "learning_rate": 0.0002, "loss": 1.4423, "step": 16460 }, { "epoch": 0.07, "grad_norm": 1.9829685688018799, "learning_rate": 0.0002, "loss": 1.7244, "step": 16470 }, { "epoch": 0.07, "grad_norm": 3.093388319015503, "learning_rate": 0.0002, "loss": 1.3953, "step": 16480 }, { "epoch": 0.07, "grad_norm": 3.923902988433838, "learning_rate": 0.0002, "loss": 1.5743, "step": 16490 }, { "epoch": 0.07, "grad_norm": 4.005373477935791, "learning_rate": 0.0002, "loss": 1.3045, "step": 16500 }, { "epoch": 0.07, "grad_norm": 1.8947213888168335, "learning_rate": 0.0002, "loss": 1.5503, "step": 16510 }, { "epoch": 0.07, "grad_norm": 1.9035135507583618, "learning_rate": 0.0002, "loss": 1.5673, "step": 16520 }, { "epoch": 0.07, "grad_norm": 2.6828866004943848, "learning_rate": 0.0002, "loss": 1.3943, "step": 16530 }, { "epoch": 0.07, "grad_norm": 1.5945796966552734, "learning_rate": 0.0002, "loss": 1.6886, "step": 16540 }, { "epoch": 0.07, "grad_norm": 1.9752012491226196, "learning_rate": 0.0002, "loss": 1.5986, "step": 16550 }, { "epoch": 0.07, "grad_norm": 7.558920383453369, "learning_rate": 0.0002, "loss": 1.2399, "step": 16560 }, { "epoch": 0.07, "grad_norm": 1.9098950624465942, "learning_rate": 0.0002, "loss": 1.4339, "step": 16570 }, { "epoch": 0.07, "grad_norm": 1.5878316164016724, "learning_rate": 0.0002, "loss": 1.3494, "step": 16580 }, { "epoch": 0.07, "grad_norm": 4.907020092010498, "learning_rate": 0.0002, "loss": 1.4522, "step": 16590 }, { "epoch": 0.07, "grad_norm": 3.2639882564544678, "learning_rate": 0.0002, "loss": 1.253, "step": 16600 }, { "epoch": 0.07, "grad_norm": 3.452446937561035, "learning_rate": 0.0002, "loss": 1.4242, "step": 16610 }, { "epoch": 0.07, "grad_norm": 1.873043179512024, "learning_rate": 0.0002, "loss": 1.7039, "step": 16620 }, { "epoch": 0.07, "grad_norm": 1.8205375671386719, "learning_rate": 0.0002, "loss": 1.3207, "step": 16630 }, { "epoch": 0.07, "grad_norm": 1.9322974681854248, "learning_rate": 0.0002, "loss": 1.2818, "step": 16640 }, { "epoch": 0.07, "grad_norm": 2.063448190689087, "learning_rate": 0.0002, "loss": 1.3433, "step": 16650 }, { "epoch": 0.07, "grad_norm": 2.9421141147613525, "learning_rate": 0.0002, "loss": 1.4483, "step": 16660 }, { "epoch": 0.07, "grad_norm": 1.5054194927215576, "learning_rate": 0.0002, "loss": 1.5311, "step": 16670 }, { "epoch": 0.07, "grad_norm": 2.8956685066223145, "learning_rate": 0.0002, "loss": 1.5722, "step": 16680 }, { "epoch": 0.07, "grad_norm": 3.0655293464660645, "learning_rate": 0.0002, "loss": 1.2618, "step": 16690 }, { "epoch": 0.07, "grad_norm": 3.3379204273223877, "learning_rate": 0.0002, "loss": 1.5606, "step": 16700 }, { "epoch": 0.07, "grad_norm": 1.6339889764785767, "learning_rate": 0.0002, "loss": 1.4728, "step": 16710 }, { "epoch": 0.07, "grad_norm": 2.6504132747650146, "learning_rate": 0.0002, "loss": 1.7241, "step": 16720 }, { "epoch": 0.07, "grad_norm": 2.843687057495117, "learning_rate": 0.0002, "loss": 1.4788, "step": 16730 }, { "epoch": 0.07, "grad_norm": 2.8069005012512207, "learning_rate": 0.0002, "loss": 1.3857, "step": 16740 }, { "epoch": 0.07, "grad_norm": 1.35329270362854, "learning_rate": 0.0002, "loss": 1.4073, "step": 16750 }, { "epoch": 0.07, "grad_norm": 1.476354956626892, "learning_rate": 0.0002, "loss": 1.1701, "step": 16760 }, { "epoch": 0.07, "grad_norm": 1.991111159324646, "learning_rate": 0.0002, "loss": 1.376, "step": 16770 }, { "epoch": 0.07, "grad_norm": 1.7778503894805908, "learning_rate": 0.0002, "loss": 1.6418, "step": 16780 }, { "epoch": 0.07, "grad_norm": 1.4698892831802368, "learning_rate": 0.0002, "loss": 1.5718, "step": 16790 }, { "epoch": 0.07, "grad_norm": 3.156111717224121, "learning_rate": 0.0002, "loss": 1.441, "step": 16800 }, { "epoch": 0.07, "grad_norm": 1.75564444065094, "learning_rate": 0.0002, "loss": 1.3586, "step": 16810 }, { "epoch": 0.07, "grad_norm": 2.4802587032318115, "learning_rate": 0.0002, "loss": 1.6072, "step": 16820 }, { "epoch": 0.07, "grad_norm": 1.8455830812454224, "learning_rate": 0.0002, "loss": 1.4485, "step": 16830 }, { "epoch": 0.07, "grad_norm": 1.745429277420044, "learning_rate": 0.0002, "loss": 1.6678, "step": 16840 }, { "epoch": 0.07, "grad_norm": 2.8564858436584473, "learning_rate": 0.0002, "loss": 1.6744, "step": 16850 }, { "epoch": 0.07, "grad_norm": 9.103362083435059, "learning_rate": 0.0002, "loss": 1.669, "step": 16860 }, { "epoch": 0.07, "grad_norm": 2.652200937271118, "learning_rate": 0.0002, "loss": 1.4433, "step": 16870 }, { "epoch": 0.07, "grad_norm": 2.137446641921997, "learning_rate": 0.0002, "loss": 1.5923, "step": 16880 }, { "epoch": 0.07, "grad_norm": 2.8438355922698975, "learning_rate": 0.0002, "loss": 1.6183, "step": 16890 }, { "epoch": 0.07, "grad_norm": 2.898047924041748, "learning_rate": 0.0002, "loss": 1.4719, "step": 16900 }, { "epoch": 0.07, "grad_norm": 3.02872633934021, "learning_rate": 0.0002, "loss": 1.7087, "step": 16910 }, { "epoch": 0.07, "grad_norm": 3.23569393157959, "learning_rate": 0.0002, "loss": 1.4435, "step": 16920 }, { "epoch": 0.07, "grad_norm": 1.3745118379592896, "learning_rate": 0.0002, "loss": 1.6749, "step": 16930 }, { "epoch": 0.07, "grad_norm": 4.307079792022705, "learning_rate": 0.0002, "loss": 1.5636, "step": 16940 }, { "epoch": 0.07, "grad_norm": 2.315232515335083, "learning_rate": 0.0002, "loss": 1.6111, "step": 16950 }, { "epoch": 0.07, "grad_norm": 2.639495849609375, "learning_rate": 0.0002, "loss": 1.415, "step": 16960 }, { "epoch": 0.07, "grad_norm": 2.9394710063934326, "learning_rate": 0.0002, "loss": 1.7066, "step": 16970 }, { "epoch": 0.07, "grad_norm": 2.0465798377990723, "learning_rate": 0.0002, "loss": 1.4788, "step": 16980 }, { "epoch": 0.07, "grad_norm": 1.8238857984542847, "learning_rate": 0.0002, "loss": 1.3503, "step": 16990 }, { "epoch": 0.07, "grad_norm": 1.5390675067901611, "learning_rate": 0.0002, "loss": 1.5617, "step": 17000 }, { "epoch": 0.07, "grad_norm": 2.295893430709839, "learning_rate": 0.0002, "loss": 1.4955, "step": 17010 }, { "epoch": 0.07, "grad_norm": 2.7939329147338867, "learning_rate": 0.0002, "loss": 1.6157, "step": 17020 }, { "epoch": 0.07, "grad_norm": 2.6559650897979736, "learning_rate": 0.0002, "loss": 1.3769, "step": 17030 }, { "epoch": 0.07, "grad_norm": 3.4646918773651123, "learning_rate": 0.0002, "loss": 1.6163, "step": 17040 }, { "epoch": 0.07, "grad_norm": 2.138394832611084, "learning_rate": 0.0002, "loss": 1.4349, "step": 17050 }, { "epoch": 0.07, "grad_norm": 2.722541093826294, "learning_rate": 0.0002, "loss": 1.2929, "step": 17060 }, { "epoch": 0.07, "grad_norm": 2.306802988052368, "learning_rate": 0.0002, "loss": 1.5824, "step": 17070 }, { "epoch": 0.07, "grad_norm": 2.3348217010498047, "learning_rate": 0.0002, "loss": 1.4772, "step": 17080 }, { "epoch": 0.07, "grad_norm": 1.805189609527588, "learning_rate": 0.0002, "loss": 1.4627, "step": 17090 }, { "epoch": 0.07, "grad_norm": 4.0611891746521, "learning_rate": 0.0002, "loss": 1.5466, "step": 17100 }, { "epoch": 0.07, "grad_norm": 2.8784611225128174, "learning_rate": 0.0002, "loss": 1.3346, "step": 17110 }, { "epoch": 0.07, "grad_norm": 1.1930443048477173, "learning_rate": 0.0002, "loss": 1.8064, "step": 17120 }, { "epoch": 0.07, "grad_norm": 3.5400726795196533, "learning_rate": 0.0002, "loss": 1.3032, "step": 17130 }, { "epoch": 0.07, "grad_norm": 1.9231880903244019, "learning_rate": 0.0002, "loss": 1.5626, "step": 17140 }, { "epoch": 0.07, "grad_norm": 1.9971811771392822, "learning_rate": 0.0002, "loss": 1.4669, "step": 17150 }, { "epoch": 0.07, "grad_norm": 2.821762800216675, "learning_rate": 0.0002, "loss": 1.4135, "step": 17160 }, { "epoch": 0.07, "grad_norm": 2.824815273284912, "learning_rate": 0.0002, "loss": 1.5348, "step": 17170 }, { "epoch": 0.07, "grad_norm": 4.458291053771973, "learning_rate": 0.0002, "loss": 1.7581, "step": 17180 }, { "epoch": 0.07, "grad_norm": 1.7697877883911133, "learning_rate": 0.0002, "loss": 1.2429, "step": 17190 }, { "epoch": 0.07, "grad_norm": 4.269013404846191, "learning_rate": 0.0002, "loss": 1.4156, "step": 17200 }, { "epoch": 0.07, "grad_norm": 3.0093331336975098, "learning_rate": 0.0002, "loss": 1.6349, "step": 17210 }, { "epoch": 0.07, "grad_norm": 1.5078431367874146, "learning_rate": 0.0002, "loss": 1.7143, "step": 17220 }, { "epoch": 0.07, "grad_norm": 2.899205446243286, "learning_rate": 0.0002, "loss": 1.4214, "step": 17230 }, { "epoch": 0.07, "grad_norm": 5.133578777313232, "learning_rate": 0.0002, "loss": 1.4362, "step": 17240 }, { "epoch": 0.07, "grad_norm": 2.7691497802734375, "learning_rate": 0.0002, "loss": 1.518, "step": 17250 }, { "epoch": 0.07, "grad_norm": 3.432594060897827, "learning_rate": 0.0002, "loss": 1.4753, "step": 17260 }, { "epoch": 0.07, "grad_norm": 1.6956180334091187, "learning_rate": 0.0002, "loss": 1.6342, "step": 17270 }, { "epoch": 0.07, "grad_norm": 2.5227863788604736, "learning_rate": 0.0002, "loss": 1.5203, "step": 17280 }, { "epoch": 0.07, "grad_norm": 2.687877893447876, "learning_rate": 0.0002, "loss": 1.5017, "step": 17290 }, { "epoch": 0.07, "grad_norm": 3.6832215785980225, "learning_rate": 0.0002, "loss": 1.4778, "step": 17300 }, { "epoch": 0.07, "grad_norm": 2.128577470779419, "learning_rate": 0.0002, "loss": 1.5164, "step": 17310 }, { "epoch": 0.07, "grad_norm": 2.4290153980255127, "learning_rate": 0.0002, "loss": 1.6009, "step": 17320 }, { "epoch": 0.07, "grad_norm": 2.7406156063079834, "learning_rate": 0.0002, "loss": 1.5111, "step": 17330 }, { "epoch": 0.07, "grad_norm": 3.025700092315674, "learning_rate": 0.0002, "loss": 1.6167, "step": 17340 }, { "epoch": 0.07, "grad_norm": 2.1621406078338623, "learning_rate": 0.0002, "loss": 1.6134, "step": 17350 }, { "epoch": 0.07, "grad_norm": 2.6437416076660156, "learning_rate": 0.0002, "loss": 1.4456, "step": 17360 }, { "epoch": 0.07, "grad_norm": 1.9849531650543213, "learning_rate": 0.0002, "loss": 1.3335, "step": 17370 }, { "epoch": 0.07, "grad_norm": 2.8071815967559814, "learning_rate": 0.0002, "loss": 1.4095, "step": 17380 }, { "epoch": 0.07, "grad_norm": 2.315274715423584, "learning_rate": 0.0002, "loss": 1.7019, "step": 17390 }, { "epoch": 0.07, "grad_norm": 1.965279221534729, "learning_rate": 0.0002, "loss": 1.6586, "step": 17400 }, { "epoch": 0.07, "grad_norm": 2.5031673908233643, "learning_rate": 0.0002, "loss": 1.49, "step": 17410 }, { "epoch": 0.07, "grad_norm": 2.093740463256836, "learning_rate": 0.0002, "loss": 1.4045, "step": 17420 }, { "epoch": 0.07, "grad_norm": 2.4551351070404053, "learning_rate": 0.0002, "loss": 1.6691, "step": 17430 }, { "epoch": 0.07, "grad_norm": 2.1525423526763916, "learning_rate": 0.0002, "loss": 1.5898, "step": 17440 }, { "epoch": 0.07, "grad_norm": 1.7029722929000854, "learning_rate": 0.0002, "loss": 1.5223, "step": 17450 }, { "epoch": 0.07, "grad_norm": 4.359130382537842, "learning_rate": 0.0002, "loss": 1.3719, "step": 17460 }, { "epoch": 0.07, "grad_norm": 2.778907299041748, "learning_rate": 0.0002, "loss": 1.564, "step": 17470 }, { "epoch": 0.07, "grad_norm": 3.351936101913452, "learning_rate": 0.0002, "loss": 1.5415, "step": 17480 }, { "epoch": 0.07, "grad_norm": 2.420229196548462, "learning_rate": 0.0002, "loss": 1.4144, "step": 17490 }, { "epoch": 0.07, "grad_norm": 2.3388731479644775, "learning_rate": 0.0002, "loss": 1.6322, "step": 17500 }, { "epoch": 0.07, "grad_norm": 2.883765459060669, "learning_rate": 0.0002, "loss": 1.5905, "step": 17510 }, { "epoch": 0.07, "grad_norm": 3.5920825004577637, "learning_rate": 0.0002, "loss": 1.3445, "step": 17520 }, { "epoch": 0.07, "grad_norm": 2.495828866958618, "learning_rate": 0.0002, "loss": 1.3799, "step": 17530 }, { "epoch": 0.07, "grad_norm": 1.6943329572677612, "learning_rate": 0.0002, "loss": 1.484, "step": 17540 }, { "epoch": 0.07, "grad_norm": 3.083441734313965, "learning_rate": 0.0002, "loss": 1.6866, "step": 17550 }, { "epoch": 0.07, "grad_norm": 2.170057535171509, "learning_rate": 0.0002, "loss": 1.4322, "step": 17560 }, { "epoch": 0.07, "grad_norm": 2.536923408508301, "learning_rate": 0.0002, "loss": 1.5904, "step": 17570 }, { "epoch": 0.07, "grad_norm": 5.919551372528076, "learning_rate": 0.0002, "loss": 1.6733, "step": 17580 }, { "epoch": 0.07, "grad_norm": 1.70661199092865, "learning_rate": 0.0002, "loss": 1.3702, "step": 17590 }, { "epoch": 0.07, "grad_norm": 3.066768169403076, "learning_rate": 0.0002, "loss": 1.5059, "step": 17600 }, { "epoch": 0.07, "grad_norm": 3.671416997909546, "learning_rate": 0.0002, "loss": 1.6319, "step": 17610 }, { "epoch": 0.07, "grad_norm": 3.816124439239502, "learning_rate": 0.0002, "loss": 1.4607, "step": 17620 }, { "epoch": 0.07, "grad_norm": 3.6020498275756836, "learning_rate": 0.0002, "loss": 1.6689, "step": 17630 }, { "epoch": 0.07, "grad_norm": 2.314796209335327, "learning_rate": 0.0002, "loss": 1.697, "step": 17640 }, { "epoch": 0.07, "grad_norm": 3.0026955604553223, "learning_rate": 0.0002, "loss": 1.5473, "step": 17650 }, { "epoch": 0.07, "grad_norm": 2.742788791656494, "learning_rate": 0.0002, "loss": 1.5557, "step": 17660 }, { "epoch": 0.07, "grad_norm": 1.3615120649337769, "learning_rate": 0.0002, "loss": 1.8889, "step": 17670 }, { "epoch": 0.07, "grad_norm": 2.2956719398498535, "learning_rate": 0.0002, "loss": 1.4515, "step": 17680 }, { "epoch": 0.07, "grad_norm": 2.6158177852630615, "learning_rate": 0.0002, "loss": 1.4589, "step": 17690 }, { "epoch": 0.07, "grad_norm": 2.4566757678985596, "learning_rate": 0.0002, "loss": 1.6493, "step": 17700 }, { "epoch": 0.07, "grad_norm": 2.2356324195861816, "learning_rate": 0.0002, "loss": 1.4958, "step": 17710 }, { "epoch": 0.07, "grad_norm": 2.080371856689453, "learning_rate": 0.0002, "loss": 1.5039, "step": 17720 }, { "epoch": 0.07, "grad_norm": 3.3084983825683594, "learning_rate": 0.0002, "loss": 1.2117, "step": 17730 }, { "epoch": 0.07, "grad_norm": 3.128554105758667, "learning_rate": 0.0002, "loss": 1.4076, "step": 17740 }, { "epoch": 0.07, "grad_norm": 3.8920319080352783, "learning_rate": 0.0002, "loss": 1.5797, "step": 17750 }, { "epoch": 0.07, "grad_norm": 2.605613946914673, "learning_rate": 0.0002, "loss": 1.2272, "step": 17760 }, { "epoch": 0.07, "grad_norm": 1.3277255296707153, "learning_rate": 0.0002, "loss": 1.551, "step": 17770 }, { "epoch": 0.07, "grad_norm": 3.625258684158325, "learning_rate": 0.0002, "loss": 1.5927, "step": 17780 }, { "epoch": 0.07, "grad_norm": 1.7717891931533813, "learning_rate": 0.0002, "loss": 1.7913, "step": 17790 }, { "epoch": 0.07, "grad_norm": 3.4901375770568848, "learning_rate": 0.0002, "loss": 1.5062, "step": 17800 }, { "epoch": 0.07, "grad_norm": 1.3852989673614502, "learning_rate": 0.0002, "loss": 1.5023, "step": 17810 }, { "epoch": 0.07, "grad_norm": 3.5959129333496094, "learning_rate": 0.0002, "loss": 1.3527, "step": 17820 }, { "epoch": 0.07, "grad_norm": 2.140979051589966, "learning_rate": 0.0002, "loss": 1.6648, "step": 17830 }, { "epoch": 0.07, "grad_norm": 3.6911988258361816, "learning_rate": 0.0002, "loss": 1.6962, "step": 17840 }, { "epoch": 0.07, "grad_norm": 2.2891769409179688, "learning_rate": 0.0002, "loss": 1.4668, "step": 17850 }, { "epoch": 0.07, "grad_norm": 2.249213933944702, "learning_rate": 0.0002, "loss": 1.4033, "step": 17860 }, { "epoch": 0.07, "grad_norm": 2.1262519359588623, "learning_rate": 0.0002, "loss": 1.4244, "step": 17870 }, { "epoch": 0.07, "grad_norm": 2.1751177310943604, "learning_rate": 0.0002, "loss": 1.4439, "step": 17880 }, { "epoch": 0.07, "grad_norm": 1.8801313638687134, "learning_rate": 0.0002, "loss": 1.7994, "step": 17890 }, { "epoch": 0.07, "grad_norm": 2.8481833934783936, "learning_rate": 0.0002, "loss": 1.7196, "step": 17900 }, { "epoch": 0.07, "grad_norm": 1.373915195465088, "learning_rate": 0.0002, "loss": 1.479, "step": 17910 }, { "epoch": 0.07, "grad_norm": 1.4752509593963623, "learning_rate": 0.0002, "loss": 1.4987, "step": 17920 }, { "epoch": 0.07, "grad_norm": 3.1871883869171143, "learning_rate": 0.0002, "loss": 1.2186, "step": 17930 }, { "epoch": 0.07, "grad_norm": 3.146282196044922, "learning_rate": 0.0002, "loss": 1.5754, "step": 17940 }, { "epoch": 0.07, "grad_norm": 2.2268922328948975, "learning_rate": 0.0002, "loss": 1.3844, "step": 17950 }, { "epoch": 0.07, "grad_norm": 4.373378276824951, "learning_rate": 0.0002, "loss": 1.5342, "step": 17960 }, { "epoch": 0.07, "grad_norm": 2.686307668685913, "learning_rate": 0.0002, "loss": 1.422, "step": 17970 }, { "epoch": 0.07, "grad_norm": 2.4025375843048096, "learning_rate": 0.0002, "loss": 1.4012, "step": 17980 }, { "epoch": 0.07, "grad_norm": 1.683312177658081, "learning_rate": 0.0002, "loss": 1.6584, "step": 17990 }, { "epoch": 0.07, "grad_norm": 4.228415489196777, "learning_rate": 0.0002, "loss": 1.4005, "step": 18000 }, { "epoch": 0.07, "grad_norm": 1.9318197965621948, "learning_rate": 0.0002, "loss": 1.7439, "step": 18010 }, { "epoch": 0.07, "grad_norm": 2.174100875854492, "learning_rate": 0.0002, "loss": 1.4346, "step": 18020 }, { "epoch": 0.07, "grad_norm": 2.392470598220825, "learning_rate": 0.0002, "loss": 1.4153, "step": 18030 }, { "epoch": 0.07, "grad_norm": 2.4115982055664062, "learning_rate": 0.0002, "loss": 1.4973, "step": 18040 }, { "epoch": 0.07, "grad_norm": 1.547080159187317, "learning_rate": 0.0002, "loss": 1.4737, "step": 18050 }, { "epoch": 0.07, "grad_norm": 2.2358827590942383, "learning_rate": 0.0002, "loss": 1.326, "step": 18060 }, { "epoch": 0.07, "grad_norm": 2.885221481323242, "learning_rate": 0.0002, "loss": 1.2804, "step": 18070 }, { "epoch": 0.07, "grad_norm": 3.0335195064544678, "learning_rate": 0.0002, "loss": 1.3658, "step": 18080 }, { "epoch": 0.07, "grad_norm": 1.19554603099823, "learning_rate": 0.0002, "loss": 1.5568, "step": 18090 }, { "epoch": 0.07, "grad_norm": 3.2933313846588135, "learning_rate": 0.0002, "loss": 1.4521, "step": 18100 }, { "epoch": 0.07, "grad_norm": 1.4881560802459717, "learning_rate": 0.0002, "loss": 1.4094, "step": 18110 }, { "epoch": 0.07, "grad_norm": 3.606672763824463, "learning_rate": 0.0002, "loss": 1.3685, "step": 18120 }, { "epoch": 0.07, "grad_norm": 2.318424701690674, "learning_rate": 0.0002, "loss": 1.5973, "step": 18130 }, { "epoch": 0.07, "grad_norm": 2.1774511337280273, "learning_rate": 0.0002, "loss": 1.7509, "step": 18140 }, { "epoch": 0.07, "grad_norm": 2.092482566833496, "learning_rate": 0.0002, "loss": 1.4991, "step": 18150 }, { "epoch": 0.07, "grad_norm": 3.0839128494262695, "learning_rate": 0.0002, "loss": 1.563, "step": 18160 }, { "epoch": 0.07, "grad_norm": 3.0611300468444824, "learning_rate": 0.0002, "loss": 1.4793, "step": 18170 }, { "epoch": 0.07, "grad_norm": 1.6822774410247803, "learning_rate": 0.0002, "loss": 1.4119, "step": 18180 }, { "epoch": 0.07, "grad_norm": 2.683307409286499, "learning_rate": 0.0002, "loss": 1.5112, "step": 18190 }, { "epoch": 0.07, "grad_norm": 2.988372325897217, "learning_rate": 0.0002, "loss": 1.4882, "step": 18200 }, { "epoch": 0.07, "grad_norm": 1.987959384918213, "learning_rate": 0.0002, "loss": 1.5416, "step": 18210 }, { "epoch": 0.07, "grad_norm": 2.842446804046631, "learning_rate": 0.0002, "loss": 1.7274, "step": 18220 }, { "epoch": 0.07, "grad_norm": 2.6179697513580322, "learning_rate": 0.0002, "loss": 1.4017, "step": 18230 }, { "epoch": 0.07, "grad_norm": 1.9987601041793823, "learning_rate": 0.0002, "loss": 1.3876, "step": 18240 }, { "epoch": 0.07, "grad_norm": 2.825605630874634, "learning_rate": 0.0002, "loss": 1.7127, "step": 18250 }, { "epoch": 0.07, "grad_norm": 2.27286696434021, "learning_rate": 0.0002, "loss": 1.5476, "step": 18260 }, { "epoch": 0.07, "grad_norm": 1.9839746952056885, "learning_rate": 0.0002, "loss": 1.718, "step": 18270 }, { "epoch": 0.07, "grad_norm": 2.854069232940674, "learning_rate": 0.0002, "loss": 1.3889, "step": 18280 }, { "epoch": 0.07, "grad_norm": 1.9887094497680664, "learning_rate": 0.0002, "loss": 1.4706, "step": 18290 }, { "epoch": 0.07, "grad_norm": 2.747380495071411, "learning_rate": 0.0002, "loss": 1.3007, "step": 18300 }, { "epoch": 0.07, "grad_norm": 3.5056350231170654, "learning_rate": 0.0002, "loss": 1.5006, "step": 18310 }, { "epoch": 0.07, "grad_norm": 2.304567813873291, "learning_rate": 0.0002, "loss": 1.5757, "step": 18320 }, { "epoch": 0.07, "grad_norm": 1.630389928817749, "learning_rate": 0.0002, "loss": 1.4448, "step": 18330 }, { "epoch": 0.07, "grad_norm": 2.329542875289917, "learning_rate": 0.0002, "loss": 1.5963, "step": 18340 }, { "epoch": 0.07, "grad_norm": 1.4112093448638916, "learning_rate": 0.0002, "loss": 1.619, "step": 18350 }, { "epoch": 0.07, "grad_norm": 2.682809352874756, "learning_rate": 0.0002, "loss": 1.5153, "step": 18360 }, { "epoch": 0.07, "grad_norm": 3.459752321243286, "learning_rate": 0.0002, "loss": 1.5611, "step": 18370 }, { "epoch": 0.07, "grad_norm": 4.131134986877441, "learning_rate": 0.0002, "loss": 1.4972, "step": 18380 }, { "epoch": 0.07, "grad_norm": 3.0186517238616943, "learning_rate": 0.0002, "loss": 1.5301, "step": 18390 }, { "epoch": 0.07, "grad_norm": 4.190934181213379, "learning_rate": 0.0002, "loss": 1.528, "step": 18400 }, { "epoch": 0.07, "grad_norm": 3.4033329486846924, "learning_rate": 0.0002, "loss": 1.3255, "step": 18410 }, { "epoch": 0.07, "grad_norm": 1.3935093879699707, "learning_rate": 0.0002, "loss": 1.5305, "step": 18420 }, { "epoch": 0.08, "grad_norm": 3.9908745288848877, "learning_rate": 0.0002, "loss": 1.3628, "step": 18430 }, { "epoch": 0.08, "grad_norm": 1.7829481363296509, "learning_rate": 0.0002, "loss": 1.5704, "step": 18440 }, { "epoch": 0.08, "grad_norm": 1.9123656749725342, "learning_rate": 0.0002, "loss": 1.7079, "step": 18450 }, { "epoch": 0.08, "grad_norm": 3.331716299057007, "learning_rate": 0.0002, "loss": 1.7626, "step": 18460 }, { "epoch": 0.08, "grad_norm": 2.394967555999756, "learning_rate": 0.0002, "loss": 1.5408, "step": 18470 }, { "epoch": 0.08, "grad_norm": 3.645315170288086, "learning_rate": 0.0002, "loss": 1.338, "step": 18480 }, { "epoch": 0.08, "grad_norm": 1.4535808563232422, "learning_rate": 0.0002, "loss": 1.5438, "step": 18490 }, { "epoch": 0.08, "grad_norm": 1.7184559106826782, "learning_rate": 0.0002, "loss": 1.6258, "step": 18500 }, { "epoch": 0.08, "grad_norm": 2.1978952884674072, "learning_rate": 0.0002, "loss": 1.4802, "step": 18510 }, { "epoch": 0.08, "grad_norm": 2.67720103263855, "learning_rate": 0.0002, "loss": 1.6948, "step": 18520 }, { "epoch": 0.08, "grad_norm": 1.849210262298584, "learning_rate": 0.0002, "loss": 1.6003, "step": 18530 }, { "epoch": 0.08, "grad_norm": 3.6462204456329346, "learning_rate": 0.0002, "loss": 1.3991, "step": 18540 }, { "epoch": 0.08, "grad_norm": 2.221848964691162, "learning_rate": 0.0002, "loss": 1.4946, "step": 18550 }, { "epoch": 0.08, "grad_norm": 1.965873122215271, "learning_rate": 0.0002, "loss": 1.6203, "step": 18560 }, { "epoch": 0.08, "grad_norm": 2.0354092121124268, "learning_rate": 0.0002, "loss": 1.5191, "step": 18570 }, { "epoch": 0.08, "grad_norm": 2.3718252182006836, "learning_rate": 0.0002, "loss": 1.6357, "step": 18580 }, { "epoch": 0.08, "grad_norm": 2.8587253093719482, "learning_rate": 0.0002, "loss": 1.4833, "step": 18590 }, { "epoch": 0.08, "grad_norm": 2.4546804428100586, "learning_rate": 0.0002, "loss": 1.2712, "step": 18600 }, { "epoch": 0.08, "grad_norm": 1.4562889337539673, "learning_rate": 0.0002, "loss": 1.5118, "step": 18610 }, { "epoch": 0.08, "grad_norm": 3.4138295650482178, "learning_rate": 0.0002, "loss": 1.5616, "step": 18620 }, { "epoch": 0.08, "grad_norm": 3.0898187160491943, "learning_rate": 0.0002, "loss": 1.4203, "step": 18630 }, { "epoch": 0.08, "grad_norm": 3.4998974800109863, "learning_rate": 0.0002, "loss": 1.5358, "step": 18640 }, { "epoch": 0.08, "grad_norm": 2.6160595417022705, "learning_rate": 0.0002, "loss": 1.7313, "step": 18650 }, { "epoch": 0.08, "grad_norm": 4.045777320861816, "learning_rate": 0.0002, "loss": 1.4432, "step": 18660 }, { "epoch": 0.08, "grad_norm": 1.303579568862915, "learning_rate": 0.0002, "loss": 1.3317, "step": 18670 }, { "epoch": 0.08, "grad_norm": 1.9725898504257202, "learning_rate": 0.0002, "loss": 1.6581, "step": 18680 }, { "epoch": 0.08, "grad_norm": 3.2772529125213623, "learning_rate": 0.0002, "loss": 1.6941, "step": 18690 }, { "epoch": 0.08, "grad_norm": 2.415905237197876, "learning_rate": 0.0002, "loss": 1.4723, "step": 18700 }, { "epoch": 0.08, "grad_norm": 3.608281135559082, "learning_rate": 0.0002, "loss": 1.4474, "step": 18710 }, { "epoch": 0.08, "grad_norm": 2.6064393520355225, "learning_rate": 0.0002, "loss": 1.5784, "step": 18720 }, { "epoch": 0.08, "grad_norm": 2.4592297077178955, "learning_rate": 0.0002, "loss": 1.439, "step": 18730 }, { "epoch": 0.08, "grad_norm": 2.226635456085205, "learning_rate": 0.0002, "loss": 1.5012, "step": 18740 }, { "epoch": 0.08, "grad_norm": 1.8649885654449463, "learning_rate": 0.0002, "loss": 1.3685, "step": 18750 }, { "epoch": 0.08, "grad_norm": 3.8508903980255127, "learning_rate": 0.0002, "loss": 1.542, "step": 18760 }, { "epoch": 0.08, "grad_norm": 2.8102524280548096, "learning_rate": 0.0002, "loss": 1.5149, "step": 18770 }, { "epoch": 0.08, "grad_norm": 1.9894517660140991, "learning_rate": 0.0002, "loss": 1.4813, "step": 18780 }, { "epoch": 0.08, "grad_norm": 2.3441836833953857, "learning_rate": 0.0002, "loss": 1.3642, "step": 18790 }, { "epoch": 0.08, "grad_norm": 1.956203818321228, "learning_rate": 0.0002, "loss": 1.4481, "step": 18800 }, { "epoch": 0.08, "grad_norm": 2.4076926708221436, "learning_rate": 0.0002, "loss": 1.1842, "step": 18810 }, { "epoch": 0.08, "grad_norm": 3.1323349475860596, "learning_rate": 0.0002, "loss": 1.5561, "step": 18820 }, { "epoch": 0.08, "grad_norm": 2.312711000442505, "learning_rate": 0.0002, "loss": 1.2397, "step": 18830 }, { "epoch": 0.08, "grad_norm": 1.9604530334472656, "learning_rate": 0.0002, "loss": 1.4662, "step": 18840 }, { "epoch": 0.08, "grad_norm": 2.8293745517730713, "learning_rate": 0.0002, "loss": 1.6785, "step": 18850 }, { "epoch": 0.08, "grad_norm": 2.215510845184326, "learning_rate": 0.0002, "loss": 1.7656, "step": 18860 }, { "epoch": 0.08, "grad_norm": 2.58843731880188, "learning_rate": 0.0002, "loss": 1.5651, "step": 18870 }, { "epoch": 0.08, "grad_norm": 2.2052242755889893, "learning_rate": 0.0002, "loss": 1.5971, "step": 18880 }, { "epoch": 0.08, "grad_norm": 2.555577039718628, "learning_rate": 0.0002, "loss": 1.5591, "step": 18890 }, { "epoch": 0.08, "grad_norm": 2.8598859310150146, "learning_rate": 0.0002, "loss": 1.4856, "step": 18900 }, { "epoch": 0.08, "grad_norm": 3.227782726287842, "learning_rate": 0.0002, "loss": 1.5004, "step": 18910 }, { "epoch": 0.08, "grad_norm": 1.8585950136184692, "learning_rate": 0.0002, "loss": 1.367, "step": 18920 }, { "epoch": 0.08, "grad_norm": 2.929931402206421, "learning_rate": 0.0002, "loss": 1.5284, "step": 18930 }, { "epoch": 0.08, "grad_norm": 3.0832266807556152, "learning_rate": 0.0002, "loss": 1.54, "step": 18940 }, { "epoch": 0.08, "grad_norm": 3.789797782897949, "learning_rate": 0.0002, "loss": 1.5098, "step": 18950 }, { "epoch": 0.08, "grad_norm": 2.1860287189483643, "learning_rate": 0.0002, "loss": 1.6333, "step": 18960 }, { "epoch": 0.08, "grad_norm": 2.628929853439331, "learning_rate": 0.0002, "loss": 1.4702, "step": 18970 }, { "epoch": 0.08, "grad_norm": 2.3510847091674805, "learning_rate": 0.0002, "loss": 1.8011, "step": 18980 }, { "epoch": 0.08, "grad_norm": 2.009354829788208, "learning_rate": 0.0002, "loss": 1.444, "step": 18990 }, { "epoch": 0.08, "grad_norm": 2.2743570804595947, "learning_rate": 0.0002, "loss": 1.5376, "step": 19000 }, { "epoch": 0.08, "grad_norm": 3.320021152496338, "learning_rate": 0.0002, "loss": 1.3488, "step": 19010 }, { "epoch": 0.08, "grad_norm": 2.837242603302002, "learning_rate": 0.0002, "loss": 1.8208, "step": 19020 }, { "epoch": 0.08, "grad_norm": 2.641460657119751, "learning_rate": 0.0002, "loss": 1.5568, "step": 19030 }, { "epoch": 0.08, "grad_norm": 1.4663853645324707, "learning_rate": 0.0002, "loss": 1.4127, "step": 19040 }, { "epoch": 0.08, "grad_norm": 3.613635778427124, "learning_rate": 0.0002, "loss": 1.3696, "step": 19050 }, { "epoch": 0.08, "grad_norm": 2.391834020614624, "learning_rate": 0.0002, "loss": 1.5007, "step": 19060 }, { "epoch": 0.08, "grad_norm": 2.318747043609619, "learning_rate": 0.0002, "loss": 1.3884, "step": 19070 }, { "epoch": 0.08, "grad_norm": 3.5582451820373535, "learning_rate": 0.0002, "loss": 1.6227, "step": 19080 }, { "epoch": 0.08, "grad_norm": 2.5929806232452393, "learning_rate": 0.0002, "loss": 1.5506, "step": 19090 }, { "epoch": 0.08, "grad_norm": 2.76924991607666, "learning_rate": 0.0002, "loss": 1.4996, "step": 19100 }, { "epoch": 0.08, "grad_norm": 3.299473285675049, "learning_rate": 0.0002, "loss": 1.5576, "step": 19110 }, { "epoch": 0.08, "grad_norm": 2.7138023376464844, "learning_rate": 0.0002, "loss": 1.5233, "step": 19120 }, { "epoch": 0.08, "grad_norm": 3.810305118560791, "learning_rate": 0.0002, "loss": 1.7133, "step": 19130 }, { "epoch": 0.08, "grad_norm": 2.8977341651916504, "learning_rate": 0.0002, "loss": 1.4015, "step": 19140 }, { "epoch": 0.08, "grad_norm": 2.4802310466766357, "learning_rate": 0.0002, "loss": 1.5556, "step": 19150 }, { "epoch": 0.08, "grad_norm": 2.8874614238739014, "learning_rate": 0.0002, "loss": 1.5758, "step": 19160 }, { "epoch": 0.08, "grad_norm": 3.133655548095703, "learning_rate": 0.0002, "loss": 1.3975, "step": 19170 }, { "epoch": 0.08, "grad_norm": 2.084477186203003, "learning_rate": 0.0002, "loss": 1.3014, "step": 19180 }, { "epoch": 0.08, "grad_norm": 2.0461010932922363, "learning_rate": 0.0002, "loss": 1.5598, "step": 19190 }, { "epoch": 0.08, "grad_norm": 3.7054593563079834, "learning_rate": 0.0002, "loss": 1.4398, "step": 19200 }, { "epoch": 0.08, "grad_norm": 2.625788450241089, "learning_rate": 0.0002, "loss": 1.3828, "step": 19210 }, { "epoch": 0.08, "grad_norm": 3.6133198738098145, "learning_rate": 0.0002, "loss": 1.5405, "step": 19220 }, { "epoch": 0.08, "grad_norm": 2.1493401527404785, "learning_rate": 0.0002, "loss": 1.6791, "step": 19230 }, { "epoch": 0.08, "grad_norm": 2.1153736114501953, "learning_rate": 0.0002, "loss": 1.5592, "step": 19240 }, { "epoch": 0.08, "grad_norm": 3.752476930618286, "learning_rate": 0.0002, "loss": 1.6746, "step": 19250 }, { "epoch": 0.08, "grad_norm": 2.5095467567443848, "learning_rate": 0.0002, "loss": 1.6724, "step": 19260 }, { "epoch": 0.08, "grad_norm": 3.014160633087158, "learning_rate": 0.0002, "loss": 1.6892, "step": 19270 }, { "epoch": 0.08, "grad_norm": 2.1600117683410645, "learning_rate": 0.0002, "loss": 1.3809, "step": 19280 }, { "epoch": 0.08, "grad_norm": 1.961382269859314, "learning_rate": 0.0002, "loss": 1.4983, "step": 19290 }, { "epoch": 0.08, "grad_norm": 2.698371171951294, "learning_rate": 0.0002, "loss": 1.4017, "step": 19300 }, { "epoch": 0.08, "grad_norm": 4.177449703216553, "learning_rate": 0.0002, "loss": 1.6319, "step": 19310 }, { "epoch": 0.08, "grad_norm": 2.097092390060425, "learning_rate": 0.0002, "loss": 1.711, "step": 19320 }, { "epoch": 0.08, "grad_norm": 1.4732941389083862, "learning_rate": 0.0002, "loss": 1.5308, "step": 19330 }, { "epoch": 0.08, "grad_norm": 3.123778820037842, "learning_rate": 0.0002, "loss": 1.3179, "step": 19340 }, { "epoch": 0.08, "grad_norm": 3.2439863681793213, "learning_rate": 0.0002, "loss": 1.4289, "step": 19350 }, { "epoch": 0.08, "grad_norm": 2.1778743267059326, "learning_rate": 0.0002, "loss": 1.4675, "step": 19360 }, { "epoch": 0.08, "grad_norm": 2.807208299636841, "learning_rate": 0.0002, "loss": 1.5908, "step": 19370 }, { "epoch": 0.08, "grad_norm": 3.0393178462982178, "learning_rate": 0.0002, "loss": 1.3503, "step": 19380 }, { "epoch": 0.08, "grad_norm": 1.970576286315918, "learning_rate": 0.0002, "loss": 1.7525, "step": 19390 }, { "epoch": 0.08, "grad_norm": 3.333981513977051, "learning_rate": 0.0002, "loss": 1.5456, "step": 19400 }, { "epoch": 0.08, "grad_norm": 2.708038330078125, "learning_rate": 0.0002, "loss": 1.795, "step": 19410 }, { "epoch": 0.08, "grad_norm": 2.6744019985198975, "learning_rate": 0.0002, "loss": 1.6846, "step": 19420 }, { "epoch": 0.08, "grad_norm": 2.9006667137145996, "learning_rate": 0.0002, "loss": 1.5313, "step": 19430 }, { "epoch": 0.08, "grad_norm": 3.2287373542785645, "learning_rate": 0.0002, "loss": 1.9248, "step": 19440 }, { "epoch": 0.08, "grad_norm": 2.5279340744018555, "learning_rate": 0.0002, "loss": 1.3537, "step": 19450 }, { "epoch": 0.08, "grad_norm": 1.5974133014678955, "learning_rate": 0.0002, "loss": 1.508, "step": 19460 }, { "epoch": 0.08, "grad_norm": 3.6041128635406494, "learning_rate": 0.0002, "loss": 1.3323, "step": 19470 }, { "epoch": 0.08, "grad_norm": 2.4763588905334473, "learning_rate": 0.0002, "loss": 1.4496, "step": 19480 }, { "epoch": 0.08, "grad_norm": 1.7559212446212769, "learning_rate": 0.0002, "loss": 1.7592, "step": 19490 }, { "epoch": 0.08, "grad_norm": 2.3031294345855713, "learning_rate": 0.0002, "loss": 1.5629, "step": 19500 }, { "epoch": 0.08, "grad_norm": 4.1698317527771, "learning_rate": 0.0002, "loss": 1.3729, "step": 19510 }, { "epoch": 0.08, "grad_norm": 2.126417398452759, "learning_rate": 0.0002, "loss": 1.4095, "step": 19520 }, { "epoch": 0.08, "grad_norm": 2.5388450622558594, "learning_rate": 0.0002, "loss": 1.5862, "step": 19530 }, { "epoch": 0.08, "grad_norm": 1.982663869857788, "learning_rate": 0.0002, "loss": 1.761, "step": 19540 }, { "epoch": 0.08, "grad_norm": 4.210536479949951, "learning_rate": 0.0002, "loss": 1.7381, "step": 19550 }, { "epoch": 0.08, "grad_norm": 2.532033681869507, "learning_rate": 0.0002, "loss": 1.5758, "step": 19560 }, { "epoch": 0.08, "grad_norm": 2.125444173812866, "learning_rate": 0.0002, "loss": 1.6816, "step": 19570 }, { "epoch": 0.08, "grad_norm": 3.486839532852173, "learning_rate": 0.0002, "loss": 1.5067, "step": 19580 }, { "epoch": 0.08, "grad_norm": 3.091294527053833, "learning_rate": 0.0002, "loss": 1.4577, "step": 19590 }, { "epoch": 0.08, "grad_norm": 1.6923528909683228, "learning_rate": 0.0002, "loss": 1.6647, "step": 19600 }, { "epoch": 0.08, "grad_norm": 1.7170891761779785, "learning_rate": 0.0002, "loss": 1.7424, "step": 19610 }, { "epoch": 0.08, "grad_norm": 2.7562339305877686, "learning_rate": 0.0002, "loss": 1.4628, "step": 19620 }, { "epoch": 0.08, "grad_norm": 2.594419002532959, "learning_rate": 0.0002, "loss": 1.6241, "step": 19630 }, { "epoch": 0.08, "grad_norm": 3.101525068283081, "learning_rate": 0.0002, "loss": 1.6204, "step": 19640 }, { "epoch": 0.08, "grad_norm": 2.3003437519073486, "learning_rate": 0.0002, "loss": 1.6344, "step": 19650 }, { "epoch": 0.08, "grad_norm": 3.1206963062286377, "learning_rate": 0.0002, "loss": 1.8168, "step": 19660 }, { "epoch": 0.08, "grad_norm": 1.9720275402069092, "learning_rate": 0.0002, "loss": 1.4601, "step": 19670 }, { "epoch": 0.08, "grad_norm": 3.2656972408294678, "learning_rate": 0.0002, "loss": 1.559, "step": 19680 }, { "epoch": 0.08, "grad_norm": 1.3445274829864502, "learning_rate": 0.0002, "loss": 1.3806, "step": 19690 }, { "epoch": 0.08, "grad_norm": 2.2486348152160645, "learning_rate": 0.0002, "loss": 1.4468, "step": 19700 }, { "epoch": 0.08, "grad_norm": 1.7580506801605225, "learning_rate": 0.0002, "loss": 1.3136, "step": 19710 }, { "epoch": 0.08, "grad_norm": 2.2664566040039062, "learning_rate": 0.0002, "loss": 1.2545, "step": 19720 }, { "epoch": 0.08, "grad_norm": 2.821418523788452, "learning_rate": 0.0002, "loss": 1.7327, "step": 19730 }, { "epoch": 0.08, "grad_norm": 4.129728317260742, "learning_rate": 0.0002, "loss": 1.3794, "step": 19740 }, { "epoch": 0.08, "grad_norm": 2.565004825592041, "learning_rate": 0.0002, "loss": 1.5507, "step": 19750 }, { "epoch": 0.08, "grad_norm": 2.8486554622650146, "learning_rate": 0.0002, "loss": 1.6744, "step": 19760 }, { "epoch": 0.08, "grad_norm": 2.166261672973633, "learning_rate": 0.0002, "loss": 1.5366, "step": 19770 }, { "epoch": 0.08, "grad_norm": 2.7594943046569824, "learning_rate": 0.0002, "loss": 1.6546, "step": 19780 }, { "epoch": 0.08, "grad_norm": 2.2406179904937744, "learning_rate": 0.0002, "loss": 1.6059, "step": 19790 }, { "epoch": 0.08, "grad_norm": 2.9985108375549316, "learning_rate": 0.0002, "loss": 1.4403, "step": 19800 }, { "epoch": 0.08, "grad_norm": 2.348949670791626, "learning_rate": 0.0002, "loss": 1.6066, "step": 19810 }, { "epoch": 0.08, "grad_norm": 4.219382286071777, "learning_rate": 0.0002, "loss": 1.4642, "step": 19820 }, { "epoch": 0.08, "grad_norm": 3.3581490516662598, "learning_rate": 0.0002, "loss": 1.6428, "step": 19830 }, { "epoch": 0.08, "grad_norm": 2.20892071723938, "learning_rate": 0.0002, "loss": 1.6773, "step": 19840 }, { "epoch": 0.08, "grad_norm": 3.5226032733917236, "learning_rate": 0.0002, "loss": 1.5607, "step": 19850 }, { "epoch": 0.08, "grad_norm": 2.2070350646972656, "learning_rate": 0.0002, "loss": 1.5813, "step": 19860 }, { "epoch": 0.08, "grad_norm": 2.2469732761383057, "learning_rate": 0.0002, "loss": 1.6378, "step": 19870 }, { "epoch": 0.08, "grad_norm": 2.3770322799682617, "learning_rate": 0.0002, "loss": 1.5132, "step": 19880 }, { "epoch": 0.08, "grad_norm": 3.024841070175171, "learning_rate": 0.0002, "loss": 1.6502, "step": 19890 }, { "epoch": 0.08, "grad_norm": 1.59449303150177, "learning_rate": 0.0002, "loss": 1.708, "step": 19900 }, { "epoch": 0.08, "grad_norm": 1.6110081672668457, "learning_rate": 0.0002, "loss": 1.6787, "step": 19910 }, { "epoch": 0.08, "grad_norm": 1.8666945695877075, "learning_rate": 0.0002, "loss": 1.5002, "step": 19920 }, { "epoch": 0.08, "grad_norm": 2.681276798248291, "learning_rate": 0.0002, "loss": 1.5481, "step": 19930 }, { "epoch": 0.08, "grad_norm": 3.588979721069336, "learning_rate": 0.0002, "loss": 1.5246, "step": 19940 }, { "epoch": 0.08, "grad_norm": 2.8706719875335693, "learning_rate": 0.0002, "loss": 1.6007, "step": 19950 }, { "epoch": 0.08, "grad_norm": 3.1459317207336426, "learning_rate": 0.0002, "loss": 1.4465, "step": 19960 }, { "epoch": 0.08, "grad_norm": 1.8522253036499023, "learning_rate": 0.0002, "loss": 1.466, "step": 19970 }, { "epoch": 0.08, "grad_norm": 3.1580326557159424, "learning_rate": 0.0002, "loss": 1.5984, "step": 19980 }, { "epoch": 0.08, "grad_norm": 4.080891132354736, "learning_rate": 0.0002, "loss": 1.5608, "step": 19990 }, { "epoch": 0.08, "grad_norm": 1.495326280593872, "learning_rate": 0.0002, "loss": 1.5264, "step": 20000 }, { "epoch": 0.08, "grad_norm": 2.35280442237854, "learning_rate": 0.0002, "loss": 1.6901, "step": 20010 }, { "epoch": 0.08, "grad_norm": 1.4182275533676147, "learning_rate": 0.0002, "loss": 1.566, "step": 20020 }, { "epoch": 0.08, "grad_norm": 2.4263689517974854, "learning_rate": 0.0002, "loss": 1.441, "step": 20030 }, { "epoch": 0.08, "grad_norm": 1.5669450759887695, "learning_rate": 0.0002, "loss": 1.4384, "step": 20040 }, { "epoch": 0.08, "grad_norm": 2.8028371334075928, "learning_rate": 0.0002, "loss": 1.2791, "step": 20050 }, { "epoch": 0.08, "grad_norm": 2.137122392654419, "learning_rate": 0.0002, "loss": 1.4881, "step": 20060 }, { "epoch": 0.08, "grad_norm": 1.6393847465515137, "learning_rate": 0.0002, "loss": 1.3354, "step": 20070 }, { "epoch": 0.08, "grad_norm": 2.557347059249878, "learning_rate": 0.0002, "loss": 1.3492, "step": 20080 }, { "epoch": 0.08, "grad_norm": 2.332458972930908, "learning_rate": 0.0002, "loss": 1.5443, "step": 20090 }, { "epoch": 0.08, "grad_norm": 4.434239864349365, "learning_rate": 0.0002, "loss": 1.501, "step": 20100 }, { "epoch": 0.08, "grad_norm": 3.702242374420166, "learning_rate": 0.0002, "loss": 1.602, "step": 20110 }, { "epoch": 0.08, "grad_norm": 2.354989528656006, "learning_rate": 0.0002, "loss": 1.4979, "step": 20120 }, { "epoch": 0.08, "grad_norm": 2.639601469039917, "learning_rate": 0.0002, "loss": 1.0833, "step": 20130 }, { "epoch": 0.08, "grad_norm": 3.060662269592285, "learning_rate": 0.0002, "loss": 1.5621, "step": 20140 }, { "epoch": 0.08, "grad_norm": 3.0110530853271484, "learning_rate": 0.0002, "loss": 1.619, "step": 20150 }, { "epoch": 0.08, "grad_norm": 4.40569543838501, "learning_rate": 0.0002, "loss": 1.4842, "step": 20160 }, { "epoch": 0.08, "grad_norm": 2.501140594482422, "learning_rate": 0.0002, "loss": 1.3475, "step": 20170 }, { "epoch": 0.08, "grad_norm": 2.174081563949585, "learning_rate": 0.0002, "loss": 1.5812, "step": 20180 }, { "epoch": 0.08, "grad_norm": 2.0183908939361572, "learning_rate": 0.0002, "loss": 1.6288, "step": 20190 }, { "epoch": 0.08, "grad_norm": 2.799220085144043, "learning_rate": 0.0002, "loss": 1.3326, "step": 20200 }, { "epoch": 0.08, "grad_norm": 2.7096595764160156, "learning_rate": 0.0002, "loss": 1.4257, "step": 20210 }, { "epoch": 0.08, "grad_norm": 4.2525224685668945, "learning_rate": 0.0002, "loss": 1.5859, "step": 20220 }, { "epoch": 0.08, "grad_norm": 3.2170286178588867, "learning_rate": 0.0002, "loss": 1.5281, "step": 20230 }, { "epoch": 0.08, "grad_norm": 3.1276040077209473, "learning_rate": 0.0002, "loss": 1.5674, "step": 20240 }, { "epoch": 0.08, "grad_norm": 3.611945867538452, "learning_rate": 0.0002, "loss": 1.6653, "step": 20250 }, { "epoch": 0.08, "grad_norm": 2.925816059112549, "learning_rate": 0.0002, "loss": 1.428, "step": 20260 }, { "epoch": 0.08, "grad_norm": 3.281264305114746, "learning_rate": 0.0002, "loss": 1.3426, "step": 20270 }, { "epoch": 0.08, "grad_norm": 2.5161972045898438, "learning_rate": 0.0002, "loss": 1.3733, "step": 20280 }, { "epoch": 0.08, "grad_norm": 3.177931308746338, "learning_rate": 0.0002, "loss": 1.634, "step": 20290 }, { "epoch": 0.08, "grad_norm": 2.1616616249084473, "learning_rate": 0.0002, "loss": 1.2521, "step": 20300 }, { "epoch": 0.08, "grad_norm": 2.6312873363494873, "learning_rate": 0.0002, "loss": 1.3892, "step": 20310 }, { "epoch": 0.08, "grad_norm": 2.6951470375061035, "learning_rate": 0.0002, "loss": 1.5337, "step": 20320 }, { "epoch": 0.08, "grad_norm": 2.1044418811798096, "learning_rate": 0.0002, "loss": 1.4939, "step": 20330 }, { "epoch": 0.08, "grad_norm": 3.231621026992798, "learning_rate": 0.0002, "loss": 1.5929, "step": 20340 }, { "epoch": 0.08, "grad_norm": 3.492968797683716, "learning_rate": 0.0002, "loss": 1.6075, "step": 20350 }, { "epoch": 0.08, "grad_norm": 3.4546265602111816, "learning_rate": 0.0002, "loss": 1.6192, "step": 20360 }, { "epoch": 0.08, "grad_norm": 4.372480392456055, "learning_rate": 0.0002, "loss": 1.5822, "step": 20370 }, { "epoch": 0.08, "grad_norm": 1.9497356414794922, "learning_rate": 0.0002, "loss": 1.5961, "step": 20380 }, { "epoch": 0.08, "grad_norm": 1.9981340169906616, "learning_rate": 0.0002, "loss": 1.708, "step": 20390 }, { "epoch": 0.08, "grad_norm": 3.2291388511657715, "learning_rate": 0.0002, "loss": 1.6525, "step": 20400 }, { "epoch": 0.08, "grad_norm": 2.1867358684539795, "learning_rate": 0.0002, "loss": 1.4301, "step": 20410 }, { "epoch": 0.08, "grad_norm": 4.5304412841796875, "learning_rate": 0.0002, "loss": 1.5934, "step": 20420 }, { "epoch": 0.08, "grad_norm": 2.8474085330963135, "learning_rate": 0.0002, "loss": 1.7431, "step": 20430 }, { "epoch": 0.08, "grad_norm": 2.4116172790527344, "learning_rate": 0.0002, "loss": 1.6861, "step": 20440 }, { "epoch": 0.08, "grad_norm": 2.816406011581421, "learning_rate": 0.0002, "loss": 1.6954, "step": 20450 }, { "epoch": 0.08, "grad_norm": 3.7495949268341064, "learning_rate": 0.0002, "loss": 1.4205, "step": 20460 }, { "epoch": 0.08, "grad_norm": 2.1087911128997803, "learning_rate": 0.0002, "loss": 1.5772, "step": 20470 }, { "epoch": 0.08, "grad_norm": 2.4270660877227783, "learning_rate": 0.0002, "loss": 1.3565, "step": 20480 }, { "epoch": 0.08, "grad_norm": 1.8177647590637207, "learning_rate": 0.0002, "loss": 1.5809, "step": 20490 }, { "epoch": 0.08, "grad_norm": 2.0080275535583496, "learning_rate": 0.0002, "loss": 1.4661, "step": 20500 }, { "epoch": 0.08, "grad_norm": 2.5295910835266113, "learning_rate": 0.0002, "loss": 1.4103, "step": 20510 }, { "epoch": 0.08, "grad_norm": 2.4448461532592773, "learning_rate": 0.0002, "loss": 1.491, "step": 20520 }, { "epoch": 0.08, "grad_norm": 1.8285744190216064, "learning_rate": 0.0002, "loss": 1.4743, "step": 20530 }, { "epoch": 0.08, "grad_norm": 2.2105393409729004, "learning_rate": 0.0002, "loss": 1.5231, "step": 20540 }, { "epoch": 0.08, "grad_norm": 2.441657304763794, "learning_rate": 0.0002, "loss": 1.6045, "step": 20550 }, { "epoch": 0.08, "grad_norm": 3.024348258972168, "learning_rate": 0.0002, "loss": 1.6426, "step": 20560 }, { "epoch": 0.08, "grad_norm": 2.7351582050323486, "learning_rate": 0.0002, "loss": 1.5524, "step": 20570 }, { "epoch": 0.08, "grad_norm": 1.5487263202667236, "learning_rate": 0.0002, "loss": 1.8214, "step": 20580 }, { "epoch": 0.08, "grad_norm": 2.2704665660858154, "learning_rate": 0.0002, "loss": 1.6396, "step": 20590 }, { "epoch": 0.08, "grad_norm": 2.344797134399414, "learning_rate": 0.0002, "loss": 1.5647, "step": 20600 }, { "epoch": 0.08, "grad_norm": 2.859088897705078, "learning_rate": 0.0002, "loss": 1.3369, "step": 20610 }, { "epoch": 0.08, "grad_norm": 1.7964578866958618, "learning_rate": 0.0002, "loss": 1.5509, "step": 20620 }, { "epoch": 0.08, "grad_norm": 3.5163536071777344, "learning_rate": 0.0002, "loss": 1.556, "step": 20630 }, { "epoch": 0.08, "grad_norm": 3.417954444885254, "learning_rate": 0.0002, "loss": 1.4592, "step": 20640 }, { "epoch": 0.08, "grad_norm": 4.057839870452881, "learning_rate": 0.0002, "loss": 1.5862, "step": 20650 }, { "epoch": 0.08, "grad_norm": 3.009749174118042, "learning_rate": 0.0002, "loss": 1.4434, "step": 20660 }, { "epoch": 0.08, "grad_norm": 2.0098156929016113, "learning_rate": 0.0002, "loss": 1.4145, "step": 20670 }, { "epoch": 0.08, "grad_norm": 1.9579684734344482, "learning_rate": 0.0002, "loss": 1.4985, "step": 20680 }, { "epoch": 0.08, "grad_norm": 3.219374895095825, "learning_rate": 0.0002, "loss": 1.6395, "step": 20690 }, { "epoch": 0.08, "grad_norm": 3.151789665222168, "learning_rate": 0.0002, "loss": 1.6237, "step": 20700 }, { "epoch": 0.08, "grad_norm": 1.679736852645874, "learning_rate": 0.0002, "loss": 1.5945, "step": 20710 }, { "epoch": 0.08, "grad_norm": 2.7416634559631348, "learning_rate": 0.0002, "loss": 1.466, "step": 20720 }, { "epoch": 0.08, "grad_norm": 1.5389399528503418, "learning_rate": 0.0002, "loss": 1.5268, "step": 20730 }, { "epoch": 0.08, "grad_norm": 3.7668166160583496, "learning_rate": 0.0002, "loss": 1.4076, "step": 20740 }, { "epoch": 0.08, "grad_norm": 2.8062775135040283, "learning_rate": 0.0002, "loss": 1.5201, "step": 20750 }, { "epoch": 0.08, "grad_norm": 1.9682716131210327, "learning_rate": 0.0002, "loss": 1.5128, "step": 20760 }, { "epoch": 0.08, "grad_norm": 2.957693576812744, "learning_rate": 0.0002, "loss": 1.5974, "step": 20770 }, { "epoch": 0.08, "grad_norm": 3.497673511505127, "learning_rate": 0.0002, "loss": 1.6242, "step": 20780 }, { "epoch": 0.08, "grad_norm": 2.879254102706909, "learning_rate": 0.0002, "loss": 1.4364, "step": 20790 }, { "epoch": 0.08, "grad_norm": 2.3963420391082764, "learning_rate": 0.0002, "loss": 1.7348, "step": 20800 }, { "epoch": 0.08, "grad_norm": 4.119241714477539, "learning_rate": 0.0002, "loss": 1.6016, "step": 20810 }, { "epoch": 0.08, "grad_norm": 2.8672714233398438, "learning_rate": 0.0002, "loss": 1.5695, "step": 20820 }, { "epoch": 0.08, "grad_norm": 3.4502999782562256, "learning_rate": 0.0002, "loss": 1.5152, "step": 20830 }, { "epoch": 0.08, "grad_norm": 1.6936084032058716, "learning_rate": 0.0002, "loss": 1.4613, "step": 20840 }, { "epoch": 0.08, "grad_norm": 3.240034818649292, "learning_rate": 0.0002, "loss": 1.2728, "step": 20850 }, { "epoch": 0.08, "grad_norm": 2.1016886234283447, "learning_rate": 0.0002, "loss": 1.4538, "step": 20860 }, { "epoch": 0.08, "grad_norm": 1.887285828590393, "learning_rate": 0.0002, "loss": 1.5576, "step": 20870 }, { "epoch": 0.09, "grad_norm": 2.3200836181640625, "learning_rate": 0.0002, "loss": 1.5641, "step": 20880 }, { "epoch": 0.09, "grad_norm": 2.295924663543701, "learning_rate": 0.0002, "loss": 1.402, "step": 20890 }, { "epoch": 0.09, "grad_norm": 1.42788827419281, "learning_rate": 0.0002, "loss": 1.4235, "step": 20900 }, { "epoch": 0.09, "grad_norm": 3.1019787788391113, "learning_rate": 0.0002, "loss": 1.5782, "step": 20910 }, { "epoch": 0.09, "grad_norm": 3.2448296546936035, "learning_rate": 0.0002, "loss": 1.3784, "step": 20920 }, { "epoch": 0.09, "grad_norm": 1.0034680366516113, "learning_rate": 0.0002, "loss": 1.3278, "step": 20930 }, { "epoch": 0.09, "grad_norm": 3.555109739303589, "learning_rate": 0.0002, "loss": 1.4373, "step": 20940 }, { "epoch": 0.09, "grad_norm": 3.0464320182800293, "learning_rate": 0.0002, "loss": 1.5359, "step": 20950 }, { "epoch": 0.09, "grad_norm": 2.8370563983917236, "learning_rate": 0.0002, "loss": 1.7603, "step": 20960 }, { "epoch": 0.09, "grad_norm": 3.100341796875, "learning_rate": 0.0002, "loss": 1.6253, "step": 20970 }, { "epoch": 0.09, "grad_norm": 2.0944790840148926, "learning_rate": 0.0002, "loss": 1.5768, "step": 20980 }, { "epoch": 0.09, "grad_norm": 2.0887973308563232, "learning_rate": 0.0002, "loss": 1.3599, "step": 20990 }, { "epoch": 0.09, "grad_norm": 3.2522614002227783, "learning_rate": 0.0002, "loss": 1.3251, "step": 21000 }, { "epoch": 0.09, "grad_norm": 3.947356939315796, "learning_rate": 0.0002, "loss": 1.5363, "step": 21010 }, { "epoch": 0.09, "grad_norm": 2.585414171218872, "learning_rate": 0.0002, "loss": 1.4833, "step": 21020 }, { "epoch": 0.09, "grad_norm": 2.2844738960266113, "learning_rate": 0.0002, "loss": 1.4605, "step": 21030 }, { "epoch": 0.09, "grad_norm": 3.700784921646118, "learning_rate": 0.0002, "loss": 1.5932, "step": 21040 }, { "epoch": 0.09, "grad_norm": 2.0960137844085693, "learning_rate": 0.0002, "loss": 1.3543, "step": 21050 }, { "epoch": 0.09, "grad_norm": 3.5662992000579834, "learning_rate": 0.0002, "loss": 1.2183, "step": 21060 }, { "epoch": 0.09, "grad_norm": 1.7961903810501099, "learning_rate": 0.0002, "loss": 1.6581, "step": 21070 }, { "epoch": 0.09, "grad_norm": 3.740849256515503, "learning_rate": 0.0002, "loss": 1.7261, "step": 21080 }, { "epoch": 0.09, "grad_norm": 3.0442214012145996, "learning_rate": 0.0002, "loss": 1.347, "step": 21090 }, { "epoch": 0.09, "grad_norm": 2.6670961380004883, "learning_rate": 0.0002, "loss": 1.4805, "step": 21100 }, { "epoch": 0.09, "grad_norm": 2.0388131141662598, "learning_rate": 0.0002, "loss": 1.5799, "step": 21110 }, { "epoch": 0.09, "grad_norm": 2.050585985183716, "learning_rate": 0.0002, "loss": 1.4146, "step": 21120 }, { "epoch": 0.09, "grad_norm": 3.6752517223358154, "learning_rate": 0.0002, "loss": 1.1872, "step": 21130 }, { "epoch": 0.09, "grad_norm": 2.330439329147339, "learning_rate": 0.0002, "loss": 1.5321, "step": 21140 }, { "epoch": 0.09, "grad_norm": 2.5847272872924805, "learning_rate": 0.0002, "loss": 1.511, "step": 21150 }, { "epoch": 0.09, "grad_norm": 2.0053324699401855, "learning_rate": 0.0002, "loss": 1.4586, "step": 21160 }, { "epoch": 0.09, "grad_norm": 1.679966688156128, "learning_rate": 0.0002, "loss": 1.4632, "step": 21170 }, { "epoch": 0.09, "grad_norm": 1.7359073162078857, "learning_rate": 0.0002, "loss": 1.5903, "step": 21180 }, { "epoch": 0.09, "grad_norm": 2.581209421157837, "learning_rate": 0.0002, "loss": 1.325, "step": 21190 }, { "epoch": 0.09, "grad_norm": 3.5984420776367188, "learning_rate": 0.0002, "loss": 1.5352, "step": 21200 }, { "epoch": 0.09, "grad_norm": 2.524233102798462, "learning_rate": 0.0002, "loss": 1.4417, "step": 21210 }, { "epoch": 0.09, "grad_norm": 3.2444841861724854, "learning_rate": 0.0002, "loss": 1.5986, "step": 21220 }, { "epoch": 0.09, "grad_norm": 2.0444600582122803, "learning_rate": 0.0002, "loss": 1.5358, "step": 21230 }, { "epoch": 0.09, "grad_norm": 4.122199058532715, "learning_rate": 0.0002, "loss": 1.4526, "step": 21240 }, { "epoch": 0.09, "grad_norm": 2.377103090286255, "learning_rate": 0.0002, "loss": 1.7444, "step": 21250 }, { "epoch": 0.09, "grad_norm": 1.877272129058838, "learning_rate": 0.0002, "loss": 1.2683, "step": 21260 }, { "epoch": 0.09, "grad_norm": 1.761972427368164, "learning_rate": 0.0002, "loss": 1.4445, "step": 21270 }, { "epoch": 0.09, "grad_norm": 2.450269937515259, "learning_rate": 0.0002, "loss": 1.4662, "step": 21280 }, { "epoch": 0.09, "grad_norm": 2.899463176727295, "learning_rate": 0.0002, "loss": 1.7665, "step": 21290 }, { "epoch": 0.09, "grad_norm": 2.7675092220306396, "learning_rate": 0.0002, "loss": 1.6117, "step": 21300 }, { "epoch": 0.09, "grad_norm": 1.920156478881836, "learning_rate": 0.0002, "loss": 1.4594, "step": 21310 }, { "epoch": 0.09, "grad_norm": 3.6285994052886963, "learning_rate": 0.0002, "loss": 1.5315, "step": 21320 }, { "epoch": 0.09, "grad_norm": 4.685689449310303, "learning_rate": 0.0002, "loss": 1.8249, "step": 21330 }, { "epoch": 0.09, "grad_norm": 2.5909087657928467, "learning_rate": 0.0002, "loss": 1.647, "step": 21340 }, { "epoch": 0.09, "grad_norm": 2.067603826522827, "learning_rate": 0.0002, "loss": 1.5418, "step": 21350 }, { "epoch": 0.09, "grad_norm": 2.1298646926879883, "learning_rate": 0.0002, "loss": 1.5449, "step": 21360 }, { "epoch": 0.09, "grad_norm": 1.8161571025848389, "learning_rate": 0.0002, "loss": 1.3416, "step": 21370 }, { "epoch": 0.09, "grad_norm": 1.9408620595932007, "learning_rate": 0.0002, "loss": 1.2211, "step": 21380 }, { "epoch": 0.09, "grad_norm": 2.45986008644104, "learning_rate": 0.0002, "loss": 1.4904, "step": 21390 }, { "epoch": 0.09, "grad_norm": 2.611638069152832, "learning_rate": 0.0002, "loss": 1.5136, "step": 21400 }, { "epoch": 0.09, "grad_norm": 3.074429750442505, "learning_rate": 0.0002, "loss": 1.667, "step": 21410 }, { "epoch": 0.09, "grad_norm": 2.4365861415863037, "learning_rate": 0.0002, "loss": 1.5659, "step": 21420 }, { "epoch": 0.09, "grad_norm": 2.2357661724090576, "learning_rate": 0.0002, "loss": 1.3014, "step": 21430 }, { "epoch": 0.09, "grad_norm": 1.7818875312805176, "learning_rate": 0.0002, "loss": 1.3184, "step": 21440 }, { "epoch": 0.09, "grad_norm": 3.1236214637756348, "learning_rate": 0.0002, "loss": 1.544, "step": 21450 }, { "epoch": 0.09, "grad_norm": 2.003364086151123, "learning_rate": 0.0002, "loss": 1.6213, "step": 21460 }, { "epoch": 0.09, "grad_norm": 2.223785161972046, "learning_rate": 0.0002, "loss": 1.3445, "step": 21470 }, { "epoch": 0.09, "grad_norm": 2.8523457050323486, "learning_rate": 0.0002, "loss": 1.366, "step": 21480 }, { "epoch": 0.09, "grad_norm": 2.1524741649627686, "learning_rate": 0.0002, "loss": 1.3678, "step": 21490 }, { "epoch": 0.09, "grad_norm": 2.4504830837249756, "learning_rate": 0.0002, "loss": 1.5763, "step": 21500 }, { "epoch": 0.09, "grad_norm": 2.339215040206909, "learning_rate": 0.0002, "loss": 1.4468, "step": 21510 }, { "epoch": 0.09, "grad_norm": 2.1418850421905518, "learning_rate": 0.0002, "loss": 1.8327, "step": 21520 }, { "epoch": 0.09, "grad_norm": 2.0039026737213135, "learning_rate": 0.0002, "loss": 1.6623, "step": 21530 }, { "epoch": 0.09, "grad_norm": 3.689967393875122, "learning_rate": 0.0002, "loss": 1.7973, "step": 21540 }, { "epoch": 0.09, "grad_norm": 2.563551664352417, "learning_rate": 0.0002, "loss": 1.5878, "step": 21550 }, { "epoch": 0.09, "grad_norm": 2.257265090942383, "learning_rate": 0.0002, "loss": 1.5873, "step": 21560 }, { "epoch": 0.09, "grad_norm": 1.830036997795105, "learning_rate": 0.0002, "loss": 1.5054, "step": 21570 }, { "epoch": 0.09, "grad_norm": 3.1557908058166504, "learning_rate": 0.0002, "loss": 1.4209, "step": 21580 }, { "epoch": 0.09, "grad_norm": 2.710627317428589, "learning_rate": 0.0002, "loss": 1.6344, "step": 21590 }, { "epoch": 0.09, "grad_norm": 1.8035508394241333, "learning_rate": 0.0002, "loss": 1.5315, "step": 21600 }, { "epoch": 0.09, "grad_norm": 1.996025562286377, "learning_rate": 0.0002, "loss": 1.6818, "step": 21610 }, { "epoch": 0.09, "grad_norm": 1.9488850831985474, "learning_rate": 0.0002, "loss": 1.5451, "step": 21620 }, { "epoch": 0.09, "grad_norm": 4.077146053314209, "learning_rate": 0.0002, "loss": 1.5427, "step": 21630 }, { "epoch": 0.09, "grad_norm": 1.880171298980713, "learning_rate": 0.0002, "loss": 1.6125, "step": 21640 }, { "epoch": 0.09, "grad_norm": 2.206751585006714, "learning_rate": 0.0002, "loss": 1.4269, "step": 21650 }, { "epoch": 0.09, "grad_norm": 2.0289411544799805, "learning_rate": 0.0002, "loss": 1.6254, "step": 21660 }, { "epoch": 0.09, "grad_norm": 2.528268575668335, "learning_rate": 0.0002, "loss": 1.5362, "step": 21670 }, { "epoch": 0.09, "grad_norm": 1.6276005506515503, "learning_rate": 0.0002, "loss": 1.5525, "step": 21680 }, { "epoch": 0.09, "grad_norm": 2.424776077270508, "learning_rate": 0.0002, "loss": 1.3949, "step": 21690 }, { "epoch": 0.09, "grad_norm": 2.37109112739563, "learning_rate": 0.0002, "loss": 1.4837, "step": 21700 }, { "epoch": 0.09, "grad_norm": 3.3874785900115967, "learning_rate": 0.0002, "loss": 1.6525, "step": 21710 }, { "epoch": 0.09, "grad_norm": 1.5708048343658447, "learning_rate": 0.0002, "loss": 1.547, "step": 21720 }, { "epoch": 0.09, "grad_norm": 1.8248467445373535, "learning_rate": 0.0002, "loss": 1.4757, "step": 21730 }, { "epoch": 0.09, "grad_norm": 2.441685914993286, "learning_rate": 0.0002, "loss": 1.3707, "step": 21740 }, { "epoch": 0.09, "grad_norm": 2.5369997024536133, "learning_rate": 0.0002, "loss": 1.6284, "step": 21750 }, { "epoch": 0.09, "grad_norm": 2.0539841651916504, "learning_rate": 0.0002, "loss": 1.3319, "step": 21760 }, { "epoch": 0.09, "grad_norm": 2.6716151237487793, "learning_rate": 0.0002, "loss": 1.5776, "step": 21770 }, { "epoch": 0.09, "grad_norm": 2.777019739151001, "learning_rate": 0.0002, "loss": 1.5277, "step": 21780 }, { "epoch": 0.09, "grad_norm": 2.723928451538086, "learning_rate": 0.0002, "loss": 1.4624, "step": 21790 }, { "epoch": 0.09, "grad_norm": 4.3509202003479, "learning_rate": 0.0002, "loss": 1.207, "step": 21800 }, { "epoch": 0.09, "grad_norm": 2.9509129524230957, "learning_rate": 0.0002, "loss": 1.517, "step": 21810 }, { "epoch": 0.09, "grad_norm": 3.923332452774048, "learning_rate": 0.0002, "loss": 1.5657, "step": 21820 }, { "epoch": 0.09, "grad_norm": 3.7461423873901367, "learning_rate": 0.0002, "loss": 1.3321, "step": 21830 }, { "epoch": 0.09, "grad_norm": 1.8373053073883057, "learning_rate": 0.0002, "loss": 1.3636, "step": 21840 }, { "epoch": 0.09, "grad_norm": 3.238175868988037, "learning_rate": 0.0002, "loss": 1.5888, "step": 21850 }, { "epoch": 0.09, "grad_norm": 1.2444688081741333, "learning_rate": 0.0002, "loss": 1.5031, "step": 21860 }, { "epoch": 0.09, "grad_norm": 2.274013042449951, "learning_rate": 0.0002, "loss": 1.5547, "step": 21870 }, { "epoch": 0.09, "grad_norm": 1.9657914638519287, "learning_rate": 0.0002, "loss": 1.5452, "step": 21880 }, { "epoch": 0.09, "grad_norm": 3.8793208599090576, "learning_rate": 0.0002, "loss": 1.566, "step": 21890 }, { "epoch": 0.09, "grad_norm": 3.4360015392303467, "learning_rate": 0.0002, "loss": 1.4933, "step": 21900 }, { "epoch": 0.09, "grad_norm": 2.6426327228546143, "learning_rate": 0.0002, "loss": 1.6302, "step": 21910 }, { "epoch": 0.09, "grad_norm": 2.18123459815979, "learning_rate": 0.0002, "loss": 1.438, "step": 21920 }, { "epoch": 0.09, "grad_norm": 2.133388042449951, "learning_rate": 0.0002, "loss": 1.5506, "step": 21930 }, { "epoch": 0.09, "grad_norm": 2.7117908000946045, "learning_rate": 0.0002, "loss": 1.6952, "step": 21940 }, { "epoch": 0.09, "grad_norm": 2.1655972003936768, "learning_rate": 0.0002, "loss": 1.6065, "step": 21950 }, { "epoch": 0.09, "grad_norm": 2.487818479537964, "learning_rate": 0.0002, "loss": 1.6117, "step": 21960 }, { "epoch": 0.09, "grad_norm": 3.2402184009552, "learning_rate": 0.0002, "loss": 1.4825, "step": 21970 }, { "epoch": 0.09, "grad_norm": 2.790008306503296, "learning_rate": 0.0002, "loss": 1.7678, "step": 21980 }, { "epoch": 0.09, "grad_norm": 3.56528639793396, "learning_rate": 0.0002, "loss": 1.6889, "step": 21990 }, { "epoch": 0.09, "grad_norm": 2.739330530166626, "learning_rate": 0.0002, "loss": 1.5987, "step": 22000 }, { "epoch": 0.09, "grad_norm": 13.49658203125, "learning_rate": 0.0002, "loss": 1.7708, "step": 22010 }, { "epoch": 0.09, "grad_norm": 1.6208460330963135, "learning_rate": 0.0002, "loss": 1.4479, "step": 22020 }, { "epoch": 0.09, "grad_norm": 2.1476781368255615, "learning_rate": 0.0002, "loss": 1.5053, "step": 22030 }, { "epoch": 0.09, "grad_norm": 2.778850793838501, "learning_rate": 0.0002, "loss": 1.3964, "step": 22040 }, { "epoch": 0.09, "grad_norm": 3.5056798458099365, "learning_rate": 0.0002, "loss": 1.6374, "step": 22050 }, { "epoch": 0.09, "grad_norm": 2.5884153842926025, "learning_rate": 0.0002, "loss": 1.6403, "step": 22060 }, { "epoch": 0.09, "grad_norm": 3.1891894340515137, "learning_rate": 0.0002, "loss": 1.1586, "step": 22070 }, { "epoch": 0.09, "grad_norm": 3.191912889480591, "learning_rate": 0.0002, "loss": 1.6592, "step": 22080 }, { "epoch": 0.09, "grad_norm": 1.748806118965149, "learning_rate": 0.0002, "loss": 1.4791, "step": 22090 }, { "epoch": 0.09, "grad_norm": 2.3810360431671143, "learning_rate": 0.0002, "loss": 1.4568, "step": 22100 }, { "epoch": 0.09, "grad_norm": 4.14363956451416, "learning_rate": 0.0002, "loss": 1.4352, "step": 22110 }, { "epoch": 0.09, "grad_norm": 2.9598772525787354, "learning_rate": 0.0002, "loss": 1.5141, "step": 22120 }, { "epoch": 0.09, "grad_norm": 2.258070707321167, "learning_rate": 0.0002, "loss": 1.4462, "step": 22130 }, { "epoch": 0.09, "grad_norm": 1.628442406654358, "learning_rate": 0.0002, "loss": 1.6553, "step": 22140 }, { "epoch": 0.09, "grad_norm": 2.155829429626465, "learning_rate": 0.0002, "loss": 1.4399, "step": 22150 }, { "epoch": 0.09, "grad_norm": 2.377225399017334, "learning_rate": 0.0002, "loss": 1.5862, "step": 22160 }, { "epoch": 0.09, "grad_norm": 1.7903268337249756, "learning_rate": 0.0002, "loss": 1.2576, "step": 22170 }, { "epoch": 0.09, "grad_norm": 4.4213056564331055, "learning_rate": 0.0002, "loss": 1.7064, "step": 22180 }, { "epoch": 0.09, "grad_norm": 4.336287021636963, "learning_rate": 0.0002, "loss": 1.6124, "step": 22190 }, { "epoch": 0.09, "grad_norm": 1.3479605913162231, "learning_rate": 0.0002, "loss": 1.6899, "step": 22200 }, { "epoch": 0.09, "grad_norm": 2.404656410217285, "learning_rate": 0.0002, "loss": 1.6828, "step": 22210 }, { "epoch": 0.09, "grad_norm": 1.7797189950942993, "learning_rate": 0.0002, "loss": 1.5148, "step": 22220 }, { "epoch": 0.09, "grad_norm": 2.2946231365203857, "learning_rate": 0.0002, "loss": 1.7349, "step": 22230 }, { "epoch": 0.09, "grad_norm": 1.8872685432434082, "learning_rate": 0.0002, "loss": 1.597, "step": 22240 }, { "epoch": 0.09, "grad_norm": 2.5361833572387695, "learning_rate": 0.0002, "loss": 1.6135, "step": 22250 }, { "epoch": 0.09, "grad_norm": 4.1546630859375, "learning_rate": 0.0002, "loss": 1.4194, "step": 22260 }, { "epoch": 0.09, "grad_norm": 3.7161126136779785, "learning_rate": 0.0002, "loss": 1.6293, "step": 22270 }, { "epoch": 0.09, "grad_norm": 1.8825205564498901, "learning_rate": 0.0002, "loss": 1.3378, "step": 22280 }, { "epoch": 0.09, "grad_norm": 2.0942957401275635, "learning_rate": 0.0002, "loss": 1.5386, "step": 22290 }, { "epoch": 0.09, "grad_norm": 2.444286346435547, "learning_rate": 0.0002, "loss": 1.4191, "step": 22300 }, { "epoch": 0.09, "grad_norm": 3.893162727355957, "learning_rate": 0.0002, "loss": 1.544, "step": 22310 }, { "epoch": 0.09, "grad_norm": 2.6354477405548096, "learning_rate": 0.0002, "loss": 1.6192, "step": 22320 }, { "epoch": 0.09, "grad_norm": 2.3263254165649414, "learning_rate": 0.0002, "loss": 1.2909, "step": 22330 }, { "epoch": 0.09, "grad_norm": 2.41375732421875, "learning_rate": 0.0002, "loss": 1.2361, "step": 22340 }, { "epoch": 0.09, "grad_norm": 2.217876434326172, "learning_rate": 0.0002, "loss": 1.6688, "step": 22350 }, { "epoch": 0.09, "grad_norm": 3.1612367630004883, "learning_rate": 0.0002, "loss": 1.7591, "step": 22360 }, { "epoch": 0.09, "grad_norm": 2.2575018405914307, "learning_rate": 0.0002, "loss": 1.5844, "step": 22370 }, { "epoch": 0.09, "grad_norm": 2.767517566680908, "learning_rate": 0.0002, "loss": 1.5962, "step": 22380 }, { "epoch": 0.09, "grad_norm": 1.79093337059021, "learning_rate": 0.0002, "loss": 1.4767, "step": 22390 }, { "epoch": 0.09, "grad_norm": 2.5680880546569824, "learning_rate": 0.0002, "loss": 1.4954, "step": 22400 }, { "epoch": 0.09, "grad_norm": 2.1213128566741943, "learning_rate": 0.0002, "loss": 1.4934, "step": 22410 }, { "epoch": 0.09, "grad_norm": 1.625685214996338, "learning_rate": 0.0002, "loss": 1.4496, "step": 22420 }, { "epoch": 0.09, "grad_norm": 2.5826354026794434, "learning_rate": 0.0002, "loss": 1.8215, "step": 22430 }, { "epoch": 0.09, "grad_norm": 1.967275619506836, "learning_rate": 0.0002, "loss": 1.5526, "step": 22440 }, { "epoch": 0.09, "grad_norm": 3.0394294261932373, "learning_rate": 0.0002, "loss": 1.6962, "step": 22450 }, { "epoch": 0.09, "grad_norm": 2.9036381244659424, "learning_rate": 0.0002, "loss": 1.5944, "step": 22460 }, { "epoch": 0.09, "grad_norm": 2.465207576751709, "learning_rate": 0.0002, "loss": 1.5984, "step": 22470 }, { "epoch": 0.09, "grad_norm": 2.9212534427642822, "learning_rate": 0.0002, "loss": 1.3183, "step": 22480 }, { "epoch": 0.09, "grad_norm": 2.6703858375549316, "learning_rate": 0.0002, "loss": 1.3984, "step": 22490 }, { "epoch": 0.09, "grad_norm": 2.3689444065093994, "learning_rate": 0.0002, "loss": 1.7242, "step": 22500 }, { "epoch": 0.09, "grad_norm": 2.9894492626190186, "learning_rate": 0.0002, "loss": 1.6942, "step": 22510 }, { "epoch": 0.09, "grad_norm": 4.237354755401611, "learning_rate": 0.0002, "loss": 1.6334, "step": 22520 }, { "epoch": 0.09, "grad_norm": 3.2268757820129395, "learning_rate": 0.0002, "loss": 1.4343, "step": 22530 }, { "epoch": 0.09, "grad_norm": 4.3955488204956055, "learning_rate": 0.0002, "loss": 1.541, "step": 22540 }, { "epoch": 0.09, "grad_norm": 2.3360393047332764, "learning_rate": 0.0002, "loss": 1.5227, "step": 22550 }, { "epoch": 0.09, "grad_norm": 1.4804788827896118, "learning_rate": 0.0002, "loss": 1.5404, "step": 22560 }, { "epoch": 0.09, "grad_norm": 2.270753860473633, "learning_rate": 0.0002, "loss": 1.7728, "step": 22570 }, { "epoch": 0.09, "grad_norm": 3.8624908924102783, "learning_rate": 0.0002, "loss": 1.6839, "step": 22580 }, { "epoch": 0.09, "grad_norm": 1.8023661375045776, "learning_rate": 0.0002, "loss": 1.4771, "step": 22590 }, { "epoch": 0.09, "grad_norm": 4.401796340942383, "learning_rate": 0.0002, "loss": 1.5656, "step": 22600 }, { "epoch": 0.09, "grad_norm": 1.787070631980896, "learning_rate": 0.0002, "loss": 1.6847, "step": 22610 }, { "epoch": 0.09, "grad_norm": 4.723184585571289, "learning_rate": 0.0002, "loss": 1.6881, "step": 22620 }, { "epoch": 0.09, "grad_norm": 2.2750236988067627, "learning_rate": 0.0002, "loss": 1.4944, "step": 22630 }, { "epoch": 0.09, "grad_norm": 2.9199929237365723, "learning_rate": 0.0002, "loss": 1.5107, "step": 22640 }, { "epoch": 0.09, "grad_norm": 3.237804651260376, "learning_rate": 0.0002, "loss": 1.6394, "step": 22650 }, { "epoch": 0.09, "grad_norm": 2.591519832611084, "learning_rate": 0.0002, "loss": 1.3896, "step": 22660 }, { "epoch": 0.09, "grad_norm": 2.2180211544036865, "learning_rate": 0.0002, "loss": 1.5916, "step": 22670 }, { "epoch": 0.09, "grad_norm": 2.29297137260437, "learning_rate": 0.0002, "loss": 1.698, "step": 22680 }, { "epoch": 0.09, "grad_norm": 3.1152491569519043, "learning_rate": 0.0002, "loss": 1.5477, "step": 22690 }, { "epoch": 0.09, "grad_norm": 3.1873233318328857, "learning_rate": 0.0002, "loss": 1.725, "step": 22700 }, { "epoch": 0.09, "grad_norm": 2.51530385017395, "learning_rate": 0.0002, "loss": 1.4417, "step": 22710 }, { "epoch": 0.09, "grad_norm": 2.501972198486328, "learning_rate": 0.0002, "loss": 1.4114, "step": 22720 }, { "epoch": 0.09, "grad_norm": 1.1667064428329468, "learning_rate": 0.0002, "loss": 1.5824, "step": 22730 }, { "epoch": 0.09, "grad_norm": 2.231757164001465, "learning_rate": 0.0002, "loss": 1.3173, "step": 22740 }, { "epoch": 0.09, "grad_norm": 2.8371810913085938, "learning_rate": 0.0002, "loss": 1.5535, "step": 22750 }, { "epoch": 0.09, "grad_norm": 1.5876264572143555, "learning_rate": 0.0002, "loss": 1.7882, "step": 22760 }, { "epoch": 0.09, "grad_norm": 1.7127479314804077, "learning_rate": 0.0002, "loss": 1.4526, "step": 22770 }, { "epoch": 0.09, "grad_norm": 2.373178482055664, "learning_rate": 0.0002, "loss": 1.7779, "step": 22780 }, { "epoch": 0.09, "grad_norm": 4.837076187133789, "learning_rate": 0.0002, "loss": 1.4779, "step": 22790 }, { "epoch": 0.09, "grad_norm": 2.684434175491333, "learning_rate": 0.0002, "loss": 1.5706, "step": 22800 }, { "epoch": 0.09, "grad_norm": 2.192366361618042, "learning_rate": 0.0002, "loss": 1.727, "step": 22810 }, { "epoch": 0.09, "grad_norm": 1.3108134269714355, "learning_rate": 0.0002, "loss": 1.3705, "step": 22820 }, { "epoch": 0.09, "grad_norm": 1.9477019309997559, "learning_rate": 0.0002, "loss": 1.6121, "step": 22830 }, { "epoch": 0.09, "grad_norm": 3.0223259925842285, "learning_rate": 0.0002, "loss": 1.5438, "step": 22840 }, { "epoch": 0.09, "grad_norm": 3.236264705657959, "learning_rate": 0.0002, "loss": 1.5126, "step": 22850 }, { "epoch": 0.09, "grad_norm": 2.919511556625366, "learning_rate": 0.0002, "loss": 1.5854, "step": 22860 }, { "epoch": 0.09, "grad_norm": 2.3742241859436035, "learning_rate": 0.0002, "loss": 1.7764, "step": 22870 }, { "epoch": 0.09, "grad_norm": 2.6880414485931396, "learning_rate": 0.0002, "loss": 1.5567, "step": 22880 }, { "epoch": 0.09, "grad_norm": 2.693965435028076, "learning_rate": 0.0002, "loss": 1.3707, "step": 22890 }, { "epoch": 0.09, "grad_norm": 2.743278980255127, "learning_rate": 0.0002, "loss": 1.4073, "step": 22900 }, { "epoch": 0.09, "grad_norm": 2.481567621231079, "learning_rate": 0.0002, "loss": 1.4729, "step": 22910 }, { "epoch": 0.09, "grad_norm": 2.075455904006958, "learning_rate": 0.0002, "loss": 1.5265, "step": 22920 }, { "epoch": 0.09, "grad_norm": 2.4448812007904053, "learning_rate": 0.0002, "loss": 1.3036, "step": 22930 }, { "epoch": 0.09, "grad_norm": 2.8133702278137207, "learning_rate": 0.0002, "loss": 1.5327, "step": 22940 }, { "epoch": 0.09, "grad_norm": 3.1014351844787598, "learning_rate": 0.0002, "loss": 1.5342, "step": 22950 }, { "epoch": 0.09, "grad_norm": 3.4370064735412598, "learning_rate": 0.0002, "loss": 1.58, "step": 22960 }, { "epoch": 0.09, "grad_norm": 1.8385107517242432, "learning_rate": 0.0002, "loss": 1.3473, "step": 22970 }, { "epoch": 0.09, "grad_norm": 2.893556833267212, "learning_rate": 0.0002, "loss": 1.3589, "step": 22980 }, { "epoch": 0.09, "grad_norm": 2.3741378784179688, "learning_rate": 0.0002, "loss": 1.5588, "step": 22990 }, { "epoch": 0.09, "grad_norm": 2.186066150665283, "learning_rate": 0.0002, "loss": 1.8099, "step": 23000 }, { "epoch": 0.09, "grad_norm": 3.3065719604492188, "learning_rate": 0.0002, "loss": 1.8408, "step": 23010 }, { "epoch": 0.09, "grad_norm": 3.401256799697876, "learning_rate": 0.0002, "loss": 1.5238, "step": 23020 }, { "epoch": 0.09, "grad_norm": 3.7196028232574463, "learning_rate": 0.0002, "loss": 1.4655, "step": 23030 }, { "epoch": 0.09, "grad_norm": 2.713559627532959, "learning_rate": 0.0002, "loss": 1.7203, "step": 23040 }, { "epoch": 0.09, "grad_norm": 1.9281628131866455, "learning_rate": 0.0002, "loss": 1.7028, "step": 23050 }, { "epoch": 0.09, "grad_norm": 1.3508598804473877, "learning_rate": 0.0002, "loss": 1.5878, "step": 23060 }, { "epoch": 0.09, "grad_norm": 1.8386223316192627, "learning_rate": 0.0002, "loss": 1.6766, "step": 23070 }, { "epoch": 0.09, "grad_norm": 2.8274013996124268, "learning_rate": 0.0002, "loss": 1.4878, "step": 23080 }, { "epoch": 0.09, "grad_norm": 5.4335503578186035, "learning_rate": 0.0002, "loss": 1.6533, "step": 23090 }, { "epoch": 0.09, "grad_norm": 2.1645357608795166, "learning_rate": 0.0002, "loss": 1.7322, "step": 23100 }, { "epoch": 0.09, "grad_norm": 1.704575538635254, "learning_rate": 0.0002, "loss": 1.4802, "step": 23110 }, { "epoch": 0.09, "grad_norm": 3.4086132049560547, "learning_rate": 0.0002, "loss": 1.5212, "step": 23120 }, { "epoch": 0.09, "grad_norm": 3.140316963195801, "learning_rate": 0.0002, "loss": 1.5924, "step": 23130 }, { "epoch": 0.09, "grad_norm": 2.47487473487854, "learning_rate": 0.0002, "loss": 1.6072, "step": 23140 }, { "epoch": 0.09, "grad_norm": 2.0287506580352783, "learning_rate": 0.0002, "loss": 1.75, "step": 23150 }, { "epoch": 0.09, "grad_norm": 1.1743899583816528, "learning_rate": 0.0002, "loss": 1.4403, "step": 23160 }, { "epoch": 0.09, "grad_norm": 2.1042797565460205, "learning_rate": 0.0002, "loss": 1.8692, "step": 23170 }, { "epoch": 0.09, "grad_norm": 2.710587501525879, "learning_rate": 0.0002, "loss": 1.3911, "step": 23180 }, { "epoch": 0.09, "grad_norm": 8.590960502624512, "learning_rate": 0.0002, "loss": 1.3917, "step": 23190 }, { "epoch": 0.09, "grad_norm": 2.526578903198242, "learning_rate": 0.0002, "loss": 1.4335, "step": 23200 }, { "epoch": 0.09, "grad_norm": 3.44848370552063, "learning_rate": 0.0002, "loss": 1.3326, "step": 23210 }, { "epoch": 0.09, "grad_norm": 2.147181987762451, "learning_rate": 0.0002, "loss": 1.6341, "step": 23220 }, { "epoch": 0.09, "grad_norm": 1.0689388513565063, "learning_rate": 0.0002, "loss": 1.3477, "step": 23230 }, { "epoch": 0.09, "grad_norm": 4.590764999389648, "learning_rate": 0.0002, "loss": 1.4378, "step": 23240 }, { "epoch": 0.09, "grad_norm": 1.771703839302063, "learning_rate": 0.0002, "loss": 1.7286, "step": 23250 }, { "epoch": 0.09, "grad_norm": 2.675457000732422, "learning_rate": 0.0002, "loss": 1.2107, "step": 23260 }, { "epoch": 0.09, "grad_norm": 1.8442515134811401, "learning_rate": 0.0002, "loss": 1.4756, "step": 23270 }, { "epoch": 0.09, "grad_norm": 3.8142261505126953, "learning_rate": 0.0002, "loss": 1.5792, "step": 23280 }, { "epoch": 0.09, "grad_norm": 2.7648072242736816, "learning_rate": 0.0002, "loss": 1.6792, "step": 23290 }, { "epoch": 0.09, "grad_norm": 2.780097723007202, "learning_rate": 0.0002, "loss": 1.5769, "step": 23300 }, { "epoch": 0.09, "grad_norm": 2.0253586769104004, "learning_rate": 0.0002, "loss": 1.6311, "step": 23310 }, { "epoch": 0.09, "grad_norm": 3.4930922985076904, "learning_rate": 0.0002, "loss": 1.7065, "step": 23320 }, { "epoch": 0.09, "grad_norm": 2.8152012825012207, "learning_rate": 0.0002, "loss": 1.717, "step": 23330 }, { "epoch": 0.1, "grad_norm": 4.196621894836426, "learning_rate": 0.0002, "loss": 1.5189, "step": 23340 }, { "epoch": 0.1, "grad_norm": 2.2498488426208496, "learning_rate": 0.0002, "loss": 1.4575, "step": 23350 }, { "epoch": 0.1, "grad_norm": 1.8835644721984863, "learning_rate": 0.0002, "loss": 1.5069, "step": 23360 }, { "epoch": 0.1, "grad_norm": 2.3593714237213135, "learning_rate": 0.0002, "loss": 1.5691, "step": 23370 }, { "epoch": 0.1, "grad_norm": 2.3535311222076416, "learning_rate": 0.0002, "loss": 1.4176, "step": 23380 }, { "epoch": 0.1, "grad_norm": 2.4292616844177246, "learning_rate": 0.0002, "loss": 1.4247, "step": 23390 }, { "epoch": 0.1, "grad_norm": 3.292191982269287, "learning_rate": 0.0002, "loss": 1.5038, "step": 23400 }, { "epoch": 0.1, "grad_norm": 2.852095365524292, "learning_rate": 0.0002, "loss": 1.5263, "step": 23410 }, { "epoch": 0.1, "grad_norm": 2.064642906188965, "learning_rate": 0.0002, "loss": 1.6147, "step": 23420 }, { "epoch": 0.1, "grad_norm": 1.8230153322219849, "learning_rate": 0.0002, "loss": 1.2634, "step": 23430 }, { "epoch": 0.1, "grad_norm": 2.549680709838867, "learning_rate": 0.0002, "loss": 1.4838, "step": 23440 }, { "epoch": 0.1, "grad_norm": 2.534205675125122, "learning_rate": 0.0002, "loss": 1.2089, "step": 23450 }, { "epoch": 0.1, "grad_norm": 2.0137951374053955, "learning_rate": 0.0002, "loss": 1.4296, "step": 23460 }, { "epoch": 0.1, "grad_norm": 1.910715937614441, "learning_rate": 0.0002, "loss": 1.5862, "step": 23470 }, { "epoch": 0.1, "grad_norm": 2.647428274154663, "learning_rate": 0.0002, "loss": 1.5767, "step": 23480 }, { "epoch": 0.1, "grad_norm": 2.0895488262176514, "learning_rate": 0.0002, "loss": 1.4904, "step": 23490 }, { "epoch": 0.1, "grad_norm": 2.296091318130493, "learning_rate": 0.0002, "loss": 1.4045, "step": 23500 }, { "epoch": 0.1, "grad_norm": 2.7179744243621826, "learning_rate": 0.0002, "loss": 1.5728, "step": 23510 }, { "epoch": 0.1, "grad_norm": 2.9695260524749756, "learning_rate": 0.0002, "loss": 1.3993, "step": 23520 }, { "epoch": 0.1, "grad_norm": 3.753267288208008, "learning_rate": 0.0002, "loss": 1.2883, "step": 23530 }, { "epoch": 0.1, "grad_norm": 2.6508824825286865, "learning_rate": 0.0002, "loss": 1.3101, "step": 23540 }, { "epoch": 0.1, "grad_norm": 2.188663959503174, "learning_rate": 0.0002, "loss": 1.5675, "step": 23550 }, { "epoch": 0.1, "grad_norm": 3.0997743606567383, "learning_rate": 0.0002, "loss": 1.4844, "step": 23560 }, { "epoch": 0.1, "grad_norm": 2.594477653503418, "learning_rate": 0.0002, "loss": 1.5884, "step": 23570 }, { "epoch": 0.1, "grad_norm": 2.1160669326782227, "learning_rate": 0.0002, "loss": 1.5121, "step": 23580 }, { "epoch": 0.1, "grad_norm": 2.1465377807617188, "learning_rate": 0.0002, "loss": 1.46, "step": 23590 }, { "epoch": 0.1, "grad_norm": 2.4048919677734375, "learning_rate": 0.0002, "loss": 1.5818, "step": 23600 }, { "epoch": 0.1, "grad_norm": 2.33125376701355, "learning_rate": 0.0002, "loss": 1.4318, "step": 23610 }, { "epoch": 0.1, "grad_norm": 2.6539864540100098, "learning_rate": 0.0002, "loss": 1.496, "step": 23620 }, { "epoch": 0.1, "grad_norm": 3.333007335662842, "learning_rate": 0.0002, "loss": 1.4985, "step": 23630 }, { "epoch": 0.1, "grad_norm": 3.2369983196258545, "learning_rate": 0.0002, "loss": 1.5606, "step": 23640 }, { "epoch": 0.1, "grad_norm": 1.4520263671875, "learning_rate": 0.0002, "loss": 1.5559, "step": 23650 }, { "epoch": 0.1, "grad_norm": 3.335742235183716, "learning_rate": 0.0002, "loss": 1.3877, "step": 23660 }, { "epoch": 0.1, "grad_norm": 4.572993278503418, "learning_rate": 0.0002, "loss": 1.4027, "step": 23670 }, { "epoch": 0.1, "grad_norm": 2.007429599761963, "learning_rate": 0.0002, "loss": 1.6584, "step": 23680 }, { "epoch": 0.1, "grad_norm": 2.7145209312438965, "learning_rate": 0.0002, "loss": 1.4959, "step": 23690 }, { "epoch": 0.1, "grad_norm": 2.2312192916870117, "learning_rate": 0.0002, "loss": 1.5832, "step": 23700 }, { "epoch": 0.1, "grad_norm": 1.2442584037780762, "learning_rate": 0.0002, "loss": 1.7055, "step": 23710 }, { "epoch": 0.1, "grad_norm": 2.501950740814209, "learning_rate": 0.0002, "loss": 1.6038, "step": 23720 }, { "epoch": 0.1, "grad_norm": 4.925215721130371, "learning_rate": 0.0002, "loss": 1.1658, "step": 23730 }, { "epoch": 0.1, "grad_norm": 2.4308550357818604, "learning_rate": 0.0002, "loss": 1.5936, "step": 23740 }, { "epoch": 0.1, "grad_norm": 3.393599510192871, "learning_rate": 0.0002, "loss": 1.2635, "step": 23750 }, { "epoch": 0.1, "grad_norm": 3.3599696159362793, "learning_rate": 0.0002, "loss": 1.4532, "step": 23760 }, { "epoch": 0.1, "grad_norm": 3.1442463397979736, "learning_rate": 0.0002, "loss": 1.555, "step": 23770 }, { "epoch": 0.1, "grad_norm": 2.720533847808838, "learning_rate": 0.0002, "loss": 1.4232, "step": 23780 }, { "epoch": 0.1, "grad_norm": 2.6818745136260986, "learning_rate": 0.0002, "loss": 1.5821, "step": 23790 }, { "epoch": 0.1, "grad_norm": 2.857677936553955, "learning_rate": 0.0002, "loss": 1.7406, "step": 23800 }, { "epoch": 0.1, "grad_norm": 2.6761789321899414, "learning_rate": 0.0002, "loss": 1.6396, "step": 23810 }, { "epoch": 0.1, "grad_norm": 2.3810696601867676, "learning_rate": 0.0002, "loss": 1.7714, "step": 23820 }, { "epoch": 0.1, "grad_norm": 2.143313407897949, "learning_rate": 0.0002, "loss": 1.4976, "step": 23830 }, { "epoch": 0.1, "grad_norm": 3.072354316711426, "learning_rate": 0.0002, "loss": 1.437, "step": 23840 }, { "epoch": 0.1, "grad_norm": 3.1677095890045166, "learning_rate": 0.0002, "loss": 1.4272, "step": 23850 }, { "epoch": 0.1, "grad_norm": 2.4953501224517822, "learning_rate": 0.0002, "loss": 1.6324, "step": 23860 }, { "epoch": 0.1, "grad_norm": 2.059945583343506, "learning_rate": 0.0002, "loss": 1.6364, "step": 23870 }, { "epoch": 0.1, "grad_norm": 3.385831117630005, "learning_rate": 0.0002, "loss": 1.3816, "step": 23880 }, { "epoch": 0.1, "grad_norm": 1.800626516342163, "learning_rate": 0.0002, "loss": 1.5055, "step": 23890 }, { "epoch": 0.1, "grad_norm": 4.655955791473389, "learning_rate": 0.0002, "loss": 1.3334, "step": 23900 }, { "epoch": 0.1, "grad_norm": 1.7351959943771362, "learning_rate": 0.0002, "loss": 1.4818, "step": 23910 }, { "epoch": 0.1, "grad_norm": 3.1674346923828125, "learning_rate": 0.0002, "loss": 1.3954, "step": 23920 }, { "epoch": 0.1, "grad_norm": 4.411452293395996, "learning_rate": 0.0002, "loss": 1.5773, "step": 23930 }, { "epoch": 0.1, "grad_norm": 2.0256845951080322, "learning_rate": 0.0002, "loss": 1.6816, "step": 23940 }, { "epoch": 0.1, "grad_norm": 2.1475768089294434, "learning_rate": 0.0002, "loss": 1.4107, "step": 23950 }, { "epoch": 0.1, "grad_norm": 3.365471124649048, "learning_rate": 0.0002, "loss": 1.6746, "step": 23960 }, { "epoch": 0.1, "grad_norm": 2.220874309539795, "learning_rate": 0.0002, "loss": 1.774, "step": 23970 }, { "epoch": 0.1, "grad_norm": 2.520181179046631, "learning_rate": 0.0002, "loss": 1.375, "step": 23980 }, { "epoch": 0.1, "grad_norm": 2.9815917015075684, "learning_rate": 0.0002, "loss": 1.254, "step": 23990 }, { "epoch": 0.1, "grad_norm": 3.0009524822235107, "learning_rate": 0.0002, "loss": 1.4316, "step": 24000 }, { "epoch": 0.1, "grad_norm": 1.6016945838928223, "learning_rate": 0.0002, "loss": 1.5444, "step": 24010 }, { "epoch": 0.1, "grad_norm": 3.2134788036346436, "learning_rate": 0.0002, "loss": 1.4704, "step": 24020 }, { "epoch": 0.1, "grad_norm": 2.5281002521514893, "learning_rate": 0.0002, "loss": 1.453, "step": 24030 }, { "epoch": 0.1, "grad_norm": 2.033281087875366, "learning_rate": 0.0002, "loss": 1.6947, "step": 24040 }, { "epoch": 0.1, "grad_norm": 2.8222603797912598, "learning_rate": 0.0002, "loss": 1.29, "step": 24050 }, { "epoch": 0.1, "grad_norm": 3.40444278717041, "learning_rate": 0.0002, "loss": 1.6769, "step": 24060 }, { "epoch": 0.1, "grad_norm": 3.255019426345825, "learning_rate": 0.0002, "loss": 1.4935, "step": 24070 }, { "epoch": 0.1, "grad_norm": 2.6802406311035156, "learning_rate": 0.0002, "loss": 1.7243, "step": 24080 }, { "epoch": 0.1, "grad_norm": 3.430797576904297, "learning_rate": 0.0002, "loss": 1.4588, "step": 24090 }, { "epoch": 0.1, "grad_norm": 2.757725954055786, "learning_rate": 0.0002, "loss": 1.5615, "step": 24100 }, { "epoch": 0.1, "grad_norm": 2.797210454940796, "learning_rate": 0.0002, "loss": 1.5704, "step": 24110 }, { "epoch": 0.1, "grad_norm": 2.996601104736328, "learning_rate": 0.0002, "loss": 1.5118, "step": 24120 }, { "epoch": 0.1, "grad_norm": 2.4483022689819336, "learning_rate": 0.0002, "loss": 1.5143, "step": 24130 }, { "epoch": 0.1, "grad_norm": 2.8071417808532715, "learning_rate": 0.0002, "loss": 1.5752, "step": 24140 }, { "epoch": 0.1, "grad_norm": 3.0826632976531982, "learning_rate": 0.0002, "loss": 1.6656, "step": 24150 }, { "epoch": 0.1, "grad_norm": 1.9968476295471191, "learning_rate": 0.0002, "loss": 1.5179, "step": 24160 }, { "epoch": 0.1, "grad_norm": 3.5949602127075195, "learning_rate": 0.0002, "loss": 1.4528, "step": 24170 }, { "epoch": 0.1, "grad_norm": 1.6354811191558838, "learning_rate": 0.0002, "loss": 1.4863, "step": 24180 }, { "epoch": 0.1, "grad_norm": 1.927276372909546, "learning_rate": 0.0002, "loss": 1.5085, "step": 24190 }, { "epoch": 0.1, "grad_norm": 2.879296064376831, "learning_rate": 0.0002, "loss": 1.5797, "step": 24200 }, { "epoch": 0.1, "grad_norm": 2.6450750827789307, "learning_rate": 0.0002, "loss": 1.5045, "step": 24210 }, { "epoch": 0.1, "grad_norm": 3.7201333045959473, "learning_rate": 0.0002, "loss": 1.5868, "step": 24220 }, { "epoch": 0.1, "grad_norm": 2.507378339767456, "learning_rate": 0.0002, "loss": 1.6166, "step": 24230 }, { "epoch": 0.1, "grad_norm": 1.4825161695480347, "learning_rate": 0.0002, "loss": 1.424, "step": 24240 }, { "epoch": 0.1, "grad_norm": 2.091766595840454, "learning_rate": 0.0002, "loss": 1.5451, "step": 24250 }, { "epoch": 0.1, "grad_norm": 1.646433711051941, "learning_rate": 0.0002, "loss": 1.5294, "step": 24260 }, { "epoch": 0.1, "grad_norm": 2.437295913696289, "learning_rate": 0.0002, "loss": 1.4782, "step": 24270 }, { "epoch": 0.1, "grad_norm": 2.0196962356567383, "learning_rate": 0.0002, "loss": 1.581, "step": 24280 }, { "epoch": 0.1, "grad_norm": 3.901500701904297, "learning_rate": 0.0002, "loss": 1.2685, "step": 24290 }, { "epoch": 0.1, "grad_norm": 3.304198980331421, "learning_rate": 0.0002, "loss": 1.4535, "step": 24300 }, { "epoch": 0.1, "grad_norm": 2.846446990966797, "learning_rate": 0.0002, "loss": 1.3858, "step": 24310 }, { "epoch": 0.1, "grad_norm": 2.6167263984680176, "learning_rate": 0.0002, "loss": 1.8218, "step": 24320 }, { "epoch": 0.1, "grad_norm": 3.3056836128234863, "learning_rate": 0.0002, "loss": 1.5237, "step": 24330 }, { "epoch": 0.1, "grad_norm": 2.5708706378936768, "learning_rate": 0.0002, "loss": 1.6809, "step": 24340 }, { "epoch": 0.1, "grad_norm": 2.313208818435669, "learning_rate": 0.0002, "loss": 1.4657, "step": 24350 }, { "epoch": 0.1, "grad_norm": 2.374476671218872, "learning_rate": 0.0002, "loss": 1.574, "step": 24360 }, { "epoch": 0.1, "grad_norm": 3.2242040634155273, "learning_rate": 0.0002, "loss": 1.6139, "step": 24370 }, { "epoch": 0.1, "grad_norm": 3.5134031772613525, "learning_rate": 0.0002, "loss": 1.5066, "step": 24380 }, { "epoch": 0.1, "grad_norm": 2.608140230178833, "learning_rate": 0.0002, "loss": 1.7092, "step": 24390 }, { "epoch": 0.1, "grad_norm": 1.9296104907989502, "learning_rate": 0.0002, "loss": 1.4645, "step": 24400 }, { "epoch": 0.1, "grad_norm": 5.177892684936523, "learning_rate": 0.0002, "loss": 1.7411, "step": 24410 }, { "epoch": 0.1, "grad_norm": 2.746833562850952, "learning_rate": 0.0002, "loss": 1.3881, "step": 24420 }, { "epoch": 0.1, "grad_norm": 2.7417640686035156, "learning_rate": 0.0002, "loss": 1.4952, "step": 24430 }, { "epoch": 0.1, "grad_norm": 2.8965296745300293, "learning_rate": 0.0002, "loss": 1.4221, "step": 24440 }, { "epoch": 0.1, "grad_norm": 2.832780122756958, "learning_rate": 0.0002, "loss": 1.5621, "step": 24450 }, { "epoch": 0.1, "grad_norm": 1.8590688705444336, "learning_rate": 0.0002, "loss": 1.4724, "step": 24460 }, { "epoch": 0.1, "grad_norm": 2.430743455886841, "learning_rate": 0.0002, "loss": 1.3212, "step": 24470 }, { "epoch": 0.1, "grad_norm": 2.551568031311035, "learning_rate": 0.0002, "loss": 1.5647, "step": 24480 }, { "epoch": 0.1, "grad_norm": 3.127302408218384, "learning_rate": 0.0002, "loss": 1.5576, "step": 24490 }, { "epoch": 0.1, "grad_norm": 3.5562729835510254, "learning_rate": 0.0002, "loss": 1.4317, "step": 24500 }, { "epoch": 0.1, "grad_norm": 2.4244420528411865, "learning_rate": 0.0002, "loss": 1.6801, "step": 24510 }, { "epoch": 0.1, "grad_norm": 2.684809923171997, "learning_rate": 0.0002, "loss": 1.5936, "step": 24520 }, { "epoch": 0.1, "grad_norm": 6.7053422927856445, "learning_rate": 0.0002, "loss": 1.5734, "step": 24530 }, { "epoch": 0.1, "grad_norm": 2.5563342571258545, "learning_rate": 0.0002, "loss": 1.718, "step": 24540 }, { "epoch": 0.1, "grad_norm": 2.1291401386260986, "learning_rate": 0.0002, "loss": 1.6401, "step": 24550 }, { "epoch": 0.1, "grad_norm": 1.6699440479278564, "learning_rate": 0.0002, "loss": 1.4351, "step": 24560 }, { "epoch": 0.1, "grad_norm": 2.777484893798828, "learning_rate": 0.0002, "loss": 1.5968, "step": 24570 }, { "epoch": 0.1, "grad_norm": 3.756131410598755, "learning_rate": 0.0002, "loss": 1.6739, "step": 24580 }, { "epoch": 0.1, "grad_norm": 4.72047758102417, "learning_rate": 0.0002, "loss": 1.5425, "step": 24590 }, { "epoch": 0.1, "grad_norm": 3.548846483230591, "learning_rate": 0.0002, "loss": 1.682, "step": 24600 }, { "epoch": 0.1, "grad_norm": 2.54101300239563, "learning_rate": 0.0002, "loss": 1.4129, "step": 24610 }, { "epoch": 0.1, "grad_norm": 3.034342050552368, "learning_rate": 0.0002, "loss": 1.4924, "step": 24620 }, { "epoch": 0.1, "grad_norm": 2.338239908218384, "learning_rate": 0.0002, "loss": 1.7071, "step": 24630 }, { "epoch": 0.1, "grad_norm": 3.370579719543457, "learning_rate": 0.0002, "loss": 1.434, "step": 24640 }, { "epoch": 0.1, "grad_norm": 2.467899799346924, "learning_rate": 0.0002, "loss": 1.3037, "step": 24650 }, { "epoch": 0.1, "grad_norm": 2.3751842975616455, "learning_rate": 0.0002, "loss": 1.5507, "step": 24660 }, { "epoch": 0.1, "grad_norm": 1.759171724319458, "learning_rate": 0.0002, "loss": 1.4879, "step": 24670 }, { "epoch": 0.1, "grad_norm": 1.9592934846878052, "learning_rate": 0.0002, "loss": 1.519, "step": 24680 }, { "epoch": 0.1, "grad_norm": 2.3101439476013184, "learning_rate": 0.0002, "loss": 1.648, "step": 24690 }, { "epoch": 0.1, "grad_norm": 2.5734333992004395, "learning_rate": 0.0002, "loss": 1.2651, "step": 24700 }, { "epoch": 0.1, "grad_norm": 3.6168954372406006, "learning_rate": 0.0002, "loss": 1.6054, "step": 24710 }, { "epoch": 0.1, "grad_norm": 3.5693583488464355, "learning_rate": 0.0002, "loss": 1.6944, "step": 24720 }, { "epoch": 0.1, "grad_norm": 3.5966644287109375, "learning_rate": 0.0002, "loss": 1.4651, "step": 24730 }, { "epoch": 0.1, "grad_norm": 3.2200510501861572, "learning_rate": 0.0002, "loss": 1.5644, "step": 24740 }, { "epoch": 0.1, "grad_norm": 2.7951912879943848, "learning_rate": 0.0002, "loss": 1.5367, "step": 24750 }, { "epoch": 0.1, "grad_norm": 2.3859634399414062, "learning_rate": 0.0002, "loss": 1.5353, "step": 24760 }, { "epoch": 0.1, "grad_norm": 4.061084270477295, "learning_rate": 0.0002, "loss": 1.4014, "step": 24770 }, { "epoch": 0.1, "grad_norm": 1.8142356872558594, "learning_rate": 0.0002, "loss": 1.5964, "step": 24780 }, { "epoch": 0.1, "grad_norm": 2.1473007202148438, "learning_rate": 0.0002, "loss": 1.7255, "step": 24790 }, { "epoch": 0.1, "grad_norm": 2.4599769115448, "learning_rate": 0.0002, "loss": 1.8098, "step": 24800 }, { "epoch": 0.1, "grad_norm": 2.5723423957824707, "learning_rate": 0.0002, "loss": 1.3597, "step": 24810 }, { "epoch": 0.1, "grad_norm": 3.7902863025665283, "learning_rate": 0.0002, "loss": 1.4809, "step": 24820 }, { "epoch": 0.1, "grad_norm": 2.477308750152588, "learning_rate": 0.0002, "loss": 1.3274, "step": 24830 }, { "epoch": 0.1, "grad_norm": 5.183752536773682, "learning_rate": 0.0002, "loss": 1.3055, "step": 24840 }, { "epoch": 0.1, "grad_norm": 1.441998839378357, "learning_rate": 0.0002, "loss": 1.5172, "step": 24850 }, { "epoch": 0.1, "grad_norm": 1.359745979309082, "learning_rate": 0.0002, "loss": 1.3529, "step": 24860 }, { "epoch": 0.1, "grad_norm": 2.0921809673309326, "learning_rate": 0.0002, "loss": 1.5145, "step": 24870 }, { "epoch": 0.1, "grad_norm": 2.7886271476745605, "learning_rate": 0.0002, "loss": 1.828, "step": 24880 }, { "epoch": 0.1, "grad_norm": 1.1661821603775024, "learning_rate": 0.0002, "loss": 1.6261, "step": 24890 }, { "epoch": 0.1, "grad_norm": 2.7220780849456787, "learning_rate": 0.0002, "loss": 1.5573, "step": 24900 }, { "epoch": 0.1, "grad_norm": 1.9944418668746948, "learning_rate": 0.0002, "loss": 1.6821, "step": 24910 }, { "epoch": 0.1, "grad_norm": 4.457098007202148, "learning_rate": 0.0002, "loss": 1.5354, "step": 24920 }, { "epoch": 0.1, "grad_norm": 1.1178674697875977, "learning_rate": 0.0002, "loss": 1.5945, "step": 24930 }, { "epoch": 0.1, "grad_norm": 3.7393710613250732, "learning_rate": 0.0002, "loss": 1.5443, "step": 24940 }, { "epoch": 0.1, "grad_norm": 2.1495234966278076, "learning_rate": 0.0002, "loss": 1.6792, "step": 24950 }, { "epoch": 0.1, "grad_norm": 2.906822919845581, "learning_rate": 0.0002, "loss": 1.4815, "step": 24960 }, { "epoch": 0.1, "grad_norm": 2.2863471508026123, "learning_rate": 0.0002, "loss": 1.6454, "step": 24970 }, { "epoch": 0.1, "grad_norm": 3.0376479625701904, "learning_rate": 0.0002, "loss": 1.7103, "step": 24980 }, { "epoch": 0.1, "grad_norm": 2.4374475479125977, "learning_rate": 0.0002, "loss": 1.6741, "step": 24990 }, { "epoch": 0.1, "grad_norm": 2.7591280937194824, "learning_rate": 0.0002, "loss": 1.6048, "step": 25000 }, { "epoch": 0.1, "grad_norm": 1.7040059566497803, "learning_rate": 0.0002, "loss": 1.418, "step": 25010 }, { "epoch": 0.1, "grad_norm": 3.282034158706665, "learning_rate": 0.0002, "loss": 1.618, "step": 25020 }, { "epoch": 0.1, "grad_norm": 2.8917298316955566, "learning_rate": 0.0002, "loss": 1.7058, "step": 25030 }, { "epoch": 0.1, "grad_norm": 2.0399389266967773, "learning_rate": 0.0002, "loss": 1.3139, "step": 25040 }, { "epoch": 0.1, "grad_norm": 2.8800253868103027, "learning_rate": 0.0002, "loss": 1.4621, "step": 25050 }, { "epoch": 0.1, "grad_norm": 2.553208589553833, "learning_rate": 0.0002, "loss": 1.5699, "step": 25060 }, { "epoch": 0.1, "grad_norm": 3.181018352508545, "learning_rate": 0.0002, "loss": 1.4983, "step": 25070 }, { "epoch": 0.1, "grad_norm": 4.266862869262695, "learning_rate": 0.0002, "loss": 1.7077, "step": 25080 }, { "epoch": 0.1, "grad_norm": 2.0631392002105713, "learning_rate": 0.0002, "loss": 1.6202, "step": 25090 }, { "epoch": 0.1, "grad_norm": 2.379429340362549, "learning_rate": 0.0002, "loss": 1.5141, "step": 25100 }, { "epoch": 0.1, "grad_norm": 2.322143316268921, "learning_rate": 0.0002, "loss": 1.5815, "step": 25110 }, { "epoch": 0.1, "grad_norm": 2.0951223373413086, "learning_rate": 0.0002, "loss": 1.449, "step": 25120 }, { "epoch": 0.1, "grad_norm": 2.322969436645508, "learning_rate": 0.0002, "loss": 1.5699, "step": 25130 }, { "epoch": 0.1, "grad_norm": 1.7242372035980225, "learning_rate": 0.0002, "loss": 1.6495, "step": 25140 }, { "epoch": 0.1, "grad_norm": 2.8920488357543945, "learning_rate": 0.0002, "loss": 1.6381, "step": 25150 }, { "epoch": 0.1, "grad_norm": 1.9998745918273926, "learning_rate": 0.0002, "loss": 1.7596, "step": 25160 }, { "epoch": 0.1, "grad_norm": 3.8581507205963135, "learning_rate": 0.0002, "loss": 1.3501, "step": 25170 }, { "epoch": 0.1, "grad_norm": 3.3478665351867676, "learning_rate": 0.0002, "loss": 1.5882, "step": 25180 }, { "epoch": 0.1, "grad_norm": 3.1179027557373047, "learning_rate": 0.0002, "loss": 1.611, "step": 25190 }, { "epoch": 0.1, "grad_norm": 2.5164175033569336, "learning_rate": 0.0002, "loss": 1.2747, "step": 25200 }, { "epoch": 0.1, "grad_norm": 2.710847854614258, "learning_rate": 0.0002, "loss": 1.4024, "step": 25210 }, { "epoch": 0.1, "grad_norm": 2.996472120285034, "learning_rate": 0.0002, "loss": 1.5499, "step": 25220 }, { "epoch": 0.1, "grad_norm": 2.662992000579834, "learning_rate": 0.0002, "loss": 1.453, "step": 25230 }, { "epoch": 0.1, "grad_norm": 2.901707649230957, "learning_rate": 0.0002, "loss": 1.4329, "step": 25240 }, { "epoch": 0.1, "grad_norm": 2.5031301975250244, "learning_rate": 0.0002, "loss": 1.6642, "step": 25250 }, { "epoch": 0.1, "grad_norm": 2.329401969909668, "learning_rate": 0.0002, "loss": 1.446, "step": 25260 }, { "epoch": 0.1, "grad_norm": 3.3914952278137207, "learning_rate": 0.0002, "loss": 1.6221, "step": 25270 }, { "epoch": 0.1, "grad_norm": 3.0679664611816406, "learning_rate": 0.0002, "loss": 1.6786, "step": 25280 }, { "epoch": 0.1, "grad_norm": 3.686049699783325, "learning_rate": 0.0002, "loss": 1.6381, "step": 25290 }, { "epoch": 0.1, "grad_norm": 3.3208813667297363, "learning_rate": 0.0002, "loss": 1.6015, "step": 25300 }, { "epoch": 0.1, "grad_norm": 3.9716601371765137, "learning_rate": 0.0002, "loss": 1.4965, "step": 25310 }, { "epoch": 0.1, "grad_norm": 2.7597038745880127, "learning_rate": 0.0002, "loss": 1.6129, "step": 25320 }, { "epoch": 0.1, "grad_norm": 2.590627908706665, "learning_rate": 0.0002, "loss": 1.4345, "step": 25330 }, { "epoch": 0.1, "grad_norm": 3.668210506439209, "learning_rate": 0.0002, "loss": 1.4396, "step": 25340 }, { "epoch": 0.1, "grad_norm": 2.2660582065582275, "learning_rate": 0.0002, "loss": 1.4435, "step": 25350 }, { "epoch": 0.1, "grad_norm": 3.058520555496216, "learning_rate": 0.0002, "loss": 1.5267, "step": 25360 }, { "epoch": 0.1, "grad_norm": 2.0902323722839355, "learning_rate": 0.0002, "loss": 1.6993, "step": 25370 }, { "epoch": 0.1, "grad_norm": 2.9051342010498047, "learning_rate": 0.0002, "loss": 1.6451, "step": 25380 }, { "epoch": 0.1, "grad_norm": 2.5315699577331543, "learning_rate": 0.0002, "loss": 1.7175, "step": 25390 }, { "epoch": 0.1, "grad_norm": 2.440678834915161, "learning_rate": 0.0002, "loss": 1.473, "step": 25400 }, { "epoch": 0.1, "grad_norm": 3.399094820022583, "learning_rate": 0.0002, "loss": 1.6461, "step": 25410 }, { "epoch": 0.1, "grad_norm": 2.9171969890594482, "learning_rate": 0.0002, "loss": 1.5975, "step": 25420 }, { "epoch": 0.1, "grad_norm": 3.3904244899749756, "learning_rate": 0.0002, "loss": 1.377, "step": 25430 }, { "epoch": 0.1, "grad_norm": 1.5368961095809937, "learning_rate": 0.0002, "loss": 1.7289, "step": 25440 }, { "epoch": 0.1, "grad_norm": 2.3563294410705566, "learning_rate": 0.0002, "loss": 1.7149, "step": 25450 }, { "epoch": 0.1, "grad_norm": 3.2200655937194824, "learning_rate": 0.0002, "loss": 1.5136, "step": 25460 }, { "epoch": 0.1, "grad_norm": 4.267578125, "learning_rate": 0.0002, "loss": 1.7305, "step": 25470 }, { "epoch": 0.1, "grad_norm": 5.080331325531006, "learning_rate": 0.0002, "loss": 1.5236, "step": 25480 }, { "epoch": 0.1, "grad_norm": 1.9304091930389404, "learning_rate": 0.0002, "loss": 1.3094, "step": 25490 }, { "epoch": 0.1, "grad_norm": 3.7401528358459473, "learning_rate": 0.0002, "loss": 1.5063, "step": 25500 }, { "epoch": 0.1, "grad_norm": 3.238760471343994, "learning_rate": 0.0002, "loss": 1.2148, "step": 25510 }, { "epoch": 0.1, "grad_norm": 2.3409249782562256, "learning_rate": 0.0002, "loss": 1.3979, "step": 25520 }, { "epoch": 0.1, "grad_norm": 4.876424312591553, "learning_rate": 0.0002, "loss": 1.3725, "step": 25530 }, { "epoch": 0.1, "grad_norm": 2.0878939628601074, "learning_rate": 0.0002, "loss": 1.47, "step": 25540 }, { "epoch": 0.1, "grad_norm": 1.7253520488739014, "learning_rate": 0.0002, "loss": 1.2977, "step": 25550 }, { "epoch": 0.1, "grad_norm": 3.944467306137085, "learning_rate": 0.0002, "loss": 1.3338, "step": 25560 }, { "epoch": 0.1, "grad_norm": 3.0925798416137695, "learning_rate": 0.0002, "loss": 1.7026, "step": 25570 }, { "epoch": 0.1, "grad_norm": 2.4256198406219482, "learning_rate": 0.0002, "loss": 1.5282, "step": 25580 }, { "epoch": 0.1, "grad_norm": 2.918769359588623, "learning_rate": 0.0002, "loss": 1.7268, "step": 25590 }, { "epoch": 0.1, "grad_norm": 2.0791356563568115, "learning_rate": 0.0002, "loss": 1.6018, "step": 25600 }, { "epoch": 0.1, "grad_norm": 2.2635321617126465, "learning_rate": 0.0002, "loss": 1.611, "step": 25610 }, { "epoch": 0.1, "grad_norm": 4.511003494262695, "learning_rate": 0.0002, "loss": 1.5465, "step": 25620 }, { "epoch": 0.1, "grad_norm": 1.8132908344268799, "learning_rate": 0.0002, "loss": 1.2116, "step": 25630 }, { "epoch": 0.1, "grad_norm": 2.6225717067718506, "learning_rate": 0.0002, "loss": 1.5807, "step": 25640 }, { "epoch": 0.1, "grad_norm": 2.6487791538238525, "learning_rate": 0.0002, "loss": 1.6655, "step": 25650 }, { "epoch": 0.1, "grad_norm": 2.5666890144348145, "learning_rate": 0.0002, "loss": 1.2565, "step": 25660 }, { "epoch": 0.1, "grad_norm": 2.6299779415130615, "learning_rate": 0.0002, "loss": 1.563, "step": 25670 }, { "epoch": 0.1, "grad_norm": 2.0475704669952393, "learning_rate": 0.0002, "loss": 1.2332, "step": 25680 }, { "epoch": 0.1, "grad_norm": 3.4554147720336914, "learning_rate": 0.0002, "loss": 1.6857, "step": 25690 }, { "epoch": 0.1, "grad_norm": 2.4782466888427734, "learning_rate": 0.0002, "loss": 1.4489, "step": 25700 }, { "epoch": 0.1, "grad_norm": 2.7642455101013184, "learning_rate": 0.0002, "loss": 1.6797, "step": 25710 }, { "epoch": 0.1, "grad_norm": 3.121621608734131, "learning_rate": 0.0002, "loss": 1.3618, "step": 25720 }, { "epoch": 0.1, "grad_norm": 2.2055914402008057, "learning_rate": 0.0002, "loss": 1.6704, "step": 25730 }, { "epoch": 0.1, "grad_norm": 3.648705005645752, "learning_rate": 0.0002, "loss": 1.7206, "step": 25740 }, { "epoch": 0.1, "grad_norm": 1.6554328203201294, "learning_rate": 0.0002, "loss": 1.2113, "step": 25750 }, { "epoch": 0.1, "grad_norm": 3.2896664142608643, "learning_rate": 0.0002, "loss": 1.6775, "step": 25760 }, { "epoch": 0.1, "grad_norm": 1.969316840171814, "learning_rate": 0.0002, "loss": 1.7198, "step": 25770 }, { "epoch": 0.1, "grad_norm": 2.8580143451690674, "learning_rate": 0.0002, "loss": 1.4936, "step": 25780 }, { "epoch": 0.1, "grad_norm": 4.131762981414795, "learning_rate": 0.0002, "loss": 1.5977, "step": 25790 }, { "epoch": 0.11, "grad_norm": 2.0692403316497803, "learning_rate": 0.0002, "loss": 1.5056, "step": 25800 }, { "epoch": 0.11, "grad_norm": 1.6555261611938477, "learning_rate": 0.0002, "loss": 1.3771, "step": 25810 }, { "epoch": 0.11, "grad_norm": 1.8397610187530518, "learning_rate": 0.0002, "loss": 1.5047, "step": 25820 }, { "epoch": 0.11, "grad_norm": 2.7014060020446777, "learning_rate": 0.0002, "loss": 1.4975, "step": 25830 }, { "epoch": 0.11, "grad_norm": 2.6634747982025146, "learning_rate": 0.0002, "loss": 1.4494, "step": 25840 }, { "epoch": 0.11, "grad_norm": 1.966864824295044, "learning_rate": 0.0002, "loss": 1.674, "step": 25850 }, { "epoch": 0.11, "grad_norm": 3.026275873184204, "learning_rate": 0.0002, "loss": 1.5464, "step": 25860 }, { "epoch": 0.11, "grad_norm": 2.3478662967681885, "learning_rate": 0.0002, "loss": 1.3505, "step": 25870 }, { "epoch": 0.11, "grad_norm": 2.290703058242798, "learning_rate": 0.0002, "loss": 1.5722, "step": 25880 }, { "epoch": 0.11, "grad_norm": 3.5616254806518555, "learning_rate": 0.0002, "loss": 1.5936, "step": 25890 }, { "epoch": 0.11, "grad_norm": 3.345024347305298, "learning_rate": 0.0002, "loss": 1.4731, "step": 25900 }, { "epoch": 0.11, "grad_norm": 1.9973421096801758, "learning_rate": 0.0002, "loss": 1.3114, "step": 25910 }, { "epoch": 0.11, "grad_norm": 3.7572591304779053, "learning_rate": 0.0002, "loss": 1.569, "step": 25920 }, { "epoch": 0.11, "grad_norm": 3.157355546951294, "learning_rate": 0.0002, "loss": 1.5141, "step": 25930 }, { "epoch": 0.11, "grad_norm": 3.242595672607422, "learning_rate": 0.0002, "loss": 1.5963, "step": 25940 }, { "epoch": 0.11, "grad_norm": 1.7761012315750122, "learning_rate": 0.0002, "loss": 1.2651, "step": 25950 }, { "epoch": 0.11, "grad_norm": 3.3727095127105713, "learning_rate": 0.0002, "loss": 1.5736, "step": 25960 }, { "epoch": 0.11, "grad_norm": 2.5119287967681885, "learning_rate": 0.0002, "loss": 1.5151, "step": 25970 }, { "epoch": 0.11, "grad_norm": 1.7566907405853271, "learning_rate": 0.0002, "loss": 1.5816, "step": 25980 }, { "epoch": 0.11, "grad_norm": 4.352044105529785, "learning_rate": 0.0002, "loss": 1.5385, "step": 25990 }, { "epoch": 0.11, "grad_norm": 2.775785207748413, "learning_rate": 0.0002, "loss": 1.6824, "step": 26000 }, { "epoch": 0.11, "grad_norm": 3.5114924907684326, "learning_rate": 0.0002, "loss": 1.6772, "step": 26010 }, { "epoch": 0.11, "grad_norm": 3.007758855819702, "learning_rate": 0.0002, "loss": 1.3535, "step": 26020 }, { "epoch": 0.11, "grad_norm": 2.383978843688965, "learning_rate": 0.0002, "loss": 1.3515, "step": 26030 }, { "epoch": 0.11, "grad_norm": 3.1329562664031982, "learning_rate": 0.0002, "loss": 1.3574, "step": 26040 }, { "epoch": 0.11, "grad_norm": 1.6564632654190063, "learning_rate": 0.0002, "loss": 1.4762, "step": 26050 }, { "epoch": 0.11, "grad_norm": 3.1162209510803223, "learning_rate": 0.0002, "loss": 1.6183, "step": 26060 }, { "epoch": 0.11, "grad_norm": 2.767627000808716, "learning_rate": 0.0002, "loss": 1.663, "step": 26070 }, { "epoch": 0.11, "grad_norm": 2.3360214233398438, "learning_rate": 0.0002, "loss": 1.4672, "step": 26080 }, { "epoch": 0.11, "grad_norm": 2.8299708366394043, "learning_rate": 0.0002, "loss": 1.5292, "step": 26090 }, { "epoch": 0.11, "grad_norm": 3.9879326820373535, "learning_rate": 0.0002, "loss": 1.5188, "step": 26100 }, { "epoch": 0.11, "grad_norm": 2.939241647720337, "learning_rate": 0.0002, "loss": 1.6631, "step": 26110 }, { "epoch": 0.11, "grad_norm": 2.8702499866485596, "learning_rate": 0.0002, "loss": 1.6777, "step": 26120 }, { "epoch": 0.11, "grad_norm": 1.8264857530593872, "learning_rate": 0.0002, "loss": 1.5435, "step": 26130 }, { "epoch": 0.11, "grad_norm": 2.200803756713867, "learning_rate": 0.0002, "loss": 1.6424, "step": 26140 }, { "epoch": 0.11, "grad_norm": 2.123955488204956, "learning_rate": 0.0002, "loss": 1.5984, "step": 26150 }, { "epoch": 0.11, "grad_norm": 2.120506763458252, "learning_rate": 0.0002, "loss": 1.5452, "step": 26160 }, { "epoch": 0.11, "grad_norm": 2.234532117843628, "learning_rate": 0.0002, "loss": 1.4544, "step": 26170 }, { "epoch": 0.11, "grad_norm": 1.9966429471969604, "learning_rate": 0.0002, "loss": 1.5738, "step": 26180 }, { "epoch": 0.11, "grad_norm": 3.163994312286377, "learning_rate": 0.0002, "loss": 1.5016, "step": 26190 }, { "epoch": 0.11, "grad_norm": 2.614168882369995, "learning_rate": 0.0002, "loss": 1.5562, "step": 26200 }, { "epoch": 0.11, "grad_norm": 2.3482115268707275, "learning_rate": 0.0002, "loss": 1.5109, "step": 26210 }, { "epoch": 0.11, "grad_norm": 4.023293972015381, "learning_rate": 0.0002, "loss": 1.2857, "step": 26220 }, { "epoch": 0.11, "grad_norm": 1.890121579170227, "learning_rate": 0.0002, "loss": 1.7206, "step": 26230 }, { "epoch": 0.11, "grad_norm": 3.0325679779052734, "learning_rate": 0.0002, "loss": 1.689, "step": 26240 }, { "epoch": 0.11, "grad_norm": 1.8351006507873535, "learning_rate": 0.0002, "loss": 1.5039, "step": 26250 }, { "epoch": 0.11, "grad_norm": 2.256042003631592, "learning_rate": 0.0002, "loss": 1.5464, "step": 26260 }, { "epoch": 0.11, "grad_norm": 2.4987294673919678, "learning_rate": 0.0002, "loss": 1.6709, "step": 26270 }, { "epoch": 0.11, "grad_norm": 3.641310691833496, "learning_rate": 0.0002, "loss": 1.4494, "step": 26280 }, { "epoch": 0.11, "grad_norm": 3.0231990814208984, "learning_rate": 0.0002, "loss": 1.683, "step": 26290 }, { "epoch": 0.11, "grad_norm": 4.977132797241211, "learning_rate": 0.0002, "loss": 1.3482, "step": 26300 }, { "epoch": 0.11, "grad_norm": 2.279872417449951, "learning_rate": 0.0002, "loss": 1.7066, "step": 26310 }, { "epoch": 0.11, "grad_norm": 2.796259641647339, "learning_rate": 0.0002, "loss": 1.6466, "step": 26320 }, { "epoch": 0.11, "grad_norm": 2.295542001724243, "learning_rate": 0.0002, "loss": 1.6021, "step": 26330 }, { "epoch": 0.11, "grad_norm": 2.2967987060546875, "learning_rate": 0.0002, "loss": 1.6339, "step": 26340 }, { "epoch": 0.11, "grad_norm": 1.668882131576538, "learning_rate": 0.0002, "loss": 1.4111, "step": 26350 }, { "epoch": 0.11, "grad_norm": 2.597534656524658, "learning_rate": 0.0002, "loss": 1.5051, "step": 26360 }, { "epoch": 0.11, "grad_norm": 4.143869400024414, "learning_rate": 0.0002, "loss": 1.2824, "step": 26370 }, { "epoch": 0.11, "grad_norm": 3.640631914138794, "learning_rate": 0.0002, "loss": 1.4864, "step": 26380 }, { "epoch": 0.11, "grad_norm": 2.9828054904937744, "learning_rate": 0.0002, "loss": 1.3753, "step": 26390 }, { "epoch": 0.11, "grad_norm": 5.337001323699951, "learning_rate": 0.0002, "loss": 1.508, "step": 26400 }, { "epoch": 0.11, "grad_norm": 3.219829797744751, "learning_rate": 0.0002, "loss": 1.3794, "step": 26410 }, { "epoch": 0.11, "grad_norm": 2.0368363857269287, "learning_rate": 0.0002, "loss": 1.4851, "step": 26420 }, { "epoch": 0.11, "grad_norm": 2.4346766471862793, "learning_rate": 0.0002, "loss": 1.3748, "step": 26430 }, { "epoch": 0.11, "grad_norm": 2.9380686283111572, "learning_rate": 0.0002, "loss": 1.5345, "step": 26440 }, { "epoch": 0.11, "grad_norm": 2.702901601791382, "learning_rate": 0.0002, "loss": 1.5841, "step": 26450 }, { "epoch": 0.11, "grad_norm": 3.3383283615112305, "learning_rate": 0.0002, "loss": 1.7933, "step": 26460 }, { "epoch": 0.11, "grad_norm": 4.634119510650635, "learning_rate": 0.0002, "loss": 1.5227, "step": 26470 }, { "epoch": 0.11, "grad_norm": 2.056971549987793, "learning_rate": 0.0002, "loss": 1.5667, "step": 26480 }, { "epoch": 0.11, "grad_norm": 2.8649816513061523, "learning_rate": 0.0002, "loss": 1.4055, "step": 26490 }, { "epoch": 0.11, "grad_norm": 2.541701555252075, "learning_rate": 0.0002, "loss": 1.5074, "step": 26500 }, { "epoch": 0.11, "grad_norm": 4.33270788192749, "learning_rate": 0.0002, "loss": 1.5978, "step": 26510 }, { "epoch": 0.11, "grad_norm": 3.7486085891723633, "learning_rate": 0.0002, "loss": 1.425, "step": 26520 }, { "epoch": 0.11, "grad_norm": 2.4098801612854004, "learning_rate": 0.0002, "loss": 1.5445, "step": 26530 }, { "epoch": 0.11, "grad_norm": 1.558599829673767, "learning_rate": 0.0002, "loss": 1.6059, "step": 26540 }, { "epoch": 0.11, "grad_norm": 2.2123687267303467, "learning_rate": 0.0002, "loss": 1.7046, "step": 26550 }, { "epoch": 0.11, "grad_norm": 3.1190035343170166, "learning_rate": 0.0002, "loss": 1.9121, "step": 26560 }, { "epoch": 0.11, "grad_norm": 12.130480766296387, "learning_rate": 0.0002, "loss": 1.3719, "step": 26570 }, { "epoch": 0.11, "grad_norm": 2.0422747135162354, "learning_rate": 0.0002, "loss": 1.4594, "step": 26580 }, { "epoch": 0.11, "grad_norm": 2.5832669734954834, "learning_rate": 0.0002, "loss": 1.4782, "step": 26590 }, { "epoch": 0.11, "grad_norm": 2.583174228668213, "learning_rate": 0.0002, "loss": 1.7013, "step": 26600 }, { "epoch": 0.11, "grad_norm": 3.073202610015869, "learning_rate": 0.0002, "loss": 1.3672, "step": 26610 }, { "epoch": 0.11, "grad_norm": 3.0241165161132812, "learning_rate": 0.0002, "loss": 1.5639, "step": 26620 }, { "epoch": 0.11, "grad_norm": 3.0154643058776855, "learning_rate": 0.0002, "loss": 1.3806, "step": 26630 }, { "epoch": 0.11, "grad_norm": 2.5668771266937256, "learning_rate": 0.0002, "loss": 1.5048, "step": 26640 }, { "epoch": 0.11, "grad_norm": 1.6201571226119995, "learning_rate": 0.0002, "loss": 1.7351, "step": 26650 }, { "epoch": 0.11, "grad_norm": 2.9009921550750732, "learning_rate": 0.0002, "loss": 1.3911, "step": 26660 }, { "epoch": 0.11, "grad_norm": 2.6786487102508545, "learning_rate": 0.0002, "loss": 1.6771, "step": 26670 }, { "epoch": 0.11, "grad_norm": 2.7844574451446533, "learning_rate": 0.0002, "loss": 1.4127, "step": 26680 }, { "epoch": 0.11, "grad_norm": 2.344125986099243, "learning_rate": 0.0002, "loss": 1.5564, "step": 26690 }, { "epoch": 0.11, "grad_norm": 2.047452688217163, "learning_rate": 0.0002, "loss": 1.349, "step": 26700 }, { "epoch": 0.11, "grad_norm": 2.025503635406494, "learning_rate": 0.0002, "loss": 1.4341, "step": 26710 }, { "epoch": 0.11, "grad_norm": 2.739144802093506, "learning_rate": 0.0002, "loss": 1.4308, "step": 26720 }, { "epoch": 0.11, "grad_norm": 3.3389735221862793, "learning_rate": 0.0002, "loss": 1.3204, "step": 26730 }, { "epoch": 0.11, "grad_norm": 2.1105637550354004, "learning_rate": 0.0002, "loss": 1.5644, "step": 26740 }, { "epoch": 0.11, "grad_norm": 1.864851713180542, "learning_rate": 0.0002, "loss": 1.4498, "step": 26750 }, { "epoch": 0.11, "grad_norm": 3.0106122493743896, "learning_rate": 0.0002, "loss": 1.4969, "step": 26760 }, { "epoch": 0.11, "grad_norm": 2.304643392562866, "learning_rate": 0.0002, "loss": 1.5311, "step": 26770 }, { "epoch": 0.11, "grad_norm": 1.3341697454452515, "learning_rate": 0.0002, "loss": 1.578, "step": 26780 }, { "epoch": 0.11, "grad_norm": 2.6214065551757812, "learning_rate": 0.0002, "loss": 1.3723, "step": 26790 }, { "epoch": 0.11, "grad_norm": 2.671891450881958, "learning_rate": 0.0002, "loss": 1.723, "step": 26800 }, { "epoch": 0.11, "grad_norm": 2.7677063941955566, "learning_rate": 0.0002, "loss": 1.5236, "step": 26810 }, { "epoch": 0.11, "grad_norm": 2.6418848037719727, "learning_rate": 0.0002, "loss": 1.3408, "step": 26820 }, { "epoch": 0.11, "grad_norm": 1.7009527683258057, "learning_rate": 0.0002, "loss": 1.5222, "step": 26830 }, { "epoch": 0.11, "grad_norm": 2.952791452407837, "learning_rate": 0.0002, "loss": 1.418, "step": 26840 }, { "epoch": 0.11, "grad_norm": 2.0328376293182373, "learning_rate": 0.0002, "loss": 1.3537, "step": 26850 }, { "epoch": 0.11, "grad_norm": 3.061866521835327, "learning_rate": 0.0002, "loss": 1.8005, "step": 26860 }, { "epoch": 0.11, "grad_norm": 1.8954142332077026, "learning_rate": 0.0002, "loss": 1.6572, "step": 26870 }, { "epoch": 0.11, "grad_norm": 3.1389973163604736, "learning_rate": 0.0002, "loss": 1.5972, "step": 26880 }, { "epoch": 0.11, "grad_norm": 2.850322723388672, "learning_rate": 0.0002, "loss": 1.6827, "step": 26890 }, { "epoch": 0.11, "grad_norm": 6.303369522094727, "learning_rate": 0.0002, "loss": 1.7179, "step": 26900 }, { "epoch": 0.11, "grad_norm": 2.3797316551208496, "learning_rate": 0.0002, "loss": 1.7752, "step": 26910 }, { "epoch": 0.11, "grad_norm": 2.4984688758850098, "learning_rate": 0.0002, "loss": 1.5148, "step": 26920 }, { "epoch": 0.11, "grad_norm": 3.535942554473877, "learning_rate": 0.0002, "loss": 1.3616, "step": 26930 }, { "epoch": 0.11, "grad_norm": 3.445889949798584, "learning_rate": 0.0002, "loss": 1.6088, "step": 26940 }, { "epoch": 0.11, "grad_norm": Infinity, "learning_rate": 0.0002, "loss": 1.4826, "step": 26950 }, { "epoch": 0.11, "grad_norm": 6.623419284820557, "learning_rate": 0.0002, "loss": 1.7313, "step": 26960 }, { "epoch": 0.11, "grad_norm": 3.0405595302581787, "learning_rate": 0.0002, "loss": 1.4932, "step": 26970 }, { "epoch": 0.11, "grad_norm": 2.6461408138275146, "learning_rate": 0.0002, "loss": 1.5245, "step": 26980 }, { "epoch": 0.11, "grad_norm": 1.7968964576721191, "learning_rate": 0.0002, "loss": 1.3299, "step": 26990 }, { "epoch": 0.11, "grad_norm": 2.1113500595092773, "learning_rate": 0.0002, "loss": 1.3802, "step": 27000 }, { "epoch": 0.11, "grad_norm": 2.4554154872894287, "learning_rate": 0.0002, "loss": 1.4559, "step": 27010 }, { "epoch": 0.11, "grad_norm": 3.6531708240509033, "learning_rate": 0.0002, "loss": 1.5954, "step": 27020 }, { "epoch": 0.11, "grad_norm": 2.855760335922241, "learning_rate": 0.0002, "loss": 1.438, "step": 27030 }, { "epoch": 0.11, "grad_norm": 2.215036630630493, "learning_rate": 0.0002, "loss": 1.5967, "step": 27040 }, { "epoch": 0.11, "grad_norm": 2.9446704387664795, "learning_rate": 0.0002, "loss": 1.4542, "step": 27050 }, { "epoch": 0.11, "grad_norm": 5.334181785583496, "learning_rate": 0.0002, "loss": 1.7915, "step": 27060 }, { "epoch": 0.11, "grad_norm": 2.1319282054901123, "learning_rate": 0.0002, "loss": 1.4047, "step": 27070 }, { "epoch": 0.11, "grad_norm": 2.0520613193511963, "learning_rate": 0.0002, "loss": 1.7213, "step": 27080 }, { "epoch": 0.11, "grad_norm": 2.152665138244629, "learning_rate": 0.0002, "loss": 1.3565, "step": 27090 }, { "epoch": 0.11, "grad_norm": 4.345574855804443, "learning_rate": 0.0002, "loss": 1.6763, "step": 27100 }, { "epoch": 0.11, "grad_norm": 5.414093971252441, "learning_rate": 0.0002, "loss": 1.8275, "step": 27110 }, { "epoch": 0.11, "grad_norm": 1.9903165102005005, "learning_rate": 0.0002, "loss": 1.5701, "step": 27120 }, { "epoch": 0.11, "grad_norm": 2.2108216285705566, "learning_rate": 0.0002, "loss": 1.6628, "step": 27130 }, { "epoch": 0.11, "grad_norm": 3.2955873012542725, "learning_rate": 0.0002, "loss": 1.6383, "step": 27140 }, { "epoch": 0.11, "grad_norm": 1.955370306968689, "learning_rate": 0.0002, "loss": 1.4546, "step": 27150 }, { "epoch": 0.11, "grad_norm": 1.9629192352294922, "learning_rate": 0.0002, "loss": 1.5495, "step": 27160 }, { "epoch": 0.11, "grad_norm": 4.64094877243042, "learning_rate": 0.0002, "loss": 1.3735, "step": 27170 }, { "epoch": 0.11, "grad_norm": 2.5497958660125732, "learning_rate": 0.0002, "loss": 1.4927, "step": 27180 }, { "epoch": 0.11, "grad_norm": 2.2463765144348145, "learning_rate": 0.0002, "loss": 1.3896, "step": 27190 }, { "epoch": 0.11, "grad_norm": 2.3817052841186523, "learning_rate": 0.0002, "loss": 1.6862, "step": 27200 }, { "epoch": 0.11, "grad_norm": 5.0954084396362305, "learning_rate": 0.0002, "loss": 1.5967, "step": 27210 }, { "epoch": 0.11, "grad_norm": 2.1784398555755615, "learning_rate": 0.0002, "loss": 1.5331, "step": 27220 }, { "epoch": 0.11, "grad_norm": 2.9391257762908936, "learning_rate": 0.0002, "loss": 1.4056, "step": 27230 }, { "epoch": 0.11, "grad_norm": 6.181065082550049, "learning_rate": 0.0002, "loss": 1.6301, "step": 27240 }, { "epoch": 0.11, "grad_norm": 2.5531864166259766, "learning_rate": 0.0002, "loss": 1.399, "step": 27250 }, { "epoch": 0.11, "grad_norm": 3.5296413898468018, "learning_rate": 0.0002, "loss": 1.2649, "step": 27260 }, { "epoch": 0.11, "grad_norm": 1.3612074851989746, "learning_rate": 0.0002, "loss": 1.5912, "step": 27270 }, { "epoch": 0.11, "grad_norm": 4.681140899658203, "learning_rate": 0.0002, "loss": 1.5172, "step": 27280 }, { "epoch": 0.11, "grad_norm": 3.1596839427948, "learning_rate": 0.0002, "loss": 1.495, "step": 27290 }, { "epoch": 0.11, "grad_norm": 3.2574784755706787, "learning_rate": 0.0002, "loss": 1.5841, "step": 27300 }, { "epoch": 0.11, "grad_norm": 2.379091739654541, "learning_rate": 0.0002, "loss": 1.4695, "step": 27310 }, { "epoch": 0.11, "grad_norm": 5.492181777954102, "learning_rate": 0.0002, "loss": 1.4898, "step": 27320 }, { "epoch": 0.11, "grad_norm": 1.7988677024841309, "learning_rate": 0.0002, "loss": 1.702, "step": 27330 }, { "epoch": 0.11, "grad_norm": 3.3300728797912598, "learning_rate": 0.0002, "loss": 1.7259, "step": 27340 }, { "epoch": 0.11, "grad_norm": 1.5493674278259277, "learning_rate": 0.0002, "loss": 1.4136, "step": 27350 }, { "epoch": 0.11, "grad_norm": 2.893134832382202, "learning_rate": 0.0002, "loss": 1.426, "step": 27360 }, { "epoch": 0.11, "grad_norm": 2.0993592739105225, "learning_rate": 0.0002, "loss": 1.4453, "step": 27370 }, { "epoch": 0.11, "grad_norm": 2.163743495941162, "learning_rate": 0.0002, "loss": 1.4991, "step": 27380 }, { "epoch": 0.11, "grad_norm": 3.7790298461914062, "learning_rate": 0.0002, "loss": 1.3695, "step": 27390 }, { "epoch": 0.11, "grad_norm": 1.8006999492645264, "learning_rate": 0.0002, "loss": 1.3999, "step": 27400 }, { "epoch": 0.11, "grad_norm": 2.13381290435791, "learning_rate": 0.0002, "loss": 1.6518, "step": 27410 }, { "epoch": 0.11, "grad_norm": 2.2575905323028564, "learning_rate": 0.0002, "loss": 1.4401, "step": 27420 }, { "epoch": 0.11, "grad_norm": 1.9801636934280396, "learning_rate": 0.0002, "loss": 1.3669, "step": 27430 }, { "epoch": 0.11, "grad_norm": 2.390327215194702, "learning_rate": 0.0002, "loss": 1.6384, "step": 27440 }, { "epoch": 0.11, "grad_norm": 1.5517131090164185, "learning_rate": 0.0002, "loss": 1.469, "step": 27450 }, { "epoch": 0.11, "grad_norm": 2.393817663192749, "learning_rate": 0.0002, "loss": 1.613, "step": 27460 }, { "epoch": 0.11, "grad_norm": 1.8452250957489014, "learning_rate": 0.0002, "loss": 1.6438, "step": 27470 }, { "epoch": 0.11, "grad_norm": 1.7147914171218872, "learning_rate": 0.0002, "loss": 1.4184, "step": 27480 }, { "epoch": 0.11, "grad_norm": 3.9510037899017334, "learning_rate": 0.0002, "loss": 1.8027, "step": 27490 }, { "epoch": 0.11, "grad_norm": 3.6504933834075928, "learning_rate": 0.0002, "loss": 1.6513, "step": 27500 }, { "epoch": 0.11, "grad_norm": 1.0117521286010742, "learning_rate": 0.0002, "loss": 1.7174, "step": 27510 }, { "epoch": 0.11, "grad_norm": 4.527946472167969, "learning_rate": 0.0002, "loss": 1.5217, "step": 27520 }, { "epoch": 0.11, "grad_norm": 2.6946535110473633, "learning_rate": 0.0002, "loss": 1.691, "step": 27530 }, { "epoch": 0.11, "grad_norm": 3.424387216567993, "learning_rate": 0.0002, "loss": 1.6998, "step": 27540 }, { "epoch": 0.11, "grad_norm": 1.7253187894821167, "learning_rate": 0.0002, "loss": 1.5588, "step": 27550 }, { "epoch": 0.11, "grad_norm": 2.7949023246765137, "learning_rate": 0.0002, "loss": 1.5549, "step": 27560 }, { "epoch": 0.11, "grad_norm": 2.321605920791626, "learning_rate": 0.0002, "loss": 1.7374, "step": 27570 }, { "epoch": 0.11, "grad_norm": 3.051018238067627, "learning_rate": 0.0002, "loss": 1.582, "step": 27580 }, { "epoch": 0.11, "grad_norm": 4.084060192108154, "learning_rate": 0.0002, "loss": 1.4836, "step": 27590 }, { "epoch": 0.11, "grad_norm": 2.882244825363159, "learning_rate": 0.0002, "loss": 1.5065, "step": 27600 }, { "epoch": 0.11, "grad_norm": 2.9846878051757812, "learning_rate": 0.0002, "loss": 1.4601, "step": 27610 }, { "epoch": 0.11, "grad_norm": 2.62461256980896, "learning_rate": 0.0002, "loss": 1.5428, "step": 27620 }, { "epoch": 0.11, "grad_norm": 2.6654841899871826, "learning_rate": 0.0002, "loss": 1.635, "step": 27630 }, { "epoch": 0.11, "grad_norm": 3.367002248764038, "learning_rate": 0.0002, "loss": 1.5312, "step": 27640 }, { "epoch": 0.11, "grad_norm": 3.43072509765625, "learning_rate": 0.0002, "loss": 1.4985, "step": 27650 }, { "epoch": 0.11, "grad_norm": 3.012266159057617, "learning_rate": 0.0002, "loss": 1.5418, "step": 27660 }, { "epoch": 0.11, "grad_norm": 3.5160064697265625, "learning_rate": 0.0002, "loss": 1.6949, "step": 27670 }, { "epoch": 0.11, "grad_norm": 3.043010711669922, "learning_rate": 0.0002, "loss": 1.2782, "step": 27680 }, { "epoch": 0.11, "grad_norm": 2.6565558910369873, "learning_rate": 0.0002, "loss": 1.4871, "step": 27690 }, { "epoch": 0.11, "grad_norm": 2.1896438598632812, "learning_rate": 0.0002, "loss": 1.6819, "step": 27700 }, { "epoch": 0.11, "grad_norm": 2.5979936122894287, "learning_rate": 0.0002, "loss": 1.585, "step": 27710 }, { "epoch": 0.11, "grad_norm": 2.4760372638702393, "learning_rate": 0.0002, "loss": 1.507, "step": 27720 }, { "epoch": 0.11, "grad_norm": 2.4837193489074707, "learning_rate": 0.0002, "loss": 1.6871, "step": 27730 }, { "epoch": 0.11, "grad_norm": 1.5299186706542969, "learning_rate": 0.0002, "loss": 1.4514, "step": 27740 }, { "epoch": 0.11, "grad_norm": 3.6157126426696777, "learning_rate": 0.0002, "loss": 1.4421, "step": 27750 }, { "epoch": 0.11, "grad_norm": 3.9136338233947754, "learning_rate": 0.0002, "loss": 1.7049, "step": 27760 }, { "epoch": 0.11, "grad_norm": 2.3796072006225586, "learning_rate": 0.0002, "loss": 1.6438, "step": 27770 }, { "epoch": 0.11, "grad_norm": 3.5336475372314453, "learning_rate": 0.0002, "loss": 1.7585, "step": 27780 }, { "epoch": 0.11, "grad_norm": 2.6124155521392822, "learning_rate": 0.0002, "loss": 1.3758, "step": 27790 }, { "epoch": 0.11, "grad_norm": 3.128169059753418, "learning_rate": 0.0002, "loss": 1.3905, "step": 27800 }, { "epoch": 0.11, "grad_norm": 2.939539670944214, "learning_rate": 0.0002, "loss": 1.423, "step": 27810 }, { "epoch": 0.11, "grad_norm": 1.3542715311050415, "learning_rate": 0.0002, "loss": 1.6695, "step": 27820 }, { "epoch": 0.11, "grad_norm": 2.406010150909424, "learning_rate": 0.0002, "loss": 1.4403, "step": 27830 }, { "epoch": 0.11, "grad_norm": 5.296232223510742, "learning_rate": 0.0002, "loss": 1.4635, "step": 27840 }, { "epoch": 0.11, "grad_norm": 3.214754819869995, "learning_rate": 0.0002, "loss": 1.5338, "step": 27850 }, { "epoch": 0.11, "grad_norm": 1.8263347148895264, "learning_rate": 0.0002, "loss": 1.8517, "step": 27860 }, { "epoch": 0.11, "grad_norm": 1.8670629262924194, "learning_rate": 0.0002, "loss": 1.5689, "step": 27870 }, { "epoch": 0.11, "grad_norm": 3.0802783966064453, "learning_rate": 0.0002, "loss": 1.4721, "step": 27880 }, { "epoch": 0.11, "grad_norm": 1.77230966091156, "learning_rate": 0.0002, "loss": 1.7114, "step": 27890 }, { "epoch": 0.11, "grad_norm": 1.6190956830978394, "learning_rate": 0.0002, "loss": 1.5435, "step": 27900 }, { "epoch": 0.11, "grad_norm": 2.5912601947784424, "learning_rate": 0.0002, "loss": 1.4105, "step": 27910 }, { "epoch": 0.11, "grad_norm": 4.047062873840332, "learning_rate": 0.0002, "loss": 1.6001, "step": 27920 }, { "epoch": 0.11, "grad_norm": 3.643092632293701, "learning_rate": 0.0002, "loss": 1.4096, "step": 27930 }, { "epoch": 0.11, "grad_norm": 3.414088726043701, "learning_rate": 0.0002, "loss": 1.5095, "step": 27940 }, { "epoch": 0.11, "grad_norm": 3.238912343978882, "learning_rate": 0.0002, "loss": 1.5817, "step": 27950 }, { "epoch": 0.11, "grad_norm": 3.3677878379821777, "learning_rate": 0.0002, "loss": 1.3574, "step": 27960 }, { "epoch": 0.11, "grad_norm": 1.9242361783981323, "learning_rate": 0.0002, "loss": 1.5535, "step": 27970 }, { "epoch": 0.11, "grad_norm": 1.522535800933838, "learning_rate": 0.0002, "loss": 1.428, "step": 27980 }, { "epoch": 0.11, "grad_norm": 2.680748224258423, "learning_rate": 0.0002, "loss": 1.4099, "step": 27990 }, { "epoch": 0.11, "grad_norm": 2.652621030807495, "learning_rate": 0.0002, "loss": 1.6085, "step": 28000 }, { "epoch": 0.11, "grad_norm": 1.5992029905319214, "learning_rate": 0.0002, "loss": 1.5221, "step": 28010 }, { "epoch": 0.11, "grad_norm": 3.7392547130584717, "learning_rate": 0.0002, "loss": 1.4417, "step": 28020 }, { "epoch": 0.11, "grad_norm": 3.4362783432006836, "learning_rate": 0.0002, "loss": 1.7823, "step": 28030 }, { "epoch": 0.11, "grad_norm": 2.836646318435669, "learning_rate": 0.0002, "loss": 1.38, "step": 28040 }, { "epoch": 0.11, "grad_norm": 2.0677058696746826, "learning_rate": 0.0002, "loss": 1.4254, "step": 28050 }, { "epoch": 0.11, "grad_norm": 2.2426013946533203, "learning_rate": 0.0002, "loss": 1.5765, "step": 28060 }, { "epoch": 0.11, "grad_norm": 2.7723867893218994, "learning_rate": 0.0002, "loss": 1.5786, "step": 28070 }, { "epoch": 0.11, "grad_norm": 1.8189119100570679, "learning_rate": 0.0002, "loss": 1.2887, "step": 28080 }, { "epoch": 0.11, "grad_norm": 2.5659866333007812, "learning_rate": 0.0002, "loss": 1.6867, "step": 28090 }, { "epoch": 0.11, "grad_norm": 3.637737989425659, "learning_rate": 0.0002, "loss": 1.6384, "step": 28100 }, { "epoch": 0.11, "grad_norm": 2.5390727519989014, "learning_rate": 0.0002, "loss": 1.7199, "step": 28110 }, { "epoch": 0.11, "grad_norm": 1.9449889659881592, "learning_rate": 0.0002, "loss": 1.4876, "step": 28120 }, { "epoch": 0.11, "grad_norm": 2.15716814994812, "learning_rate": 0.0002, "loss": 1.5703, "step": 28130 }, { "epoch": 0.11, "grad_norm": 3.9399380683898926, "learning_rate": 0.0002, "loss": 1.4569, "step": 28140 }, { "epoch": 0.11, "grad_norm": 3.1299853324890137, "learning_rate": 0.0002, "loss": 1.7398, "step": 28150 }, { "epoch": 0.11, "grad_norm": 3.728882074356079, "learning_rate": 0.0002, "loss": 1.6592, "step": 28160 }, { "epoch": 0.11, "grad_norm": 4.198960304260254, "learning_rate": 0.0002, "loss": 1.7176, "step": 28170 }, { "epoch": 0.11, "grad_norm": 8.174386024475098, "learning_rate": 0.0002, "loss": 1.5118, "step": 28180 }, { "epoch": 0.11, "grad_norm": 1.3298041820526123, "learning_rate": 0.0002, "loss": 1.5017, "step": 28190 }, { "epoch": 0.11, "grad_norm": 7.7067766189575195, "learning_rate": 0.0002, "loss": 1.4479, "step": 28200 }, { "epoch": 0.11, "grad_norm": 2.265751600265503, "learning_rate": 0.0002, "loss": 1.4707, "step": 28210 }, { "epoch": 0.11, "grad_norm": 5.065028667449951, "learning_rate": 0.0002, "loss": 1.7352, "step": 28220 }, { "epoch": 0.11, "grad_norm": 3.6850404739379883, "learning_rate": 0.0002, "loss": 1.5686, "step": 28230 }, { "epoch": 0.11, "grad_norm": 1.5413984060287476, "learning_rate": 0.0002, "loss": 1.7714, "step": 28240 }, { "epoch": 0.12, "grad_norm": 2.036875009536743, "learning_rate": 0.0002, "loss": 1.5597, "step": 28250 }, { "epoch": 0.12, "grad_norm": 5.4801154136657715, "learning_rate": 0.0002, "loss": 1.615, "step": 28260 }, { "epoch": 0.12, "grad_norm": 2.7803475856781006, "learning_rate": 0.0002, "loss": 1.3861, "step": 28270 }, { "epoch": 0.12, "grad_norm": 3.0942070484161377, "learning_rate": 0.0002, "loss": 1.5705, "step": 28280 }, { "epoch": 0.12, "grad_norm": 2.409017562866211, "learning_rate": 0.0002, "loss": 1.5531, "step": 28290 }, { "epoch": 0.12, "grad_norm": 3.5183963775634766, "learning_rate": 0.0002, "loss": 1.4616, "step": 28300 }, { "epoch": 0.12, "grad_norm": 2.1130919456481934, "learning_rate": 0.0002, "loss": 1.6151, "step": 28310 }, { "epoch": 0.12, "grad_norm": 2.451939105987549, "learning_rate": 0.0002, "loss": 1.6353, "step": 28320 }, { "epoch": 0.12, "grad_norm": 2.6873438358306885, "learning_rate": 0.0002, "loss": 1.4585, "step": 28330 }, { "epoch": 0.12, "grad_norm": 1.3693360090255737, "learning_rate": 0.0002, "loss": 1.3216, "step": 28340 }, { "epoch": 0.12, "grad_norm": 3.7087020874023438, "learning_rate": 0.0002, "loss": 1.5269, "step": 28350 }, { "epoch": 0.12, "grad_norm": 2.8872287273406982, "learning_rate": 0.0002, "loss": 1.7272, "step": 28360 }, { "epoch": 0.12, "grad_norm": 2.974109649658203, "learning_rate": 0.0002, "loss": 1.4853, "step": 28370 }, { "epoch": 0.12, "grad_norm": 3.107379913330078, "learning_rate": 0.0002, "loss": 1.547, "step": 28380 }, { "epoch": 0.12, "grad_norm": 2.090304136276245, "learning_rate": 0.0002, "loss": 1.4337, "step": 28390 }, { "epoch": 0.12, "grad_norm": 3.032627582550049, "learning_rate": 0.0002, "loss": 1.4921, "step": 28400 }, { "epoch": 0.12, "grad_norm": 2.7197868824005127, "learning_rate": 0.0002, "loss": 1.5142, "step": 28410 }, { "epoch": 0.12, "grad_norm": 2.65959095954895, "learning_rate": 0.0002, "loss": 1.5361, "step": 28420 }, { "epoch": 0.12, "grad_norm": 2.104731559753418, "learning_rate": 0.0002, "loss": 1.5991, "step": 28430 }, { "epoch": 0.12, "grad_norm": 2.3742079734802246, "learning_rate": 0.0002, "loss": 1.5666, "step": 28440 }, { "epoch": 0.12, "grad_norm": 2.88517165184021, "learning_rate": 0.0002, "loss": 1.8304, "step": 28450 }, { "epoch": 0.12, "grad_norm": 2.137423276901245, "learning_rate": 0.0002, "loss": 1.4177, "step": 28460 }, { "epoch": 0.12, "grad_norm": 1.9569393396377563, "learning_rate": 0.0002, "loss": 1.4607, "step": 28470 }, { "epoch": 0.12, "grad_norm": 3.5280163288116455, "learning_rate": 0.0002, "loss": 1.6778, "step": 28480 }, { "epoch": 0.12, "grad_norm": 1.8916643857955933, "learning_rate": 0.0002, "loss": 1.3932, "step": 28490 }, { "epoch": 0.12, "grad_norm": 3.954197883605957, "learning_rate": 0.0002, "loss": 1.4863, "step": 28500 }, { "epoch": 0.12, "grad_norm": 1.9795894622802734, "learning_rate": 0.0002, "loss": 1.6511, "step": 28510 }, { "epoch": 0.12, "grad_norm": 2.8829829692840576, "learning_rate": 0.0002, "loss": 1.4398, "step": 28520 }, { "epoch": 0.12, "grad_norm": 2.6284797191619873, "learning_rate": 0.0002, "loss": 1.7036, "step": 28530 }, { "epoch": 0.12, "grad_norm": 3.4680986404418945, "learning_rate": 0.0002, "loss": 1.5529, "step": 28540 }, { "epoch": 0.12, "grad_norm": 3.3399903774261475, "learning_rate": 0.0002, "loss": 1.7125, "step": 28550 }, { "epoch": 0.12, "grad_norm": 2.9144070148468018, "learning_rate": 0.0002, "loss": 1.6244, "step": 28560 }, { "epoch": 0.12, "grad_norm": 1.577575445175171, "learning_rate": 0.0002, "loss": 1.6501, "step": 28570 }, { "epoch": 0.12, "grad_norm": 2.106559991836548, "learning_rate": 0.0002, "loss": 1.4649, "step": 28580 }, { "epoch": 0.12, "grad_norm": 3.734273672103882, "learning_rate": 0.0002, "loss": 1.3942, "step": 28590 }, { "epoch": 0.12, "grad_norm": 2.0348169803619385, "learning_rate": 0.0002, "loss": 1.642, "step": 28600 }, { "epoch": 0.12, "grad_norm": 4.169249057769775, "learning_rate": 0.0002, "loss": 1.6033, "step": 28610 }, { "epoch": 0.12, "grad_norm": 2.7932021617889404, "learning_rate": 0.0002, "loss": 1.4466, "step": 28620 }, { "epoch": 0.12, "grad_norm": 3.1523613929748535, "learning_rate": 0.0002, "loss": 1.4595, "step": 28630 }, { "epoch": 0.12, "grad_norm": 2.523538827896118, "learning_rate": 0.0002, "loss": 1.4338, "step": 28640 }, { "epoch": 0.12, "grad_norm": 4.545161724090576, "learning_rate": 0.0002, "loss": 1.6975, "step": 28650 }, { "epoch": 0.12, "grad_norm": 3.6954691410064697, "learning_rate": 0.0002, "loss": 1.5944, "step": 28660 }, { "epoch": 0.12, "grad_norm": 2.6521406173706055, "learning_rate": 0.0002, "loss": 1.3934, "step": 28670 }, { "epoch": 0.12, "grad_norm": 2.0128488540649414, "learning_rate": 0.0002, "loss": 1.6722, "step": 28680 }, { "epoch": 0.12, "grad_norm": 2.9571266174316406, "learning_rate": 0.0002, "loss": 1.5039, "step": 28690 }, { "epoch": 0.12, "grad_norm": 2.6077284812927246, "learning_rate": 0.0002, "loss": 1.6697, "step": 28700 }, { "epoch": 0.12, "grad_norm": 6.056484222412109, "learning_rate": 0.0002, "loss": 1.4482, "step": 28710 }, { "epoch": 0.12, "grad_norm": 2.7402584552764893, "learning_rate": 0.0002, "loss": 1.5604, "step": 28720 }, { "epoch": 0.12, "grad_norm": 3.0815305709838867, "learning_rate": 0.0002, "loss": 1.7233, "step": 28730 }, { "epoch": 0.12, "grad_norm": 2.139357566833496, "learning_rate": 0.0002, "loss": 1.6024, "step": 28740 }, { "epoch": 0.12, "grad_norm": 3.1429741382598877, "learning_rate": 0.0002, "loss": 1.5969, "step": 28750 }, { "epoch": 0.12, "grad_norm": 2.6408300399780273, "learning_rate": 0.0002, "loss": 1.4581, "step": 28760 }, { "epoch": 0.12, "grad_norm": 2.7061305046081543, "learning_rate": 0.0002, "loss": 1.7021, "step": 28770 }, { "epoch": 0.12, "grad_norm": 2.9258928298950195, "learning_rate": 0.0002, "loss": 1.4655, "step": 28780 }, { "epoch": 0.12, "grad_norm": 2.598116397857666, "learning_rate": 0.0002, "loss": 1.6307, "step": 28790 }, { "epoch": 0.12, "grad_norm": 3.729031801223755, "learning_rate": 0.0002, "loss": 1.6308, "step": 28800 }, { "epoch": 0.12, "grad_norm": 3.5513100624084473, "learning_rate": 0.0002, "loss": 1.5854, "step": 28810 }, { "epoch": 0.12, "grad_norm": 3.24543833732605, "learning_rate": 0.0002, "loss": 1.3534, "step": 28820 }, { "epoch": 0.12, "grad_norm": 1.9608618021011353, "learning_rate": 0.0002, "loss": 1.3832, "step": 28830 }, { "epoch": 0.12, "grad_norm": 2.3102729320526123, "learning_rate": 0.0002, "loss": 1.6229, "step": 28840 }, { "epoch": 0.12, "grad_norm": 2.646620512008667, "learning_rate": 0.0002, "loss": 1.547, "step": 28850 }, { "epoch": 0.12, "grad_norm": 3.339702844619751, "learning_rate": 0.0002, "loss": 1.551, "step": 28860 }, { "epoch": 0.12, "grad_norm": 4.364441871643066, "learning_rate": 0.0002, "loss": 1.4431, "step": 28870 }, { "epoch": 0.12, "grad_norm": 2.210887908935547, "learning_rate": 0.0002, "loss": 1.4323, "step": 28880 }, { "epoch": 0.12, "grad_norm": 4.032818794250488, "learning_rate": 0.0002, "loss": 1.6003, "step": 28890 }, { "epoch": 0.12, "grad_norm": 2.1691324710845947, "learning_rate": 0.0002, "loss": 1.3852, "step": 28900 }, { "epoch": 0.12, "grad_norm": 1.8477176427841187, "learning_rate": 0.0002, "loss": 1.5313, "step": 28910 }, { "epoch": 0.12, "grad_norm": 1.6748524904251099, "learning_rate": 0.0002, "loss": 1.5602, "step": 28920 }, { "epoch": 0.12, "grad_norm": 2.6346445083618164, "learning_rate": 0.0002, "loss": 1.5275, "step": 28930 }, { "epoch": 0.12, "grad_norm": 2.6156301498413086, "learning_rate": 0.0002, "loss": 1.4957, "step": 28940 }, { "epoch": 0.12, "grad_norm": 3.2184815406799316, "learning_rate": 0.0002, "loss": 1.5654, "step": 28950 }, { "epoch": 0.12, "grad_norm": 3.0372095108032227, "learning_rate": 0.0002, "loss": 1.4936, "step": 28960 }, { "epoch": 0.12, "grad_norm": 3.233447313308716, "learning_rate": 0.0002, "loss": 1.3413, "step": 28970 }, { "epoch": 0.12, "grad_norm": 2.6106841564178467, "learning_rate": 0.0002, "loss": 1.4149, "step": 28980 }, { "epoch": 0.12, "grad_norm": 2.591646194458008, "learning_rate": 0.0002, "loss": 1.7507, "step": 28990 }, { "epoch": 0.12, "grad_norm": 2.711601972579956, "learning_rate": 0.0002, "loss": 1.4625, "step": 29000 }, { "epoch": 0.12, "grad_norm": 1.5056706666946411, "learning_rate": 0.0002, "loss": 1.5017, "step": 29010 }, { "epoch": 0.12, "grad_norm": 1.9789998531341553, "learning_rate": 0.0002, "loss": 1.4137, "step": 29020 }, { "epoch": 0.12, "grad_norm": 3.669037342071533, "learning_rate": 0.0002, "loss": 1.6001, "step": 29030 }, { "epoch": 0.12, "grad_norm": 1.809706449508667, "learning_rate": 0.0002, "loss": 1.5599, "step": 29040 }, { "epoch": 0.12, "grad_norm": 1.5540838241577148, "learning_rate": 0.0002, "loss": 1.5223, "step": 29050 }, { "epoch": 0.12, "grad_norm": 5.321897506713867, "learning_rate": 0.0002, "loss": 1.3477, "step": 29060 }, { "epoch": 0.12, "grad_norm": 1.8659026622772217, "learning_rate": 0.0002, "loss": 1.7854, "step": 29070 }, { "epoch": 0.12, "grad_norm": 1.6658955812454224, "learning_rate": 0.0002, "loss": 1.3108, "step": 29080 }, { "epoch": 0.12, "grad_norm": 2.934999465942383, "learning_rate": 0.0002, "loss": 1.545, "step": 29090 }, { "epoch": 0.12, "grad_norm": 1.9523112773895264, "learning_rate": 0.0002, "loss": 1.3585, "step": 29100 }, { "epoch": 0.12, "grad_norm": 2.7465262413024902, "learning_rate": 0.0002, "loss": 1.4558, "step": 29110 }, { "epoch": 0.12, "grad_norm": 2.5624568462371826, "learning_rate": 0.0002, "loss": 1.5739, "step": 29120 }, { "epoch": 0.12, "grad_norm": 3.4915285110473633, "learning_rate": 0.0002, "loss": 1.5036, "step": 29130 }, { "epoch": 0.12, "grad_norm": 2.9861526489257812, "learning_rate": 0.0002, "loss": 1.5114, "step": 29140 }, { "epoch": 0.12, "grad_norm": 1.914683222770691, "learning_rate": 0.0002, "loss": 1.5288, "step": 29150 }, { "epoch": 0.12, "grad_norm": 2.5269100666046143, "learning_rate": 0.0002, "loss": 1.4804, "step": 29160 }, { "epoch": 0.12, "grad_norm": 3.2447733879089355, "learning_rate": 0.0002, "loss": 1.4274, "step": 29170 }, { "epoch": 0.12, "grad_norm": 1.7198048830032349, "learning_rate": 0.0002, "loss": 1.5942, "step": 29180 }, { "epoch": 0.12, "grad_norm": 2.6538822650909424, "learning_rate": 0.0002, "loss": 1.5695, "step": 29190 }, { "epoch": 0.12, "grad_norm": 3.3672568798065186, "learning_rate": 0.0002, "loss": 1.6168, "step": 29200 }, { "epoch": 0.12, "grad_norm": 2.0998082160949707, "learning_rate": 0.0002, "loss": 1.5595, "step": 29210 }, { "epoch": 0.12, "grad_norm": 2.298194646835327, "learning_rate": 0.0002, "loss": 1.3828, "step": 29220 }, { "epoch": 0.12, "grad_norm": 2.831997871398926, "learning_rate": 0.0002, "loss": 1.6956, "step": 29230 }, { "epoch": 0.12, "grad_norm": 2.4967541694641113, "learning_rate": 0.0002, "loss": 1.5743, "step": 29240 }, { "epoch": 0.12, "grad_norm": 6.142807483673096, "learning_rate": 0.0002, "loss": 1.6354, "step": 29250 }, { "epoch": 0.12, "grad_norm": 2.311610460281372, "learning_rate": 0.0002, "loss": 1.5276, "step": 29260 }, { "epoch": 0.12, "grad_norm": 3.9396657943725586, "learning_rate": 0.0002, "loss": 1.56, "step": 29270 }, { "epoch": 0.12, "grad_norm": 1.8205673694610596, "learning_rate": 0.0002, "loss": 1.4231, "step": 29280 }, { "epoch": 0.12, "grad_norm": 1.2460665702819824, "learning_rate": 0.0002, "loss": 1.6113, "step": 29290 }, { "epoch": 0.12, "grad_norm": 2.0759050846099854, "learning_rate": 0.0002, "loss": 1.4563, "step": 29300 }, { "epoch": 0.12, "grad_norm": 2.581354856491089, "learning_rate": 0.0002, "loss": 1.5511, "step": 29310 }, { "epoch": 0.12, "grad_norm": 2.2674636840820312, "learning_rate": 0.0002, "loss": 1.8646, "step": 29320 }, { "epoch": 0.12, "grad_norm": 3.8266234397888184, "learning_rate": 0.0002, "loss": 1.4817, "step": 29330 }, { "epoch": 0.12, "grad_norm": 4.360745429992676, "learning_rate": 0.0002, "loss": 1.6012, "step": 29340 }, { "epoch": 0.12, "grad_norm": 2.823136568069458, "learning_rate": 0.0002, "loss": 1.5315, "step": 29350 }, { "epoch": 0.12, "grad_norm": 1.5675060749053955, "learning_rate": 0.0002, "loss": 1.6685, "step": 29360 }, { "epoch": 0.12, "grad_norm": 3.093360424041748, "learning_rate": 0.0002, "loss": 1.7377, "step": 29370 }, { "epoch": 0.12, "grad_norm": 3.047363758087158, "learning_rate": 0.0002, "loss": 1.8319, "step": 29380 }, { "epoch": 0.12, "grad_norm": 2.406447172164917, "learning_rate": 0.0002, "loss": 1.3204, "step": 29390 }, { "epoch": 0.12, "grad_norm": 1.541755199432373, "learning_rate": 0.0002, "loss": 1.4397, "step": 29400 }, { "epoch": 0.12, "grad_norm": 1.9881423711776733, "learning_rate": 0.0002, "loss": 1.4133, "step": 29410 }, { "epoch": 0.12, "grad_norm": 2.056170701980591, "learning_rate": 0.0002, "loss": 1.6286, "step": 29420 }, { "epoch": 0.12, "grad_norm": 2.709263563156128, "learning_rate": 0.0002, "loss": 1.3947, "step": 29430 }, { "epoch": 0.12, "grad_norm": 4.1051201820373535, "learning_rate": 0.0002, "loss": 1.4113, "step": 29440 }, { "epoch": 0.12, "grad_norm": 2.559011697769165, "learning_rate": 0.0002, "loss": 1.2775, "step": 29450 }, { "epoch": 0.12, "grad_norm": 2.526266574859619, "learning_rate": 0.0002, "loss": 1.3928, "step": 29460 }, { "epoch": 0.12, "grad_norm": 3.8429601192474365, "learning_rate": 0.0002, "loss": 1.79, "step": 29470 }, { "epoch": 0.12, "grad_norm": 1.4395616054534912, "learning_rate": 0.0002, "loss": 1.5236, "step": 29480 }, { "epoch": 0.12, "grad_norm": 3.662407398223877, "learning_rate": 0.0002, "loss": 1.6097, "step": 29490 }, { "epoch": 0.12, "grad_norm": 3.6251749992370605, "learning_rate": 0.0002, "loss": 1.6309, "step": 29500 }, { "epoch": 0.12, "grad_norm": 3.694929599761963, "learning_rate": 0.0002, "loss": 1.8164, "step": 29510 }, { "epoch": 0.12, "grad_norm": 3.2030396461486816, "learning_rate": 0.0002, "loss": 1.7449, "step": 29520 }, { "epoch": 0.12, "grad_norm": 3.6496634483337402, "learning_rate": 0.0002, "loss": 1.3879, "step": 29530 }, { "epoch": 0.12, "grad_norm": 1.9833556413650513, "learning_rate": 0.0002, "loss": 1.5908, "step": 29540 }, { "epoch": 0.12, "grad_norm": 2.5879855155944824, "learning_rate": 0.0002, "loss": 1.508, "step": 29550 }, { "epoch": 0.12, "grad_norm": 3.3369221687316895, "learning_rate": 0.0002, "loss": 1.606, "step": 29560 }, { "epoch": 0.12, "grad_norm": 2.384021759033203, "learning_rate": 0.0002, "loss": 1.3828, "step": 29570 }, { "epoch": 0.12, "grad_norm": 2.4894940853118896, "learning_rate": 0.0002, "loss": 1.5073, "step": 29580 }, { "epoch": 0.12, "grad_norm": 2.6510918140411377, "learning_rate": 0.0002, "loss": 1.3761, "step": 29590 }, { "epoch": 0.12, "grad_norm": 2.021568775177002, "learning_rate": 0.0002, "loss": 1.6735, "step": 29600 }, { "epoch": 0.12, "grad_norm": 1.5913597345352173, "learning_rate": 0.0002, "loss": 1.518, "step": 29610 }, { "epoch": 0.12, "grad_norm": 2.9794185161590576, "learning_rate": 0.0002, "loss": 1.3153, "step": 29620 }, { "epoch": 0.12, "grad_norm": 2.722465753555298, "learning_rate": 0.0002, "loss": 1.5257, "step": 29630 }, { "epoch": 0.12, "grad_norm": 2.355778455734253, "learning_rate": 0.0002, "loss": 1.4737, "step": 29640 }, { "epoch": 0.12, "grad_norm": 3.1229052543640137, "learning_rate": 0.0002, "loss": 1.5763, "step": 29650 }, { "epoch": 0.12, "grad_norm": 2.273313522338867, "learning_rate": 0.0002, "loss": 1.4284, "step": 29660 }, { "epoch": 0.12, "grad_norm": 3.7648439407348633, "learning_rate": 0.0002, "loss": 1.5445, "step": 29670 }, { "epoch": 0.12, "grad_norm": 1.9822213649749756, "learning_rate": 0.0002, "loss": 1.4111, "step": 29680 }, { "epoch": 0.12, "grad_norm": 2.2540292739868164, "learning_rate": 0.0002, "loss": 1.2848, "step": 29690 }, { "epoch": 0.12, "grad_norm": 2.5648252964019775, "learning_rate": 0.0002, "loss": 1.6033, "step": 29700 }, { "epoch": 0.12, "grad_norm": 2.1101293563842773, "learning_rate": 0.0002, "loss": 1.5325, "step": 29710 }, { "epoch": 0.12, "grad_norm": 2.639385938644409, "learning_rate": 0.0002, "loss": 1.5006, "step": 29720 }, { "epoch": 0.12, "grad_norm": 2.8536336421966553, "learning_rate": 0.0002, "loss": 1.3265, "step": 29730 }, { "epoch": 0.12, "grad_norm": 3.4655966758728027, "learning_rate": 0.0002, "loss": 1.3102, "step": 29740 }, { "epoch": 0.12, "grad_norm": 1.3194688558578491, "learning_rate": 0.0002, "loss": 1.6111, "step": 29750 }, { "epoch": 0.12, "grad_norm": 2.6934168338775635, "learning_rate": 0.0002, "loss": 1.5426, "step": 29760 }, { "epoch": 0.12, "grad_norm": 2.8735053539276123, "learning_rate": 0.0002, "loss": 1.5522, "step": 29770 }, { "epoch": 0.12, "grad_norm": 2.3496923446655273, "learning_rate": 0.0002, "loss": 1.4366, "step": 29780 }, { "epoch": 0.12, "grad_norm": 3.3407158851623535, "learning_rate": 0.0002, "loss": 1.2537, "step": 29790 }, { "epoch": 0.12, "grad_norm": 2.596200704574585, "learning_rate": 0.0002, "loss": 1.518, "step": 29800 }, { "epoch": 0.12, "grad_norm": 7.65170955657959, "learning_rate": 0.0002, "loss": 1.7024, "step": 29810 }, { "epoch": 0.12, "grad_norm": 2.8351283073425293, "learning_rate": 0.0002, "loss": 1.6444, "step": 29820 }, { "epoch": 0.12, "grad_norm": 3.20917010307312, "learning_rate": 0.0002, "loss": 1.6643, "step": 29830 }, { "epoch": 0.12, "grad_norm": 3.9653732776641846, "learning_rate": 0.0002, "loss": 1.4982, "step": 29840 }, { "epoch": 0.12, "grad_norm": 3.229647159576416, "learning_rate": 0.0002, "loss": 1.5202, "step": 29850 }, { "epoch": 0.12, "grad_norm": 1.3544387817382812, "learning_rate": 0.0002, "loss": 1.6118, "step": 29860 }, { "epoch": 0.12, "grad_norm": 1.8969634771347046, "learning_rate": 0.0002, "loss": 1.627, "step": 29870 }, { "epoch": 0.12, "grad_norm": 3.3110454082489014, "learning_rate": 0.0002, "loss": 1.6793, "step": 29880 }, { "epoch": 0.12, "grad_norm": 1.9596840143203735, "learning_rate": 0.0002, "loss": 1.4416, "step": 29890 }, { "epoch": 0.12, "grad_norm": 2.30810809135437, "learning_rate": 0.0002, "loss": 1.4664, "step": 29900 }, { "epoch": 0.12, "grad_norm": 1.9252119064331055, "learning_rate": 0.0002, "loss": 1.5853, "step": 29910 }, { "epoch": 0.12, "grad_norm": 2.4923903942108154, "learning_rate": 0.0002, "loss": 1.4607, "step": 29920 }, { "epoch": 0.12, "grad_norm": 2.7300045490264893, "learning_rate": 0.0002, "loss": 1.3804, "step": 29930 }, { "epoch": 0.12, "grad_norm": 3.28190016746521, "learning_rate": 0.0002, "loss": 1.6883, "step": 29940 }, { "epoch": 0.12, "grad_norm": 2.8720104694366455, "learning_rate": 0.0002, "loss": 1.5852, "step": 29950 }, { "epoch": 0.12, "grad_norm": 3.3064303398132324, "learning_rate": 0.0002, "loss": 1.6159, "step": 29960 }, { "epoch": 0.12, "grad_norm": 2.915215492248535, "learning_rate": 0.0002, "loss": 1.4135, "step": 29970 }, { "epoch": 0.12, "grad_norm": 3.1459741592407227, "learning_rate": 0.0002, "loss": 1.707, "step": 29980 }, { "epoch": 0.12, "grad_norm": 2.987107753753662, "learning_rate": 0.0002, "loss": 1.4151, "step": 29990 }, { "epoch": 0.12, "grad_norm": 3.312089204788208, "learning_rate": 0.0002, "loss": 1.3251, "step": 30000 }, { "epoch": 0.12, "grad_norm": 2.0898945331573486, "learning_rate": 0.0002, "loss": 1.7137, "step": 30010 }, { "epoch": 0.12, "grad_norm": 2.5708796977996826, "learning_rate": 0.0002, "loss": 1.7401, "step": 30020 }, { "epoch": 0.12, "grad_norm": 3.6315195560455322, "learning_rate": 0.0002, "loss": 1.6053, "step": 30030 }, { "epoch": 0.12, "grad_norm": 2.5471713542938232, "learning_rate": 0.0002, "loss": 1.2058, "step": 30040 }, { "epoch": 0.12, "grad_norm": 1.7831705808639526, "learning_rate": 0.0002, "loss": 1.587, "step": 30050 }, { "epoch": 0.12, "grad_norm": 2.7786483764648438, "learning_rate": 0.0002, "loss": 1.545, "step": 30060 }, { "epoch": 0.12, "grad_norm": 3.345700740814209, "learning_rate": 0.0002, "loss": 1.426, "step": 30070 }, { "epoch": 0.12, "grad_norm": 3.359302520751953, "learning_rate": 0.0002, "loss": 1.5848, "step": 30080 }, { "epoch": 0.12, "grad_norm": 2.4544148445129395, "learning_rate": 0.0002, "loss": 1.5306, "step": 30090 }, { "epoch": 0.12, "grad_norm": 1.8775193691253662, "learning_rate": 0.0002, "loss": 1.6922, "step": 30100 }, { "epoch": 0.12, "grad_norm": 5.279895782470703, "learning_rate": 0.0002, "loss": 1.7276, "step": 30110 }, { "epoch": 0.12, "grad_norm": 2.6851966381073, "learning_rate": 0.0002, "loss": 1.6484, "step": 30120 }, { "epoch": 0.12, "grad_norm": 3.14125394821167, "learning_rate": 0.0002, "loss": 1.3081, "step": 30130 }, { "epoch": 0.12, "grad_norm": 4.507335186004639, "learning_rate": 0.0002, "loss": 1.5398, "step": 30140 }, { "epoch": 0.12, "grad_norm": 2.4205124378204346, "learning_rate": 0.0002, "loss": 1.4718, "step": 30150 }, { "epoch": 0.12, "grad_norm": 3.777553081512451, "learning_rate": 0.0002, "loss": 1.5652, "step": 30160 }, { "epoch": 0.12, "grad_norm": 3.0678725242614746, "learning_rate": 0.0002, "loss": 1.4028, "step": 30170 }, { "epoch": 0.12, "grad_norm": 2.8067188262939453, "learning_rate": 0.0002, "loss": 1.5347, "step": 30180 }, { "epoch": 0.12, "grad_norm": 2.450756788253784, "learning_rate": 0.0002, "loss": 1.3862, "step": 30190 }, { "epoch": 0.12, "grad_norm": 2.7668237686157227, "learning_rate": 0.0002, "loss": 1.2559, "step": 30200 }, { "epoch": 0.12, "grad_norm": 2.1302249431610107, "learning_rate": 0.0002, "loss": 1.528, "step": 30210 }, { "epoch": 0.12, "grad_norm": 3.502579689025879, "learning_rate": 0.0002, "loss": 1.4528, "step": 30220 }, { "epoch": 0.12, "grad_norm": 2.0424954891204834, "learning_rate": 0.0002, "loss": 1.4165, "step": 30230 }, { "epoch": 0.12, "grad_norm": 2.1127326488494873, "learning_rate": 0.0002, "loss": 1.4501, "step": 30240 }, { "epoch": 0.12, "grad_norm": 2.764605760574341, "learning_rate": 0.0002, "loss": 1.5236, "step": 30250 }, { "epoch": 0.12, "grad_norm": 1.6555140018463135, "learning_rate": 0.0002, "loss": 1.7577, "step": 30260 }, { "epoch": 0.12, "grad_norm": 2.602604627609253, "learning_rate": 0.0002, "loss": 1.8383, "step": 30270 }, { "epoch": 0.12, "grad_norm": 2.1810052394866943, "learning_rate": 0.0002, "loss": 1.5663, "step": 30280 }, { "epoch": 0.12, "grad_norm": 1.582029938697815, "learning_rate": 0.0002, "loss": 1.6815, "step": 30290 }, { "epoch": 0.12, "grad_norm": 2.1612467765808105, "learning_rate": 0.0002, "loss": 1.5235, "step": 30300 }, { "epoch": 0.12, "grad_norm": 1.4615401029586792, "learning_rate": 0.0002, "loss": 1.5249, "step": 30310 }, { "epoch": 0.12, "grad_norm": 2.2661449909210205, "learning_rate": 0.0002, "loss": 1.5609, "step": 30320 }, { "epoch": 0.12, "grad_norm": 1.8482153415679932, "learning_rate": 0.0002, "loss": 1.5598, "step": 30330 }, { "epoch": 0.12, "grad_norm": 2.656456708908081, "learning_rate": 0.0002, "loss": 1.3967, "step": 30340 }, { "epoch": 0.12, "grad_norm": 4.223151683807373, "learning_rate": 0.0002, "loss": 1.5387, "step": 30350 }, { "epoch": 0.12, "grad_norm": 2.854491949081421, "learning_rate": 0.0002, "loss": 1.4683, "step": 30360 }, { "epoch": 0.12, "grad_norm": 1.6508816480636597, "learning_rate": 0.0002, "loss": 1.216, "step": 30370 }, { "epoch": 0.12, "grad_norm": 2.5967602729797363, "learning_rate": 0.0002, "loss": 1.4918, "step": 30380 }, { "epoch": 0.12, "grad_norm": 1.749540090560913, "learning_rate": 0.0002, "loss": 1.6257, "step": 30390 }, { "epoch": 0.12, "grad_norm": 1.4892749786376953, "learning_rate": 0.0002, "loss": 1.63, "step": 30400 }, { "epoch": 0.12, "grad_norm": 1.528883695602417, "learning_rate": 0.0002, "loss": 1.5994, "step": 30410 }, { "epoch": 0.12, "grad_norm": 1.5010509490966797, "learning_rate": 0.0002, "loss": 1.5946, "step": 30420 }, { "epoch": 0.12, "grad_norm": 2.9953505992889404, "learning_rate": 0.0002, "loss": 1.6693, "step": 30430 }, { "epoch": 0.12, "grad_norm": 2.5664401054382324, "learning_rate": 0.0002, "loss": 1.6889, "step": 30440 }, { "epoch": 0.12, "grad_norm": 3.926828622817993, "learning_rate": 0.0002, "loss": 1.67, "step": 30450 }, { "epoch": 0.12, "grad_norm": 2.620608329772949, "learning_rate": 0.0002, "loss": 1.4663, "step": 30460 }, { "epoch": 0.12, "grad_norm": 2.5972766876220703, "learning_rate": 0.0002, "loss": 1.6196, "step": 30470 }, { "epoch": 0.12, "grad_norm": 3.1296987533569336, "learning_rate": 0.0002, "loss": 1.8513, "step": 30480 }, { "epoch": 0.12, "grad_norm": 2.583239793777466, "learning_rate": 0.0002, "loss": 1.4378, "step": 30490 }, { "epoch": 0.12, "grad_norm": 1.8330557346343994, "learning_rate": 0.0002, "loss": 1.5084, "step": 30500 }, { "epoch": 0.12, "grad_norm": 3.650681972503662, "learning_rate": 0.0002, "loss": 1.6091, "step": 30510 }, { "epoch": 0.12, "grad_norm": 3.0947723388671875, "learning_rate": 0.0002, "loss": 1.6865, "step": 30520 }, { "epoch": 0.12, "grad_norm": 2.605269193649292, "learning_rate": 0.0002, "loss": 1.547, "step": 30530 }, { "epoch": 0.12, "grad_norm": 3.474839448928833, "learning_rate": 0.0002, "loss": 1.5621, "step": 30540 }, { "epoch": 0.12, "grad_norm": 2.954867124557495, "learning_rate": 0.0002, "loss": 1.3706, "step": 30550 }, { "epoch": 0.12, "grad_norm": 2.260206699371338, "learning_rate": 0.0002, "loss": 1.5989, "step": 30560 }, { "epoch": 0.12, "grad_norm": 2.8511722087860107, "learning_rate": 0.0002, "loss": 1.3723, "step": 30570 }, { "epoch": 0.12, "grad_norm": 3.372933864593506, "learning_rate": 0.0002, "loss": 1.706, "step": 30580 }, { "epoch": 0.12, "grad_norm": 2.293943405151367, "learning_rate": 0.0002, "loss": 1.729, "step": 30590 }, { "epoch": 0.12, "grad_norm": 2.946744203567505, "learning_rate": 0.0002, "loss": 1.5507, "step": 30600 }, { "epoch": 0.12, "grad_norm": 3.056394100189209, "learning_rate": 0.0002, "loss": 1.4113, "step": 30610 }, { "epoch": 0.12, "grad_norm": 4.070985317230225, "learning_rate": 0.0002, "loss": 1.4364, "step": 30620 }, { "epoch": 0.12, "grad_norm": 2.445704698562622, "learning_rate": 0.0002, "loss": 1.5629, "step": 30630 }, { "epoch": 0.12, "grad_norm": 3.1596949100494385, "learning_rate": 0.0002, "loss": 1.7433, "step": 30640 }, { "epoch": 0.12, "grad_norm": 4.057275295257568, "learning_rate": 0.0002, "loss": 1.4147, "step": 30650 }, { "epoch": 0.12, "grad_norm": 2.361283302307129, "learning_rate": 0.0002, "loss": 1.4084, "step": 30660 }, { "epoch": 0.12, "grad_norm": 2.27618408203125, "learning_rate": 0.0002, "loss": 1.5068, "step": 30670 }, { "epoch": 0.12, "grad_norm": 2.8373284339904785, "learning_rate": 0.0002, "loss": 1.5269, "step": 30680 }, { "epoch": 0.12, "grad_norm": 2.749160051345825, "learning_rate": 0.0002, "loss": 1.611, "step": 30690 }, { "epoch": 0.12, "grad_norm": 2.2826173305511475, "learning_rate": 0.0002, "loss": 1.5895, "step": 30700 }, { "epoch": 0.13, "grad_norm": 4.69254732131958, "learning_rate": 0.0002, "loss": 1.5244, "step": 30710 }, { "epoch": 0.13, "grad_norm": 1.801403284072876, "learning_rate": 0.0002, "loss": 1.4021, "step": 30720 }, { "epoch": 0.13, "grad_norm": 4.58438777923584, "learning_rate": 0.0002, "loss": 1.49, "step": 30730 }, { "epoch": 0.13, "grad_norm": 2.47393536567688, "learning_rate": 0.0002, "loss": 1.3113, "step": 30740 }, { "epoch": 0.13, "grad_norm": 2.661317825317383, "learning_rate": 0.0002, "loss": 1.4442, "step": 30750 }, { "epoch": 0.13, "grad_norm": 2.037411689758301, "learning_rate": 0.0002, "loss": 1.5269, "step": 30760 }, { "epoch": 0.13, "grad_norm": 2.240935802459717, "learning_rate": 0.0002, "loss": 1.4445, "step": 30770 }, { "epoch": 0.13, "grad_norm": 2.051048994064331, "learning_rate": 0.0002, "loss": 1.646, "step": 30780 }, { "epoch": 0.13, "grad_norm": 2.3007781505584717, "learning_rate": 0.0002, "loss": 1.4375, "step": 30790 }, { "epoch": 0.13, "grad_norm": 2.8736586570739746, "learning_rate": 0.0002, "loss": 1.4584, "step": 30800 }, { "epoch": 0.13, "grad_norm": 2.6088123321533203, "learning_rate": 0.0002, "loss": 1.6049, "step": 30810 }, { "epoch": 0.13, "grad_norm": 3.165694236755371, "learning_rate": 0.0002, "loss": 1.3148, "step": 30820 }, { "epoch": 0.13, "grad_norm": 3.0630125999450684, "learning_rate": 0.0002, "loss": 1.3913, "step": 30830 }, { "epoch": 0.13, "grad_norm": 3.266648530960083, "learning_rate": 0.0002, "loss": 1.6079, "step": 30840 }, { "epoch": 0.13, "grad_norm": 2.745680809020996, "learning_rate": 0.0002, "loss": 1.4298, "step": 30850 }, { "epoch": 0.13, "grad_norm": 5.478568077087402, "learning_rate": 0.0002, "loss": 1.4855, "step": 30860 }, { "epoch": 0.13, "grad_norm": 2.219027519226074, "learning_rate": 0.0002, "loss": 1.3167, "step": 30870 }, { "epoch": 0.13, "grad_norm": 2.9562973976135254, "learning_rate": 0.0002, "loss": 1.2859, "step": 30880 }, { "epoch": 0.13, "grad_norm": 1.6920859813690186, "learning_rate": 0.0002, "loss": 1.5713, "step": 30890 }, { "epoch": 0.13, "grad_norm": 3.3005409240722656, "learning_rate": 0.0002, "loss": 1.5749, "step": 30900 }, { "epoch": 0.13, "grad_norm": 2.7499260902404785, "learning_rate": 0.0002, "loss": 1.3276, "step": 30910 }, { "epoch": 0.13, "grad_norm": 3.8388755321502686, "learning_rate": 0.0002, "loss": 1.5834, "step": 30920 }, { "epoch": 0.13, "grad_norm": 1.704751968383789, "learning_rate": 0.0002, "loss": 1.4211, "step": 30930 }, { "epoch": 0.13, "grad_norm": 3.3300163745880127, "learning_rate": 0.0002, "loss": 1.3355, "step": 30940 }, { "epoch": 0.13, "grad_norm": 3.359070062637329, "learning_rate": 0.0002, "loss": 1.3478, "step": 30950 }, { "epoch": 0.13, "grad_norm": 3.2382185459136963, "learning_rate": 0.0002, "loss": 1.4718, "step": 30960 }, { "epoch": 0.13, "grad_norm": 1.3034156560897827, "learning_rate": 0.0002, "loss": 1.7203, "step": 30970 }, { "epoch": 0.13, "grad_norm": 2.977168560028076, "learning_rate": 0.0002, "loss": 1.7918, "step": 30980 }, { "epoch": 0.13, "grad_norm": 2.7969696521759033, "learning_rate": 0.0002, "loss": 1.6161, "step": 30990 }, { "epoch": 0.13, "grad_norm": 2.0088019371032715, "learning_rate": 0.0002, "loss": 1.4894, "step": 31000 }, { "epoch": 0.13, "grad_norm": 1.8912861347198486, "learning_rate": 0.0002, "loss": 1.3744, "step": 31010 }, { "epoch": 0.13, "grad_norm": 3.269080877304077, "learning_rate": 0.0002, "loss": 1.3403, "step": 31020 }, { "epoch": 0.13, "grad_norm": 2.4079372882843018, "learning_rate": 0.0002, "loss": 1.4292, "step": 31030 }, { "epoch": 0.13, "grad_norm": 2.9012582302093506, "learning_rate": 0.0002, "loss": 1.1951, "step": 31040 }, { "epoch": 0.13, "grad_norm": 1.7947033643722534, "learning_rate": 0.0002, "loss": 1.4637, "step": 31050 }, { "epoch": 0.13, "grad_norm": 2.5797364711761475, "learning_rate": 0.0002, "loss": 1.6587, "step": 31060 }, { "epoch": 0.13, "grad_norm": 2.520904064178467, "learning_rate": 0.0002, "loss": 1.4374, "step": 31070 }, { "epoch": 0.13, "grad_norm": 3.9593746662139893, "learning_rate": 0.0002, "loss": 1.4509, "step": 31080 }, { "epoch": 0.13, "grad_norm": 1.7983843088150024, "learning_rate": 0.0002, "loss": 1.3946, "step": 31090 }, { "epoch": 0.13, "grad_norm": 3.5693211555480957, "learning_rate": 0.0002, "loss": 1.5635, "step": 31100 }, { "epoch": 0.13, "grad_norm": 3.274874448776245, "learning_rate": 0.0002, "loss": 1.4201, "step": 31110 }, { "epoch": 0.13, "grad_norm": 3.675825595855713, "learning_rate": 0.0002, "loss": 1.2861, "step": 31120 }, { "epoch": 0.13, "grad_norm": 1.7848968505859375, "learning_rate": 0.0002, "loss": 1.7097, "step": 31130 }, { "epoch": 0.13, "grad_norm": 2.576345682144165, "learning_rate": 0.0002, "loss": 1.6004, "step": 31140 }, { "epoch": 0.13, "grad_norm": 1.782493233680725, "learning_rate": 0.0002, "loss": 1.4616, "step": 31150 }, { "epoch": 0.13, "grad_norm": 5.373935222625732, "learning_rate": 0.0002, "loss": 1.3148, "step": 31160 }, { "epoch": 0.13, "grad_norm": 2.590927839279175, "learning_rate": 0.0002, "loss": 1.5728, "step": 31170 }, { "epoch": 0.13, "grad_norm": 1.1574194431304932, "learning_rate": 0.0002, "loss": 1.4361, "step": 31180 }, { "epoch": 0.13, "grad_norm": 2.82797908782959, "learning_rate": 0.0002, "loss": 1.291, "step": 31190 }, { "epoch": 0.13, "grad_norm": 2.742830276489258, "learning_rate": 0.0002, "loss": 1.3754, "step": 31200 }, { "epoch": 0.13, "grad_norm": 3.006810426712036, "learning_rate": 0.0002, "loss": 1.5626, "step": 31210 }, { "epoch": 0.13, "grad_norm": 2.822545051574707, "learning_rate": 0.0002, "loss": 1.4609, "step": 31220 }, { "epoch": 0.13, "grad_norm": 2.416571855545044, "learning_rate": 0.0002, "loss": 1.7572, "step": 31230 }, { "epoch": 0.13, "grad_norm": 4.3149309158325195, "learning_rate": 0.0002, "loss": 1.5111, "step": 31240 }, { "epoch": 0.13, "grad_norm": 2.8357481956481934, "learning_rate": 0.0002, "loss": 1.432, "step": 31250 }, { "epoch": 0.13, "grad_norm": 2.399291753768921, "learning_rate": 0.0002, "loss": 1.6365, "step": 31260 }, { "epoch": 0.13, "grad_norm": 4.016722679138184, "learning_rate": 0.0002, "loss": 1.5277, "step": 31270 }, { "epoch": 0.13, "grad_norm": 1.2433114051818848, "learning_rate": 0.0002, "loss": 1.5155, "step": 31280 }, { "epoch": 0.13, "grad_norm": 2.5681533813476562, "learning_rate": 0.0002, "loss": 1.4078, "step": 31290 }, { "epoch": 0.13, "grad_norm": 2.104501247406006, "learning_rate": 0.0002, "loss": 1.5037, "step": 31300 }, { "epoch": 0.13, "grad_norm": 1.405920386314392, "learning_rate": 0.0002, "loss": 1.5938, "step": 31310 }, { "epoch": 0.13, "grad_norm": 2.4490702152252197, "learning_rate": 0.0002, "loss": 1.3443, "step": 31320 }, { "epoch": 0.13, "grad_norm": 3.811026096343994, "learning_rate": 0.0002, "loss": 1.5645, "step": 31330 }, { "epoch": 0.13, "grad_norm": 2.707725763320923, "learning_rate": 0.0002, "loss": 1.4206, "step": 31340 }, { "epoch": 0.13, "grad_norm": 1.3157312870025635, "learning_rate": 0.0002, "loss": 1.5432, "step": 31350 }, { "epoch": 0.13, "grad_norm": 2.7693161964416504, "learning_rate": 0.0002, "loss": 1.4993, "step": 31360 }, { "epoch": 0.13, "grad_norm": 4.314516544342041, "learning_rate": 0.0002, "loss": 1.414, "step": 31370 }, { "epoch": 0.13, "grad_norm": 2.958756923675537, "learning_rate": 0.0002, "loss": 1.4663, "step": 31380 }, { "epoch": 0.13, "grad_norm": 3.3709945678710938, "learning_rate": 0.0002, "loss": 1.6952, "step": 31390 }, { "epoch": 0.13, "grad_norm": 2.302260637283325, "learning_rate": 0.0002, "loss": 1.419, "step": 31400 }, { "epoch": 0.13, "grad_norm": 2.961299419403076, "learning_rate": 0.0002, "loss": 1.5605, "step": 31410 }, { "epoch": 0.13, "grad_norm": 3.581965446472168, "learning_rate": 0.0002, "loss": 1.7215, "step": 31420 }, { "epoch": 0.13, "grad_norm": 2.9337823390960693, "learning_rate": 0.0002, "loss": 1.5431, "step": 31430 }, { "epoch": 0.13, "grad_norm": 2.3430230617523193, "learning_rate": 0.0002, "loss": 1.6025, "step": 31440 }, { "epoch": 0.13, "grad_norm": 2.2491981983184814, "learning_rate": 0.0002, "loss": 1.5941, "step": 31450 }, { "epoch": 0.13, "grad_norm": 2.495013952255249, "learning_rate": 0.0002, "loss": 1.5591, "step": 31460 }, { "epoch": 0.13, "grad_norm": 2.5429635047912598, "learning_rate": 0.0002, "loss": 1.5786, "step": 31470 }, { "epoch": 0.13, "grad_norm": 2.423149585723877, "learning_rate": 0.0002, "loss": 1.4901, "step": 31480 }, { "epoch": 0.13, "grad_norm": 1.7147704362869263, "learning_rate": 0.0002, "loss": 1.4986, "step": 31490 }, { "epoch": 0.13, "grad_norm": 3.8937602043151855, "learning_rate": 0.0002, "loss": 1.5565, "step": 31500 }, { "epoch": 0.13, "grad_norm": 3.0167911052703857, "learning_rate": 0.0002, "loss": 1.5153, "step": 31510 }, { "epoch": 0.13, "grad_norm": 9.462910652160645, "learning_rate": 0.0002, "loss": 1.5423, "step": 31520 }, { "epoch": 0.13, "grad_norm": 3.610581159591675, "learning_rate": 0.0002, "loss": 1.2549, "step": 31530 }, { "epoch": 0.13, "grad_norm": 3.006077289581299, "learning_rate": 0.0002, "loss": 1.4562, "step": 31540 }, { "epoch": 0.13, "grad_norm": 3.4331748485565186, "learning_rate": 0.0002, "loss": 1.7799, "step": 31550 }, { "epoch": 0.13, "grad_norm": 1.3224940299987793, "learning_rate": 0.0002, "loss": 1.7733, "step": 31560 }, { "epoch": 0.13, "grad_norm": 1.7797443866729736, "learning_rate": 0.0002, "loss": 1.8761, "step": 31570 }, { "epoch": 0.13, "grad_norm": 4.199153900146484, "learning_rate": 0.0002, "loss": 1.7956, "step": 31580 }, { "epoch": 0.13, "grad_norm": 4.171722412109375, "learning_rate": 0.0002, "loss": 1.4363, "step": 31590 }, { "epoch": 0.13, "grad_norm": 2.089047431945801, "learning_rate": 0.0002, "loss": 1.5143, "step": 31600 }, { "epoch": 0.13, "grad_norm": 8.485922813415527, "learning_rate": 0.0002, "loss": 1.6052, "step": 31610 }, { "epoch": 0.13, "grad_norm": 4.083344459533691, "learning_rate": 0.0002, "loss": 1.4353, "step": 31620 }, { "epoch": 0.13, "grad_norm": 1.8898402452468872, "learning_rate": 0.0002, "loss": 1.6916, "step": 31630 }, { "epoch": 0.13, "grad_norm": 2.751964807510376, "learning_rate": 0.0002, "loss": 1.4638, "step": 31640 }, { "epoch": 0.13, "grad_norm": 2.3176252841949463, "learning_rate": 0.0002, "loss": 1.4958, "step": 31650 }, { "epoch": 0.13, "grad_norm": 2.443629741668701, "learning_rate": 0.0002, "loss": 1.4038, "step": 31660 }, { "epoch": 0.13, "grad_norm": 3.3905138969421387, "learning_rate": 0.0002, "loss": 1.3909, "step": 31670 }, { "epoch": 0.13, "grad_norm": 2.594081163406372, "learning_rate": 0.0002, "loss": 1.9625, "step": 31680 }, { "epoch": 0.13, "grad_norm": 3.792123317718506, "learning_rate": 0.0002, "loss": 1.5908, "step": 31690 }, { "epoch": 0.13, "grad_norm": 3.368661880493164, "learning_rate": 0.0002, "loss": 1.445, "step": 31700 }, { "epoch": 0.13, "grad_norm": 2.665541410446167, "learning_rate": 0.0002, "loss": 1.3642, "step": 31710 }, { "epoch": 0.13, "grad_norm": 2.365668773651123, "learning_rate": 0.0002, "loss": 1.3649, "step": 31720 }, { "epoch": 0.13, "grad_norm": 3.549075126647949, "learning_rate": 0.0002, "loss": 1.6904, "step": 31730 }, { "epoch": 0.13, "grad_norm": 1.6915037631988525, "learning_rate": 0.0002, "loss": 1.2698, "step": 31740 }, { "epoch": 0.13, "grad_norm": 4.215758800506592, "learning_rate": 0.0002, "loss": 1.2437, "step": 31750 }, { "epoch": 0.13, "grad_norm": 1.8703866004943848, "learning_rate": 0.0002, "loss": 1.6097, "step": 31760 }, { "epoch": 0.13, "grad_norm": 2.7872440814971924, "learning_rate": 0.0002, "loss": 1.4773, "step": 31770 }, { "epoch": 0.13, "grad_norm": 5.039217948913574, "learning_rate": 0.0002, "loss": 1.5446, "step": 31780 }, { "epoch": 0.13, "grad_norm": 2.6887574195861816, "learning_rate": 0.0002, "loss": 1.4187, "step": 31790 }, { "epoch": 0.13, "grad_norm": 1.9579719305038452, "learning_rate": 0.0002, "loss": 1.7316, "step": 31800 }, { "epoch": 0.13, "grad_norm": 1.6511861085891724, "learning_rate": 0.0002, "loss": 1.6822, "step": 31810 }, { "epoch": 0.13, "grad_norm": 3.503598690032959, "learning_rate": 0.0002, "loss": 1.5541, "step": 31820 }, { "epoch": 0.13, "grad_norm": 2.9199302196502686, "learning_rate": 0.0002, "loss": 1.7049, "step": 31830 }, { "epoch": 0.13, "grad_norm": 2.3408517837524414, "learning_rate": 0.0002, "loss": 1.6274, "step": 31840 }, { "epoch": 0.13, "grad_norm": 2.1444766521453857, "learning_rate": 0.0002, "loss": 1.4535, "step": 31850 }, { "epoch": 0.13, "grad_norm": 2.125805139541626, "learning_rate": 0.0002, "loss": 1.5663, "step": 31860 }, { "epoch": 0.13, "grad_norm": 3.1803410053253174, "learning_rate": 0.0002, "loss": 1.5464, "step": 31870 }, { "epoch": 0.13, "grad_norm": 2.481090784072876, "learning_rate": 0.0002, "loss": 1.4855, "step": 31880 }, { "epoch": 0.13, "grad_norm": 1.2183886766433716, "learning_rate": 0.0002, "loss": 1.453, "step": 31890 }, { "epoch": 0.13, "grad_norm": 3.1665244102478027, "learning_rate": 0.0002, "loss": 1.4423, "step": 31900 }, { "epoch": 0.13, "grad_norm": 4.243392467498779, "learning_rate": 0.0002, "loss": 1.6102, "step": 31910 }, { "epoch": 0.13, "grad_norm": 2.1416661739349365, "learning_rate": 0.0002, "loss": 1.6472, "step": 31920 }, { "epoch": 0.13, "grad_norm": 2.979734420776367, "learning_rate": 0.0002, "loss": 1.3859, "step": 31930 }, { "epoch": 0.13, "grad_norm": 2.5728507041931152, "learning_rate": 0.0002, "loss": 1.5089, "step": 31940 }, { "epoch": 0.13, "grad_norm": 1.658892035484314, "learning_rate": 0.0002, "loss": 1.2237, "step": 31950 }, { "epoch": 0.13, "grad_norm": 2.9815008640289307, "learning_rate": 0.0002, "loss": 1.5137, "step": 31960 }, { "epoch": 0.13, "grad_norm": 1.7753783464431763, "learning_rate": 0.0002, "loss": 1.4778, "step": 31970 }, { "epoch": 0.13, "grad_norm": 3.254425287246704, "learning_rate": 0.0002, "loss": 1.6401, "step": 31980 }, { "epoch": 0.13, "grad_norm": 6.469662189483643, "learning_rate": 0.0002, "loss": 1.5922, "step": 31990 }, { "epoch": 0.13, "grad_norm": 4.740315914154053, "learning_rate": 0.0002, "loss": 1.5436, "step": 32000 }, { "epoch": 0.13, "grad_norm": 2.9473087787628174, "learning_rate": 0.0002, "loss": 1.5303, "step": 32010 }, { "epoch": 0.13, "grad_norm": 2.8623874187469482, "learning_rate": 0.0002, "loss": 1.5605, "step": 32020 }, { "epoch": 0.13, "grad_norm": 2.488013982772827, "learning_rate": 0.0002, "loss": 1.2866, "step": 32030 }, { "epoch": 0.13, "grad_norm": 3.5828332901000977, "learning_rate": 0.0002, "loss": 1.642, "step": 32040 }, { "epoch": 0.13, "grad_norm": 2.2060787677764893, "learning_rate": 0.0002, "loss": 1.328, "step": 32050 }, { "epoch": 0.13, "grad_norm": 3.791715621948242, "learning_rate": 0.0002, "loss": 1.4579, "step": 32060 }, { "epoch": 0.13, "grad_norm": 2.818779230117798, "learning_rate": 0.0002, "loss": 1.2697, "step": 32070 }, { "epoch": 0.13, "grad_norm": 3.100367307662964, "learning_rate": 0.0002, "loss": 1.3637, "step": 32080 }, { "epoch": 0.13, "grad_norm": 5.621219635009766, "learning_rate": 0.0002, "loss": 1.7509, "step": 32090 }, { "epoch": 0.13, "grad_norm": 1.6147791147232056, "learning_rate": 0.0002, "loss": 1.3846, "step": 32100 }, { "epoch": 0.13, "grad_norm": 3.207767963409424, "learning_rate": 0.0002, "loss": 1.2932, "step": 32110 }, { "epoch": 0.13, "grad_norm": 3.630815029144287, "learning_rate": 0.0002, "loss": 1.6648, "step": 32120 }, { "epoch": 0.13, "grad_norm": 1.8639456033706665, "learning_rate": 0.0002, "loss": 1.5715, "step": 32130 }, { "epoch": 0.13, "grad_norm": 2.4483630657196045, "learning_rate": 0.0002, "loss": 1.6008, "step": 32140 }, { "epoch": 0.13, "grad_norm": 1.8941856622695923, "learning_rate": 0.0002, "loss": 1.6202, "step": 32150 }, { "epoch": 0.13, "grad_norm": 4.340956211090088, "learning_rate": 0.0002, "loss": 1.7161, "step": 32160 }, { "epoch": 0.13, "grad_norm": 2.5021986961364746, "learning_rate": 0.0002, "loss": 1.4875, "step": 32170 }, { "epoch": 0.13, "grad_norm": 2.9166572093963623, "learning_rate": 0.0002, "loss": 1.8115, "step": 32180 }, { "epoch": 0.13, "grad_norm": 1.5487812757492065, "learning_rate": 0.0002, "loss": 1.575, "step": 32190 }, { "epoch": 0.13, "grad_norm": 2.382882595062256, "learning_rate": 0.0002, "loss": 1.6372, "step": 32200 }, { "epoch": 0.13, "grad_norm": 2.3261353969573975, "learning_rate": 0.0002, "loss": 1.6229, "step": 32210 }, { "epoch": 0.13, "grad_norm": 3.012815237045288, "learning_rate": 0.0002, "loss": 1.5528, "step": 32220 }, { "epoch": 0.13, "grad_norm": 2.946295738220215, "learning_rate": 0.0002, "loss": 1.6054, "step": 32230 }, { "epoch": 0.13, "grad_norm": 2.53067946434021, "learning_rate": 0.0002, "loss": 1.6521, "step": 32240 }, { "epoch": 0.13, "grad_norm": 2.918412208557129, "learning_rate": 0.0002, "loss": 1.491, "step": 32250 }, { "epoch": 0.13, "grad_norm": 2.620249032974243, "learning_rate": 0.0002, "loss": 1.5588, "step": 32260 }, { "epoch": 0.13, "grad_norm": 2.369143486022949, "learning_rate": 0.0002, "loss": 1.6996, "step": 32270 }, { "epoch": 0.13, "grad_norm": 2.4939818382263184, "learning_rate": 0.0002, "loss": 1.5454, "step": 32280 }, { "epoch": 0.13, "grad_norm": 2.1212406158447266, "learning_rate": 0.0002, "loss": 1.5874, "step": 32290 }, { "epoch": 0.13, "grad_norm": 1.3766343593597412, "learning_rate": 0.0002, "loss": 1.6842, "step": 32300 }, { "epoch": 0.13, "grad_norm": 3.430257558822632, "learning_rate": 0.0002, "loss": 1.5835, "step": 32310 }, { "epoch": 0.13, "grad_norm": 4.425400257110596, "learning_rate": 0.0002, "loss": 1.3437, "step": 32320 }, { "epoch": 0.13, "grad_norm": 2.23991322517395, "learning_rate": 0.0002, "loss": 1.3783, "step": 32330 }, { "epoch": 0.13, "grad_norm": 3.2672359943389893, "learning_rate": 0.0002, "loss": 1.4983, "step": 32340 }, { "epoch": 0.13, "grad_norm": 3.3638956546783447, "learning_rate": 0.0002, "loss": 1.4683, "step": 32350 }, { "epoch": 0.13, "grad_norm": 7.014698505401611, "learning_rate": 0.0002, "loss": 1.7875, "step": 32360 }, { "epoch": 0.13, "grad_norm": 4.131831169128418, "learning_rate": 0.0002, "loss": 1.6205, "step": 32370 }, { "epoch": 0.13, "grad_norm": 1.9319567680358887, "learning_rate": 0.0002, "loss": 1.3417, "step": 32380 }, { "epoch": 0.13, "grad_norm": 2.972581148147583, "learning_rate": 0.0002, "loss": 1.3904, "step": 32390 }, { "epoch": 0.13, "grad_norm": 3.3741836547851562, "learning_rate": 0.0002, "loss": 1.5517, "step": 32400 }, { "epoch": 0.13, "grad_norm": 4.4148430824279785, "learning_rate": 0.0002, "loss": 1.4017, "step": 32410 }, { "epoch": 0.13, "grad_norm": 2.040686845779419, "learning_rate": 0.0002, "loss": 1.6608, "step": 32420 }, { "epoch": 0.13, "grad_norm": 2.927025318145752, "learning_rate": 0.0002, "loss": 1.5916, "step": 32430 }, { "epoch": 0.13, "grad_norm": 2.6895787715911865, "learning_rate": 0.0002, "loss": 1.395, "step": 32440 }, { "epoch": 0.13, "grad_norm": 6.068900108337402, "learning_rate": 0.0002, "loss": 1.3306, "step": 32450 }, { "epoch": 0.13, "grad_norm": 2.442549228668213, "learning_rate": 0.0002, "loss": 1.3277, "step": 32460 }, { "epoch": 0.13, "grad_norm": 1.9108610153198242, "learning_rate": 0.0002, "loss": 1.6905, "step": 32470 }, { "epoch": 0.13, "grad_norm": 2.782421350479126, "learning_rate": 0.0002, "loss": 1.645, "step": 32480 }, { "epoch": 0.13, "grad_norm": 7.353684425354004, "learning_rate": 0.0002, "loss": 1.7573, "step": 32490 }, { "epoch": 0.13, "grad_norm": 2.362269639968872, "learning_rate": 0.0002, "loss": 1.4622, "step": 32500 }, { "epoch": 0.13, "grad_norm": 1.4311714172363281, "learning_rate": 0.0002, "loss": 1.6444, "step": 32510 }, { "epoch": 0.13, "grad_norm": 3.8236122131347656, "learning_rate": 0.0002, "loss": 1.3862, "step": 32520 }, { "epoch": 0.13, "grad_norm": 1.739070177078247, "learning_rate": 0.0002, "loss": 1.7162, "step": 32530 }, { "epoch": 0.13, "grad_norm": 4.535881519317627, "learning_rate": 0.0002, "loss": 1.541, "step": 32540 }, { "epoch": 0.13, "grad_norm": 2.2269272804260254, "learning_rate": 0.0002, "loss": 1.6023, "step": 32550 }, { "epoch": 0.13, "grad_norm": 1.546985149383545, "learning_rate": 0.0002, "loss": 1.7396, "step": 32560 }, { "epoch": 0.13, "grad_norm": 3.3701841831207275, "learning_rate": 0.0002, "loss": 1.4733, "step": 32570 }, { "epoch": 0.13, "grad_norm": 2.340930938720703, "learning_rate": 0.0002, "loss": 1.4927, "step": 32580 }, { "epoch": 0.13, "grad_norm": 4.256556987762451, "learning_rate": 0.0002, "loss": 1.6338, "step": 32590 }, { "epoch": 0.13, "grad_norm": 2.8064022064208984, "learning_rate": 0.0002, "loss": 1.6384, "step": 32600 }, { "epoch": 0.13, "grad_norm": 3.359487533569336, "learning_rate": 0.0002, "loss": 1.5429, "step": 32610 }, { "epoch": 0.13, "grad_norm": 0.9903825521469116, "learning_rate": 0.0002, "loss": 1.5644, "step": 32620 }, { "epoch": 0.13, "grad_norm": 2.8350696563720703, "learning_rate": 0.0002, "loss": 1.4903, "step": 32630 }, { "epoch": 0.13, "grad_norm": 1.7155574560165405, "learning_rate": 0.0002, "loss": 1.5268, "step": 32640 }, { "epoch": 0.13, "grad_norm": 1.7932653427124023, "learning_rate": 0.0002, "loss": 1.909, "step": 32650 }, { "epoch": 0.13, "grad_norm": 3.524487018585205, "learning_rate": 0.0002, "loss": 1.6108, "step": 32660 }, { "epoch": 0.13, "grad_norm": 3.99782133102417, "learning_rate": 0.0002, "loss": 1.4915, "step": 32670 }, { "epoch": 0.13, "grad_norm": 3.5535264015197754, "learning_rate": 0.0002, "loss": 1.3305, "step": 32680 }, { "epoch": 0.13, "grad_norm": 2.7162163257598877, "learning_rate": 0.0002, "loss": 1.6546, "step": 32690 }, { "epoch": 0.13, "grad_norm": 2.1194822788238525, "learning_rate": 0.0002, "loss": 1.2906, "step": 32700 }, { "epoch": 0.13, "grad_norm": 3.020813226699829, "learning_rate": 0.0002, "loss": 1.345, "step": 32710 }, { "epoch": 0.13, "grad_norm": 1.7547842264175415, "learning_rate": 0.0002, "loss": 1.4407, "step": 32720 }, { "epoch": 0.13, "grad_norm": 2.694007158279419, "learning_rate": 0.0002, "loss": 1.6617, "step": 32730 }, { "epoch": 0.13, "grad_norm": 2.859727144241333, "learning_rate": 0.0002, "loss": 1.4985, "step": 32740 }, { "epoch": 0.13, "grad_norm": 2.965174674987793, "learning_rate": 0.0002, "loss": 1.8337, "step": 32750 }, { "epoch": 0.13, "grad_norm": 2.9660425186157227, "learning_rate": 0.0002, "loss": 1.5682, "step": 32760 }, { "epoch": 0.13, "grad_norm": 2.3113908767700195, "learning_rate": 0.0002, "loss": 1.3465, "step": 32770 }, { "epoch": 0.13, "grad_norm": 3.0865209102630615, "learning_rate": 0.0002, "loss": 1.5604, "step": 32780 }, { "epoch": 0.13, "grad_norm": 4.392728805541992, "learning_rate": 0.0002, "loss": 1.8286, "step": 32790 }, { "epoch": 0.13, "grad_norm": 3.263317584991455, "learning_rate": 0.0002, "loss": 1.6193, "step": 32800 }, { "epoch": 0.13, "grad_norm": 3.5521388053894043, "learning_rate": 0.0002, "loss": 1.5914, "step": 32810 }, { "epoch": 0.13, "grad_norm": 2.5830578804016113, "learning_rate": 0.0002, "loss": 1.5952, "step": 32820 }, { "epoch": 0.13, "grad_norm": 2.223177909851074, "learning_rate": 0.0002, "loss": 1.5425, "step": 32830 }, { "epoch": 0.13, "grad_norm": 3.051988124847412, "learning_rate": 0.0002, "loss": 1.4132, "step": 32840 }, { "epoch": 0.13, "grad_norm": 2.5183377265930176, "learning_rate": 0.0002, "loss": 1.7229, "step": 32850 }, { "epoch": 0.13, "grad_norm": 2.0237274169921875, "learning_rate": 0.0002, "loss": 1.748, "step": 32860 }, { "epoch": 0.13, "grad_norm": 1.9322806596755981, "learning_rate": 0.0002, "loss": 1.6437, "step": 32870 }, { "epoch": 0.13, "grad_norm": 2.5919394493103027, "learning_rate": 0.0002, "loss": 1.5555, "step": 32880 }, { "epoch": 0.13, "grad_norm": 3.071043014526367, "learning_rate": 0.0002, "loss": 1.5118, "step": 32890 }, { "epoch": 0.13, "grad_norm": 3.7258408069610596, "learning_rate": 0.0002, "loss": 1.512, "step": 32900 }, { "epoch": 0.13, "grad_norm": 4.726340293884277, "learning_rate": 0.0002, "loss": 1.8347, "step": 32910 }, { "epoch": 0.13, "grad_norm": 2.166200876235962, "learning_rate": 0.0002, "loss": 1.3799, "step": 32920 }, { "epoch": 0.13, "grad_norm": 3.325042247772217, "learning_rate": 0.0002, "loss": 1.5084, "step": 32930 }, { "epoch": 0.13, "grad_norm": 3.5565152168273926, "learning_rate": 0.0002, "loss": 1.7826, "step": 32940 }, { "epoch": 0.13, "grad_norm": 2.517972946166992, "learning_rate": 0.0002, "loss": 1.5093, "step": 32950 }, { "epoch": 0.13, "grad_norm": 1.677240252494812, "learning_rate": 0.0002, "loss": 1.6968, "step": 32960 }, { "epoch": 0.13, "grad_norm": 3.194413423538208, "learning_rate": 0.0002, "loss": 1.5288, "step": 32970 }, { "epoch": 0.13, "grad_norm": 1.8054685592651367, "learning_rate": 0.0002, "loss": 1.5413, "step": 32980 }, { "epoch": 0.13, "grad_norm": 3.7156553268432617, "learning_rate": 0.0002, "loss": 1.5655, "step": 32990 }, { "epoch": 0.13, "grad_norm": 2.455780029296875, "learning_rate": 0.0002, "loss": 1.5299, "step": 33000 }, { "epoch": 0.13, "grad_norm": 1.3603352308273315, "learning_rate": 0.0002, "loss": 1.8159, "step": 33010 }, { "epoch": 0.13, "grad_norm": 1.677162528038025, "learning_rate": 0.0002, "loss": 1.4964, "step": 33020 }, { "epoch": 0.13, "grad_norm": 3.9534401893615723, "learning_rate": 0.0002, "loss": 1.5938, "step": 33030 }, { "epoch": 0.13, "grad_norm": 2.6093428134918213, "learning_rate": 0.0002, "loss": 1.6286, "step": 33040 }, { "epoch": 0.13, "grad_norm": 3.2916595935821533, "learning_rate": 0.0002, "loss": 1.5219, "step": 33050 }, { "epoch": 0.13, "grad_norm": 3.23443341255188, "learning_rate": 0.0002, "loss": 1.5717, "step": 33060 }, { "epoch": 0.13, "grad_norm": 2.4186670780181885, "learning_rate": 0.0002, "loss": 1.7533, "step": 33070 }, { "epoch": 0.13, "grad_norm": 2.4054977893829346, "learning_rate": 0.0002, "loss": 1.4894, "step": 33080 }, { "epoch": 0.13, "grad_norm": 2.8935043811798096, "learning_rate": 0.0002, "loss": 1.6591, "step": 33090 }, { "epoch": 0.13, "grad_norm": 3.032256841659546, "learning_rate": 0.0002, "loss": 1.4896, "step": 33100 }, { "epoch": 0.13, "grad_norm": 2.56370210647583, "learning_rate": 0.0002, "loss": 1.3703, "step": 33110 }, { "epoch": 0.13, "grad_norm": 3.1871159076690674, "learning_rate": 0.0002, "loss": 1.4996, "step": 33120 }, { "epoch": 0.13, "grad_norm": 1.489699363708496, "learning_rate": 0.0002, "loss": 1.8029, "step": 33130 }, { "epoch": 0.13, "grad_norm": 2.4262475967407227, "learning_rate": 0.0002, "loss": 1.4578, "step": 33140 }, { "epoch": 0.13, "grad_norm": 2.9207515716552734, "learning_rate": 0.0002, "loss": 1.3867, "step": 33150 }, { "epoch": 0.13, "grad_norm": 4.85026741027832, "learning_rate": 0.0002, "loss": 1.7493, "step": 33160 }, { "epoch": 0.14, "grad_norm": 2.316704750061035, "learning_rate": 0.0002, "loss": 1.5758, "step": 33170 }, { "epoch": 0.14, "grad_norm": 3.262190818786621, "learning_rate": 0.0002, "loss": 1.6619, "step": 33180 }, { "epoch": 0.14, "grad_norm": 2.494776725769043, "learning_rate": 0.0002, "loss": 1.5733, "step": 33190 }, { "epoch": 0.14, "grad_norm": 2.858025312423706, "learning_rate": 0.0002, "loss": 1.7663, "step": 33200 }, { "epoch": 0.14, "grad_norm": 2.0623607635498047, "learning_rate": 0.0002, "loss": 1.5915, "step": 33210 }, { "epoch": 0.14, "grad_norm": 2.383455753326416, "learning_rate": 0.0002, "loss": 1.5354, "step": 33220 }, { "epoch": 0.14, "grad_norm": 2.0549376010894775, "learning_rate": 0.0002, "loss": 1.6585, "step": 33230 }, { "epoch": 0.14, "grad_norm": 2.733764886856079, "learning_rate": 0.0002, "loss": 1.6741, "step": 33240 }, { "epoch": 0.14, "grad_norm": 2.817605495452881, "learning_rate": 0.0002, "loss": 1.4384, "step": 33250 }, { "epoch": 0.14, "grad_norm": 2.441850423812866, "learning_rate": 0.0002, "loss": 1.4735, "step": 33260 }, { "epoch": 0.14, "grad_norm": 3.622450828552246, "learning_rate": 0.0002, "loss": 1.6229, "step": 33270 }, { "epoch": 0.14, "grad_norm": 2.453122615814209, "learning_rate": 0.0002, "loss": 1.3558, "step": 33280 }, { "epoch": 0.14, "grad_norm": 2.5708110332489014, "learning_rate": 0.0002, "loss": 1.6318, "step": 33290 }, { "epoch": 0.14, "grad_norm": 3.063382387161255, "learning_rate": 0.0002, "loss": 1.6031, "step": 33300 }, { "epoch": 0.14, "grad_norm": 5.861618518829346, "learning_rate": 0.0002, "loss": 1.7327, "step": 33310 }, { "epoch": 0.14, "grad_norm": 2.7357449531555176, "learning_rate": 0.0002, "loss": 1.5674, "step": 33320 }, { "epoch": 0.14, "grad_norm": 6.20182991027832, "learning_rate": 0.0002, "loss": 1.7101, "step": 33330 }, { "epoch": 0.14, "grad_norm": 2.632512331008911, "learning_rate": 0.0002, "loss": 1.6279, "step": 33340 }, { "epoch": 0.14, "grad_norm": 1.8669778108596802, "learning_rate": 0.0002, "loss": 1.4357, "step": 33350 }, { "epoch": 0.14, "grad_norm": 2.944939613342285, "learning_rate": 0.0002, "loss": 1.5052, "step": 33360 }, { "epoch": 0.14, "grad_norm": 1.51935875415802, "learning_rate": 0.0002, "loss": 1.6434, "step": 33370 }, { "epoch": 0.14, "grad_norm": 2.247390031814575, "learning_rate": 0.0002, "loss": 1.5732, "step": 33380 }, { "epoch": 0.14, "grad_norm": 2.188319683074951, "learning_rate": 0.0002, "loss": 1.4907, "step": 33390 }, { "epoch": 0.14, "grad_norm": 3.2768802642822266, "learning_rate": 0.0002, "loss": 1.5622, "step": 33400 }, { "epoch": 0.14, "grad_norm": 1.637466549873352, "learning_rate": 0.0002, "loss": 1.3613, "step": 33410 }, { "epoch": 0.14, "grad_norm": 3.859714984893799, "learning_rate": 0.0002, "loss": 1.4265, "step": 33420 }, { "epoch": 0.14, "grad_norm": 3.5099217891693115, "learning_rate": 0.0002, "loss": 1.5122, "step": 33430 }, { "epoch": 0.14, "grad_norm": 1.8791121244430542, "learning_rate": 0.0002, "loss": 1.417, "step": 33440 }, { "epoch": 0.14, "grad_norm": 3.6841161251068115, "learning_rate": 0.0002, "loss": 1.3725, "step": 33450 }, { "epoch": 0.14, "grad_norm": 3.152883768081665, "learning_rate": 0.0002, "loss": 1.6344, "step": 33460 }, { "epoch": 0.14, "grad_norm": 3.594789981842041, "learning_rate": 0.0002, "loss": 1.4869, "step": 33470 }, { "epoch": 0.14, "grad_norm": 3.1142005920410156, "learning_rate": 0.0002, "loss": 1.5535, "step": 33480 }, { "epoch": 0.14, "grad_norm": 1.9424583911895752, "learning_rate": 0.0002, "loss": 1.6353, "step": 33490 }, { "epoch": 0.14, "grad_norm": 4.5644121170043945, "learning_rate": 0.0002, "loss": 1.4805, "step": 33500 }, { "epoch": 0.14, "grad_norm": 2.924668550491333, "learning_rate": 0.0002, "loss": 1.6068, "step": 33510 }, { "epoch": 0.14, "grad_norm": 6.7521071434021, "learning_rate": 0.0002, "loss": 1.4769, "step": 33520 }, { "epoch": 0.14, "grad_norm": 3.338320016860962, "learning_rate": 0.0002, "loss": 1.9135, "step": 33530 }, { "epoch": 0.14, "grad_norm": 1.7727015018463135, "learning_rate": 0.0002, "loss": 1.6722, "step": 33540 }, { "epoch": 0.14, "grad_norm": 2.6126487255096436, "learning_rate": 0.0002, "loss": 1.5564, "step": 33550 }, { "epoch": 0.14, "grad_norm": 2.683222770690918, "learning_rate": 0.0002, "loss": 1.4924, "step": 33560 }, { "epoch": 0.14, "grad_norm": 2.6027421951293945, "learning_rate": 0.0002, "loss": 1.5433, "step": 33570 }, { "epoch": 0.14, "grad_norm": 3.8661980628967285, "learning_rate": 0.0002, "loss": 1.8229, "step": 33580 }, { "epoch": 0.14, "grad_norm": 2.6513781547546387, "learning_rate": 0.0002, "loss": 1.3906, "step": 33590 }, { "epoch": 0.14, "grad_norm": 1.7589657306671143, "learning_rate": 0.0002, "loss": 1.472, "step": 33600 }, { "epoch": 0.14, "grad_norm": 2.170264959335327, "learning_rate": 0.0002, "loss": 1.5964, "step": 33610 }, { "epoch": 0.14, "grad_norm": 2.6246397495269775, "learning_rate": 0.0002, "loss": 1.4213, "step": 33620 }, { "epoch": 0.14, "grad_norm": 3.484957695007324, "learning_rate": 0.0002, "loss": 1.5303, "step": 33630 }, { "epoch": 0.14, "grad_norm": 4.085269451141357, "learning_rate": 0.0002, "loss": 1.4209, "step": 33640 }, { "epoch": 0.14, "grad_norm": 2.792144298553467, "learning_rate": 0.0002, "loss": 1.4818, "step": 33650 }, { "epoch": 0.14, "grad_norm": 2.355093002319336, "learning_rate": 0.0002, "loss": 1.3432, "step": 33660 }, { "epoch": 0.14, "grad_norm": 3.252622127532959, "learning_rate": 0.0002, "loss": 1.7021, "step": 33670 }, { "epoch": 0.14, "grad_norm": 3.0245697498321533, "learning_rate": 0.0002, "loss": 1.6054, "step": 33680 }, { "epoch": 0.14, "grad_norm": 1.4027763605117798, "learning_rate": 0.0002, "loss": 1.4211, "step": 33690 }, { "epoch": 0.14, "grad_norm": 1.8304396867752075, "learning_rate": 0.0002, "loss": 1.6391, "step": 33700 }, { "epoch": 0.14, "grad_norm": 4.61044979095459, "learning_rate": 0.0002, "loss": 1.6165, "step": 33710 }, { "epoch": 0.14, "grad_norm": 2.6648809909820557, "learning_rate": 0.0002, "loss": 1.5601, "step": 33720 }, { "epoch": 0.14, "grad_norm": 2.9924685955047607, "learning_rate": 0.0002, "loss": 1.4317, "step": 33730 }, { "epoch": 0.14, "grad_norm": 3.0055205821990967, "learning_rate": 0.0002, "loss": 1.5513, "step": 33740 }, { "epoch": 0.14, "grad_norm": 2.305119514465332, "learning_rate": 0.0002, "loss": 1.6374, "step": 33750 }, { "epoch": 0.14, "grad_norm": 2.74816632270813, "learning_rate": 0.0002, "loss": 1.4811, "step": 33760 }, { "epoch": 0.14, "grad_norm": 2.3632466793060303, "learning_rate": 0.0002, "loss": 1.5181, "step": 33770 }, { "epoch": 0.14, "grad_norm": 1.5432156324386597, "learning_rate": 0.0002, "loss": 1.4044, "step": 33780 }, { "epoch": 0.14, "grad_norm": 2.667569875717163, "learning_rate": 0.0002, "loss": 1.4664, "step": 33790 }, { "epoch": 0.14, "grad_norm": 3.188335418701172, "learning_rate": 0.0002, "loss": 1.6636, "step": 33800 }, { "epoch": 0.14, "grad_norm": 2.794522285461426, "learning_rate": 0.0002, "loss": 1.357, "step": 33810 }, { "epoch": 0.14, "grad_norm": 5.703272342681885, "learning_rate": 0.0002, "loss": 1.4563, "step": 33820 }, { "epoch": 0.14, "grad_norm": 2.345681667327881, "learning_rate": 0.0002, "loss": 1.4563, "step": 33830 }, { "epoch": 0.14, "grad_norm": 3.6173202991485596, "learning_rate": 0.0002, "loss": 1.4801, "step": 33840 }, { "epoch": 0.14, "grad_norm": 2.5870308876037598, "learning_rate": 0.0002, "loss": 1.8445, "step": 33850 }, { "epoch": 0.14, "grad_norm": 2.271963119506836, "learning_rate": 0.0002, "loss": 1.554, "step": 33860 }, { "epoch": 0.14, "grad_norm": 3.4352879524230957, "learning_rate": 0.0002, "loss": 1.497, "step": 33870 }, { "epoch": 0.14, "grad_norm": 2.8017213344573975, "learning_rate": 0.0002, "loss": 1.6748, "step": 33880 }, { "epoch": 0.14, "grad_norm": 2.283287763595581, "learning_rate": 0.0002, "loss": 1.5473, "step": 33890 }, { "epoch": 0.14, "grad_norm": 2.9284703731536865, "learning_rate": 0.0002, "loss": 1.6137, "step": 33900 }, { "epoch": 0.14, "grad_norm": 2.758293390274048, "learning_rate": 0.0002, "loss": 1.6224, "step": 33910 }, { "epoch": 0.14, "grad_norm": 1.4873640537261963, "learning_rate": 0.0002, "loss": 1.7657, "step": 33920 }, { "epoch": 0.14, "grad_norm": 3.892670154571533, "learning_rate": 0.0002, "loss": 1.6902, "step": 33930 }, { "epoch": 0.14, "grad_norm": 2.6619889736175537, "learning_rate": 0.0002, "loss": 1.5558, "step": 33940 }, { "epoch": 0.14, "grad_norm": 1.983452320098877, "learning_rate": 0.0002, "loss": 1.5314, "step": 33950 }, { "epoch": 0.14, "grad_norm": 2.9709205627441406, "learning_rate": 0.0002, "loss": 1.617, "step": 33960 }, { "epoch": 0.14, "grad_norm": 3.9798035621643066, "learning_rate": 0.0002, "loss": 1.662, "step": 33970 }, { "epoch": 0.14, "grad_norm": 3.4276480674743652, "learning_rate": 0.0002, "loss": 1.6564, "step": 33980 }, { "epoch": 0.14, "grad_norm": 3.617412805557251, "learning_rate": 0.0002, "loss": 1.5876, "step": 33990 }, { "epoch": 0.14, "grad_norm": 2.239128351211548, "learning_rate": 0.0002, "loss": 1.612, "step": 34000 }, { "epoch": 0.14, "grad_norm": 2.5213372707366943, "learning_rate": 0.0002, "loss": 1.6267, "step": 34010 }, { "epoch": 0.14, "grad_norm": 3.4861812591552734, "learning_rate": 0.0002, "loss": 1.6892, "step": 34020 }, { "epoch": 0.14, "grad_norm": 2.5261998176574707, "learning_rate": 0.0002, "loss": 1.477, "step": 34030 }, { "epoch": 0.14, "grad_norm": 1.8097457885742188, "learning_rate": 0.0002, "loss": 1.4809, "step": 34040 }, { "epoch": 0.14, "grad_norm": 2.306100368499756, "learning_rate": 0.0002, "loss": 1.3771, "step": 34050 }, { "epoch": 0.14, "grad_norm": 4.875732421875, "learning_rate": 0.0002, "loss": 1.7273, "step": 34060 }, { "epoch": 0.14, "grad_norm": 1.858256459236145, "learning_rate": 0.0002, "loss": 1.5601, "step": 34070 }, { "epoch": 0.14, "grad_norm": 3.3128466606140137, "learning_rate": 0.0002, "loss": 1.5909, "step": 34080 }, { "epoch": 0.14, "grad_norm": 2.339962959289551, "learning_rate": 0.0002, "loss": 1.5778, "step": 34090 }, { "epoch": 0.14, "grad_norm": 1.8465442657470703, "learning_rate": 0.0002, "loss": 1.7638, "step": 34100 }, { "epoch": 0.14, "grad_norm": 3.3994364738464355, "learning_rate": 0.0002, "loss": 1.4803, "step": 34110 }, { "epoch": 0.14, "grad_norm": 2.0944385528564453, "learning_rate": 0.0002, "loss": 1.5028, "step": 34120 }, { "epoch": 0.14, "grad_norm": 2.3303160667419434, "learning_rate": 0.0002, "loss": 1.6275, "step": 34130 }, { "epoch": 0.14, "grad_norm": 3.8345813751220703, "learning_rate": 0.0002, "loss": 1.6433, "step": 34140 }, { "epoch": 0.14, "grad_norm": 1.8214412927627563, "learning_rate": 0.0002, "loss": 1.706, "step": 34150 }, { "epoch": 0.14, "grad_norm": 1.485376238822937, "learning_rate": 0.0002, "loss": 1.3989, "step": 34160 }, { "epoch": 0.14, "grad_norm": 2.575773239135742, "learning_rate": 0.0002, "loss": 1.6052, "step": 34170 }, { "epoch": 0.14, "grad_norm": 4.111771106719971, "learning_rate": 0.0002, "loss": 1.6398, "step": 34180 }, { "epoch": 0.14, "grad_norm": 2.9089913368225098, "learning_rate": 0.0002, "loss": 1.6642, "step": 34190 }, { "epoch": 0.14, "grad_norm": 2.4188292026519775, "learning_rate": 0.0002, "loss": 1.6709, "step": 34200 }, { "epoch": 0.14, "grad_norm": 2.4634456634521484, "learning_rate": 0.0002, "loss": 1.4718, "step": 34210 }, { "epoch": 0.14, "grad_norm": 5.243656635284424, "learning_rate": 0.0002, "loss": 1.6686, "step": 34220 }, { "epoch": 0.14, "grad_norm": 2.3437061309814453, "learning_rate": 0.0002, "loss": 1.6506, "step": 34230 }, { "epoch": 0.14, "grad_norm": 3.041025400161743, "learning_rate": 0.0002, "loss": 1.7095, "step": 34240 }, { "epoch": 0.14, "grad_norm": 3.2295005321502686, "learning_rate": 0.0002, "loss": 1.55, "step": 34250 }, { "epoch": 0.14, "grad_norm": 3.394674062728882, "learning_rate": 0.0002, "loss": 1.4647, "step": 34260 }, { "epoch": 0.14, "grad_norm": 3.1022987365722656, "learning_rate": 0.0002, "loss": 1.4661, "step": 34270 }, { "epoch": 0.14, "grad_norm": 2.515133857727051, "learning_rate": 0.0002, "loss": 1.5313, "step": 34280 }, { "epoch": 0.14, "grad_norm": 2.0705573558807373, "learning_rate": 0.0002, "loss": 1.6093, "step": 34290 }, { "epoch": 0.14, "grad_norm": 2.388972520828247, "learning_rate": 0.0002, "loss": 1.7694, "step": 34300 }, { "epoch": 0.14, "grad_norm": 2.086458921432495, "learning_rate": 0.0002, "loss": 1.5454, "step": 34310 }, { "epoch": 0.14, "grad_norm": 1.9718059301376343, "learning_rate": 0.0002, "loss": 1.6959, "step": 34320 }, { "epoch": 0.14, "grad_norm": 3.0180089473724365, "learning_rate": 0.0002, "loss": 1.4633, "step": 34330 }, { "epoch": 0.14, "grad_norm": 2.762282133102417, "learning_rate": 0.0002, "loss": 1.4443, "step": 34340 }, { "epoch": 0.14, "grad_norm": 3.6597087383270264, "learning_rate": 0.0002, "loss": 1.4628, "step": 34350 }, { "epoch": 0.14, "grad_norm": 2.9780614376068115, "learning_rate": 0.0002, "loss": 1.4577, "step": 34360 }, { "epoch": 0.14, "grad_norm": 2.171596050262451, "learning_rate": 0.0002, "loss": 1.597, "step": 34370 }, { "epoch": 0.14, "grad_norm": 2.7658700942993164, "learning_rate": 0.0002, "loss": 1.6899, "step": 34380 }, { "epoch": 0.14, "grad_norm": 2.8629820346832275, "learning_rate": 0.0002, "loss": 1.3878, "step": 34390 }, { "epoch": 0.14, "grad_norm": 4.013698577880859, "learning_rate": 0.0002, "loss": 1.6719, "step": 34400 }, { "epoch": 0.14, "grad_norm": 4.192431449890137, "learning_rate": 0.0002, "loss": 1.8315, "step": 34410 }, { "epoch": 0.14, "grad_norm": 2.75195050239563, "learning_rate": 0.0002, "loss": 1.6418, "step": 34420 }, { "epoch": 0.14, "grad_norm": 2.247694969177246, "learning_rate": 0.0002, "loss": 1.5326, "step": 34430 }, { "epoch": 0.14, "grad_norm": 1.6109055280685425, "learning_rate": 0.0002, "loss": 1.5474, "step": 34440 }, { "epoch": 0.14, "grad_norm": 2.525606393814087, "learning_rate": 0.0002, "loss": 1.284, "step": 34450 }, { "epoch": 0.14, "grad_norm": 3.9661619663238525, "learning_rate": 0.0002, "loss": 1.6663, "step": 34460 }, { "epoch": 0.14, "grad_norm": 5.519237995147705, "learning_rate": 0.0002, "loss": 1.4952, "step": 34470 }, { "epoch": 0.14, "grad_norm": 2.41274356842041, "learning_rate": 0.0002, "loss": 1.3848, "step": 34480 }, { "epoch": 0.14, "grad_norm": 2.7256600856781006, "learning_rate": 0.0002, "loss": 1.7947, "step": 34490 }, { "epoch": 0.14, "grad_norm": 2.1198770999908447, "learning_rate": 0.0002, "loss": 1.6093, "step": 34500 }, { "epoch": 0.14, "grad_norm": 2.4910972118377686, "learning_rate": 0.0002, "loss": 1.7993, "step": 34510 }, { "epoch": 0.14, "grad_norm": 2.2790346145629883, "learning_rate": 0.0002, "loss": 1.5649, "step": 34520 }, { "epoch": 0.14, "grad_norm": 3.604989767074585, "learning_rate": 0.0002, "loss": 1.375, "step": 34530 }, { "epoch": 0.14, "grad_norm": 3.9042508602142334, "learning_rate": 0.0002, "loss": 1.4615, "step": 34540 }, { "epoch": 0.14, "grad_norm": 1.988263487815857, "learning_rate": 0.0002, "loss": 1.5995, "step": 34550 }, { "epoch": 0.14, "grad_norm": 6.705137729644775, "learning_rate": 0.0002, "loss": 1.4927, "step": 34560 }, { "epoch": 0.14, "grad_norm": 3.6654374599456787, "learning_rate": 0.0002, "loss": 1.3388, "step": 34570 }, { "epoch": 0.14, "grad_norm": 2.7973804473876953, "learning_rate": 0.0002, "loss": 1.8294, "step": 34580 }, { "epoch": 0.14, "grad_norm": 2.8394646644592285, "learning_rate": 0.0002, "loss": 1.543, "step": 34590 }, { "epoch": 0.14, "grad_norm": 3.9193592071533203, "learning_rate": 0.0002, "loss": 1.5143, "step": 34600 }, { "epoch": 0.14, "grad_norm": 3.013329029083252, "learning_rate": 0.0002, "loss": 1.433, "step": 34610 }, { "epoch": 0.14, "grad_norm": 3.6777145862579346, "learning_rate": 0.0002, "loss": 1.4751, "step": 34620 }, { "epoch": 0.14, "grad_norm": 3.1644790172576904, "learning_rate": 0.0002, "loss": 1.4871, "step": 34630 }, { "epoch": 0.14, "grad_norm": 2.245134115219116, "learning_rate": 0.0002, "loss": 1.6583, "step": 34640 }, { "epoch": 0.14, "grad_norm": 4.16561222076416, "learning_rate": 0.0002, "loss": 1.5689, "step": 34650 }, { "epoch": 0.14, "grad_norm": 2.226308584213257, "learning_rate": 0.0002, "loss": 1.5728, "step": 34660 }, { "epoch": 0.14, "grad_norm": 1.7818354368209839, "learning_rate": 0.0002, "loss": 1.4333, "step": 34670 }, { "epoch": 0.14, "grad_norm": 1.7596670389175415, "learning_rate": 0.0002, "loss": 1.486, "step": 34680 }, { "epoch": 0.14, "grad_norm": 4.119341850280762, "learning_rate": 0.0002, "loss": 1.5723, "step": 34690 }, { "epoch": 0.14, "grad_norm": 2.6021411418914795, "learning_rate": 0.0002, "loss": 1.4858, "step": 34700 }, { "epoch": 0.14, "grad_norm": 3.3429694175720215, "learning_rate": 0.0002, "loss": 1.6519, "step": 34710 }, { "epoch": 0.14, "grad_norm": 4.911736488342285, "learning_rate": 0.0002, "loss": 1.629, "step": 34720 }, { "epoch": 0.14, "grad_norm": 1.553317904472351, "learning_rate": 0.0002, "loss": 1.5808, "step": 34730 }, { "epoch": 0.14, "grad_norm": 2.4527876377105713, "learning_rate": 0.0002, "loss": 1.4435, "step": 34740 }, { "epoch": 0.14, "grad_norm": 3.3607122898101807, "learning_rate": 0.0002, "loss": 1.5578, "step": 34750 }, { "epoch": 0.14, "grad_norm": 5.976861476898193, "learning_rate": 0.0002, "loss": 1.2739, "step": 34760 }, { "epoch": 0.14, "grad_norm": 2.174278736114502, "learning_rate": 0.0002, "loss": 1.4149, "step": 34770 }, { "epoch": 0.14, "grad_norm": 4.045132160186768, "learning_rate": 0.0002, "loss": 1.7387, "step": 34780 }, { "epoch": 0.14, "grad_norm": 1.7307897806167603, "learning_rate": 0.0002, "loss": 1.6535, "step": 34790 }, { "epoch": 0.14, "grad_norm": 4.200463771820068, "learning_rate": 0.0002, "loss": 1.5969, "step": 34800 }, { "epoch": 0.14, "grad_norm": 1.9800533056259155, "learning_rate": 0.0002, "loss": 1.7302, "step": 34810 }, { "epoch": 0.14, "grad_norm": 1.8424806594848633, "learning_rate": 0.0002, "loss": 1.3625, "step": 34820 }, { "epoch": 0.14, "grad_norm": 2.4064624309539795, "learning_rate": 0.0002, "loss": 1.6493, "step": 34830 }, { "epoch": 0.14, "grad_norm": 1.2995905876159668, "learning_rate": 0.0002, "loss": 1.5585, "step": 34840 }, { "epoch": 0.14, "grad_norm": 2.0510101318359375, "learning_rate": 0.0002, "loss": 1.5787, "step": 34850 }, { "epoch": 0.14, "grad_norm": 3.1470894813537598, "learning_rate": 0.0002, "loss": 1.4767, "step": 34860 }, { "epoch": 0.14, "grad_norm": 3.0652709007263184, "learning_rate": 0.0002, "loss": 1.7308, "step": 34870 }, { "epoch": 0.14, "grad_norm": 3.1654860973358154, "learning_rate": 0.0002, "loss": 1.4709, "step": 34880 }, { "epoch": 0.14, "grad_norm": 2.7551510334014893, "learning_rate": 0.0002, "loss": 1.3715, "step": 34890 }, { "epoch": 0.14, "grad_norm": 3.568934679031372, "learning_rate": 0.0002, "loss": 1.4379, "step": 34900 }, { "epoch": 0.14, "grad_norm": 3.6812469959259033, "learning_rate": 0.0002, "loss": 1.4802, "step": 34910 }, { "epoch": 0.14, "grad_norm": 3.7777109146118164, "learning_rate": 0.0002, "loss": 1.2661, "step": 34920 }, { "epoch": 0.14, "grad_norm": 3.0888941287994385, "learning_rate": 0.0002, "loss": 1.6155, "step": 34930 }, { "epoch": 0.14, "grad_norm": 4.126332759857178, "learning_rate": 0.0002, "loss": 1.4204, "step": 34940 }, { "epoch": 0.14, "grad_norm": 4.078556537628174, "learning_rate": 0.0002, "loss": 1.6937, "step": 34950 }, { "epoch": 0.14, "grad_norm": 2.505459785461426, "learning_rate": 0.0002, "loss": 1.657, "step": 34960 }, { "epoch": 0.14, "grad_norm": 5.141067981719971, "learning_rate": 0.0002, "loss": 1.4674, "step": 34970 }, { "epoch": 0.14, "grad_norm": 3.687330484390259, "learning_rate": 0.0002, "loss": 1.4611, "step": 34980 }, { "epoch": 0.14, "grad_norm": 2.118471145629883, "learning_rate": 0.0002, "loss": 1.5477, "step": 34990 }, { "epoch": 0.14, "grad_norm": 1.6835793256759644, "learning_rate": 0.0002, "loss": 1.6163, "step": 35000 }, { "epoch": 0.14, "grad_norm": 2.733523368835449, "learning_rate": 0.0002, "loss": 1.2162, "step": 35010 }, { "epoch": 0.14, "grad_norm": 2.4534354209899902, "learning_rate": 0.0002, "loss": 1.4347, "step": 35020 }, { "epoch": 0.14, "grad_norm": 5.48153829574585, "learning_rate": 0.0002, "loss": 1.5768, "step": 35030 }, { "epoch": 0.14, "grad_norm": 2.97706937789917, "learning_rate": 0.0002, "loss": 1.7282, "step": 35040 }, { "epoch": 0.14, "grad_norm": 2.026846170425415, "learning_rate": 0.0002, "loss": 1.4759, "step": 35050 }, { "epoch": 0.14, "grad_norm": 3.635438919067383, "learning_rate": 0.0002, "loss": 1.4327, "step": 35060 }, { "epoch": 0.14, "grad_norm": 3.630679130554199, "learning_rate": 0.0002, "loss": 1.6292, "step": 35070 }, { "epoch": 0.14, "grad_norm": 2.1345202922821045, "learning_rate": 0.0002, "loss": 1.4113, "step": 35080 }, { "epoch": 0.14, "grad_norm": 2.139051675796509, "learning_rate": 0.0002, "loss": 1.3433, "step": 35090 }, { "epoch": 0.14, "grad_norm": 2.457296133041382, "learning_rate": 0.0002, "loss": 1.3551, "step": 35100 }, { "epoch": 0.14, "grad_norm": 2.8800902366638184, "learning_rate": 0.0002, "loss": 1.5365, "step": 35110 }, { "epoch": 0.14, "grad_norm": 3.859482526779175, "learning_rate": 0.0002, "loss": 1.572, "step": 35120 }, { "epoch": 0.14, "grad_norm": 1.2742677927017212, "learning_rate": 0.0002, "loss": 1.7, "step": 35130 }, { "epoch": 0.14, "grad_norm": 1.8109002113342285, "learning_rate": 0.0002, "loss": 1.5052, "step": 35140 }, { "epoch": 0.14, "grad_norm": 2.093888521194458, "learning_rate": 0.0002, "loss": 1.7044, "step": 35150 }, { "epoch": 0.14, "grad_norm": 2.4807000160217285, "learning_rate": 0.0002, "loss": 1.6714, "step": 35160 }, { "epoch": 0.14, "grad_norm": 2.9093828201293945, "learning_rate": 0.0002, "loss": 1.5333, "step": 35170 }, { "epoch": 0.14, "grad_norm": 1.6674773693084717, "learning_rate": 0.0002, "loss": 1.5499, "step": 35180 }, { "epoch": 0.14, "grad_norm": 2.986262559890747, "learning_rate": 0.0002, "loss": 1.4635, "step": 35190 }, { "epoch": 0.14, "grad_norm": 2.898914337158203, "learning_rate": 0.0002, "loss": 1.6908, "step": 35200 }, { "epoch": 0.14, "grad_norm": 2.514441967010498, "learning_rate": 0.0002, "loss": 1.4351, "step": 35210 }, { "epoch": 0.14, "grad_norm": 3.784665822982788, "learning_rate": 0.0002, "loss": 1.8883, "step": 35220 }, { "epoch": 0.14, "grad_norm": 3.2679123878479004, "learning_rate": 0.0002, "loss": 1.3696, "step": 35230 }, { "epoch": 0.14, "grad_norm": 2.8592581748962402, "learning_rate": 0.0002, "loss": 1.6242, "step": 35240 }, { "epoch": 0.14, "grad_norm": 3.822429656982422, "learning_rate": 0.0002, "loss": 1.4008, "step": 35250 }, { "epoch": 0.14, "grad_norm": 1.9023325443267822, "learning_rate": 0.0002, "loss": 1.4242, "step": 35260 }, { "epoch": 0.14, "grad_norm": 6.115183353424072, "learning_rate": 0.0002, "loss": 1.4439, "step": 35270 }, { "epoch": 0.14, "grad_norm": 6.529855251312256, "learning_rate": 0.0002, "loss": 1.7236, "step": 35280 }, { "epoch": 0.14, "grad_norm": 2.9486234188079834, "learning_rate": 0.0002, "loss": 1.6688, "step": 35290 }, { "epoch": 0.14, "grad_norm": 2.619170904159546, "learning_rate": 0.0002, "loss": 1.5461, "step": 35300 }, { "epoch": 0.14, "grad_norm": 2.4875524044036865, "learning_rate": 0.0002, "loss": 1.7063, "step": 35310 }, { "epoch": 0.14, "grad_norm": 3.6073696613311768, "learning_rate": 0.0002, "loss": 1.6319, "step": 35320 }, { "epoch": 0.14, "grad_norm": 1.9204188585281372, "learning_rate": 0.0002, "loss": 1.4203, "step": 35330 }, { "epoch": 0.14, "grad_norm": 2.64862060546875, "learning_rate": 0.0002, "loss": 1.6225, "step": 35340 }, { "epoch": 0.14, "grad_norm": 1.9476277828216553, "learning_rate": 0.0002, "loss": 1.566, "step": 35350 }, { "epoch": 0.14, "grad_norm": 1.6951111555099487, "learning_rate": 0.0002, "loss": 1.4032, "step": 35360 }, { "epoch": 0.14, "grad_norm": 2.107534170150757, "learning_rate": 0.0002, "loss": 1.5422, "step": 35370 }, { "epoch": 0.14, "grad_norm": 2.8493058681488037, "learning_rate": 0.0002, "loss": 1.5985, "step": 35380 }, { "epoch": 0.14, "grad_norm": 2.3300347328186035, "learning_rate": 0.0002, "loss": 1.496, "step": 35390 }, { "epoch": 0.14, "grad_norm": 2.009730100631714, "learning_rate": 0.0002, "loss": 1.3622, "step": 35400 }, { "epoch": 0.14, "grad_norm": 1.7809568643569946, "learning_rate": 0.0002, "loss": 1.5273, "step": 35410 }, { "epoch": 0.14, "grad_norm": 2.8733084201812744, "learning_rate": 0.0002, "loss": 1.7671, "step": 35420 }, { "epoch": 0.14, "grad_norm": 2.634500741958618, "learning_rate": 0.0002, "loss": 1.5259, "step": 35430 }, { "epoch": 0.14, "grad_norm": 2.6701998710632324, "learning_rate": 0.0002, "loss": 1.8117, "step": 35440 }, { "epoch": 0.14, "grad_norm": 3.1694655418395996, "learning_rate": 0.0002, "loss": 1.7049, "step": 35450 }, { "epoch": 0.14, "grad_norm": 4.627549171447754, "learning_rate": 0.0002, "loss": 1.822, "step": 35460 }, { "epoch": 0.14, "grad_norm": 2.6474335193634033, "learning_rate": 0.0002, "loss": 1.5374, "step": 35470 }, { "epoch": 0.14, "grad_norm": 1.824230432510376, "learning_rate": 0.0002, "loss": 1.6036, "step": 35480 }, { "epoch": 0.14, "grad_norm": 3.3470444679260254, "learning_rate": 0.0002, "loss": 1.5436, "step": 35490 }, { "epoch": 0.14, "grad_norm": 4.201387405395508, "learning_rate": 0.0002, "loss": 1.5499, "step": 35500 }, { "epoch": 0.14, "grad_norm": 3.0107641220092773, "learning_rate": 0.0002, "loss": 1.7308, "step": 35510 }, { "epoch": 0.14, "grad_norm": 2.1535890102386475, "learning_rate": 0.0002, "loss": 1.307, "step": 35520 }, { "epoch": 0.14, "grad_norm": 1.661996841430664, "learning_rate": 0.0002, "loss": 1.5968, "step": 35530 }, { "epoch": 0.14, "grad_norm": 2.662313461303711, "learning_rate": 0.0002, "loss": 1.4666, "step": 35540 }, { "epoch": 0.14, "grad_norm": 3.2008936405181885, "learning_rate": 0.0002, "loss": 1.486, "step": 35550 }, { "epoch": 0.14, "grad_norm": 3.577833414077759, "learning_rate": 0.0002, "loss": 1.6355, "step": 35560 }, { "epoch": 0.14, "grad_norm": 3.3828935623168945, "learning_rate": 0.0002, "loss": 1.4624, "step": 35570 }, { "epoch": 0.14, "grad_norm": 1.77092444896698, "learning_rate": 0.0002, "loss": 1.4667, "step": 35580 }, { "epoch": 0.14, "grad_norm": 2.2714123725891113, "learning_rate": 0.0002, "loss": 1.5102, "step": 35590 }, { "epoch": 0.14, "grad_norm": 3.704336404800415, "learning_rate": 0.0002, "loss": 1.6498, "step": 35600 }, { "epoch": 0.14, "grad_norm": 3.014723300933838, "learning_rate": 0.0002, "loss": 1.4881, "step": 35610 }, { "epoch": 0.15, "grad_norm": 2.5337512493133545, "learning_rate": 0.0002, "loss": 1.4863, "step": 35620 }, { "epoch": 0.15, "grad_norm": 2.188589334487915, "learning_rate": 0.0002, "loss": 1.5113, "step": 35630 }, { "epoch": 0.15, "grad_norm": 2.311046838760376, "learning_rate": 0.0002, "loss": 1.5045, "step": 35640 }, { "epoch": 0.15, "grad_norm": 9.864656448364258, "learning_rate": 0.0002, "loss": 1.6332, "step": 35650 }, { "epoch": 0.15, "grad_norm": 1.886946201324463, "learning_rate": 0.0002, "loss": 1.6086, "step": 35660 }, { "epoch": 0.15, "grad_norm": 1.9604591131210327, "learning_rate": 0.0002, "loss": 1.5046, "step": 35670 }, { "epoch": 0.15, "grad_norm": 2.438539981842041, "learning_rate": 0.0002, "loss": 1.488, "step": 35680 }, { "epoch": 0.15, "grad_norm": 1.994531512260437, "learning_rate": 0.0002, "loss": 1.6764, "step": 35690 }, { "epoch": 0.15, "grad_norm": 4.9043402671813965, "learning_rate": 0.0002, "loss": 1.5359, "step": 35700 }, { "epoch": 0.15, "grad_norm": 2.0131618976593018, "learning_rate": 0.0002, "loss": 1.5517, "step": 35710 }, { "epoch": 0.15, "grad_norm": 2.127509355545044, "learning_rate": 0.0002, "loss": 1.3446, "step": 35720 }, { "epoch": 0.15, "grad_norm": 2.755802631378174, "learning_rate": 0.0002, "loss": 1.6148, "step": 35730 }, { "epoch": 0.15, "grad_norm": 2.7413554191589355, "learning_rate": 0.0002, "loss": 1.5606, "step": 35740 }, { "epoch": 0.15, "grad_norm": 2.355156421661377, "learning_rate": 0.0002, "loss": 1.721, "step": 35750 }, { "epoch": 0.15, "grad_norm": 2.283367156982422, "learning_rate": 0.0002, "loss": 1.6272, "step": 35760 }, { "epoch": 0.15, "grad_norm": 2.0274810791015625, "learning_rate": 0.0002, "loss": 1.5281, "step": 35770 }, { "epoch": 0.15, "grad_norm": 2.4384100437164307, "learning_rate": 0.0002, "loss": 1.7653, "step": 35780 }, { "epoch": 0.15, "grad_norm": 2.8601927757263184, "learning_rate": 0.0002, "loss": 1.634, "step": 35790 }, { "epoch": 0.15, "grad_norm": 2.2163500785827637, "learning_rate": 0.0002, "loss": 1.3713, "step": 35800 }, { "epoch": 0.15, "grad_norm": 2.835860013961792, "learning_rate": 0.0002, "loss": 1.5138, "step": 35810 }, { "epoch": 0.15, "grad_norm": 2.3584306240081787, "learning_rate": 0.0002, "loss": 1.5343, "step": 35820 }, { "epoch": 0.15, "grad_norm": 2.3015055656433105, "learning_rate": 0.0002, "loss": 1.5185, "step": 35830 }, { "epoch": 0.15, "grad_norm": 2.9439938068389893, "learning_rate": 0.0002, "loss": 1.566, "step": 35840 }, { "epoch": 0.15, "grad_norm": 2.1449434757232666, "learning_rate": 0.0002, "loss": 1.4802, "step": 35850 }, { "epoch": 0.15, "grad_norm": 2.4290647506713867, "learning_rate": 0.0002, "loss": 1.6023, "step": 35860 }, { "epoch": 0.15, "grad_norm": 1.3055518865585327, "learning_rate": 0.0002, "loss": 1.5226, "step": 35870 }, { "epoch": 0.15, "grad_norm": 2.8855767250061035, "learning_rate": 0.0002, "loss": 1.5571, "step": 35880 }, { "epoch": 0.15, "grad_norm": 3.4890682697296143, "learning_rate": 0.0002, "loss": 1.5998, "step": 35890 }, { "epoch": 0.15, "grad_norm": 4.041129112243652, "learning_rate": 0.0002, "loss": 1.549, "step": 35900 }, { "epoch": 0.15, "grad_norm": 1.6233865022659302, "learning_rate": 0.0002, "loss": 1.4879, "step": 35910 }, { "epoch": 0.15, "grad_norm": 2.664450168609619, "learning_rate": 0.0002, "loss": 1.5308, "step": 35920 }, { "epoch": 0.15, "grad_norm": 1.8061373233795166, "learning_rate": 0.0002, "loss": 1.305, "step": 35930 }, { "epoch": 0.15, "grad_norm": 2.247591733932495, "learning_rate": 0.0002, "loss": 1.7283, "step": 35940 }, { "epoch": 0.15, "grad_norm": 2.185760498046875, "learning_rate": 0.0002, "loss": 1.7761, "step": 35950 }, { "epoch": 0.15, "grad_norm": 2.6540985107421875, "learning_rate": 0.0002, "loss": 1.7247, "step": 35960 }, { "epoch": 0.15, "grad_norm": 3.1061577796936035, "learning_rate": 0.0002, "loss": 1.3192, "step": 35970 }, { "epoch": 0.15, "grad_norm": 3.1941728591918945, "learning_rate": 0.0002, "loss": 1.6756, "step": 35980 }, { "epoch": 0.15, "grad_norm": 3.222649335861206, "learning_rate": 0.0002, "loss": 1.4987, "step": 35990 }, { "epoch": 0.15, "grad_norm": 2.860546588897705, "learning_rate": 0.0002, "loss": 1.5433, "step": 36000 }, { "epoch": 0.15, "grad_norm": 2.513792037963867, "learning_rate": 0.0002, "loss": 1.3535, "step": 36010 }, { "epoch": 0.15, "grad_norm": 2.302497386932373, "learning_rate": 0.0002, "loss": 1.4881, "step": 36020 }, { "epoch": 0.15, "grad_norm": 3.6569364070892334, "learning_rate": 0.0002, "loss": 1.4843, "step": 36030 }, { "epoch": 0.15, "grad_norm": 2.5057480335235596, "learning_rate": 0.0002, "loss": 1.6442, "step": 36040 }, { "epoch": 0.15, "grad_norm": 3.319434642791748, "learning_rate": 0.0002, "loss": 1.6929, "step": 36050 }, { "epoch": 0.15, "grad_norm": 6.9552903175354, "learning_rate": 0.0002, "loss": 1.6307, "step": 36060 }, { "epoch": 0.15, "grad_norm": 1.9230303764343262, "learning_rate": 0.0002, "loss": 1.7408, "step": 36070 }, { "epoch": 0.15, "grad_norm": 1.5455387830734253, "learning_rate": 0.0002, "loss": 1.4534, "step": 36080 }, { "epoch": 0.15, "grad_norm": 3.4063827991485596, "learning_rate": 0.0002, "loss": 1.6812, "step": 36090 }, { "epoch": 0.15, "grad_norm": 4.738742828369141, "learning_rate": 0.0002, "loss": 1.4463, "step": 36100 }, { "epoch": 0.15, "grad_norm": 3.002847909927368, "learning_rate": 0.0002, "loss": 1.638, "step": 36110 }, { "epoch": 0.15, "grad_norm": 1.9221875667572021, "learning_rate": 0.0002, "loss": 1.5566, "step": 36120 }, { "epoch": 0.15, "grad_norm": 2.6964147090911865, "learning_rate": 0.0002, "loss": 1.4941, "step": 36130 }, { "epoch": 0.15, "grad_norm": 2.212184429168701, "learning_rate": 0.0002, "loss": 1.3697, "step": 36140 }, { "epoch": 0.15, "grad_norm": 2.5548365116119385, "learning_rate": 0.0002, "loss": 1.5549, "step": 36150 }, { "epoch": 0.15, "grad_norm": 3.6505842208862305, "learning_rate": 0.0002, "loss": 1.6117, "step": 36160 }, { "epoch": 0.15, "grad_norm": 2.9462220668792725, "learning_rate": 0.0002, "loss": 1.4584, "step": 36170 }, { "epoch": 0.15, "grad_norm": 1.6118745803833008, "learning_rate": 0.0002, "loss": 1.2347, "step": 36180 }, { "epoch": 0.15, "grad_norm": 2.510969400405884, "learning_rate": 0.0002, "loss": 1.8617, "step": 36190 }, { "epoch": 0.15, "grad_norm": 2.163376569747925, "learning_rate": 0.0002, "loss": 1.577, "step": 36200 }, { "epoch": 0.15, "grad_norm": 1.4920247793197632, "learning_rate": 0.0002, "loss": 1.4132, "step": 36210 }, { "epoch": 0.15, "grad_norm": 2.65464186668396, "learning_rate": 0.0002, "loss": 1.2992, "step": 36220 }, { "epoch": 0.15, "grad_norm": 3.182016134262085, "learning_rate": 0.0002, "loss": 1.6279, "step": 36230 }, { "epoch": 0.15, "grad_norm": 2.3439600467681885, "learning_rate": 0.0002, "loss": 1.5496, "step": 36240 }, { "epoch": 0.15, "grad_norm": 4.802595615386963, "learning_rate": 0.0002, "loss": 2.0344, "step": 36250 }, { "epoch": 0.15, "grad_norm": 3.5421199798583984, "learning_rate": 0.0002, "loss": 1.6308, "step": 36260 }, { "epoch": 0.15, "grad_norm": 3.1902196407318115, "learning_rate": 0.0002, "loss": 1.5893, "step": 36270 }, { "epoch": 0.15, "grad_norm": 2.3535208702087402, "learning_rate": 0.0002, "loss": 1.4689, "step": 36280 }, { "epoch": 0.15, "grad_norm": 3.0393006801605225, "learning_rate": 0.0002, "loss": 1.5708, "step": 36290 }, { "epoch": 0.15, "grad_norm": 4.052380084991455, "learning_rate": 0.0002, "loss": 1.6397, "step": 36300 }, { "epoch": 0.15, "grad_norm": 3.050781488418579, "learning_rate": 0.0002, "loss": 1.3629, "step": 36310 }, { "epoch": 0.15, "grad_norm": 1.6259483098983765, "learning_rate": 0.0002, "loss": 1.4933, "step": 36320 }, { "epoch": 0.15, "grad_norm": 6.2557854652404785, "learning_rate": 0.0002, "loss": 1.5971, "step": 36330 }, { "epoch": 0.15, "grad_norm": 2.1479809284210205, "learning_rate": 0.0002, "loss": 1.5073, "step": 36340 }, { "epoch": 0.15, "grad_norm": 3.163254737854004, "learning_rate": 0.0002, "loss": 1.4846, "step": 36350 }, { "epoch": 0.15, "grad_norm": 2.6566755771636963, "learning_rate": 0.0002, "loss": 1.3889, "step": 36360 }, { "epoch": 0.15, "grad_norm": 2.0440516471862793, "learning_rate": 0.0002, "loss": 1.6749, "step": 36370 }, { "epoch": 0.15, "grad_norm": 3.5590813159942627, "learning_rate": 0.0002, "loss": 1.5924, "step": 36380 }, { "epoch": 0.15, "grad_norm": 3.193333387374878, "learning_rate": 0.0002, "loss": 1.7145, "step": 36390 }, { "epoch": 0.15, "grad_norm": 2.352262258529663, "learning_rate": 0.0002, "loss": 1.5595, "step": 36400 }, { "epoch": 0.15, "grad_norm": 2.8808400630950928, "learning_rate": 0.0002, "loss": 1.5056, "step": 36410 }, { "epoch": 0.15, "grad_norm": 3.492481231689453, "learning_rate": 0.0002, "loss": 1.5113, "step": 36420 }, { "epoch": 0.15, "grad_norm": 3.4931347370147705, "learning_rate": 0.0002, "loss": 1.6788, "step": 36430 }, { "epoch": 0.15, "grad_norm": 1.8634732961654663, "learning_rate": 0.0002, "loss": 1.5267, "step": 36440 }, { "epoch": 0.15, "grad_norm": 2.2627339363098145, "learning_rate": 0.0002, "loss": 1.6863, "step": 36450 }, { "epoch": 0.15, "grad_norm": 3.4337892532348633, "learning_rate": 0.0002, "loss": 1.4921, "step": 36460 }, { "epoch": 0.15, "grad_norm": 4.334169864654541, "learning_rate": 0.0002, "loss": 1.5682, "step": 36470 }, { "epoch": 0.15, "grad_norm": 3.2231156826019287, "learning_rate": 0.0002, "loss": 1.701, "step": 36480 }, { "epoch": 0.15, "grad_norm": 2.6368026733398438, "learning_rate": 0.0002, "loss": 1.6408, "step": 36490 }, { "epoch": 0.15, "grad_norm": 2.4584293365478516, "learning_rate": 0.0002, "loss": 1.5206, "step": 36500 }, { "epoch": 0.15, "grad_norm": 2.65276837348938, "learning_rate": 0.0002, "loss": 1.3943, "step": 36510 }, { "epoch": 0.15, "grad_norm": 3.798259735107422, "learning_rate": 0.0002, "loss": 1.6575, "step": 36520 }, { "epoch": 0.15, "grad_norm": 1.9310179948806763, "learning_rate": 0.0002, "loss": 1.4527, "step": 36530 }, { "epoch": 0.15, "grad_norm": 1.8816944360733032, "learning_rate": 0.0002, "loss": 1.4086, "step": 36540 }, { "epoch": 0.15, "grad_norm": 2.538437843322754, "learning_rate": 0.0002, "loss": 1.4099, "step": 36550 }, { "epoch": 0.15, "grad_norm": 1.4266316890716553, "learning_rate": 0.0002, "loss": 1.5746, "step": 36560 }, { "epoch": 0.15, "grad_norm": 2.486351251602173, "learning_rate": 0.0002, "loss": 1.4218, "step": 36570 }, { "epoch": 0.15, "grad_norm": 3.0540695190429688, "learning_rate": 0.0002, "loss": 1.455, "step": 36580 }, { "epoch": 0.15, "grad_norm": 4.489884376525879, "learning_rate": 0.0002, "loss": 1.6076, "step": 36590 }, { "epoch": 0.15, "grad_norm": 1.8647762537002563, "learning_rate": 0.0002, "loss": 1.3932, "step": 36600 }, { "epoch": 0.15, "grad_norm": 2.3476924896240234, "learning_rate": 0.0002, "loss": 1.4739, "step": 36610 }, { "epoch": 0.15, "grad_norm": 2.3100149631500244, "learning_rate": 0.0002, "loss": 1.7703, "step": 36620 }, { "epoch": 0.15, "grad_norm": 2.9238791465759277, "learning_rate": 0.0002, "loss": 1.2929, "step": 36630 }, { "epoch": 0.15, "grad_norm": 4.602470874786377, "learning_rate": 0.0002, "loss": 1.6969, "step": 36640 }, { "epoch": 0.15, "grad_norm": 1.9663585424423218, "learning_rate": 0.0002, "loss": 1.3923, "step": 36650 }, { "epoch": 0.15, "grad_norm": 2.5135600566864014, "learning_rate": 0.0002, "loss": 1.4445, "step": 36660 }, { "epoch": 0.15, "grad_norm": 1.7088937759399414, "learning_rate": 0.0002, "loss": 1.3972, "step": 36670 }, { "epoch": 0.15, "grad_norm": 2.5924365520477295, "learning_rate": 0.0002, "loss": 1.2753, "step": 36680 }, { "epoch": 0.15, "grad_norm": 6.331651210784912, "learning_rate": 0.0002, "loss": 1.6896, "step": 36690 }, { "epoch": 0.15, "grad_norm": 2.2950222492218018, "learning_rate": 0.0002, "loss": 1.3847, "step": 36700 }, { "epoch": 0.15, "grad_norm": 2.3484907150268555, "learning_rate": 0.0002, "loss": 1.4539, "step": 36710 }, { "epoch": 0.15, "grad_norm": 3.1476573944091797, "learning_rate": 0.0002, "loss": 1.242, "step": 36720 }, { "epoch": 0.15, "grad_norm": 3.8328919410705566, "learning_rate": 0.0002, "loss": 1.6073, "step": 36730 }, { "epoch": 0.15, "grad_norm": 4.019923686981201, "learning_rate": 0.0002, "loss": 1.6402, "step": 36740 }, { "epoch": 0.15, "grad_norm": 2.096447467803955, "learning_rate": 0.0002, "loss": 1.6402, "step": 36750 }, { "epoch": 0.15, "grad_norm": 2.6023313999176025, "learning_rate": 0.0002, "loss": 1.7239, "step": 36760 }, { "epoch": 0.15, "grad_norm": 2.7378346920013428, "learning_rate": 0.0002, "loss": 1.438, "step": 36770 }, { "epoch": 0.15, "grad_norm": 2.082568645477295, "learning_rate": 0.0002, "loss": 1.5979, "step": 36780 }, { "epoch": 0.15, "grad_norm": 1.9720779657363892, "learning_rate": 0.0002, "loss": 1.6583, "step": 36790 }, { "epoch": 0.15, "grad_norm": 3.350682020187378, "learning_rate": 0.0002, "loss": 1.5482, "step": 36800 }, { "epoch": 0.15, "grad_norm": 2.907200813293457, "learning_rate": 0.0002, "loss": 1.4977, "step": 36810 }, { "epoch": 0.15, "grad_norm": 3.938661575317383, "learning_rate": 0.0002, "loss": 1.4246, "step": 36820 }, { "epoch": 0.15, "grad_norm": 2.9429972171783447, "learning_rate": 0.0002, "loss": 1.3134, "step": 36830 }, { "epoch": 0.15, "grad_norm": 2.759472131729126, "learning_rate": 0.0002, "loss": 1.5549, "step": 36840 }, { "epoch": 0.15, "grad_norm": 2.7318437099456787, "learning_rate": 0.0002, "loss": 1.666, "step": 36850 }, { "epoch": 0.15, "grad_norm": 1.4676684141159058, "learning_rate": 0.0002, "loss": 1.4786, "step": 36860 }, { "epoch": 0.15, "grad_norm": 2.5009829998016357, "learning_rate": 0.0002, "loss": 1.518, "step": 36870 }, { "epoch": 0.15, "grad_norm": 5.2632670402526855, "learning_rate": 0.0002, "loss": 1.7161, "step": 36880 }, { "epoch": 0.15, "grad_norm": 2.0844624042510986, "learning_rate": 0.0002, "loss": 1.4615, "step": 36890 }, { "epoch": 0.15, "grad_norm": 3.0304505825042725, "learning_rate": 0.0002, "loss": 1.6809, "step": 36900 }, { "epoch": 0.15, "grad_norm": 2.752136468887329, "learning_rate": 0.0002, "loss": 1.6829, "step": 36910 }, { "epoch": 0.15, "grad_norm": 2.528637647628784, "learning_rate": 0.0002, "loss": 1.5037, "step": 36920 }, { "epoch": 0.15, "grad_norm": 2.4906208515167236, "learning_rate": 0.0002, "loss": 1.6114, "step": 36930 }, { "epoch": 0.15, "grad_norm": 1.584646463394165, "learning_rate": 0.0002, "loss": 1.6292, "step": 36940 }, { "epoch": 0.15, "grad_norm": 3.214421033859253, "learning_rate": 0.0002, "loss": 1.7047, "step": 36950 }, { "epoch": 0.15, "grad_norm": 2.0626158714294434, "learning_rate": 0.0002, "loss": 1.5572, "step": 36960 }, { "epoch": 0.15, "grad_norm": 2.6731948852539062, "learning_rate": 0.0002, "loss": 1.4843, "step": 36970 }, { "epoch": 0.15, "grad_norm": 3.7041304111480713, "learning_rate": 0.0002, "loss": 1.4303, "step": 36980 }, { "epoch": 0.15, "grad_norm": 3.0925724506378174, "learning_rate": 0.0002, "loss": 1.616, "step": 36990 }, { "epoch": 0.15, "grad_norm": 3.5291855335235596, "learning_rate": 0.0002, "loss": 1.5103, "step": 37000 }, { "epoch": 0.15, "grad_norm": 3.8171095848083496, "learning_rate": 0.0002, "loss": 1.5711, "step": 37010 }, { "epoch": 0.15, "grad_norm": 1.2967965602874756, "learning_rate": 0.0002, "loss": 1.4228, "step": 37020 }, { "epoch": 0.15, "grad_norm": 3.467292547225952, "learning_rate": 0.0002, "loss": 1.284, "step": 37030 }, { "epoch": 0.15, "grad_norm": 3.863300085067749, "learning_rate": 0.0002, "loss": 1.6577, "step": 37040 }, { "epoch": 0.15, "grad_norm": 2.987226724624634, "learning_rate": 0.0002, "loss": 1.4685, "step": 37050 }, { "epoch": 0.15, "grad_norm": 2.162478446960449, "learning_rate": 0.0002, "loss": 1.5697, "step": 37060 }, { "epoch": 0.15, "grad_norm": 2.187300205230713, "learning_rate": 0.0002, "loss": 1.4566, "step": 37070 }, { "epoch": 0.15, "grad_norm": 1.6453102827072144, "learning_rate": 0.0002, "loss": 1.5301, "step": 37080 }, { "epoch": 0.15, "grad_norm": 2.797729969024658, "learning_rate": 0.0002, "loss": 1.4697, "step": 37090 }, { "epoch": 0.15, "grad_norm": 2.112119197845459, "learning_rate": 0.0002, "loss": 1.6463, "step": 37100 }, { "epoch": 0.15, "grad_norm": 3.2189152240753174, "learning_rate": 0.0002, "loss": 1.4471, "step": 37110 }, { "epoch": 0.15, "grad_norm": 1.6465295553207397, "learning_rate": 0.0002, "loss": 1.6065, "step": 37120 }, { "epoch": 0.15, "grad_norm": 1.6696324348449707, "learning_rate": 0.0002, "loss": 1.4517, "step": 37130 }, { "epoch": 0.15, "grad_norm": 2.551542043685913, "learning_rate": 0.0002, "loss": 1.4742, "step": 37140 }, { "epoch": 0.15, "grad_norm": 3.6437036991119385, "learning_rate": 0.0002, "loss": 1.4629, "step": 37150 }, { "epoch": 0.15, "grad_norm": 3.0029232501983643, "learning_rate": 0.0002, "loss": 1.7536, "step": 37160 }, { "epoch": 0.15, "grad_norm": 5.933969974517822, "learning_rate": 0.0002, "loss": 1.566, "step": 37170 }, { "epoch": 0.15, "grad_norm": 2.8952200412750244, "learning_rate": 0.0002, "loss": 1.505, "step": 37180 }, { "epoch": 0.15, "grad_norm": 2.843555450439453, "learning_rate": 0.0002, "loss": 1.6371, "step": 37190 }, { "epoch": 0.15, "grad_norm": 2.280219316482544, "learning_rate": 0.0002, "loss": 1.3498, "step": 37200 }, { "epoch": 0.15, "grad_norm": 3.278130531311035, "learning_rate": 0.0002, "loss": 1.7341, "step": 37210 }, { "epoch": 0.15, "grad_norm": 1.768969178199768, "learning_rate": 0.0002, "loss": 1.6076, "step": 37220 }, { "epoch": 0.15, "grad_norm": 3.4191572666168213, "learning_rate": 0.0002, "loss": 1.7423, "step": 37230 }, { "epoch": 0.15, "grad_norm": 2.364619255065918, "learning_rate": 0.0002, "loss": 1.5188, "step": 37240 }, { "epoch": 0.15, "grad_norm": 3.2221202850341797, "learning_rate": 0.0002, "loss": 1.6271, "step": 37250 }, { "epoch": 0.15, "grad_norm": 3.2422635555267334, "learning_rate": 0.0002, "loss": 1.3943, "step": 37260 }, { "epoch": 0.15, "grad_norm": 3.0152854919433594, "learning_rate": 0.0002, "loss": 1.6656, "step": 37270 }, { "epoch": 0.15, "grad_norm": 3.255152463912964, "learning_rate": 0.0002, "loss": 1.3935, "step": 37280 }, { "epoch": 0.15, "grad_norm": 4.276217937469482, "learning_rate": 0.0002, "loss": 1.5503, "step": 37290 }, { "epoch": 0.15, "grad_norm": 2.7012858390808105, "learning_rate": 0.0002, "loss": 1.8346, "step": 37300 }, { "epoch": 0.15, "grad_norm": 4.625742435455322, "learning_rate": 0.0002, "loss": 1.4439, "step": 37310 }, { "epoch": 0.15, "grad_norm": 2.0149335861206055, "learning_rate": 0.0002, "loss": 1.2395, "step": 37320 }, { "epoch": 0.15, "grad_norm": 7.768395900726318, "learning_rate": 0.0002, "loss": 1.6695, "step": 37330 }, { "epoch": 0.15, "grad_norm": 4.973001956939697, "learning_rate": 0.0002, "loss": 1.4566, "step": 37340 }, { "epoch": 0.15, "grad_norm": 4.282678127288818, "learning_rate": 0.0002, "loss": 1.6982, "step": 37350 }, { "epoch": 0.15, "grad_norm": 2.109236717224121, "learning_rate": 0.0002, "loss": 1.4743, "step": 37360 }, { "epoch": 0.15, "grad_norm": 1.3516968488693237, "learning_rate": 0.0002, "loss": 1.5486, "step": 37370 }, { "epoch": 0.15, "grad_norm": 2.304269552230835, "learning_rate": 0.0002, "loss": 1.7351, "step": 37380 }, { "epoch": 0.15, "grad_norm": 1.3639110326766968, "learning_rate": 0.0002, "loss": 1.2165, "step": 37390 }, { "epoch": 0.15, "grad_norm": 3.1956417560577393, "learning_rate": 0.0002, "loss": 1.3473, "step": 37400 }, { "epoch": 0.15, "grad_norm": 2.5169482231140137, "learning_rate": 0.0002, "loss": 1.7048, "step": 37410 }, { "epoch": 0.15, "grad_norm": 3.457409143447876, "learning_rate": 0.0002, "loss": 1.4431, "step": 37420 }, { "epoch": 0.15, "grad_norm": 2.3252084255218506, "learning_rate": 0.0002, "loss": 1.8269, "step": 37430 }, { "epoch": 0.15, "grad_norm": 3.5087826251983643, "learning_rate": 0.0002, "loss": 1.4804, "step": 37440 }, { "epoch": 0.15, "grad_norm": 3.097106456756592, "learning_rate": 0.0002, "loss": 1.4896, "step": 37450 }, { "epoch": 0.15, "grad_norm": 3.7397241592407227, "learning_rate": 0.0002, "loss": 1.6616, "step": 37460 }, { "epoch": 0.15, "grad_norm": 2.9914238452911377, "learning_rate": 0.0002, "loss": 1.5822, "step": 37470 }, { "epoch": 0.15, "grad_norm": 3.706068754196167, "learning_rate": 0.0002, "loss": 1.6963, "step": 37480 }, { "epoch": 0.15, "grad_norm": 2.5899229049682617, "learning_rate": 0.0002, "loss": 1.542, "step": 37490 }, { "epoch": 0.15, "grad_norm": 3.7781310081481934, "learning_rate": 0.0002, "loss": 1.585, "step": 37500 }, { "epoch": 0.15, "grad_norm": 2.55702805519104, "learning_rate": 0.0002, "loss": 1.3269, "step": 37510 }, { "epoch": 0.15, "grad_norm": 4.423364162445068, "learning_rate": 0.0002, "loss": 1.341, "step": 37520 }, { "epoch": 0.15, "grad_norm": 2.404371976852417, "learning_rate": 0.0002, "loss": 1.5616, "step": 37530 }, { "epoch": 0.15, "grad_norm": 2.9291164875030518, "learning_rate": 0.0002, "loss": 1.7112, "step": 37540 }, { "epoch": 0.15, "grad_norm": 2.7515125274658203, "learning_rate": 0.0002, "loss": 1.6286, "step": 37550 }, { "epoch": 0.15, "grad_norm": 2.537074565887451, "learning_rate": 0.0002, "loss": 1.4451, "step": 37560 }, { "epoch": 0.15, "grad_norm": 2.037182092666626, "learning_rate": 0.0002, "loss": 1.5981, "step": 37570 }, { "epoch": 0.15, "grad_norm": 4.424961090087891, "learning_rate": 0.0002, "loss": 1.4016, "step": 37580 }, { "epoch": 0.15, "grad_norm": 2.925459146499634, "learning_rate": 0.0002, "loss": 1.5122, "step": 37590 }, { "epoch": 0.15, "grad_norm": 2.324514865875244, "learning_rate": 0.0002, "loss": 1.7803, "step": 37600 }, { "epoch": 0.15, "grad_norm": 2.70588755607605, "learning_rate": 0.0002, "loss": 1.2127, "step": 37610 }, { "epoch": 0.15, "grad_norm": 2.641561985015869, "learning_rate": 0.0002, "loss": 1.6808, "step": 37620 }, { "epoch": 0.15, "grad_norm": 3.2888166904449463, "learning_rate": 0.0002, "loss": 1.5, "step": 37630 }, { "epoch": 0.15, "grad_norm": 3.8794069290161133, "learning_rate": 0.0002, "loss": 1.6552, "step": 37640 }, { "epoch": 0.15, "grad_norm": 4.291093349456787, "learning_rate": 0.0002, "loss": 1.4032, "step": 37650 }, { "epoch": 0.15, "grad_norm": 2.567382335662842, "learning_rate": 0.0002, "loss": 1.4089, "step": 37660 }, { "epoch": 0.15, "grad_norm": 1.7561856508255005, "learning_rate": 0.0002, "loss": 1.641, "step": 37670 }, { "epoch": 0.15, "grad_norm": 3.507157325744629, "learning_rate": 0.0002, "loss": 1.4671, "step": 37680 }, { "epoch": 0.15, "grad_norm": 2.9293999671936035, "learning_rate": 0.0002, "loss": 1.9124, "step": 37690 }, { "epoch": 0.15, "grad_norm": 2.3676061630249023, "learning_rate": 0.0002, "loss": 1.4137, "step": 37700 }, { "epoch": 0.15, "grad_norm": 2.145576000213623, "learning_rate": 0.0002, "loss": 1.3666, "step": 37710 }, { "epoch": 0.15, "grad_norm": 1.4212034940719604, "learning_rate": 0.0002, "loss": 1.2993, "step": 37720 }, { "epoch": 0.15, "grad_norm": 4.7351789474487305, "learning_rate": 0.0002, "loss": 1.652, "step": 37730 }, { "epoch": 0.15, "grad_norm": 4.204655647277832, "learning_rate": 0.0002, "loss": 1.4359, "step": 37740 }, { "epoch": 0.15, "grad_norm": 3.729447603225708, "learning_rate": 0.0002, "loss": 1.4297, "step": 37750 }, { "epoch": 0.15, "grad_norm": 2.950885772705078, "learning_rate": 0.0002, "loss": 1.501, "step": 37760 }, { "epoch": 0.15, "grad_norm": 3.082045793533325, "learning_rate": 0.0002, "loss": 1.5392, "step": 37770 }, { "epoch": 0.15, "grad_norm": 4.74533224105835, "learning_rate": 0.0002, "loss": 1.6053, "step": 37780 }, { "epoch": 0.15, "grad_norm": 3.0013086795806885, "learning_rate": 0.0002, "loss": 1.7125, "step": 37790 }, { "epoch": 0.15, "grad_norm": 3.0442628860473633, "learning_rate": 0.0002, "loss": 1.6313, "step": 37800 }, { "epoch": 0.15, "grad_norm": 3.057023048400879, "learning_rate": 0.0002, "loss": 1.5307, "step": 37810 }, { "epoch": 0.15, "grad_norm": 4.337264537811279, "learning_rate": 0.0002, "loss": 1.7088, "step": 37820 }, { "epoch": 0.15, "grad_norm": 5.202800750732422, "learning_rate": 0.0002, "loss": 1.7087, "step": 37830 }, { "epoch": 0.15, "grad_norm": 1.8363040685653687, "learning_rate": 0.0002, "loss": 1.6157, "step": 37840 }, { "epoch": 0.15, "grad_norm": 3.4829185009002686, "learning_rate": 0.0002, "loss": 1.3825, "step": 37850 }, { "epoch": 0.15, "grad_norm": 2.1644318103790283, "learning_rate": 0.0002, "loss": 1.7401, "step": 37860 }, { "epoch": 0.15, "grad_norm": 5.231853008270264, "learning_rate": 0.0002, "loss": 1.3038, "step": 37870 }, { "epoch": 0.15, "grad_norm": 2.291571617126465, "learning_rate": 0.0002, "loss": 1.4423, "step": 37880 }, { "epoch": 0.15, "grad_norm": 5.6742377281188965, "learning_rate": 0.0002, "loss": 1.5681, "step": 37890 }, { "epoch": 0.15, "grad_norm": 2.3503713607788086, "learning_rate": 0.0002, "loss": 1.5292, "step": 37900 }, { "epoch": 0.15, "grad_norm": 3.929429531097412, "learning_rate": 0.0002, "loss": 1.6856, "step": 37910 }, { "epoch": 0.15, "grad_norm": 2.2042675018310547, "learning_rate": 0.0002, "loss": 1.4947, "step": 37920 }, { "epoch": 0.15, "grad_norm": 2.2848751544952393, "learning_rate": 0.0002, "loss": 1.4044, "step": 37930 }, { "epoch": 0.15, "grad_norm": 2.360712766647339, "learning_rate": 0.0002, "loss": 1.3034, "step": 37940 }, { "epoch": 0.15, "grad_norm": 1.9622445106506348, "learning_rate": 0.0002, "loss": 1.6481, "step": 37950 }, { "epoch": 0.15, "grad_norm": 3.2974777221679688, "learning_rate": 0.0002, "loss": 1.4979, "step": 37960 }, { "epoch": 0.15, "grad_norm": 2.0158042907714844, "learning_rate": 0.0002, "loss": 1.5731, "step": 37970 }, { "epoch": 0.15, "grad_norm": 1.8824102878570557, "learning_rate": 0.0002, "loss": 1.6389, "step": 37980 }, { "epoch": 0.15, "grad_norm": 2.0158090591430664, "learning_rate": 0.0002, "loss": 1.359, "step": 37990 }, { "epoch": 0.15, "grad_norm": 4.7785868644714355, "learning_rate": 0.0002, "loss": 1.7009, "step": 38000 }, { "epoch": 0.15, "grad_norm": 3.3938043117523193, "learning_rate": 0.0002, "loss": 1.5537, "step": 38010 }, { "epoch": 0.15, "grad_norm": 2.2482314109802246, "learning_rate": 0.0002, "loss": 1.3406, "step": 38020 }, { "epoch": 0.15, "grad_norm": 3.6910486221313477, "learning_rate": 0.0002, "loss": 1.6413, "step": 38030 }, { "epoch": 0.15, "grad_norm": 3.380369186401367, "learning_rate": 0.0002, "loss": 1.5821, "step": 38040 }, { "epoch": 0.15, "grad_norm": 2.435077667236328, "learning_rate": 0.0002, "loss": 1.6738, "step": 38050 }, { "epoch": 0.15, "grad_norm": 2.791682720184326, "learning_rate": 0.0002, "loss": 1.4078, "step": 38060 }, { "epoch": 0.15, "grad_norm": 2.237976551055908, "learning_rate": 0.0002, "loss": 1.502, "step": 38070 }, { "epoch": 0.16, "grad_norm": 3.4638309478759766, "learning_rate": 0.0002, "loss": 1.6043, "step": 38080 }, { "epoch": 0.16, "grad_norm": 3.8016233444213867, "learning_rate": 0.0002, "loss": 1.4851, "step": 38090 }, { "epoch": 0.16, "grad_norm": 3.447222948074341, "learning_rate": 0.0002, "loss": 1.6002, "step": 38100 }, { "epoch": 0.16, "grad_norm": 3.4992496967315674, "learning_rate": 0.0002, "loss": 1.5387, "step": 38110 }, { "epoch": 0.16, "grad_norm": 3.265937566757202, "learning_rate": 0.0002, "loss": 1.5051, "step": 38120 }, { "epoch": 0.16, "grad_norm": 4.275895118713379, "learning_rate": 0.0002, "loss": 1.8031, "step": 38130 }, { "epoch": 0.16, "grad_norm": 3.1637990474700928, "learning_rate": 0.0002, "loss": 1.8041, "step": 38140 }, { "epoch": 0.16, "grad_norm": 2.4916129112243652, "learning_rate": 0.0002, "loss": 1.4409, "step": 38150 }, { "epoch": 0.16, "grad_norm": 3.569352388381958, "learning_rate": 0.0002, "loss": 1.5513, "step": 38160 }, { "epoch": 0.16, "grad_norm": 2.5274674892425537, "learning_rate": 0.0002, "loss": 1.4686, "step": 38170 }, { "epoch": 0.16, "grad_norm": 3.9217376708984375, "learning_rate": 0.0002, "loss": 1.5195, "step": 38180 }, { "epoch": 0.16, "grad_norm": 2.681169033050537, "learning_rate": 0.0002, "loss": 1.7803, "step": 38190 }, { "epoch": 0.16, "grad_norm": 1.8774292469024658, "learning_rate": 0.0002, "loss": 1.2859, "step": 38200 }, { "epoch": 0.16, "grad_norm": 1.5029051303863525, "learning_rate": 0.0002, "loss": 1.2867, "step": 38210 }, { "epoch": 0.16, "grad_norm": 3.96860933303833, "learning_rate": 0.0002, "loss": 1.6455, "step": 38220 }, { "epoch": 0.16, "grad_norm": 3.7019853591918945, "learning_rate": 0.0002, "loss": 1.7099, "step": 38230 }, { "epoch": 0.16, "grad_norm": 2.966505527496338, "learning_rate": 0.0002, "loss": 1.8333, "step": 38240 }, { "epoch": 0.16, "grad_norm": 3.1222689151763916, "learning_rate": 0.0002, "loss": 1.4237, "step": 38250 }, { "epoch": 0.16, "grad_norm": 2.7223575115203857, "learning_rate": 0.0002, "loss": 1.4965, "step": 38260 }, { "epoch": 0.16, "grad_norm": 3.961477279663086, "learning_rate": 0.0002, "loss": 1.5086, "step": 38270 }, { "epoch": 0.16, "grad_norm": 2.489168643951416, "learning_rate": 0.0002, "loss": 1.691, "step": 38280 }, { "epoch": 0.16, "grad_norm": 2.9312753677368164, "learning_rate": 0.0002, "loss": 1.634, "step": 38290 }, { "epoch": 0.16, "grad_norm": 2.350367546081543, "learning_rate": 0.0002, "loss": 1.5765, "step": 38300 }, { "epoch": 0.16, "grad_norm": 4.562899589538574, "learning_rate": 0.0002, "loss": 1.626, "step": 38310 }, { "epoch": 0.16, "grad_norm": 3.4741451740264893, "learning_rate": 0.0002, "loss": 1.7154, "step": 38320 }, { "epoch": 0.16, "grad_norm": 2.0234758853912354, "learning_rate": 0.0002, "loss": 1.4858, "step": 38330 }, { "epoch": 0.16, "grad_norm": 2.484187364578247, "learning_rate": 0.0002, "loss": 1.3309, "step": 38340 }, { "epoch": 0.16, "grad_norm": 4.691081523895264, "learning_rate": 0.0002, "loss": 1.6195, "step": 38350 }, { "epoch": 0.16, "grad_norm": 2.4524428844451904, "learning_rate": 0.0002, "loss": 1.3807, "step": 38360 }, { "epoch": 0.16, "grad_norm": 1.9382978677749634, "learning_rate": 0.0002, "loss": 1.4825, "step": 38370 }, { "epoch": 0.16, "grad_norm": 1.9110631942749023, "learning_rate": 0.0002, "loss": 1.4848, "step": 38380 }, { "epoch": 0.16, "grad_norm": 4.419838905334473, "learning_rate": 0.0002, "loss": 1.7235, "step": 38390 }, { "epoch": 0.16, "grad_norm": 1.7767939567565918, "learning_rate": 0.0002, "loss": 1.435, "step": 38400 }, { "epoch": 0.16, "grad_norm": 3.4360580444335938, "learning_rate": 0.0002, "loss": 1.5324, "step": 38410 }, { "epoch": 0.16, "grad_norm": 2.881408452987671, "learning_rate": 0.0002, "loss": 1.9608, "step": 38420 }, { "epoch": 0.16, "grad_norm": 2.545144557952881, "learning_rate": 0.0002, "loss": 1.5632, "step": 38430 }, { "epoch": 0.16, "grad_norm": 3.346930980682373, "learning_rate": 0.0002, "loss": 1.573, "step": 38440 }, { "epoch": 0.16, "grad_norm": 3.2086102962493896, "learning_rate": 0.0002, "loss": 1.606, "step": 38450 }, { "epoch": 0.16, "grad_norm": 2.0786592960357666, "learning_rate": 0.0002, "loss": 1.4898, "step": 38460 }, { "epoch": 0.16, "grad_norm": 2.6182188987731934, "learning_rate": 0.0002, "loss": 1.4145, "step": 38470 }, { "epoch": 0.16, "grad_norm": 4.274809837341309, "learning_rate": 0.0002, "loss": 1.6419, "step": 38480 }, { "epoch": 0.16, "grad_norm": 2.4005789756774902, "learning_rate": 0.0002, "loss": 1.8254, "step": 38490 }, { "epoch": 0.16, "grad_norm": 3.887054204940796, "learning_rate": 0.0002, "loss": 1.622, "step": 38500 }, { "epoch": 0.16, "grad_norm": 2.565633535385132, "learning_rate": 0.0002, "loss": 1.3718, "step": 38510 }, { "epoch": 0.16, "grad_norm": 1.9441145658493042, "learning_rate": 0.0002, "loss": 1.6042, "step": 38520 }, { "epoch": 0.16, "grad_norm": 3.0139405727386475, "learning_rate": 0.0002, "loss": 1.737, "step": 38530 }, { "epoch": 0.16, "grad_norm": 2.738367795944214, "learning_rate": 0.0002, "loss": 1.6408, "step": 38540 }, { "epoch": 0.16, "grad_norm": 4.081211566925049, "learning_rate": 0.0002, "loss": 1.5903, "step": 38550 }, { "epoch": 0.16, "grad_norm": 2.1811115741729736, "learning_rate": 0.0002, "loss": 1.5832, "step": 38560 }, { "epoch": 0.16, "grad_norm": 2.338914155960083, "learning_rate": 0.0002, "loss": 1.7133, "step": 38570 }, { "epoch": 0.16, "grad_norm": 2.4682834148406982, "learning_rate": 0.0002, "loss": 1.4615, "step": 38580 }, { "epoch": 0.16, "grad_norm": 3.3587183952331543, "learning_rate": 0.0002, "loss": 1.4816, "step": 38590 }, { "epoch": 0.16, "grad_norm": 1.8476512432098389, "learning_rate": 0.0002, "loss": 1.487, "step": 38600 }, { "epoch": 0.16, "grad_norm": 2.80871844291687, "learning_rate": 0.0002, "loss": 1.7959, "step": 38610 }, { "epoch": 0.16, "grad_norm": 2.471327781677246, "learning_rate": 0.0002, "loss": 1.76, "step": 38620 }, { "epoch": 0.16, "grad_norm": 2.8580355644226074, "learning_rate": 0.0002, "loss": 1.4886, "step": 38630 }, { "epoch": 0.16, "grad_norm": 1.9744923114776611, "learning_rate": 0.0002, "loss": 1.3955, "step": 38640 }, { "epoch": 0.16, "grad_norm": 2.8480262756347656, "learning_rate": 0.0002, "loss": 1.4827, "step": 38650 }, { "epoch": 0.16, "grad_norm": 3.146252155303955, "learning_rate": 0.0002, "loss": 1.6968, "step": 38660 }, { "epoch": 0.16, "grad_norm": 3.0008411407470703, "learning_rate": 0.0002, "loss": 1.7281, "step": 38670 }, { "epoch": 0.16, "grad_norm": 2.4587106704711914, "learning_rate": 0.0002, "loss": 1.3403, "step": 38680 }, { "epoch": 0.16, "grad_norm": 2.9537720680236816, "learning_rate": 0.0002, "loss": 1.5668, "step": 38690 }, { "epoch": 0.16, "grad_norm": 2.519904851913452, "learning_rate": 0.0002, "loss": 1.825, "step": 38700 }, { "epoch": 0.16, "grad_norm": 3.1586058139801025, "learning_rate": 0.0002, "loss": 1.514, "step": 38710 }, { "epoch": 0.16, "grad_norm": 1.6959601640701294, "learning_rate": 0.0002, "loss": 1.6062, "step": 38720 }, { "epoch": 0.16, "grad_norm": 3.021080732345581, "learning_rate": 0.0002, "loss": 1.5528, "step": 38730 }, { "epoch": 0.16, "grad_norm": 3.1265170574188232, "learning_rate": 0.0002, "loss": 1.6566, "step": 38740 }, { "epoch": 0.16, "grad_norm": 3.418274164199829, "learning_rate": 0.0002, "loss": 1.6792, "step": 38750 }, { "epoch": 0.16, "grad_norm": 3.1890289783477783, "learning_rate": 0.0002, "loss": 1.5666, "step": 38760 }, { "epoch": 0.16, "grad_norm": 4.172685146331787, "learning_rate": 0.0002, "loss": 1.7688, "step": 38770 }, { "epoch": 0.16, "grad_norm": 2.329486846923828, "learning_rate": 0.0002, "loss": 1.3344, "step": 38780 }, { "epoch": 0.16, "grad_norm": 3.248980760574341, "learning_rate": 0.0002, "loss": 1.2501, "step": 38790 }, { "epoch": 0.16, "grad_norm": 1.3634289503097534, "learning_rate": 0.0002, "loss": 1.6279, "step": 38800 }, { "epoch": 0.16, "grad_norm": 2.22385835647583, "learning_rate": 0.0002, "loss": 1.7361, "step": 38810 }, { "epoch": 0.16, "grad_norm": 2.8889613151550293, "learning_rate": 0.0002, "loss": 1.6536, "step": 38820 }, { "epoch": 0.16, "grad_norm": 2.5068891048431396, "learning_rate": 0.0002, "loss": 1.3739, "step": 38830 }, { "epoch": 0.16, "grad_norm": 3.209665060043335, "learning_rate": 0.0002, "loss": 1.461, "step": 38840 }, { "epoch": 0.16, "grad_norm": 2.5475351810455322, "learning_rate": 0.0002, "loss": 1.6816, "step": 38850 }, { "epoch": 0.16, "grad_norm": 3.4791691303253174, "learning_rate": 0.0002, "loss": 1.509, "step": 38860 }, { "epoch": 0.16, "grad_norm": 2.7019617557525635, "learning_rate": 0.0002, "loss": 1.7858, "step": 38870 }, { "epoch": 0.16, "grad_norm": 2.79959774017334, "learning_rate": 0.0002, "loss": 1.6103, "step": 38880 }, { "epoch": 0.16, "grad_norm": 4.862663269042969, "learning_rate": 0.0002, "loss": 1.6398, "step": 38890 }, { "epoch": 0.16, "grad_norm": 3.034726142883301, "learning_rate": 0.0002, "loss": 1.3665, "step": 38900 }, { "epoch": 0.16, "grad_norm": 3.085221290588379, "learning_rate": 0.0002, "loss": 1.5329, "step": 38910 }, { "epoch": 0.16, "grad_norm": 2.9530415534973145, "learning_rate": 0.0002, "loss": 1.5778, "step": 38920 }, { "epoch": 0.16, "grad_norm": 2.499648094177246, "learning_rate": 0.0002, "loss": 1.6192, "step": 38930 }, { "epoch": 0.16, "grad_norm": 2.1374430656433105, "learning_rate": 0.0002, "loss": 1.4787, "step": 38940 }, { "epoch": 0.16, "grad_norm": 5.722001075744629, "learning_rate": 0.0002, "loss": 1.4066, "step": 38950 }, { "epoch": 0.16, "grad_norm": 4.340073585510254, "learning_rate": 0.0002, "loss": 1.573, "step": 38960 }, { "epoch": 0.16, "grad_norm": 3.26379132270813, "learning_rate": 0.0002, "loss": 1.7285, "step": 38970 }, { "epoch": 0.16, "grad_norm": 2.4995357990264893, "learning_rate": 0.0002, "loss": 1.4097, "step": 38980 }, { "epoch": 0.16, "grad_norm": 3.6685147285461426, "learning_rate": 0.0002, "loss": 1.5752, "step": 38990 }, { "epoch": 0.16, "grad_norm": 3.027111768722534, "learning_rate": 0.0002, "loss": 1.2875, "step": 39000 }, { "epoch": 0.16, "grad_norm": 2.542299509048462, "learning_rate": 0.0002, "loss": 1.609, "step": 39010 }, { "epoch": 0.16, "grad_norm": 2.1651365756988525, "learning_rate": 0.0002, "loss": 1.8548, "step": 39020 }, { "epoch": 0.16, "grad_norm": 3.9441661834716797, "learning_rate": 0.0002, "loss": 1.7648, "step": 39030 }, { "epoch": 0.16, "grad_norm": 4.199585437774658, "learning_rate": 0.0002, "loss": 1.5176, "step": 39040 }, { "epoch": 0.16, "grad_norm": 2.555950164794922, "learning_rate": 0.0002, "loss": 1.2158, "step": 39050 }, { "epoch": 0.16, "grad_norm": 3.3984334468841553, "learning_rate": 0.0002, "loss": 1.7074, "step": 39060 }, { "epoch": 0.16, "grad_norm": 2.33975887298584, "learning_rate": 0.0002, "loss": 1.5955, "step": 39070 }, { "epoch": 0.16, "grad_norm": 2.8306682109832764, "learning_rate": 0.0002, "loss": 1.4588, "step": 39080 }, { "epoch": 0.16, "grad_norm": 1.8932045698165894, "learning_rate": 0.0002, "loss": 1.6066, "step": 39090 }, { "epoch": 0.16, "grad_norm": 2.1702311038970947, "learning_rate": 0.0002, "loss": 1.3623, "step": 39100 }, { "epoch": 0.16, "grad_norm": 2.8285927772521973, "learning_rate": 0.0002, "loss": 1.5207, "step": 39110 }, { "epoch": 0.16, "grad_norm": 4.554954528808594, "learning_rate": 0.0002, "loss": 1.4546, "step": 39120 }, { "epoch": 0.16, "grad_norm": 2.3791799545288086, "learning_rate": 0.0002, "loss": 1.4738, "step": 39130 }, { "epoch": 0.16, "grad_norm": 4.132991790771484, "learning_rate": 0.0002, "loss": 1.6067, "step": 39140 }, { "epoch": 0.16, "grad_norm": 3.271505355834961, "learning_rate": 0.0002, "loss": 1.6209, "step": 39150 }, { "epoch": 0.16, "grad_norm": 4.065838813781738, "learning_rate": 0.0002, "loss": 1.6036, "step": 39160 }, { "epoch": 0.16, "grad_norm": 2.02022123336792, "learning_rate": 0.0002, "loss": 1.6558, "step": 39170 }, { "epoch": 0.16, "grad_norm": 2.2536098957061768, "learning_rate": 0.0002, "loss": 1.6465, "step": 39180 }, { "epoch": 0.16, "grad_norm": 3.535383701324463, "learning_rate": 0.0002, "loss": 1.7425, "step": 39190 }, { "epoch": 0.16, "grad_norm": 3.9239771366119385, "learning_rate": 0.0002, "loss": 1.4432, "step": 39200 }, { "epoch": 0.16, "grad_norm": 6.883757591247559, "learning_rate": 0.0002, "loss": 1.5471, "step": 39210 }, { "epoch": 0.16, "grad_norm": 1.4538977146148682, "learning_rate": 0.0002, "loss": 1.5421, "step": 39220 }, { "epoch": 0.16, "grad_norm": 2.29217267036438, "learning_rate": 0.0002, "loss": 1.4501, "step": 39230 }, { "epoch": 0.16, "grad_norm": 2.339585542678833, "learning_rate": 0.0002, "loss": 1.6997, "step": 39240 }, { "epoch": 0.16, "grad_norm": 3.909097909927368, "learning_rate": 0.0002, "loss": 1.768, "step": 39250 }, { "epoch": 0.16, "grad_norm": 2.709637403488159, "learning_rate": 0.0002, "loss": 1.4762, "step": 39260 }, { "epoch": 0.16, "grad_norm": 2.9929113388061523, "learning_rate": 0.0002, "loss": 1.7208, "step": 39270 }, { "epoch": 0.16, "grad_norm": 3.1258833408355713, "learning_rate": 0.0002, "loss": 1.6529, "step": 39280 }, { "epoch": 0.16, "grad_norm": 2.3511033058166504, "learning_rate": 0.0002, "loss": 1.514, "step": 39290 }, { "epoch": 0.16, "grad_norm": 2.3466556072235107, "learning_rate": 0.0002, "loss": 1.4248, "step": 39300 }, { "epoch": 0.16, "grad_norm": 2.2193803787231445, "learning_rate": 0.0002, "loss": 1.824, "step": 39310 }, { "epoch": 0.16, "grad_norm": 4.5760884284973145, "learning_rate": 0.0002, "loss": 1.5277, "step": 39320 }, { "epoch": 0.16, "grad_norm": 2.0677380561828613, "learning_rate": 0.0002, "loss": 1.3361, "step": 39330 }, { "epoch": 0.16, "grad_norm": 2.8655762672424316, "learning_rate": 0.0002, "loss": 1.6883, "step": 39340 }, { "epoch": 0.16, "grad_norm": 4.084624767303467, "learning_rate": 0.0002, "loss": 1.6353, "step": 39350 }, { "epoch": 0.16, "grad_norm": 2.112492799758911, "learning_rate": 0.0002, "loss": 1.6358, "step": 39360 }, { "epoch": 0.16, "grad_norm": 1.5927077531814575, "learning_rate": 0.0002, "loss": 1.6681, "step": 39370 }, { "epoch": 0.16, "grad_norm": 2.103264808654785, "learning_rate": 0.0002, "loss": 1.6435, "step": 39380 }, { "epoch": 0.16, "grad_norm": 7.774765968322754, "learning_rate": 0.0002, "loss": 1.3812, "step": 39390 }, { "epoch": 0.16, "grad_norm": 2.865095615386963, "learning_rate": 0.0002, "loss": 1.5177, "step": 39400 }, { "epoch": 0.16, "grad_norm": 3.4962151050567627, "learning_rate": 0.0002, "loss": 1.4507, "step": 39410 }, { "epoch": 0.16, "grad_norm": 2.421720027923584, "learning_rate": 0.0002, "loss": 1.4687, "step": 39420 }, { "epoch": 0.16, "grad_norm": 2.154650926589966, "learning_rate": 0.0002, "loss": 1.545, "step": 39430 }, { "epoch": 0.16, "grad_norm": 3.9605460166931152, "learning_rate": 0.0002, "loss": 1.5542, "step": 39440 }, { "epoch": 0.16, "grad_norm": 2.6126673221588135, "learning_rate": 0.0002, "loss": 1.7769, "step": 39450 }, { "epoch": 0.16, "grad_norm": 2.5453133583068848, "learning_rate": 0.0002, "loss": 1.6446, "step": 39460 }, { "epoch": 0.16, "grad_norm": 6.977862358093262, "learning_rate": 0.0002, "loss": 1.7503, "step": 39470 }, { "epoch": 0.16, "grad_norm": 3.5777289867401123, "learning_rate": 0.0002, "loss": 1.4944, "step": 39480 }, { "epoch": 0.16, "grad_norm": 2.8089871406555176, "learning_rate": 0.0002, "loss": 1.4215, "step": 39490 }, { "epoch": 0.16, "grad_norm": 2.8012866973876953, "learning_rate": 0.0002, "loss": 1.5948, "step": 39500 }, { "epoch": 0.16, "grad_norm": 2.5457754135131836, "learning_rate": 0.0002, "loss": 1.4392, "step": 39510 }, { "epoch": 0.16, "grad_norm": 1.9424402713775635, "learning_rate": 0.0002, "loss": 1.6355, "step": 39520 }, { "epoch": 0.16, "grad_norm": 0.9802007079124451, "learning_rate": 0.0002, "loss": 1.6407, "step": 39530 }, { "epoch": 0.16, "grad_norm": 4.794707298278809, "learning_rate": 0.0002, "loss": 1.4879, "step": 39540 }, { "epoch": 0.16, "grad_norm": 3.970128059387207, "learning_rate": 0.0002, "loss": 1.5066, "step": 39550 }, { "epoch": 0.16, "grad_norm": 3.0900189876556396, "learning_rate": 0.0002, "loss": 1.5465, "step": 39560 }, { "epoch": 0.16, "grad_norm": 2.5603630542755127, "learning_rate": 0.0002, "loss": 1.6025, "step": 39570 }, { "epoch": 0.16, "grad_norm": 3.3999359607696533, "learning_rate": 0.0002, "loss": 1.4882, "step": 39580 }, { "epoch": 0.16, "grad_norm": 4.272558212280273, "learning_rate": 0.0002, "loss": 1.4436, "step": 39590 }, { "epoch": 0.16, "grad_norm": 2.026829719543457, "learning_rate": 0.0002, "loss": 1.6133, "step": 39600 }, { "epoch": 0.16, "grad_norm": 2.7535974979400635, "learning_rate": 0.0002, "loss": 1.6081, "step": 39610 }, { "epoch": 0.16, "grad_norm": 2.6039249897003174, "learning_rate": 0.0002, "loss": 1.4392, "step": 39620 }, { "epoch": 0.16, "grad_norm": 3.9506912231445312, "learning_rate": 0.0002, "loss": 1.6556, "step": 39630 }, { "epoch": 0.16, "grad_norm": 3.3071818351745605, "learning_rate": 0.0002, "loss": 1.5923, "step": 39640 }, { "epoch": 0.16, "grad_norm": 3.502887010574341, "learning_rate": 0.0002, "loss": 1.4749, "step": 39650 }, { "epoch": 0.16, "grad_norm": 2.503492593765259, "learning_rate": 0.0002, "loss": 1.7013, "step": 39660 }, { "epoch": 0.16, "grad_norm": 1.2986881732940674, "learning_rate": 0.0002, "loss": 1.658, "step": 39670 }, { "epoch": 0.16, "grad_norm": 2.5720467567443848, "learning_rate": 0.0002, "loss": 1.4265, "step": 39680 }, { "epoch": 0.16, "grad_norm": 1.66302490234375, "learning_rate": 0.0002, "loss": 1.578, "step": 39690 }, { "epoch": 0.16, "grad_norm": 2.1461853981018066, "learning_rate": 0.0002, "loss": 1.4397, "step": 39700 }, { "epoch": 0.16, "grad_norm": 2.0754623413085938, "learning_rate": 0.0002, "loss": 1.81, "step": 39710 }, { "epoch": 0.16, "grad_norm": 2.997304916381836, "learning_rate": 0.0002, "loss": 1.469, "step": 39720 }, { "epoch": 0.16, "grad_norm": 2.8275997638702393, "learning_rate": 0.0002, "loss": 1.6864, "step": 39730 }, { "epoch": 0.16, "grad_norm": 3.519868850708008, "learning_rate": 0.0002, "loss": 1.4382, "step": 39740 }, { "epoch": 0.16, "grad_norm": 4.55117654800415, "learning_rate": 0.0002, "loss": 1.6133, "step": 39750 }, { "epoch": 0.16, "grad_norm": 2.735079765319824, "learning_rate": 0.0002, "loss": 1.4842, "step": 39760 }, { "epoch": 0.16, "grad_norm": 3.846191167831421, "learning_rate": 0.0002, "loss": 1.5231, "step": 39770 }, { "epoch": 0.16, "grad_norm": 2.0034120082855225, "learning_rate": 0.0002, "loss": 1.6385, "step": 39780 }, { "epoch": 0.16, "grad_norm": 2.5811686515808105, "learning_rate": 0.0002, "loss": 1.3483, "step": 39790 }, { "epoch": 0.16, "grad_norm": 2.7455694675445557, "learning_rate": 0.0002, "loss": 1.7556, "step": 39800 }, { "epoch": 0.16, "grad_norm": 3.802605152130127, "learning_rate": 0.0002, "loss": 1.6569, "step": 39810 }, { "epoch": 0.16, "grad_norm": 3.1049437522888184, "learning_rate": 0.0002, "loss": 1.4012, "step": 39820 }, { "epoch": 0.16, "grad_norm": 3.4102463722229004, "learning_rate": 0.0002, "loss": 1.7442, "step": 39830 }, { "epoch": 0.16, "grad_norm": 2.0631000995635986, "learning_rate": 0.0002, "loss": 1.7797, "step": 39840 }, { "epoch": 0.16, "grad_norm": 3.2319321632385254, "learning_rate": 0.0002, "loss": 1.7365, "step": 39850 }, { "epoch": 0.16, "grad_norm": 3.2327733039855957, "learning_rate": 0.0002, "loss": 2.0245, "step": 39860 }, { "epoch": 0.16, "grad_norm": 2.6415791511535645, "learning_rate": 0.0002, "loss": 1.5007, "step": 39870 }, { "epoch": 0.16, "grad_norm": 2.976961135864258, "learning_rate": 0.0002, "loss": 1.5377, "step": 39880 }, { "epoch": 0.16, "grad_norm": 3.015125274658203, "learning_rate": 0.0002, "loss": 1.5311, "step": 39890 }, { "epoch": 0.16, "grad_norm": 2.8468551635742188, "learning_rate": 0.0002, "loss": 1.6568, "step": 39900 }, { "epoch": 0.16, "grad_norm": 7.694157600402832, "learning_rate": 0.0002, "loss": 1.672, "step": 39910 }, { "epoch": 0.16, "grad_norm": 3.4519121646881104, "learning_rate": 0.0002, "loss": 1.4467, "step": 39920 }, { "epoch": 0.16, "grad_norm": 2.1328582763671875, "learning_rate": 0.0002, "loss": 1.3875, "step": 39930 }, { "epoch": 0.16, "grad_norm": 3.7663228511810303, "learning_rate": 0.0002, "loss": 1.4768, "step": 39940 }, { "epoch": 0.16, "grad_norm": 2.0307021141052246, "learning_rate": 0.0002, "loss": 1.5984, "step": 39950 }, { "epoch": 0.16, "grad_norm": 2.9079082012176514, "learning_rate": 0.0002, "loss": 1.6011, "step": 39960 }, { "epoch": 0.16, "grad_norm": 3.1948680877685547, "learning_rate": 0.0002, "loss": 1.6803, "step": 39970 }, { "epoch": 0.16, "grad_norm": 2.53198504447937, "learning_rate": 0.0002, "loss": 1.5751, "step": 39980 }, { "epoch": 0.16, "grad_norm": 4.586694717407227, "learning_rate": 0.0002, "loss": 1.4567, "step": 39990 }, { "epoch": 0.16, "grad_norm": 2.56742262840271, "learning_rate": 0.0002, "loss": 1.8789, "step": 40000 }, { "epoch": 0.16, "grad_norm": 6.089840412139893, "learning_rate": 0.0002, "loss": 1.377, "step": 40010 }, { "epoch": 0.16, "grad_norm": 2.977438449859619, "learning_rate": 0.0002, "loss": 1.4827, "step": 40020 }, { "epoch": 0.16, "grad_norm": 4.435213088989258, "learning_rate": 0.0002, "loss": 1.6721, "step": 40030 }, { "epoch": 0.16, "grad_norm": 3.097458839416504, "learning_rate": 0.0002, "loss": 1.6253, "step": 40040 }, { "epoch": 0.16, "grad_norm": 3.6012086868286133, "learning_rate": 0.0002, "loss": 1.6497, "step": 40050 }, { "epoch": 0.16, "grad_norm": 1.9148002862930298, "learning_rate": 0.0002, "loss": 1.6356, "step": 40060 }, { "epoch": 0.16, "grad_norm": 3.1237592697143555, "learning_rate": 0.0002, "loss": 1.7078, "step": 40070 }, { "epoch": 0.16, "grad_norm": 2.887016773223877, "learning_rate": 0.0002, "loss": 1.561, "step": 40080 }, { "epoch": 0.16, "grad_norm": 5.369974136352539, "learning_rate": 0.0002, "loss": 1.3935, "step": 40090 }, { "epoch": 0.16, "grad_norm": 2.96488094329834, "learning_rate": 0.0002, "loss": 1.6446, "step": 40100 }, { "epoch": 0.16, "grad_norm": 3.0545411109924316, "learning_rate": 0.0002, "loss": 1.438, "step": 40110 }, { "epoch": 0.16, "grad_norm": 2.8253636360168457, "learning_rate": 0.0002, "loss": 1.3975, "step": 40120 }, { "epoch": 0.16, "grad_norm": 3.613633871078491, "learning_rate": 0.0002, "loss": 1.4688, "step": 40130 }, { "epoch": 0.16, "grad_norm": 2.154013156890869, "learning_rate": 0.0002, "loss": 1.5137, "step": 40140 }, { "epoch": 0.16, "grad_norm": 2.8488006591796875, "learning_rate": 0.0002, "loss": 1.8054, "step": 40150 }, { "epoch": 0.16, "grad_norm": 2.7778754234313965, "learning_rate": 0.0002, "loss": 1.5315, "step": 40160 }, { "epoch": 0.16, "grad_norm": 2.870457649230957, "learning_rate": 0.0002, "loss": 1.5005, "step": 40170 }, { "epoch": 0.16, "grad_norm": 3.3619589805603027, "learning_rate": 0.0002, "loss": 1.4708, "step": 40180 }, { "epoch": 0.16, "grad_norm": 3.2335543632507324, "learning_rate": 0.0002, "loss": 1.6078, "step": 40190 }, { "epoch": 0.16, "grad_norm": 4.43333625793457, "learning_rate": 0.0002, "loss": 1.6398, "step": 40200 }, { "epoch": 0.16, "grad_norm": 2.0557475090026855, "learning_rate": 0.0002, "loss": 1.7625, "step": 40210 }, { "epoch": 0.16, "grad_norm": 2.6234536170959473, "learning_rate": 0.0002, "loss": 1.7423, "step": 40220 }, { "epoch": 0.16, "grad_norm": 1.7659281492233276, "learning_rate": 0.0002, "loss": 1.6151, "step": 40230 }, { "epoch": 0.16, "grad_norm": 2.6911754608154297, "learning_rate": 0.0002, "loss": 1.4314, "step": 40240 }, { "epoch": 0.16, "grad_norm": 4.182023525238037, "learning_rate": 0.0002, "loss": 1.3768, "step": 40250 }, { "epoch": 0.16, "grad_norm": 2.8130671977996826, "learning_rate": 0.0002, "loss": 1.2966, "step": 40260 }, { "epoch": 0.16, "grad_norm": 1.708680510520935, "learning_rate": 0.0002, "loss": 1.4336, "step": 40270 }, { "epoch": 0.16, "grad_norm": 2.1711676120758057, "learning_rate": 0.0002, "loss": 1.6057, "step": 40280 }, { "epoch": 0.16, "grad_norm": 3.2895097732543945, "learning_rate": 0.0002, "loss": 1.9029, "step": 40290 }, { "epoch": 0.16, "grad_norm": 3.1848926544189453, "learning_rate": 0.0002, "loss": 1.2625, "step": 40300 }, { "epoch": 0.16, "grad_norm": 1.7350904941558838, "learning_rate": 0.0002, "loss": 1.7331, "step": 40310 }, { "epoch": 0.16, "grad_norm": 3.4209797382354736, "learning_rate": 0.0002, "loss": 1.6588, "step": 40320 }, { "epoch": 0.16, "grad_norm": 2.3431203365325928, "learning_rate": 0.0002, "loss": 1.6822, "step": 40330 }, { "epoch": 0.16, "grad_norm": 2.372751474380493, "learning_rate": 0.0002, "loss": 1.4788, "step": 40340 }, { "epoch": 0.16, "grad_norm": 2.1992485523223877, "learning_rate": 0.0002, "loss": 1.5373, "step": 40350 }, { "epoch": 0.16, "grad_norm": 2.0047528743743896, "learning_rate": 0.0002, "loss": 1.3604, "step": 40360 }, { "epoch": 0.16, "grad_norm": 2.243473768234253, "learning_rate": 0.0002, "loss": 1.6446, "step": 40370 }, { "epoch": 0.16, "grad_norm": 2.381666421890259, "learning_rate": 0.0002, "loss": 1.5408, "step": 40380 }, { "epoch": 0.16, "grad_norm": 2.0779531002044678, "learning_rate": 0.0002, "loss": 1.3683, "step": 40390 }, { "epoch": 0.16, "grad_norm": 9.79006576538086, "learning_rate": 0.0002, "loss": 1.4461, "step": 40400 }, { "epoch": 0.16, "grad_norm": 2.445009231567383, "learning_rate": 0.0002, "loss": 1.4764, "step": 40410 }, { "epoch": 0.16, "grad_norm": 3.5027639865875244, "learning_rate": 0.0002, "loss": 1.5414, "step": 40420 }, { "epoch": 0.16, "grad_norm": 1.9882421493530273, "learning_rate": 0.0002, "loss": 1.3637, "step": 40430 }, { "epoch": 0.16, "grad_norm": 5.5572943687438965, "learning_rate": 0.0002, "loss": 1.8647, "step": 40440 }, { "epoch": 0.16, "grad_norm": 2.753171920776367, "learning_rate": 0.0002, "loss": 1.6244, "step": 40450 }, { "epoch": 0.16, "grad_norm": 4.146735668182373, "learning_rate": 0.0002, "loss": 1.804, "step": 40460 }, { "epoch": 0.16, "grad_norm": 3.9100048542022705, "learning_rate": 0.0002, "loss": 1.8779, "step": 40470 }, { "epoch": 0.16, "grad_norm": 2.4123597145080566, "learning_rate": 0.0002, "loss": 1.5399, "step": 40480 }, { "epoch": 0.16, "grad_norm": 3.044565200805664, "learning_rate": 0.0002, "loss": 1.6138, "step": 40490 }, { "epoch": 0.16, "grad_norm": 2.8523471355438232, "learning_rate": 0.0002, "loss": 1.5479, "step": 40500 }, { "epoch": 0.16, "grad_norm": 1.7149115800857544, "learning_rate": 0.0002, "loss": 1.5584, "step": 40510 }, { "epoch": 0.16, "grad_norm": 3.1794886589050293, "learning_rate": 0.0002, "loss": 1.6563, "step": 40520 }, { "epoch": 0.16, "grad_norm": 1.9432072639465332, "learning_rate": 0.0002, "loss": 1.6414, "step": 40530 }, { "epoch": 0.17, "grad_norm": 3.395859956741333, "learning_rate": 0.0002, "loss": 1.6025, "step": 40540 }, { "epoch": 0.17, "grad_norm": 3.92301869392395, "learning_rate": 0.0002, "loss": 1.6042, "step": 40550 }, { "epoch": 0.17, "grad_norm": 3.593259334564209, "learning_rate": 0.0002, "loss": 1.6715, "step": 40560 }, { "epoch": 0.17, "grad_norm": 2.9475789070129395, "learning_rate": 0.0002, "loss": 1.532, "step": 40570 }, { "epoch": 0.17, "grad_norm": 2.1101038455963135, "learning_rate": 0.0002, "loss": 1.4821, "step": 40580 }, { "epoch": 0.17, "grad_norm": 3.1293301582336426, "learning_rate": 0.0002, "loss": 1.4038, "step": 40590 }, { "epoch": 0.17, "grad_norm": 4.420048713684082, "learning_rate": 0.0002, "loss": 1.5985, "step": 40600 }, { "epoch": 0.17, "grad_norm": 2.517798662185669, "learning_rate": 0.0002, "loss": 1.6238, "step": 40610 }, { "epoch": 0.17, "grad_norm": 2.2208688259124756, "learning_rate": 0.0002, "loss": 1.5073, "step": 40620 }, { "epoch": 0.17, "grad_norm": 4.046832084655762, "learning_rate": 0.0002, "loss": 1.7008, "step": 40630 }, { "epoch": 0.17, "grad_norm": 2.3439929485321045, "learning_rate": 0.0002, "loss": 1.4072, "step": 40640 }, { "epoch": 0.17, "grad_norm": 3.4106216430664062, "learning_rate": 0.0002, "loss": 1.4298, "step": 40650 }, { "epoch": 0.17, "grad_norm": 4.044257164001465, "learning_rate": 0.0002, "loss": 1.3267, "step": 40660 }, { "epoch": 0.17, "grad_norm": 1.7117761373519897, "learning_rate": 0.0002, "loss": 1.5319, "step": 40670 }, { "epoch": 0.17, "grad_norm": 1.8422374725341797, "learning_rate": 0.0002, "loss": 1.4877, "step": 40680 }, { "epoch": 0.17, "grad_norm": 3.80833101272583, "learning_rate": 0.0002, "loss": 1.6684, "step": 40690 }, { "epoch": 0.17, "grad_norm": 1.992762565612793, "learning_rate": 0.0002, "loss": 1.6658, "step": 40700 }, { "epoch": 0.17, "grad_norm": 3.546459197998047, "learning_rate": 0.0002, "loss": 1.6373, "step": 40710 }, { "epoch": 0.17, "grad_norm": 4.187590599060059, "learning_rate": 0.0002, "loss": 1.7867, "step": 40720 }, { "epoch": 0.17, "grad_norm": 1.6472679376602173, "learning_rate": 0.0002, "loss": 1.4358, "step": 40730 }, { "epoch": 0.17, "grad_norm": 2.634852409362793, "learning_rate": 0.0002, "loss": 1.4738, "step": 40740 }, { "epoch": 0.17, "grad_norm": 3.6612792015075684, "learning_rate": 0.0002, "loss": 1.5523, "step": 40750 }, { "epoch": 0.17, "grad_norm": 3.4926857948303223, "learning_rate": 0.0002, "loss": 1.6946, "step": 40760 }, { "epoch": 0.17, "grad_norm": 2.643113374710083, "learning_rate": 0.0002, "loss": 1.5585, "step": 40770 }, { "epoch": 0.17, "grad_norm": 3.041288375854492, "learning_rate": 0.0002, "loss": 1.8062, "step": 40780 }, { "epoch": 0.17, "grad_norm": 2.0918784141540527, "learning_rate": 0.0002, "loss": 1.7293, "step": 40790 }, { "epoch": 0.17, "grad_norm": 2.351205348968506, "learning_rate": 0.0002, "loss": 1.5027, "step": 40800 }, { "epoch": 0.17, "grad_norm": 2.9748916625976562, "learning_rate": 0.0002, "loss": 1.3869, "step": 40810 }, { "epoch": 0.17, "grad_norm": 1.5876588821411133, "learning_rate": 0.0002, "loss": 1.1817, "step": 40820 }, { "epoch": 0.17, "grad_norm": 2.504818916320801, "learning_rate": 0.0002, "loss": 1.5387, "step": 40830 }, { "epoch": 0.17, "grad_norm": 2.7252495288848877, "learning_rate": 0.0002, "loss": 1.5974, "step": 40840 }, { "epoch": 0.17, "grad_norm": 3.325172185897827, "learning_rate": 0.0002, "loss": 1.4509, "step": 40850 }, { "epoch": 0.17, "grad_norm": 1.9639402627944946, "learning_rate": 0.0002, "loss": 1.6569, "step": 40860 }, { "epoch": 0.17, "grad_norm": 3.254364252090454, "learning_rate": 0.0002, "loss": 1.8693, "step": 40870 }, { "epoch": 0.17, "grad_norm": 2.6919682025909424, "learning_rate": 0.0002, "loss": 1.4284, "step": 40880 }, { "epoch": 0.17, "grad_norm": 2.3356423377990723, "learning_rate": 0.0002, "loss": 1.6542, "step": 40890 }, { "epoch": 0.17, "grad_norm": 2.0618574619293213, "learning_rate": 0.0002, "loss": 1.7039, "step": 40900 }, { "epoch": 0.17, "grad_norm": 2.24501371383667, "learning_rate": 0.0002, "loss": 1.636, "step": 40910 }, { "epoch": 0.17, "grad_norm": 2.902313232421875, "learning_rate": 0.0002, "loss": 1.4628, "step": 40920 }, { "epoch": 0.17, "grad_norm": 3.1895296573638916, "learning_rate": 0.0002, "loss": 1.5684, "step": 40930 }, { "epoch": 0.17, "grad_norm": 3.2294962406158447, "learning_rate": 0.0002, "loss": 1.4428, "step": 40940 }, { "epoch": 0.17, "grad_norm": 4.009686470031738, "learning_rate": 0.0002, "loss": 1.7693, "step": 40950 }, { "epoch": 0.17, "grad_norm": 1.866342544555664, "learning_rate": 0.0002, "loss": 1.2854, "step": 40960 }, { "epoch": 0.17, "grad_norm": 3.1469292640686035, "learning_rate": 0.0002, "loss": 1.616, "step": 40970 }, { "epoch": 0.17, "grad_norm": 2.289238214492798, "learning_rate": 0.0002, "loss": 1.3559, "step": 40980 }, { "epoch": 0.17, "grad_norm": 2.3457422256469727, "learning_rate": 0.0002, "loss": 1.5285, "step": 40990 }, { "epoch": 0.17, "grad_norm": 3.901928424835205, "learning_rate": 0.0002, "loss": 1.5719, "step": 41000 }, { "epoch": 0.17, "grad_norm": 2.8852272033691406, "learning_rate": 0.0002, "loss": 1.5266, "step": 41010 }, { "epoch": 0.17, "grad_norm": 1.6908317804336548, "learning_rate": 0.0002, "loss": 1.5796, "step": 41020 }, { "epoch": 0.17, "grad_norm": 2.274125576019287, "learning_rate": 0.0002, "loss": 1.5152, "step": 41030 }, { "epoch": 0.17, "grad_norm": 3.2474138736724854, "learning_rate": 0.0002, "loss": 1.7786, "step": 41040 }, { "epoch": 0.17, "grad_norm": 2.4960525035858154, "learning_rate": 0.0002, "loss": 1.5057, "step": 41050 }, { "epoch": 0.17, "grad_norm": 1.7565253973007202, "learning_rate": 0.0002, "loss": 1.8428, "step": 41060 }, { "epoch": 0.17, "grad_norm": 2.209428548812866, "learning_rate": 0.0002, "loss": 1.5221, "step": 41070 }, { "epoch": 0.17, "grad_norm": 3.0927579402923584, "learning_rate": 0.0002, "loss": 1.7067, "step": 41080 }, { "epoch": 0.17, "grad_norm": 3.875483751296997, "learning_rate": 0.0002, "loss": 1.5684, "step": 41090 }, { "epoch": 0.17, "grad_norm": 2.5609805583953857, "learning_rate": 0.0002, "loss": 1.4707, "step": 41100 }, { "epoch": 0.17, "grad_norm": 1.8491140604019165, "learning_rate": 0.0002, "loss": 1.3731, "step": 41110 }, { "epoch": 0.17, "grad_norm": 1.8551974296569824, "learning_rate": 0.0002, "loss": 1.3592, "step": 41120 }, { "epoch": 0.17, "grad_norm": 1.401168942451477, "learning_rate": 0.0002, "loss": 1.7119, "step": 41130 }, { "epoch": 0.17, "grad_norm": 2.2874555587768555, "learning_rate": 0.0002, "loss": 1.4489, "step": 41140 }, { "epoch": 0.17, "grad_norm": 2.6020426750183105, "learning_rate": 0.0002, "loss": 1.6601, "step": 41150 }, { "epoch": 0.17, "grad_norm": 2.5146424770355225, "learning_rate": 0.0002, "loss": 1.4101, "step": 41160 }, { "epoch": 0.17, "grad_norm": 3.715679168701172, "learning_rate": 0.0002, "loss": 1.3732, "step": 41170 }, { "epoch": 0.17, "grad_norm": 3.3582279682159424, "learning_rate": 0.0002, "loss": 1.4824, "step": 41180 }, { "epoch": 0.17, "grad_norm": 2.440763235092163, "learning_rate": 0.0002, "loss": 1.2874, "step": 41190 }, { "epoch": 0.17, "grad_norm": 1.5489022731781006, "learning_rate": 0.0002, "loss": 1.5782, "step": 41200 }, { "epoch": 0.17, "grad_norm": 2.4066524505615234, "learning_rate": 0.0002, "loss": 1.5212, "step": 41210 }, { "epoch": 0.17, "grad_norm": 1.3441545963287354, "learning_rate": 0.0002, "loss": 1.6031, "step": 41220 }, { "epoch": 0.17, "grad_norm": 2.68129301071167, "learning_rate": 0.0002, "loss": 1.618, "step": 41230 }, { "epoch": 0.17, "grad_norm": 2.585071086883545, "learning_rate": 0.0002, "loss": 1.6985, "step": 41240 }, { "epoch": 0.17, "grad_norm": 4.344153881072998, "learning_rate": 0.0002, "loss": 1.4406, "step": 41250 }, { "epoch": 0.17, "grad_norm": 2.3162834644317627, "learning_rate": 0.0002, "loss": 1.3942, "step": 41260 }, { "epoch": 0.17, "grad_norm": 1.3316384553909302, "learning_rate": 0.0002, "loss": 1.5928, "step": 41270 }, { "epoch": 0.17, "grad_norm": 1.0724666118621826, "learning_rate": 0.0002, "loss": 1.6738, "step": 41280 }, { "epoch": 0.17, "grad_norm": 2.488236665725708, "learning_rate": 0.0002, "loss": 1.6086, "step": 41290 }, { "epoch": 0.17, "grad_norm": 1.808204174041748, "learning_rate": 0.0002, "loss": 1.846, "step": 41300 }, { "epoch": 0.17, "grad_norm": 2.1522812843322754, "learning_rate": 0.0002, "loss": 1.3305, "step": 41310 }, { "epoch": 0.17, "grad_norm": 2.919128656387329, "learning_rate": 0.0002, "loss": 1.3639, "step": 41320 }, { "epoch": 0.17, "grad_norm": 2.415302276611328, "learning_rate": 0.0002, "loss": 1.3668, "step": 41330 }, { "epoch": 0.17, "grad_norm": 3.016417980194092, "learning_rate": 0.0002, "loss": 1.4668, "step": 41340 }, { "epoch": 0.17, "grad_norm": 2.2272982597351074, "learning_rate": 0.0002, "loss": 1.7159, "step": 41350 }, { "epoch": 0.17, "grad_norm": 2.577277183532715, "learning_rate": 0.0002, "loss": 1.3295, "step": 41360 }, { "epoch": 0.17, "grad_norm": 2.0130679607391357, "learning_rate": 0.0002, "loss": 1.6205, "step": 41370 }, { "epoch": 0.17, "grad_norm": 2.652550220489502, "learning_rate": 0.0002, "loss": 1.5175, "step": 41380 }, { "epoch": 0.17, "grad_norm": 2.345781087875366, "learning_rate": 0.0002, "loss": 1.5792, "step": 41390 }, { "epoch": 0.17, "grad_norm": 1.897637128829956, "learning_rate": 0.0002, "loss": 1.3521, "step": 41400 }, { "epoch": 0.17, "grad_norm": 2.0568201541900635, "learning_rate": 0.0002, "loss": 1.5488, "step": 41410 }, { "epoch": 0.17, "grad_norm": 3.1303298473358154, "learning_rate": 0.0002, "loss": 1.5163, "step": 41420 }, { "epoch": 0.17, "grad_norm": 2.2978811264038086, "learning_rate": 0.0002, "loss": 1.6155, "step": 41430 }, { "epoch": 0.17, "grad_norm": 2.8543035984039307, "learning_rate": 0.0002, "loss": 1.6042, "step": 41440 }, { "epoch": 0.17, "grad_norm": 2.5981264114379883, "learning_rate": 0.0002, "loss": 1.6129, "step": 41450 }, { "epoch": 0.17, "grad_norm": 2.4751293659210205, "learning_rate": 0.0002, "loss": 1.5831, "step": 41460 }, { "epoch": 0.17, "grad_norm": 1.662885069847107, "learning_rate": 0.0002, "loss": 1.4116, "step": 41470 }, { "epoch": 0.17, "grad_norm": 3.6712896823883057, "learning_rate": 0.0002, "loss": 1.8061, "step": 41480 }, { "epoch": 0.17, "grad_norm": 2.6085739135742188, "learning_rate": 0.0002, "loss": 1.6837, "step": 41490 }, { "epoch": 0.17, "grad_norm": 3.9722001552581787, "learning_rate": 0.0002, "loss": 1.8716, "step": 41500 }, { "epoch": 0.17, "grad_norm": 3.2030248641967773, "learning_rate": 0.0002, "loss": 1.6058, "step": 41510 }, { "epoch": 0.17, "grad_norm": 2.9342315196990967, "learning_rate": 0.0002, "loss": 1.5972, "step": 41520 }, { "epoch": 0.17, "grad_norm": 3.1542587280273438, "learning_rate": 0.0002, "loss": 1.5086, "step": 41530 }, { "epoch": 0.17, "grad_norm": 2.207340717315674, "learning_rate": 0.0002, "loss": 1.5354, "step": 41540 }, { "epoch": 0.17, "grad_norm": 3.9323880672454834, "learning_rate": 0.0002, "loss": 1.7621, "step": 41550 }, { "epoch": 0.17, "grad_norm": 2.092620849609375, "learning_rate": 0.0002, "loss": 1.4766, "step": 41560 }, { "epoch": 0.17, "grad_norm": 3.2257184982299805, "learning_rate": 0.0002, "loss": 1.543, "step": 41570 }, { "epoch": 0.17, "grad_norm": 4.165554046630859, "learning_rate": 0.0002, "loss": 1.6119, "step": 41580 }, { "epoch": 0.17, "grad_norm": 2.719071388244629, "learning_rate": 0.0002, "loss": 1.6899, "step": 41590 }, { "epoch": 0.17, "grad_norm": 2.2996931076049805, "learning_rate": 0.0002, "loss": 1.4978, "step": 41600 }, { "epoch": 0.17, "grad_norm": 2.108773946762085, "learning_rate": 0.0002, "loss": 1.3552, "step": 41610 }, { "epoch": 0.17, "grad_norm": 2.4869384765625, "learning_rate": 0.0002, "loss": 1.5753, "step": 41620 }, { "epoch": 0.17, "grad_norm": 3.2753257751464844, "learning_rate": 0.0002, "loss": 1.66, "step": 41630 }, { "epoch": 0.17, "grad_norm": 4.940680027008057, "learning_rate": 0.0002, "loss": 1.4313, "step": 41640 }, { "epoch": 0.17, "grad_norm": 3.339995861053467, "learning_rate": 0.0002, "loss": 1.4023, "step": 41650 }, { "epoch": 0.17, "grad_norm": 3.4359352588653564, "learning_rate": 0.0002, "loss": 1.3807, "step": 41660 }, { "epoch": 0.17, "grad_norm": 2.9738872051239014, "learning_rate": 0.0002, "loss": 1.5741, "step": 41670 }, { "epoch": 0.17, "grad_norm": 2.2843992710113525, "learning_rate": 0.0002, "loss": 1.6904, "step": 41680 }, { "epoch": 0.17, "grad_norm": 2.866572380065918, "learning_rate": 0.0002, "loss": 1.8485, "step": 41690 }, { "epoch": 0.17, "grad_norm": 3.8153481483459473, "learning_rate": 0.0002, "loss": 1.5054, "step": 41700 }, { "epoch": 0.17, "grad_norm": 2.708512783050537, "learning_rate": 0.0002, "loss": 1.4962, "step": 41710 }, { "epoch": 0.17, "grad_norm": 1.8778425455093384, "learning_rate": 0.0002, "loss": 1.2545, "step": 41720 }, { "epoch": 0.17, "grad_norm": 2.4169836044311523, "learning_rate": 0.0002, "loss": 1.6498, "step": 41730 }, { "epoch": 0.17, "grad_norm": 1.9886388778686523, "learning_rate": 0.0002, "loss": 1.5745, "step": 41740 }, { "epoch": 0.17, "grad_norm": 2.7988479137420654, "learning_rate": 0.0002, "loss": 1.6749, "step": 41750 }, { "epoch": 0.17, "grad_norm": 2.9573707580566406, "learning_rate": 0.0002, "loss": 1.3829, "step": 41760 }, { "epoch": 0.17, "grad_norm": 2.3747992515563965, "learning_rate": 0.0002, "loss": 1.2568, "step": 41770 }, { "epoch": 0.17, "grad_norm": 2.960240364074707, "learning_rate": 0.0002, "loss": 1.534, "step": 41780 }, { "epoch": 0.17, "grad_norm": 2.135690689086914, "learning_rate": 0.0002, "loss": 1.6055, "step": 41790 }, { "epoch": 0.17, "grad_norm": 6.3157501220703125, "learning_rate": 0.0002, "loss": 1.4938, "step": 41800 }, { "epoch": 0.17, "grad_norm": 2.159799814224243, "learning_rate": 0.0002, "loss": 1.6245, "step": 41810 }, { "epoch": 0.17, "grad_norm": 2.5078256130218506, "learning_rate": 0.0002, "loss": 1.4319, "step": 41820 }, { "epoch": 0.17, "grad_norm": 2.7695577144622803, "learning_rate": 0.0002, "loss": 1.5225, "step": 41830 }, { "epoch": 0.17, "grad_norm": 4.82796573638916, "learning_rate": 0.0002, "loss": 1.315, "step": 41840 }, { "epoch": 0.17, "grad_norm": 3.334810733795166, "learning_rate": 0.0002, "loss": 1.6129, "step": 41850 }, { "epoch": 0.17, "grad_norm": 2.2234201431274414, "learning_rate": 0.0002, "loss": 1.4364, "step": 41860 }, { "epoch": 0.17, "grad_norm": 3.373293399810791, "learning_rate": 0.0002, "loss": 1.5427, "step": 41870 }, { "epoch": 0.17, "grad_norm": 2.8439502716064453, "learning_rate": 0.0002, "loss": 1.6688, "step": 41880 }, { "epoch": 0.17, "grad_norm": 2.3294358253479004, "learning_rate": 0.0002, "loss": 1.699, "step": 41890 }, { "epoch": 0.17, "grad_norm": 3.2095744609832764, "learning_rate": 0.0002, "loss": 1.617, "step": 41900 }, { "epoch": 0.17, "grad_norm": 3.704547166824341, "learning_rate": 0.0002, "loss": 1.6278, "step": 41910 }, { "epoch": 0.17, "grad_norm": 3.1305129528045654, "learning_rate": 0.0002, "loss": 1.6898, "step": 41920 }, { "epoch": 0.17, "grad_norm": 3.055530309677124, "learning_rate": 0.0002, "loss": 1.4901, "step": 41930 }, { "epoch": 0.17, "grad_norm": 2.891995429992676, "learning_rate": 0.0002, "loss": 1.4946, "step": 41940 }, { "epoch": 0.17, "grad_norm": 2.0422091484069824, "learning_rate": 0.0002, "loss": 1.5264, "step": 41950 }, { "epoch": 0.17, "grad_norm": 4.6537089347839355, "learning_rate": 0.0002, "loss": 1.6488, "step": 41960 }, { "epoch": 0.17, "grad_norm": 2.6944756507873535, "learning_rate": 0.0002, "loss": 1.5707, "step": 41970 }, { "epoch": 0.17, "grad_norm": 1.66249418258667, "learning_rate": 0.0002, "loss": 1.5432, "step": 41980 }, { "epoch": 0.17, "grad_norm": 2.8501391410827637, "learning_rate": 0.0002, "loss": 1.6904, "step": 41990 }, { "epoch": 0.17, "grad_norm": 3.3162002563476562, "learning_rate": 0.0002, "loss": 1.6993, "step": 42000 }, { "epoch": 0.17, "grad_norm": 2.9723317623138428, "learning_rate": 0.0002, "loss": 1.575, "step": 42010 }, { "epoch": 0.17, "grad_norm": 5.234152317047119, "learning_rate": 0.0002, "loss": 1.5326, "step": 42020 }, { "epoch": 0.17, "grad_norm": 2.7015864849090576, "learning_rate": 0.0002, "loss": 1.4536, "step": 42030 }, { "epoch": 0.17, "grad_norm": 2.4864389896392822, "learning_rate": 0.0002, "loss": 1.6788, "step": 42040 }, { "epoch": 0.17, "grad_norm": 3.1117875576019287, "learning_rate": 0.0002, "loss": 1.492, "step": 42050 }, { "epoch": 0.17, "grad_norm": 5.6133952140808105, "learning_rate": 0.0002, "loss": 1.5831, "step": 42060 }, { "epoch": 0.17, "grad_norm": 3.4947595596313477, "learning_rate": 0.0002, "loss": 1.6339, "step": 42070 }, { "epoch": 0.17, "grad_norm": 5.331313610076904, "learning_rate": 0.0002, "loss": 1.6696, "step": 42080 }, { "epoch": 0.17, "grad_norm": 4.361379623413086, "learning_rate": 0.0002, "loss": 1.5906, "step": 42090 }, { "epoch": 0.17, "grad_norm": 3.0664103031158447, "learning_rate": 0.0002, "loss": 1.5418, "step": 42100 }, { "epoch": 0.17, "grad_norm": 2.163003444671631, "learning_rate": 0.0002, "loss": 1.5713, "step": 42110 }, { "epoch": 0.17, "grad_norm": 2.272439956665039, "learning_rate": 0.0002, "loss": 1.2682, "step": 42120 }, { "epoch": 0.17, "grad_norm": 2.82423734664917, "learning_rate": 0.0002, "loss": 1.3104, "step": 42130 }, { "epoch": 0.17, "grad_norm": 4.497176647186279, "learning_rate": 0.0002, "loss": 1.7104, "step": 42140 }, { "epoch": 0.17, "grad_norm": 3.8618431091308594, "learning_rate": 0.0002, "loss": 1.5626, "step": 42150 }, { "epoch": 0.17, "grad_norm": 2.3316218852996826, "learning_rate": 0.0002, "loss": 1.5618, "step": 42160 }, { "epoch": 0.17, "grad_norm": 3.9534194469451904, "learning_rate": 0.0002, "loss": 1.5185, "step": 42170 }, { "epoch": 0.17, "grad_norm": 2.8289434909820557, "learning_rate": 0.0002, "loss": 1.4, "step": 42180 }, { "epoch": 0.17, "grad_norm": 9.602402687072754, "learning_rate": 0.0002, "loss": 1.5498, "step": 42190 }, { "epoch": 0.17, "grad_norm": 1.7780072689056396, "learning_rate": 0.0002, "loss": 1.5704, "step": 42200 }, { "epoch": 0.17, "grad_norm": 2.8051555156707764, "learning_rate": 0.0002, "loss": 1.6707, "step": 42210 }, { "epoch": 0.17, "grad_norm": 2.352762460708618, "learning_rate": 0.0002, "loss": 1.8052, "step": 42220 }, { "epoch": 0.17, "grad_norm": 3.7879836559295654, "learning_rate": 0.0002, "loss": 1.6487, "step": 42230 }, { "epoch": 0.17, "grad_norm": 2.373985767364502, "learning_rate": 0.0002, "loss": 1.5142, "step": 42240 }, { "epoch": 0.17, "grad_norm": 2.0507426261901855, "learning_rate": 0.0002, "loss": 1.5565, "step": 42250 }, { "epoch": 0.17, "grad_norm": 4.38948392868042, "learning_rate": 0.0002, "loss": 1.5889, "step": 42260 }, { "epoch": 0.17, "grad_norm": 4.053574562072754, "learning_rate": 0.0002, "loss": 1.4636, "step": 42270 }, { "epoch": 0.17, "grad_norm": 4.109431266784668, "learning_rate": 0.0002, "loss": 1.4376, "step": 42280 }, { "epoch": 0.17, "grad_norm": 3.397566795349121, "learning_rate": 0.0002, "loss": 1.4078, "step": 42290 }, { "epoch": 0.17, "grad_norm": 2.707608938217163, "learning_rate": 0.0002, "loss": 1.696, "step": 42300 }, { "epoch": 0.17, "grad_norm": 2.455209970474243, "learning_rate": 0.0002, "loss": 1.6054, "step": 42310 }, { "epoch": 0.17, "grad_norm": 2.9927849769592285, "learning_rate": 0.0002, "loss": 1.4061, "step": 42320 }, { "epoch": 0.17, "grad_norm": 2.243364095687866, "learning_rate": 0.0002, "loss": 1.4122, "step": 42330 }, { "epoch": 0.17, "grad_norm": 3.527663230895996, "learning_rate": 0.0002, "loss": 1.4759, "step": 42340 }, { "epoch": 0.17, "grad_norm": 2.006516695022583, "learning_rate": 0.0002, "loss": 1.6344, "step": 42350 }, { "epoch": 0.17, "grad_norm": 1.83455491065979, "learning_rate": 0.0002, "loss": 1.5851, "step": 42360 }, { "epoch": 0.17, "grad_norm": 1.4688029289245605, "learning_rate": 0.0002, "loss": 1.4711, "step": 42370 }, { "epoch": 0.17, "grad_norm": 3.709688901901245, "learning_rate": 0.0002, "loss": 1.5079, "step": 42380 }, { "epoch": 0.17, "grad_norm": 1.7738317251205444, "learning_rate": 0.0002, "loss": 1.639, "step": 42390 }, { "epoch": 0.17, "grad_norm": 2.882310628890991, "learning_rate": 0.0002, "loss": 1.4744, "step": 42400 }, { "epoch": 0.17, "grad_norm": 6.346664905548096, "learning_rate": 0.0002, "loss": 1.4607, "step": 42410 }, { "epoch": 0.17, "grad_norm": 2.453026533126831, "learning_rate": 0.0002, "loss": 1.4289, "step": 42420 }, { "epoch": 0.17, "grad_norm": 2.0381860733032227, "learning_rate": 0.0002, "loss": 1.4814, "step": 42430 }, { "epoch": 0.17, "grad_norm": 3.327672243118286, "learning_rate": 0.0002, "loss": 1.4005, "step": 42440 }, { "epoch": 0.17, "grad_norm": 3.3737359046936035, "learning_rate": 0.0002, "loss": 1.665, "step": 42450 }, { "epoch": 0.17, "grad_norm": 2.2758219242095947, "learning_rate": 0.0002, "loss": 1.4596, "step": 42460 }, { "epoch": 0.17, "grad_norm": 2.5107614994049072, "learning_rate": 0.0002, "loss": 1.4647, "step": 42470 }, { "epoch": 0.17, "grad_norm": 3.7881484031677246, "learning_rate": 0.0002, "loss": 1.671, "step": 42480 }, { "epoch": 0.17, "grad_norm": 2.9683685302734375, "learning_rate": 0.0002, "loss": 1.2791, "step": 42490 }, { "epoch": 0.17, "grad_norm": 3.722109317779541, "learning_rate": 0.0002, "loss": 1.3388, "step": 42500 }, { "epoch": 0.17, "grad_norm": 2.10019850730896, "learning_rate": 0.0002, "loss": 1.5711, "step": 42510 }, { "epoch": 0.17, "grad_norm": 1.7993123531341553, "learning_rate": 0.0002, "loss": 1.2572, "step": 42520 }, { "epoch": 0.17, "grad_norm": 3.591733455657959, "learning_rate": 0.0002, "loss": 1.8406, "step": 42530 }, { "epoch": 0.17, "grad_norm": 1.7107187509536743, "learning_rate": 0.0002, "loss": 1.489, "step": 42540 }, { "epoch": 0.17, "grad_norm": 4.10110330581665, "learning_rate": 0.0002, "loss": 1.4716, "step": 42550 }, { "epoch": 0.17, "grad_norm": 3.1208837032318115, "learning_rate": 0.0002, "loss": 1.6439, "step": 42560 }, { "epoch": 0.17, "grad_norm": 2.0816900730133057, "learning_rate": 0.0002, "loss": 1.4359, "step": 42570 }, { "epoch": 0.17, "grad_norm": 2.859804630279541, "learning_rate": 0.0002, "loss": 1.5271, "step": 42580 }, { "epoch": 0.17, "grad_norm": 2.7422125339508057, "learning_rate": 0.0002, "loss": 1.3885, "step": 42590 }, { "epoch": 0.17, "grad_norm": 2.1806840896606445, "learning_rate": 0.0002, "loss": 1.7266, "step": 42600 }, { "epoch": 0.17, "grad_norm": 2.8978500366210938, "learning_rate": 0.0002, "loss": 1.4966, "step": 42610 }, { "epoch": 0.17, "grad_norm": 2.7635982036590576, "learning_rate": 0.0002, "loss": 1.672, "step": 42620 }, { "epoch": 0.17, "grad_norm": 3.143322229385376, "learning_rate": 0.0002, "loss": 1.5599, "step": 42630 }, { "epoch": 0.17, "grad_norm": 3.5590929985046387, "learning_rate": 0.0002, "loss": 1.5744, "step": 42640 }, { "epoch": 0.17, "grad_norm": 3.6319072246551514, "learning_rate": 0.0002, "loss": 1.7383, "step": 42650 }, { "epoch": 0.17, "grad_norm": 2.7668354511260986, "learning_rate": 0.0002, "loss": 1.7364, "step": 42660 }, { "epoch": 0.17, "grad_norm": 2.121734380722046, "learning_rate": 0.0002, "loss": 1.4998, "step": 42670 }, { "epoch": 0.17, "grad_norm": 2.2384042739868164, "learning_rate": 0.0002, "loss": 1.5757, "step": 42680 }, { "epoch": 0.17, "grad_norm": 3.1665093898773193, "learning_rate": 0.0002, "loss": 1.614, "step": 42690 }, { "epoch": 0.17, "grad_norm": 2.150170087814331, "learning_rate": 0.0002, "loss": 1.3978, "step": 42700 }, { "epoch": 0.17, "grad_norm": 1.5128740072250366, "learning_rate": 0.0002, "loss": 1.5594, "step": 42710 }, { "epoch": 0.17, "grad_norm": 2.6707053184509277, "learning_rate": 0.0002, "loss": 1.3373, "step": 42720 }, { "epoch": 0.17, "grad_norm": 3.622788190841675, "learning_rate": 0.0002, "loss": 1.4602, "step": 42730 }, { "epoch": 0.17, "grad_norm": 2.651399850845337, "learning_rate": 0.0002, "loss": 1.5984, "step": 42740 }, { "epoch": 0.17, "grad_norm": 3.35286021232605, "learning_rate": 0.0002, "loss": 1.5241, "step": 42750 }, { "epoch": 0.17, "grad_norm": 1.5918540954589844, "learning_rate": 0.0002, "loss": 1.4404, "step": 42760 }, { "epoch": 0.17, "grad_norm": 2.438708782196045, "learning_rate": 0.0002, "loss": 1.7583, "step": 42770 }, { "epoch": 0.17, "grad_norm": 3.306818962097168, "learning_rate": 0.0002, "loss": 1.4903, "step": 42780 }, { "epoch": 0.17, "grad_norm": 2.687063694000244, "learning_rate": 0.0002, "loss": 1.6732, "step": 42790 }, { "epoch": 0.17, "grad_norm": 4.612335681915283, "learning_rate": 0.0002, "loss": 1.7293, "step": 42800 }, { "epoch": 0.17, "grad_norm": 2.832148790359497, "learning_rate": 0.0002, "loss": 1.7262, "step": 42810 }, { "epoch": 0.17, "grad_norm": 3.486182928085327, "learning_rate": 0.0002, "loss": 1.5821, "step": 42820 }, { "epoch": 0.17, "grad_norm": 2.8346877098083496, "learning_rate": 0.0002, "loss": 1.2876, "step": 42830 }, { "epoch": 0.17, "grad_norm": 2.533456563949585, "learning_rate": 0.0002, "loss": 1.6764, "step": 42840 }, { "epoch": 0.17, "grad_norm": 3.3649680614471436, "learning_rate": 0.0002, "loss": 1.5784, "step": 42850 }, { "epoch": 0.17, "grad_norm": 3.4163899421691895, "learning_rate": 0.0002, "loss": 1.5205, "step": 42860 }, { "epoch": 0.17, "grad_norm": 4.1504950523376465, "learning_rate": 0.0002, "loss": 1.6061, "step": 42870 }, { "epoch": 0.17, "grad_norm": 3.0273056030273438, "learning_rate": 0.0002, "loss": 1.5634, "step": 42880 }, { "epoch": 0.17, "grad_norm": 2.664095878601074, "learning_rate": 0.0002, "loss": 1.5264, "step": 42890 }, { "epoch": 0.17, "grad_norm": 2.324653148651123, "learning_rate": 0.0002, "loss": 1.6542, "step": 42900 }, { "epoch": 0.17, "grad_norm": 3.3757164478302, "learning_rate": 0.0002, "loss": 1.4636, "step": 42910 }, { "epoch": 0.17, "grad_norm": 2.391211986541748, "learning_rate": 0.0002, "loss": 1.6182, "step": 42920 }, { "epoch": 0.17, "grad_norm": 7.649874210357666, "learning_rate": 0.0002, "loss": 1.5503, "step": 42930 }, { "epoch": 0.17, "grad_norm": 4.440985679626465, "learning_rate": 0.0002, "loss": 1.506, "step": 42940 }, { "epoch": 0.17, "grad_norm": 1.5442595481872559, "learning_rate": 0.0002, "loss": 1.3582, "step": 42950 }, { "epoch": 0.17, "grad_norm": 2.0873055458068848, "learning_rate": 0.0002, "loss": 1.5756, "step": 42960 }, { "epoch": 0.17, "grad_norm": 1.924116611480713, "learning_rate": 0.0002, "loss": 1.6619, "step": 42970 }, { "epoch": 0.17, "grad_norm": 2.8762266635894775, "learning_rate": 0.0002, "loss": 1.7251, "step": 42980 }, { "epoch": 0.18, "grad_norm": 2.273400068283081, "learning_rate": 0.0002, "loss": 1.6014, "step": 42990 }, { "epoch": 0.18, "grad_norm": 1.7591602802276611, "learning_rate": 0.0002, "loss": 1.4875, "step": 43000 }, { "epoch": 0.18, "grad_norm": 1.9445120096206665, "learning_rate": 0.0002, "loss": 1.5432, "step": 43010 }, { "epoch": 0.18, "grad_norm": 3.448334217071533, "learning_rate": 0.0002, "loss": 1.4969, "step": 43020 }, { "epoch": 0.18, "grad_norm": 2.906454563140869, "learning_rate": 0.0002, "loss": 1.3962, "step": 43030 }, { "epoch": 0.18, "grad_norm": 3.061549663543701, "learning_rate": 0.0002, "loss": 1.6409, "step": 43040 }, { "epoch": 0.18, "grad_norm": 2.087172746658325, "learning_rate": 0.0002, "loss": 1.3855, "step": 43050 }, { "epoch": 0.18, "grad_norm": 4.983830451965332, "learning_rate": 0.0002, "loss": 1.6271, "step": 43060 }, { "epoch": 0.18, "grad_norm": 4.528090476989746, "learning_rate": 0.0002, "loss": 1.6498, "step": 43070 }, { "epoch": 0.18, "grad_norm": 1.871973991394043, "learning_rate": 0.0002, "loss": 1.7162, "step": 43080 }, { "epoch": 0.18, "grad_norm": 3.8157758712768555, "learning_rate": 0.0002, "loss": 1.5218, "step": 43090 }, { "epoch": 0.18, "grad_norm": 2.736435651779175, "learning_rate": 0.0002, "loss": 1.6453, "step": 43100 }, { "epoch": 0.18, "grad_norm": 1.6567797660827637, "learning_rate": 0.0002, "loss": 1.4669, "step": 43110 }, { "epoch": 0.18, "grad_norm": 2.4767346382141113, "learning_rate": 0.0002, "loss": 1.7092, "step": 43120 }, { "epoch": 0.18, "grad_norm": 2.764172077178955, "learning_rate": 0.0002, "loss": 1.6856, "step": 43130 }, { "epoch": 0.18, "grad_norm": 3.4155685901641846, "learning_rate": 0.0002, "loss": 1.553, "step": 43140 }, { "epoch": 0.18, "grad_norm": 2.268578052520752, "learning_rate": 0.0002, "loss": 1.515, "step": 43150 }, { "epoch": 0.18, "grad_norm": 3.1362032890319824, "learning_rate": 0.0002, "loss": 1.5752, "step": 43160 }, { "epoch": 0.18, "grad_norm": 3.770695447921753, "learning_rate": 0.0002, "loss": 1.7673, "step": 43170 }, { "epoch": 0.18, "grad_norm": 6.874651908874512, "learning_rate": 0.0002, "loss": 1.4058, "step": 43180 }, { "epoch": 0.18, "grad_norm": 3.682865858078003, "learning_rate": 0.0002, "loss": 1.8289, "step": 43190 }, { "epoch": 0.18, "grad_norm": 4.247284889221191, "learning_rate": 0.0002, "loss": 1.6585, "step": 43200 }, { "epoch": 0.18, "grad_norm": 3.9121806621551514, "learning_rate": 0.0002, "loss": 1.664, "step": 43210 }, { "epoch": 0.18, "grad_norm": 4.957936763763428, "learning_rate": 0.0002, "loss": 1.3948, "step": 43220 }, { "epoch": 0.18, "grad_norm": 2.9027485847473145, "learning_rate": 0.0002, "loss": 1.4176, "step": 43230 }, { "epoch": 0.18, "grad_norm": 4.387202739715576, "learning_rate": 0.0002, "loss": 1.5516, "step": 43240 }, { "epoch": 0.18, "grad_norm": 2.1255171298980713, "learning_rate": 0.0002, "loss": 1.6411, "step": 43250 }, { "epoch": 0.18, "grad_norm": 3.504746437072754, "learning_rate": 0.0002, "loss": 1.6009, "step": 43260 }, { "epoch": 0.18, "grad_norm": 3.29946231842041, "learning_rate": 0.0002, "loss": 1.7553, "step": 43270 }, { "epoch": 0.18, "grad_norm": 3.3235316276550293, "learning_rate": 0.0002, "loss": 1.6984, "step": 43280 }, { "epoch": 0.18, "grad_norm": 4.230136394500732, "learning_rate": 0.0002, "loss": 1.7357, "step": 43290 }, { "epoch": 0.18, "grad_norm": 8.452280044555664, "learning_rate": 0.0002, "loss": 1.4582, "step": 43300 }, { "epoch": 0.18, "grad_norm": 1.8447424173355103, "learning_rate": 0.0002, "loss": 1.5765, "step": 43310 }, { "epoch": 0.18, "grad_norm": 2.1506919860839844, "learning_rate": 0.0002, "loss": 1.5583, "step": 43320 }, { "epoch": 0.18, "grad_norm": 2.3500852584838867, "learning_rate": 0.0002, "loss": 1.4928, "step": 43330 }, { "epoch": 0.18, "grad_norm": 3.683828830718994, "learning_rate": 0.0002, "loss": 1.5224, "step": 43340 }, { "epoch": 0.18, "grad_norm": 2.604280710220337, "learning_rate": 0.0002, "loss": 1.354, "step": 43350 }, { "epoch": 0.18, "grad_norm": 1.528504490852356, "learning_rate": 0.0002, "loss": 1.7979, "step": 43360 }, { "epoch": 0.18, "grad_norm": 3.4584665298461914, "learning_rate": 0.0002, "loss": 1.5743, "step": 43370 }, { "epoch": 0.18, "grad_norm": 3.0755743980407715, "learning_rate": 0.0002, "loss": 1.5533, "step": 43380 }, { "epoch": 0.18, "grad_norm": 4.585995197296143, "learning_rate": 0.0002, "loss": 1.5951, "step": 43390 }, { "epoch": 0.18, "grad_norm": 2.3042335510253906, "learning_rate": 0.0002, "loss": 1.7895, "step": 43400 }, { "epoch": 0.18, "grad_norm": 5.043673992156982, "learning_rate": 0.0002, "loss": 1.8775, "step": 43410 }, { "epoch": 0.18, "grad_norm": 1.6720231771469116, "learning_rate": 0.0002, "loss": 1.644, "step": 43420 }, { "epoch": 0.18, "grad_norm": 2.8763785362243652, "learning_rate": 0.0002, "loss": 1.581, "step": 43430 }, { "epoch": 0.18, "grad_norm": 2.116391181945801, "learning_rate": 0.0002, "loss": 1.496, "step": 43440 }, { "epoch": 0.18, "grad_norm": 2.341613531112671, "learning_rate": 0.0002, "loss": 1.5882, "step": 43450 }, { "epoch": 0.18, "grad_norm": 2.678570032119751, "learning_rate": 0.0002, "loss": 1.6951, "step": 43460 }, { "epoch": 0.18, "grad_norm": 4.555282115936279, "learning_rate": 0.0002, "loss": 1.3023, "step": 43470 }, { "epoch": 0.18, "grad_norm": 3.6657028198242188, "learning_rate": 0.0002, "loss": 1.3287, "step": 43480 }, { "epoch": 0.18, "grad_norm": 1.4860234260559082, "learning_rate": 0.0002, "loss": 1.4591, "step": 43490 }, { "epoch": 0.18, "grad_norm": 2.766763210296631, "learning_rate": 0.0002, "loss": 1.4896, "step": 43500 }, { "epoch": 0.18, "grad_norm": 3.0375423431396484, "learning_rate": 0.0002, "loss": 1.7896, "step": 43510 }, { "epoch": 0.18, "grad_norm": 3.0642037391662598, "learning_rate": 0.0002, "loss": 1.4109, "step": 43520 }, { "epoch": 0.18, "grad_norm": 3.8974108695983887, "learning_rate": 0.0002, "loss": 1.3862, "step": 43530 }, { "epoch": 0.18, "grad_norm": 2.702690362930298, "learning_rate": 0.0002, "loss": 1.3679, "step": 43540 }, { "epoch": 0.18, "grad_norm": 2.39034366607666, "learning_rate": 0.0002, "loss": 1.6642, "step": 43550 }, { "epoch": 0.18, "grad_norm": 2.455353021621704, "learning_rate": 0.0002, "loss": 1.5148, "step": 43560 }, { "epoch": 0.18, "grad_norm": 1.9259076118469238, "learning_rate": 0.0002, "loss": 1.7992, "step": 43570 }, { "epoch": 0.18, "grad_norm": 3.549535036087036, "learning_rate": 0.0002, "loss": 1.6859, "step": 43580 }, { "epoch": 0.18, "grad_norm": 3.382321834564209, "learning_rate": 0.0002, "loss": 1.7068, "step": 43590 }, { "epoch": 0.18, "grad_norm": 6.917908668518066, "learning_rate": 0.0002, "loss": 1.4777, "step": 43600 }, { "epoch": 0.18, "grad_norm": 3.343385696411133, "learning_rate": 0.0002, "loss": 1.503, "step": 43610 }, { "epoch": 0.18, "grad_norm": 2.751157760620117, "learning_rate": 0.0002, "loss": 1.5563, "step": 43620 }, { "epoch": 0.18, "grad_norm": 1.9960461854934692, "learning_rate": 0.0002, "loss": 1.4616, "step": 43630 }, { "epoch": 0.18, "grad_norm": 3.633878231048584, "learning_rate": 0.0002, "loss": 1.7369, "step": 43640 }, { "epoch": 0.18, "grad_norm": 2.8883373737335205, "learning_rate": 0.0002, "loss": 1.5578, "step": 43650 }, { "epoch": 0.18, "grad_norm": 2.146289348602295, "learning_rate": 0.0002, "loss": 1.2796, "step": 43660 }, { "epoch": 0.18, "grad_norm": 4.063661098480225, "learning_rate": 0.0002, "loss": 1.3079, "step": 43670 }, { "epoch": 0.18, "grad_norm": 4.176490306854248, "learning_rate": 0.0002, "loss": 1.8556, "step": 43680 }, { "epoch": 0.18, "grad_norm": 4.938897132873535, "learning_rate": 0.0002, "loss": 1.4239, "step": 43690 }, { "epoch": 0.18, "grad_norm": 2.898371458053589, "learning_rate": 0.0002, "loss": 1.4758, "step": 43700 }, { "epoch": 0.18, "grad_norm": 1.923251748085022, "learning_rate": 0.0002, "loss": 1.5043, "step": 43710 }, { "epoch": 0.18, "grad_norm": 3.239431142807007, "learning_rate": 0.0002, "loss": 1.403, "step": 43720 }, { "epoch": 0.18, "grad_norm": 3.1249191761016846, "learning_rate": 0.0002, "loss": 1.5881, "step": 43730 }, { "epoch": 0.18, "grad_norm": 3.6379334926605225, "learning_rate": 0.0002, "loss": 1.7383, "step": 43740 }, { "epoch": 0.18, "grad_norm": 3.838595390319824, "learning_rate": 0.0002, "loss": 1.5045, "step": 43750 }, { "epoch": 0.18, "grad_norm": 2.65982723236084, "learning_rate": 0.0002, "loss": 1.6728, "step": 43760 }, { "epoch": 0.18, "grad_norm": 5.00598669052124, "learning_rate": 0.0002, "loss": 1.5623, "step": 43770 }, { "epoch": 0.18, "grad_norm": 2.6264970302581787, "learning_rate": 0.0002, "loss": 1.3313, "step": 43780 }, { "epoch": 0.18, "grad_norm": 3.001906633377075, "learning_rate": 0.0002, "loss": 1.4858, "step": 43790 }, { "epoch": 0.18, "grad_norm": 2.309046983718872, "learning_rate": 0.0002, "loss": 1.3964, "step": 43800 }, { "epoch": 0.18, "grad_norm": 2.2936160564422607, "learning_rate": 0.0002, "loss": 1.7296, "step": 43810 }, { "epoch": 0.18, "grad_norm": 3.4256603717803955, "learning_rate": 0.0002, "loss": 1.7056, "step": 43820 }, { "epoch": 0.18, "grad_norm": 2.066373348236084, "learning_rate": 0.0002, "loss": 1.6143, "step": 43830 }, { "epoch": 0.18, "grad_norm": 2.5188546180725098, "learning_rate": 0.0002, "loss": 1.5305, "step": 43840 }, { "epoch": 0.18, "grad_norm": 2.375401020050049, "learning_rate": 0.0002, "loss": 1.4277, "step": 43850 }, { "epoch": 0.18, "grad_norm": 2.3925678730010986, "learning_rate": 0.0002, "loss": 1.673, "step": 43860 }, { "epoch": 0.18, "grad_norm": 2.801469564437866, "learning_rate": 0.0002, "loss": 1.3848, "step": 43870 }, { "epoch": 0.18, "grad_norm": 2.2531073093414307, "learning_rate": 0.0002, "loss": 1.4902, "step": 43880 }, { "epoch": 0.18, "grad_norm": 2.7237067222595215, "learning_rate": 0.0002, "loss": 1.3187, "step": 43890 }, { "epoch": 0.18, "grad_norm": 4.942293643951416, "learning_rate": 0.0002, "loss": 1.6734, "step": 43900 }, { "epoch": 0.18, "grad_norm": 3.175231695175171, "learning_rate": 0.0002, "loss": 1.6954, "step": 43910 }, { "epoch": 0.18, "grad_norm": 1.3248240947723389, "learning_rate": 0.0002, "loss": 1.5478, "step": 43920 }, { "epoch": 0.18, "grad_norm": 3.446474313735962, "learning_rate": 0.0002, "loss": 1.5438, "step": 43930 }, { "epoch": 0.18, "grad_norm": 1.8166301250457764, "learning_rate": 0.0002, "loss": 1.8633, "step": 43940 }, { "epoch": 0.18, "grad_norm": 5.901370525360107, "learning_rate": 0.0002, "loss": 1.5461, "step": 43950 }, { "epoch": 0.18, "grad_norm": 2.616243839263916, "learning_rate": 0.0002, "loss": 1.7565, "step": 43960 }, { "epoch": 0.18, "grad_norm": 2.385406494140625, "learning_rate": 0.0002, "loss": 1.4698, "step": 43970 }, { "epoch": 0.18, "grad_norm": 3.1366395950317383, "learning_rate": 0.0002, "loss": 1.411, "step": 43980 }, { "epoch": 0.18, "grad_norm": 3.420438766479492, "learning_rate": 0.0002, "loss": 1.6465, "step": 43990 }, { "epoch": 0.18, "grad_norm": 2.6605896949768066, "learning_rate": 0.0002, "loss": 1.406, "step": 44000 }, { "epoch": 0.18, "grad_norm": 2.710099697113037, "learning_rate": 0.0002, "loss": 1.5809, "step": 44010 }, { "epoch": 0.18, "grad_norm": 3.087874174118042, "learning_rate": 0.0002, "loss": 1.479, "step": 44020 }, { "epoch": 0.18, "grad_norm": 2.1653759479522705, "learning_rate": 0.0002, "loss": 1.472, "step": 44030 }, { "epoch": 0.18, "grad_norm": 3.0293760299682617, "learning_rate": 0.0002, "loss": 1.4957, "step": 44040 }, { "epoch": 0.18, "grad_norm": 2.4473366737365723, "learning_rate": 0.0002, "loss": 1.5639, "step": 44050 }, { "epoch": 0.18, "grad_norm": 1.6110376119613647, "learning_rate": 0.0002, "loss": 1.5994, "step": 44060 }, { "epoch": 0.18, "grad_norm": 3.001906633377075, "learning_rate": 0.0002, "loss": 1.6631, "step": 44070 }, { "epoch": 0.18, "grad_norm": 3.843489646911621, "learning_rate": 0.0002, "loss": 1.5497, "step": 44080 }, { "epoch": 0.18, "grad_norm": 3.022132396697998, "learning_rate": 0.0002, "loss": 1.4642, "step": 44090 }, { "epoch": 0.18, "grad_norm": 6.02247953414917, "learning_rate": 0.0002, "loss": 1.8051, "step": 44100 }, { "epoch": 0.18, "grad_norm": 2.7074759006500244, "learning_rate": 0.0002, "loss": 1.3888, "step": 44110 }, { "epoch": 0.18, "grad_norm": 2.9050445556640625, "learning_rate": 0.0002, "loss": 1.3434, "step": 44120 }, { "epoch": 0.18, "grad_norm": 2.7488768100738525, "learning_rate": 0.0002, "loss": 1.5575, "step": 44130 }, { "epoch": 0.18, "grad_norm": 3.783510446548462, "learning_rate": 0.0002, "loss": 1.7082, "step": 44140 }, { "epoch": 0.18, "grad_norm": 2.1720428466796875, "learning_rate": 0.0002, "loss": 1.4056, "step": 44150 }, { "epoch": 0.18, "grad_norm": 3.0330686569213867, "learning_rate": 0.0002, "loss": 1.6441, "step": 44160 }, { "epoch": 0.18, "grad_norm": 2.912216901779175, "learning_rate": 0.0002, "loss": 1.4028, "step": 44170 }, { "epoch": 0.18, "grad_norm": 3.0417704582214355, "learning_rate": 0.0002, "loss": 1.6078, "step": 44180 }, { "epoch": 0.18, "grad_norm": 3.222512722015381, "learning_rate": 0.0002, "loss": 1.3664, "step": 44190 }, { "epoch": 0.18, "grad_norm": 5.125855445861816, "learning_rate": 0.0002, "loss": 1.668, "step": 44200 }, { "epoch": 0.18, "grad_norm": 2.4286680221557617, "learning_rate": 0.0002, "loss": 1.5895, "step": 44210 }, { "epoch": 0.18, "grad_norm": 2.5514488220214844, "learning_rate": 0.0002, "loss": 1.5086, "step": 44220 }, { "epoch": 0.18, "grad_norm": 5.148443222045898, "learning_rate": 0.0002, "loss": 1.6139, "step": 44230 }, { "epoch": 0.18, "grad_norm": 4.445087432861328, "learning_rate": 0.0002, "loss": 1.6614, "step": 44240 }, { "epoch": 0.18, "grad_norm": 2.2826409339904785, "learning_rate": 0.0002, "loss": 1.5163, "step": 44250 }, { "epoch": 0.18, "grad_norm": 1.9173070192337036, "learning_rate": 0.0002, "loss": 1.5276, "step": 44260 }, { "epoch": 0.18, "grad_norm": 2.2765393257141113, "learning_rate": 0.0002, "loss": 1.4756, "step": 44270 }, { "epoch": 0.18, "grad_norm": 3.616863965988159, "learning_rate": 0.0002, "loss": 1.538, "step": 44280 }, { "epoch": 0.18, "grad_norm": 2.3877410888671875, "learning_rate": 0.0002, "loss": 1.4658, "step": 44290 }, { "epoch": 0.18, "grad_norm": 2.5060887336730957, "learning_rate": 0.0002, "loss": 1.6067, "step": 44300 }, { "epoch": 0.18, "grad_norm": 2.3778369426727295, "learning_rate": 0.0002, "loss": 1.532, "step": 44310 }, { "epoch": 0.18, "grad_norm": 2.3049120903015137, "learning_rate": 0.0002, "loss": 1.576, "step": 44320 }, { "epoch": 0.18, "grad_norm": 2.519707679748535, "learning_rate": 0.0002, "loss": 1.5091, "step": 44330 }, { "epoch": 0.18, "grad_norm": 2.5772318840026855, "learning_rate": 0.0002, "loss": 1.8429, "step": 44340 }, { "epoch": 0.18, "grad_norm": 2.3555877208709717, "learning_rate": 0.0002, "loss": 1.8016, "step": 44350 }, { "epoch": 0.18, "grad_norm": 2.546940326690674, "learning_rate": 0.0002, "loss": 1.3814, "step": 44360 }, { "epoch": 0.18, "grad_norm": 2.7628536224365234, "learning_rate": 0.0002, "loss": 1.6905, "step": 44370 }, { "epoch": 0.18, "grad_norm": 2.312915802001953, "learning_rate": 0.0002, "loss": 1.6031, "step": 44380 }, { "epoch": 0.18, "grad_norm": 3.250054359436035, "learning_rate": 0.0002, "loss": 1.6421, "step": 44390 }, { "epoch": 0.18, "grad_norm": 2.504770278930664, "learning_rate": 0.0002, "loss": 1.5604, "step": 44400 }, { "epoch": 0.18, "grad_norm": 3.6410410404205322, "learning_rate": 0.0002, "loss": 1.5821, "step": 44410 }, { "epoch": 0.18, "grad_norm": 1.6561528444290161, "learning_rate": 0.0002, "loss": 1.424, "step": 44420 }, { "epoch": 0.18, "grad_norm": 1.8151636123657227, "learning_rate": 0.0002, "loss": 1.5026, "step": 44430 }, { "epoch": 0.18, "grad_norm": 1.5974279642105103, "learning_rate": 0.0002, "loss": 1.3256, "step": 44440 }, { "epoch": 0.18, "grad_norm": 3.6878280639648438, "learning_rate": 0.0002, "loss": 1.5291, "step": 44450 }, { "epoch": 0.18, "grad_norm": 3.5151374340057373, "learning_rate": 0.0002, "loss": 1.5446, "step": 44460 }, { "epoch": 0.18, "grad_norm": 3.6690938472747803, "learning_rate": 0.0002, "loss": 1.5053, "step": 44470 }, { "epoch": 0.18, "grad_norm": 3.744870901107788, "learning_rate": 0.0002, "loss": 1.4267, "step": 44480 }, { "epoch": 0.18, "grad_norm": 4.268899917602539, "learning_rate": 0.0002, "loss": 1.6871, "step": 44490 }, { "epoch": 0.18, "grad_norm": 1.867915153503418, "learning_rate": 0.0002, "loss": 1.4464, "step": 44500 }, { "epoch": 0.18, "grad_norm": 2.3841888904571533, "learning_rate": 0.0002, "loss": 1.4414, "step": 44510 }, { "epoch": 0.18, "grad_norm": 3.9700467586517334, "learning_rate": 0.0002, "loss": 1.4485, "step": 44520 }, { "epoch": 0.18, "grad_norm": 3.031594753265381, "learning_rate": 0.0002, "loss": 1.4997, "step": 44530 }, { "epoch": 0.18, "grad_norm": 3.4080655574798584, "learning_rate": 0.0002, "loss": 1.6573, "step": 44540 }, { "epoch": 0.18, "grad_norm": 2.00821852684021, "learning_rate": 0.0002, "loss": 1.5843, "step": 44550 }, { "epoch": 0.18, "grad_norm": 2.593059778213501, "learning_rate": 0.0002, "loss": 1.4722, "step": 44560 }, { "epoch": 0.18, "grad_norm": 2.096092939376831, "learning_rate": 0.0002, "loss": 1.6099, "step": 44570 }, { "epoch": 0.18, "grad_norm": 3.543891191482544, "learning_rate": 0.0002, "loss": 1.4452, "step": 44580 }, { "epoch": 0.18, "grad_norm": 2.1562693119049072, "learning_rate": 0.0002, "loss": 1.6136, "step": 44590 }, { "epoch": 0.18, "grad_norm": 3.9418907165527344, "learning_rate": 0.0002, "loss": 1.7837, "step": 44600 }, { "epoch": 0.18, "grad_norm": 1.6978919506072998, "learning_rate": 0.0002, "loss": 1.7508, "step": 44610 }, { "epoch": 0.18, "grad_norm": 2.3196210861206055, "learning_rate": 0.0002, "loss": 1.653, "step": 44620 }, { "epoch": 0.18, "grad_norm": 3.760507345199585, "learning_rate": 0.0002, "loss": 1.5722, "step": 44630 }, { "epoch": 0.18, "grad_norm": 2.5597786903381348, "learning_rate": 0.0002, "loss": 1.4991, "step": 44640 }, { "epoch": 0.18, "grad_norm": 2.9261314868927, "learning_rate": 0.0002, "loss": 1.4887, "step": 44650 }, { "epoch": 0.18, "grad_norm": 2.6716930866241455, "learning_rate": 0.0002, "loss": 1.2279, "step": 44660 }, { "epoch": 0.18, "grad_norm": 3.3920583724975586, "learning_rate": 0.0002, "loss": 1.5735, "step": 44670 }, { "epoch": 0.18, "grad_norm": 3.2462313175201416, "learning_rate": 0.0002, "loss": 1.6339, "step": 44680 }, { "epoch": 0.18, "grad_norm": 2.1458332538604736, "learning_rate": 0.0002, "loss": 1.4211, "step": 44690 }, { "epoch": 0.18, "grad_norm": 2.625951051712036, "learning_rate": 0.0002, "loss": 1.5322, "step": 44700 }, { "epoch": 0.18, "grad_norm": 3.839977979660034, "learning_rate": 0.0002, "loss": 1.8372, "step": 44710 }, { "epoch": 0.18, "grad_norm": 3.2326385974884033, "learning_rate": 0.0002, "loss": 1.405, "step": 44720 }, { "epoch": 0.18, "grad_norm": 3.015857696533203, "learning_rate": 0.0002, "loss": 1.7314, "step": 44730 }, { "epoch": 0.18, "grad_norm": 2.575622320175171, "learning_rate": 0.0002, "loss": 1.6182, "step": 44740 }, { "epoch": 0.18, "grad_norm": 3.4989991188049316, "learning_rate": 0.0002, "loss": 1.6006, "step": 44750 }, { "epoch": 0.18, "grad_norm": 2.657860279083252, "learning_rate": 0.0002, "loss": 1.2096, "step": 44760 }, { "epoch": 0.18, "grad_norm": 2.9593663215637207, "learning_rate": 0.0002, "loss": 1.5612, "step": 44770 }, { "epoch": 0.18, "grad_norm": 2.7144017219543457, "learning_rate": 0.0002, "loss": 1.4879, "step": 44780 }, { "epoch": 0.18, "grad_norm": 3.0109055042266846, "learning_rate": 0.0002, "loss": 1.6921, "step": 44790 }, { "epoch": 0.18, "grad_norm": 2.6882429122924805, "learning_rate": 0.0002, "loss": 1.6047, "step": 44800 }, { "epoch": 0.18, "grad_norm": 2.09259033203125, "learning_rate": 0.0002, "loss": 1.4546, "step": 44810 }, { "epoch": 0.18, "grad_norm": 3.0615158081054688, "learning_rate": 0.0002, "loss": 1.4927, "step": 44820 }, { "epoch": 0.18, "grad_norm": 3.0888710021972656, "learning_rate": 0.0002, "loss": 1.5318, "step": 44830 }, { "epoch": 0.18, "grad_norm": 2.108914375305176, "learning_rate": 0.0002, "loss": 1.5575, "step": 44840 }, { "epoch": 0.18, "grad_norm": 2.894258975982666, "learning_rate": 0.0002, "loss": 1.6352, "step": 44850 }, { "epoch": 0.18, "grad_norm": 1.5058314800262451, "learning_rate": 0.0002, "loss": 1.5007, "step": 44860 }, { "epoch": 0.18, "grad_norm": 3.240705728530884, "learning_rate": 0.0002, "loss": 1.5228, "step": 44870 }, { "epoch": 0.18, "grad_norm": 1.585523247718811, "learning_rate": 0.0002, "loss": 1.5181, "step": 44880 }, { "epoch": 0.18, "grad_norm": 4.572604179382324, "learning_rate": 0.0002, "loss": 1.6209, "step": 44890 }, { "epoch": 0.18, "grad_norm": 3.006648540496826, "learning_rate": 0.0002, "loss": 1.6052, "step": 44900 }, { "epoch": 0.18, "grad_norm": 3.7301981449127197, "learning_rate": 0.0002, "loss": 1.5029, "step": 44910 }, { "epoch": 0.18, "grad_norm": 3.2201952934265137, "learning_rate": 0.0002, "loss": 1.3976, "step": 44920 }, { "epoch": 0.18, "grad_norm": 4.26391077041626, "learning_rate": 0.0002, "loss": 1.6478, "step": 44930 }, { "epoch": 0.18, "grad_norm": 1.8528203964233398, "learning_rate": 0.0002, "loss": 1.4744, "step": 44940 }, { "epoch": 0.18, "grad_norm": 1.6520283222198486, "learning_rate": 0.0002, "loss": 1.6244, "step": 44950 }, { "epoch": 0.18, "grad_norm": 3.085693836212158, "learning_rate": 0.0002, "loss": 1.417, "step": 44960 }, { "epoch": 0.18, "grad_norm": 3.22436261177063, "learning_rate": 0.0002, "loss": 1.5928, "step": 44970 }, { "epoch": 0.18, "grad_norm": 2.8108294010162354, "learning_rate": 0.0002, "loss": 1.5352, "step": 44980 }, { "epoch": 0.18, "grad_norm": 2.8491127490997314, "learning_rate": 0.0002, "loss": 1.3611, "step": 44990 }, { "epoch": 0.18, "grad_norm": 3.3529036045074463, "learning_rate": 0.0002, "loss": 1.7948, "step": 45000 }, { "epoch": 0.18, "grad_norm": 2.3394594192504883, "learning_rate": 0.0002, "loss": 1.7111, "step": 45010 }, { "epoch": 0.18, "grad_norm": 3.8393993377685547, "learning_rate": 0.0002, "loss": 1.6463, "step": 45020 }, { "epoch": 0.18, "grad_norm": 1.9165757894515991, "learning_rate": 0.0002, "loss": 1.6245, "step": 45030 }, { "epoch": 0.18, "grad_norm": 1.899558424949646, "learning_rate": 0.0002, "loss": 1.5798, "step": 45040 }, { "epoch": 0.18, "grad_norm": 2.332822322845459, "learning_rate": 0.0002, "loss": 1.5827, "step": 45050 }, { "epoch": 0.18, "grad_norm": 2.0270955562591553, "learning_rate": 0.0002, "loss": 1.5016, "step": 45060 }, { "epoch": 0.18, "grad_norm": 3.0725433826446533, "learning_rate": 0.0002, "loss": 1.3939, "step": 45070 }, { "epoch": 0.18, "grad_norm": 2.6844820976257324, "learning_rate": 0.0002, "loss": 1.4136, "step": 45080 }, { "epoch": 0.18, "grad_norm": 2.852404832839966, "learning_rate": 0.0002, "loss": 1.6058, "step": 45090 }, { "epoch": 0.18, "grad_norm": 1.9920531511306763, "learning_rate": 0.0002, "loss": 1.6583, "step": 45100 }, { "epoch": 0.18, "grad_norm": 3.1512303352355957, "learning_rate": 0.0002, "loss": 1.2196, "step": 45110 }, { "epoch": 0.18, "grad_norm": 3.763352870941162, "learning_rate": 0.0002, "loss": 1.7569, "step": 45120 }, { "epoch": 0.18, "grad_norm": 1.9579806327819824, "learning_rate": 0.0002, "loss": 1.815, "step": 45130 }, { "epoch": 0.18, "grad_norm": 1.9591737985610962, "learning_rate": 0.0002, "loss": 1.6105, "step": 45140 }, { "epoch": 0.18, "grad_norm": 1.9568569660186768, "learning_rate": 0.0002, "loss": 1.4233, "step": 45150 }, { "epoch": 0.18, "grad_norm": 1.6178593635559082, "learning_rate": 0.0002, "loss": 1.3612, "step": 45160 }, { "epoch": 0.18, "grad_norm": 3.191521644592285, "learning_rate": 0.0002, "loss": 1.569, "step": 45170 }, { "epoch": 0.18, "grad_norm": 3.2023327350616455, "learning_rate": 0.0002, "loss": 1.8162, "step": 45180 }, { "epoch": 0.18, "grad_norm": 2.316580057144165, "learning_rate": 0.0002, "loss": 1.4214, "step": 45190 }, { "epoch": 0.18, "grad_norm": 3.4854347705841064, "learning_rate": 0.0002, "loss": 1.5492, "step": 45200 }, { "epoch": 0.18, "grad_norm": 2.772742748260498, "learning_rate": 0.0002, "loss": 1.352, "step": 45210 }, { "epoch": 0.18, "grad_norm": 2.3222787380218506, "learning_rate": 0.0002, "loss": 1.4791, "step": 45220 }, { "epoch": 0.18, "grad_norm": 3.1838877201080322, "learning_rate": 0.0002, "loss": 1.6038, "step": 45230 }, { "epoch": 0.18, "grad_norm": 3.1666319370269775, "learning_rate": 0.0002, "loss": 1.6347, "step": 45240 }, { "epoch": 0.18, "grad_norm": 1.6235517263412476, "learning_rate": 0.0002, "loss": 1.625, "step": 45250 }, { "epoch": 0.18, "grad_norm": 6.209279537200928, "learning_rate": 0.0002, "loss": 1.6808, "step": 45260 }, { "epoch": 0.18, "grad_norm": 3.6520485877990723, "learning_rate": 0.0002, "loss": 1.6681, "step": 45270 }, { "epoch": 0.18, "grad_norm": 2.6694324016571045, "learning_rate": 0.0002, "loss": 1.7226, "step": 45280 }, { "epoch": 0.18, "grad_norm": 3.1590304374694824, "learning_rate": 0.0002, "loss": 1.8346, "step": 45290 }, { "epoch": 0.18, "grad_norm": 3.6939525604248047, "learning_rate": 0.0002, "loss": 1.8777, "step": 45300 }, { "epoch": 0.18, "grad_norm": 2.1824002265930176, "learning_rate": 0.0002, "loss": 1.4229, "step": 45310 }, { "epoch": 0.18, "grad_norm": 7.836766719818115, "learning_rate": 0.0002, "loss": 1.585, "step": 45320 }, { "epoch": 0.18, "grad_norm": 6.801265239715576, "learning_rate": 0.0002, "loss": 1.4923, "step": 45330 }, { "epoch": 0.18, "grad_norm": 2.8119685649871826, "learning_rate": 0.0002, "loss": 1.4375, "step": 45340 }, { "epoch": 0.18, "grad_norm": 1.8204445838928223, "learning_rate": 0.0002, "loss": 1.4531, "step": 45350 }, { "epoch": 0.18, "grad_norm": 5.756242752075195, "learning_rate": 0.0002, "loss": 1.6467, "step": 45360 }, { "epoch": 0.18, "grad_norm": 3.089677333831787, "learning_rate": 0.0002, "loss": 1.6214, "step": 45370 }, { "epoch": 0.18, "grad_norm": 2.256129741668701, "learning_rate": 0.0002, "loss": 1.5316, "step": 45380 }, { "epoch": 0.18, "grad_norm": 2.926945686340332, "learning_rate": 0.0002, "loss": 1.4918, "step": 45390 }, { "epoch": 0.18, "grad_norm": 2.4988226890563965, "learning_rate": 0.0002, "loss": 1.3422, "step": 45400 }, { "epoch": 0.18, "grad_norm": 1.7316113710403442, "learning_rate": 0.0002, "loss": 1.6352, "step": 45410 }, { "epoch": 0.18, "grad_norm": 3.596102476119995, "learning_rate": 0.0002, "loss": 1.6653, "step": 45420 }, { "epoch": 0.18, "grad_norm": 2.268521785736084, "learning_rate": 0.0002, "loss": 1.6395, "step": 45430 }, { "epoch": 0.18, "grad_norm": 2.206622362136841, "learning_rate": 0.0002, "loss": 1.5942, "step": 45440 }, { "epoch": 0.19, "grad_norm": 4.218605995178223, "learning_rate": 0.0002, "loss": 1.7077, "step": 45450 }, { "epoch": 0.19, "grad_norm": 2.0145912170410156, "learning_rate": 0.0002, "loss": 1.4127, "step": 45460 }, { "epoch": 0.19, "grad_norm": 5.094768047332764, "learning_rate": 0.0002, "loss": 1.4482, "step": 45470 }, { "epoch": 0.19, "grad_norm": 3.216613292694092, "learning_rate": 0.0002, "loss": 1.5371, "step": 45480 }, { "epoch": 0.19, "grad_norm": 4.180473327636719, "learning_rate": 0.0002, "loss": 1.6413, "step": 45490 }, { "epoch": 0.19, "grad_norm": 3.214285373687744, "learning_rate": 0.0002, "loss": 1.4059, "step": 45500 }, { "epoch": 0.19, "grad_norm": 1.8954601287841797, "learning_rate": 0.0002, "loss": 1.6181, "step": 45510 }, { "epoch": 0.19, "grad_norm": 3.071044921875, "learning_rate": 0.0002, "loss": 1.4243, "step": 45520 }, { "epoch": 0.19, "grad_norm": 2.5930299758911133, "learning_rate": 0.0002, "loss": 1.4213, "step": 45530 }, { "epoch": 0.19, "grad_norm": 2.297348737716675, "learning_rate": 0.0002, "loss": 1.3539, "step": 45540 }, { "epoch": 0.19, "grad_norm": 2.796990394592285, "learning_rate": 0.0002, "loss": 1.6418, "step": 45550 }, { "epoch": 0.19, "grad_norm": 1.705550193786621, "learning_rate": 0.0002, "loss": 1.8882, "step": 45560 }, { "epoch": 0.19, "grad_norm": 2.020758867263794, "learning_rate": 0.0002, "loss": 1.7136, "step": 45570 }, { "epoch": 0.19, "grad_norm": 2.7438337802886963, "learning_rate": 0.0002, "loss": 1.436, "step": 45580 }, { "epoch": 0.19, "grad_norm": 2.0031025409698486, "learning_rate": 0.0002, "loss": 1.4185, "step": 45590 }, { "epoch": 0.19, "grad_norm": 4.127625942230225, "learning_rate": 0.0002, "loss": 1.4939, "step": 45600 }, { "epoch": 0.19, "grad_norm": 1.837774395942688, "learning_rate": 0.0002, "loss": 1.4405, "step": 45610 }, { "epoch": 0.19, "grad_norm": 3.2964718341827393, "learning_rate": 0.0002, "loss": 1.5129, "step": 45620 }, { "epoch": 0.19, "grad_norm": 1.678000807762146, "learning_rate": 0.0002, "loss": 1.5442, "step": 45630 }, { "epoch": 0.19, "grad_norm": 3.005708932876587, "learning_rate": 0.0002, "loss": 1.7318, "step": 45640 }, { "epoch": 0.19, "grad_norm": 3.8337182998657227, "learning_rate": 0.0002, "loss": 1.5681, "step": 45650 }, { "epoch": 0.19, "grad_norm": 4.057092666625977, "learning_rate": 0.0002, "loss": 1.4431, "step": 45660 }, { "epoch": 0.19, "grad_norm": 2.189455986022949, "learning_rate": 0.0002, "loss": 1.3296, "step": 45670 }, { "epoch": 0.19, "grad_norm": 1.7939079999923706, "learning_rate": 0.0002, "loss": 1.4659, "step": 45680 }, { "epoch": 0.19, "grad_norm": 4.942221164703369, "learning_rate": 0.0002, "loss": 1.7478, "step": 45690 }, { "epoch": 0.19, "grad_norm": 2.628309965133667, "learning_rate": 0.0002, "loss": 1.5363, "step": 45700 }, { "epoch": 0.19, "grad_norm": 3.5748817920684814, "learning_rate": 0.0002, "loss": 1.6905, "step": 45710 }, { "epoch": 0.19, "grad_norm": 2.8963820934295654, "learning_rate": 0.0002, "loss": 1.4464, "step": 45720 }, { "epoch": 0.19, "grad_norm": 2.947162628173828, "learning_rate": 0.0002, "loss": 1.6139, "step": 45730 }, { "epoch": 0.19, "grad_norm": 2.782932758331299, "learning_rate": 0.0002, "loss": 1.6116, "step": 45740 }, { "epoch": 0.19, "grad_norm": 3.0183067321777344, "learning_rate": 0.0002, "loss": 1.5434, "step": 45750 }, { "epoch": 0.19, "grad_norm": 2.694903612136841, "learning_rate": 0.0002, "loss": 1.5898, "step": 45760 }, { "epoch": 0.19, "grad_norm": 2.0259532928466797, "learning_rate": 0.0002, "loss": 1.4569, "step": 45770 }, { "epoch": 0.19, "grad_norm": 3.5044195652008057, "learning_rate": 0.0002, "loss": 1.4782, "step": 45780 }, { "epoch": 0.19, "grad_norm": 3.028195858001709, "learning_rate": 0.0002, "loss": 1.3099, "step": 45790 }, { "epoch": 0.19, "grad_norm": 2.3832340240478516, "learning_rate": 0.0002, "loss": 1.4791, "step": 45800 }, { "epoch": 0.19, "grad_norm": 2.505295991897583, "learning_rate": 0.0002, "loss": 1.4411, "step": 45810 }, { "epoch": 0.19, "grad_norm": 2.737226724624634, "learning_rate": 0.0002, "loss": 1.5722, "step": 45820 }, { "epoch": 0.19, "grad_norm": 3.578921318054199, "learning_rate": 0.0002, "loss": 1.2481, "step": 45830 }, { "epoch": 0.19, "grad_norm": 2.390169858932495, "learning_rate": 0.0002, "loss": 1.5015, "step": 45840 }, { "epoch": 0.19, "grad_norm": 2.9216134548187256, "learning_rate": 0.0002, "loss": 1.4531, "step": 45850 }, { "epoch": 0.19, "grad_norm": 3.528728485107422, "learning_rate": 0.0002, "loss": 1.7049, "step": 45860 }, { "epoch": 0.19, "grad_norm": 1.9568026065826416, "learning_rate": 0.0002, "loss": 1.3638, "step": 45870 }, { "epoch": 0.19, "grad_norm": 1.8941891193389893, "learning_rate": 0.0002, "loss": 1.6331, "step": 45880 }, { "epoch": 0.19, "grad_norm": 5.832653999328613, "learning_rate": 0.0002, "loss": 1.8816, "step": 45890 }, { "epoch": 0.19, "grad_norm": 1.9947307109832764, "learning_rate": 0.0002, "loss": 1.6855, "step": 45900 }, { "epoch": 0.19, "grad_norm": 1.488150954246521, "learning_rate": 0.0002, "loss": 1.794, "step": 45910 }, { "epoch": 0.19, "grad_norm": 2.8047521114349365, "learning_rate": 0.0002, "loss": 1.6833, "step": 45920 }, { "epoch": 0.19, "grad_norm": 3.012233257293701, "learning_rate": 0.0002, "loss": 1.653, "step": 45930 }, { "epoch": 0.19, "grad_norm": 2.202707529067993, "learning_rate": 0.0002, "loss": 1.5072, "step": 45940 }, { "epoch": 0.19, "grad_norm": 3.197291374206543, "learning_rate": 0.0002, "loss": 1.7804, "step": 45950 }, { "epoch": 0.19, "grad_norm": 3.74139666557312, "learning_rate": 0.0002, "loss": 1.8566, "step": 45960 }, { "epoch": 0.19, "grad_norm": 2.9012579917907715, "learning_rate": 0.0002, "loss": 1.5854, "step": 45970 }, { "epoch": 0.19, "grad_norm": 1.765588402748108, "learning_rate": 0.0002, "loss": 1.543, "step": 45980 }, { "epoch": 0.19, "grad_norm": 3.1123578548431396, "learning_rate": 0.0002, "loss": 1.2319, "step": 45990 }, { "epoch": 0.19, "grad_norm": 2.3498969078063965, "learning_rate": 0.0002, "loss": 1.5153, "step": 46000 }, { "epoch": 0.19, "grad_norm": 2.7143664360046387, "learning_rate": 0.0002, "loss": 1.7054, "step": 46010 }, { "epoch": 0.19, "grad_norm": 2.4825572967529297, "learning_rate": 0.0002, "loss": 1.5563, "step": 46020 }, { "epoch": 0.19, "grad_norm": 2.982233762741089, "learning_rate": 0.0002, "loss": 1.5432, "step": 46030 }, { "epoch": 0.19, "grad_norm": 2.9779446125030518, "learning_rate": 0.0002, "loss": 1.4923, "step": 46040 }, { "epoch": 0.19, "grad_norm": 1.8644086122512817, "learning_rate": 0.0002, "loss": 1.3609, "step": 46050 }, { "epoch": 0.19, "grad_norm": 3.8053364753723145, "learning_rate": 0.0002, "loss": 1.6913, "step": 46060 }, { "epoch": 0.19, "grad_norm": 2.431157350540161, "learning_rate": 0.0002, "loss": 1.5113, "step": 46070 }, { "epoch": 0.19, "grad_norm": 2.4091811180114746, "learning_rate": 0.0002, "loss": 1.8542, "step": 46080 }, { "epoch": 0.19, "grad_norm": 2.6486611366271973, "learning_rate": 0.0002, "loss": 1.8366, "step": 46090 }, { "epoch": 0.19, "grad_norm": 1.7589308023452759, "learning_rate": 0.0002, "loss": 1.5645, "step": 46100 }, { "epoch": 0.19, "grad_norm": 2.137315034866333, "learning_rate": 0.0002, "loss": 1.4703, "step": 46110 }, { "epoch": 0.19, "grad_norm": 4.524263381958008, "learning_rate": 0.0002, "loss": 1.7375, "step": 46120 }, { "epoch": 0.19, "grad_norm": 3.5197947025299072, "learning_rate": 0.0002, "loss": 1.6277, "step": 46130 }, { "epoch": 0.19, "grad_norm": 1.99274480342865, "learning_rate": 0.0002, "loss": 1.6196, "step": 46140 }, { "epoch": 0.19, "grad_norm": 3.218264579772949, "learning_rate": 0.0002, "loss": 1.6066, "step": 46150 }, { "epoch": 0.19, "grad_norm": 2.6722474098205566, "learning_rate": 0.0002, "loss": 1.6057, "step": 46160 }, { "epoch": 0.19, "grad_norm": 3.925804853439331, "learning_rate": 0.0002, "loss": 1.5703, "step": 46170 }, { "epoch": 0.19, "grad_norm": 2.710832357406616, "learning_rate": 0.0002, "loss": 1.6329, "step": 46180 }, { "epoch": 0.19, "grad_norm": 2.900862693786621, "learning_rate": 0.0002, "loss": 1.717, "step": 46190 }, { "epoch": 0.19, "grad_norm": 3.858919382095337, "learning_rate": 0.0002, "loss": 1.5212, "step": 46200 }, { "epoch": 0.19, "grad_norm": 3.7322604656219482, "learning_rate": 0.0002, "loss": 1.5384, "step": 46210 }, { "epoch": 0.19, "grad_norm": 3.482213020324707, "learning_rate": 0.0002, "loss": 1.7198, "step": 46220 }, { "epoch": 0.19, "grad_norm": 3.2293171882629395, "learning_rate": 0.0002, "loss": 1.6371, "step": 46230 }, { "epoch": 0.19, "grad_norm": 2.352001667022705, "learning_rate": 0.0002, "loss": 1.3775, "step": 46240 }, { "epoch": 0.19, "grad_norm": 3.35121488571167, "learning_rate": 0.0002, "loss": 1.3773, "step": 46250 }, { "epoch": 0.19, "grad_norm": 4.25068473815918, "learning_rate": 0.0002, "loss": 1.5841, "step": 46260 }, { "epoch": 0.19, "grad_norm": 3.22622013092041, "learning_rate": 0.0002, "loss": 1.5621, "step": 46270 }, { "epoch": 0.19, "grad_norm": 2.6694557666778564, "learning_rate": 0.0002, "loss": 1.5714, "step": 46280 }, { "epoch": 0.19, "grad_norm": 3.281299591064453, "learning_rate": 0.0002, "loss": 1.5648, "step": 46290 }, { "epoch": 0.19, "grad_norm": 3.77848219871521, "learning_rate": 0.0002, "loss": 1.4025, "step": 46300 }, { "epoch": 0.19, "grad_norm": 2.301567554473877, "learning_rate": 0.0002, "loss": 1.6481, "step": 46310 }, { "epoch": 0.19, "grad_norm": 3.4227373600006104, "learning_rate": 0.0002, "loss": 1.7962, "step": 46320 }, { "epoch": 0.19, "grad_norm": 3.8034119606018066, "learning_rate": 0.0002, "loss": 1.6777, "step": 46330 }, { "epoch": 0.19, "grad_norm": 2.063697099685669, "learning_rate": 0.0002, "loss": 1.6207, "step": 46340 }, { "epoch": 0.19, "grad_norm": 2.9742510318756104, "learning_rate": 0.0002, "loss": 1.567, "step": 46350 }, { "epoch": 0.19, "grad_norm": 1.6347531080245972, "learning_rate": 0.0002, "loss": 1.4363, "step": 46360 }, { "epoch": 0.19, "grad_norm": 4.495654582977295, "learning_rate": 0.0002, "loss": 1.2735, "step": 46370 }, { "epoch": 0.19, "grad_norm": 2.0205349922180176, "learning_rate": 0.0002, "loss": 1.5553, "step": 46380 }, { "epoch": 0.19, "grad_norm": 3.067249298095703, "learning_rate": 0.0002, "loss": 1.8378, "step": 46390 }, { "epoch": 0.19, "grad_norm": 2.950688123703003, "learning_rate": 0.0002, "loss": 1.4975, "step": 46400 }, { "epoch": 0.19, "grad_norm": 2.3335070610046387, "learning_rate": 0.0002, "loss": 1.5493, "step": 46410 }, { "epoch": 0.19, "grad_norm": 3.8413517475128174, "learning_rate": 0.0002, "loss": 1.6586, "step": 46420 }, { "epoch": 0.19, "grad_norm": 2.657693386077881, "learning_rate": 0.0002, "loss": 1.5894, "step": 46430 }, { "epoch": 0.19, "grad_norm": 2.6796884536743164, "learning_rate": 0.0002, "loss": 1.3579, "step": 46440 }, { "epoch": 0.19, "grad_norm": 2.2038443088531494, "learning_rate": 0.0002, "loss": 1.3978, "step": 46450 }, { "epoch": 0.19, "grad_norm": 3.8835790157318115, "learning_rate": 0.0002, "loss": 1.5646, "step": 46460 }, { "epoch": 0.19, "grad_norm": 1.7125074863433838, "learning_rate": 0.0002, "loss": 1.6482, "step": 46470 }, { "epoch": 0.19, "grad_norm": 3.8000330924987793, "learning_rate": 0.0002, "loss": 1.4944, "step": 46480 }, { "epoch": 0.19, "grad_norm": 2.549764633178711, "learning_rate": 0.0002, "loss": 1.7065, "step": 46490 }, { "epoch": 0.19, "grad_norm": 2.6584815979003906, "learning_rate": 0.0002, "loss": 1.5253, "step": 46500 }, { "epoch": 0.19, "grad_norm": 2.602905035018921, "learning_rate": 0.0002, "loss": 1.5927, "step": 46510 }, { "epoch": 0.19, "grad_norm": 6.892823696136475, "learning_rate": 0.0002, "loss": 1.6487, "step": 46520 }, { "epoch": 0.19, "grad_norm": 2.3676013946533203, "learning_rate": 0.0002, "loss": 1.5338, "step": 46530 }, { "epoch": 0.19, "grad_norm": 3.4152026176452637, "learning_rate": 0.0002, "loss": 1.3661, "step": 46540 }, { "epoch": 0.19, "grad_norm": 2.5472395420074463, "learning_rate": 0.0002, "loss": 1.2275, "step": 46550 }, { "epoch": 0.19, "grad_norm": 3.892099142074585, "learning_rate": 0.0002, "loss": 1.482, "step": 46560 }, { "epoch": 0.19, "grad_norm": 4.360474109649658, "learning_rate": 0.0002, "loss": 1.5146, "step": 46570 }, { "epoch": 0.19, "grad_norm": 3.306100606918335, "learning_rate": 0.0002, "loss": 1.6309, "step": 46580 }, { "epoch": 0.19, "grad_norm": 2.6548120975494385, "learning_rate": 0.0002, "loss": 1.7337, "step": 46590 }, { "epoch": 0.19, "grad_norm": 3.7672572135925293, "learning_rate": 0.0002, "loss": 1.4907, "step": 46600 }, { "epoch": 0.19, "grad_norm": 5.708339214324951, "learning_rate": 0.0002, "loss": 1.4651, "step": 46610 }, { "epoch": 0.19, "grad_norm": 2.2299301624298096, "learning_rate": 0.0002, "loss": 1.6112, "step": 46620 }, { "epoch": 0.19, "grad_norm": 2.712761640548706, "learning_rate": 0.0002, "loss": 1.6177, "step": 46630 }, { "epoch": 0.19, "grad_norm": 4.220048427581787, "learning_rate": 0.0002, "loss": 1.6221, "step": 46640 }, { "epoch": 0.19, "grad_norm": 3.5397379398345947, "learning_rate": 0.0002, "loss": 1.5519, "step": 46650 }, { "epoch": 0.19, "grad_norm": 3.0497591495513916, "learning_rate": 0.0002, "loss": 1.6301, "step": 46660 }, { "epoch": 0.19, "grad_norm": 2.7463390827178955, "learning_rate": 0.0002, "loss": 1.831, "step": 46670 }, { "epoch": 0.19, "grad_norm": 2.4362242221832275, "learning_rate": 0.0002, "loss": 1.5536, "step": 46680 }, { "epoch": 0.19, "grad_norm": 4.248058319091797, "learning_rate": 0.0002, "loss": 1.4893, "step": 46690 }, { "epoch": 0.19, "grad_norm": 3.368191719055176, "learning_rate": 0.0002, "loss": 1.4805, "step": 46700 }, { "epoch": 0.19, "grad_norm": 3.8052990436553955, "learning_rate": 0.0002, "loss": 1.5284, "step": 46710 }, { "epoch": 0.19, "grad_norm": 3.508697509765625, "learning_rate": 0.0002, "loss": 1.5149, "step": 46720 }, { "epoch": 0.19, "grad_norm": 2.256141185760498, "learning_rate": 0.0002, "loss": 1.5984, "step": 46730 }, { "epoch": 0.19, "grad_norm": 5.571456432342529, "learning_rate": 0.0002, "loss": 1.5538, "step": 46740 }, { "epoch": 0.19, "grad_norm": 4.844105243682861, "learning_rate": 0.0002, "loss": 1.5987, "step": 46750 }, { "epoch": 0.19, "grad_norm": 2.302792549133301, "learning_rate": 0.0002, "loss": 1.5101, "step": 46760 }, { "epoch": 0.19, "grad_norm": 2.763477325439453, "learning_rate": 0.0002, "loss": 1.713, "step": 46770 }, { "epoch": 0.19, "grad_norm": 3.6720287799835205, "learning_rate": 0.0002, "loss": 1.5177, "step": 46780 }, { "epoch": 0.19, "grad_norm": 1.811232566833496, "learning_rate": 0.0002, "loss": 1.5661, "step": 46790 }, { "epoch": 0.19, "grad_norm": 3.1122944355010986, "learning_rate": 0.0002, "loss": 1.58, "step": 46800 }, { "epoch": 0.19, "grad_norm": 2.1730453968048096, "learning_rate": 0.0002, "loss": 1.6522, "step": 46810 }, { "epoch": 0.19, "grad_norm": 3.13684344291687, "learning_rate": 0.0002, "loss": 1.5213, "step": 46820 }, { "epoch": 0.19, "grad_norm": 3.8708159923553467, "learning_rate": 0.0002, "loss": 1.6542, "step": 46830 }, { "epoch": 0.19, "grad_norm": 3.1538755893707275, "learning_rate": 0.0002, "loss": 1.6581, "step": 46840 }, { "epoch": 0.19, "grad_norm": 2.4797799587249756, "learning_rate": 0.0002, "loss": 1.7691, "step": 46850 }, { "epoch": 0.19, "grad_norm": 6.53585147857666, "learning_rate": 0.0002, "loss": 1.6, "step": 46860 }, { "epoch": 0.19, "grad_norm": 2.5601234436035156, "learning_rate": 0.0002, "loss": 1.3524, "step": 46870 }, { "epoch": 0.19, "grad_norm": 2.36303973197937, "learning_rate": 0.0002, "loss": 1.4523, "step": 46880 }, { "epoch": 0.19, "grad_norm": 2.166004180908203, "learning_rate": 0.0002, "loss": 1.5699, "step": 46890 }, { "epoch": 0.19, "grad_norm": 2.0082271099090576, "learning_rate": 0.0002, "loss": 1.3321, "step": 46900 }, { "epoch": 0.19, "grad_norm": 1.170238971710205, "learning_rate": 0.0002, "loss": 1.553, "step": 46910 }, { "epoch": 0.19, "grad_norm": 3.941288709640503, "learning_rate": 0.0002, "loss": 1.7354, "step": 46920 }, { "epoch": 0.19, "grad_norm": 1.7389456033706665, "learning_rate": 0.0002, "loss": 1.539, "step": 46930 }, { "epoch": 0.19, "grad_norm": 2.759589672088623, "learning_rate": 0.0002, "loss": 1.6193, "step": 46940 }, { "epoch": 0.19, "grad_norm": 1.872680425643921, "learning_rate": 0.0002, "loss": 1.7029, "step": 46950 }, { "epoch": 0.19, "grad_norm": 3.3521926403045654, "learning_rate": 0.0002, "loss": 1.4984, "step": 46960 }, { "epoch": 0.19, "grad_norm": 3.8955960273742676, "learning_rate": 0.0002, "loss": 1.4139, "step": 46970 }, { "epoch": 0.19, "grad_norm": 3.4761147499084473, "learning_rate": 0.0002, "loss": 1.5652, "step": 46980 }, { "epoch": 0.19, "grad_norm": 3.374523162841797, "learning_rate": 0.0002, "loss": 1.4518, "step": 46990 }, { "epoch": 0.19, "grad_norm": 3.8256447315216064, "learning_rate": 0.0002, "loss": 1.5268, "step": 47000 }, { "epoch": 0.19, "grad_norm": 3.444023847579956, "learning_rate": 0.0002, "loss": 1.6072, "step": 47010 }, { "epoch": 0.19, "grad_norm": 2.9497973918914795, "learning_rate": 0.0002, "loss": 1.5529, "step": 47020 }, { "epoch": 0.19, "grad_norm": 2.38116717338562, "learning_rate": 0.0002, "loss": 1.5453, "step": 47030 }, { "epoch": 0.19, "grad_norm": 4.319930076599121, "learning_rate": 0.0002, "loss": 1.6743, "step": 47040 }, { "epoch": 0.19, "grad_norm": 2.1064910888671875, "learning_rate": 0.0002, "loss": 1.592, "step": 47050 }, { "epoch": 0.19, "grad_norm": 3.966454267501831, "learning_rate": 0.0002, "loss": 1.2765, "step": 47060 }, { "epoch": 0.19, "grad_norm": 3.0456459522247314, "learning_rate": 0.0002, "loss": 1.6984, "step": 47070 }, { "epoch": 0.19, "grad_norm": 1.56630277633667, "learning_rate": 0.0002, "loss": 1.5512, "step": 47080 }, { "epoch": 0.19, "grad_norm": 4.347168445587158, "learning_rate": 0.0002, "loss": 1.5205, "step": 47090 }, { "epoch": 0.19, "grad_norm": 4.55519437789917, "learning_rate": 0.0002, "loss": 1.4041, "step": 47100 }, { "epoch": 0.19, "grad_norm": 1.6122335195541382, "learning_rate": 0.0002, "loss": 1.5958, "step": 47110 }, { "epoch": 0.19, "grad_norm": 1.9443349838256836, "learning_rate": 0.0002, "loss": 1.6079, "step": 47120 }, { "epoch": 0.19, "grad_norm": 5.068639755249023, "learning_rate": 0.0002, "loss": 1.5233, "step": 47130 }, { "epoch": 0.19, "grad_norm": 2.6999526023864746, "learning_rate": 0.0002, "loss": 1.6835, "step": 47140 }, { "epoch": 0.19, "grad_norm": 2.6446595191955566, "learning_rate": 0.0002, "loss": 1.6496, "step": 47150 }, { "epoch": 0.19, "grad_norm": 2.645847797393799, "learning_rate": 0.0002, "loss": 1.5649, "step": 47160 }, { "epoch": 0.19, "grad_norm": 1.8562599420547485, "learning_rate": 0.0002, "loss": 1.4752, "step": 47170 }, { "epoch": 0.19, "grad_norm": 3.5951452255249023, "learning_rate": 0.0002, "loss": 1.5524, "step": 47180 }, { "epoch": 0.19, "grad_norm": 2.93753981590271, "learning_rate": 0.0002, "loss": 1.6374, "step": 47190 }, { "epoch": 0.19, "grad_norm": 1.274272084236145, "learning_rate": 0.0002, "loss": 1.7605, "step": 47200 }, { "epoch": 0.19, "grad_norm": 3.8194892406463623, "learning_rate": 0.0002, "loss": 1.5649, "step": 47210 }, { "epoch": 0.19, "grad_norm": 5.352668285369873, "learning_rate": 0.0002, "loss": 1.7883, "step": 47220 }, { "epoch": 0.19, "grad_norm": 3.7672414779663086, "learning_rate": 0.0002, "loss": 1.548, "step": 47230 }, { "epoch": 0.19, "grad_norm": 2.066175937652588, "learning_rate": 0.0002, "loss": 1.5154, "step": 47240 }, { "epoch": 0.19, "grad_norm": 2.530872344970703, "learning_rate": 0.0002, "loss": 1.4203, "step": 47250 }, { "epoch": 0.19, "grad_norm": 5.610264301300049, "learning_rate": 0.0002, "loss": 1.607, "step": 47260 }, { "epoch": 0.19, "grad_norm": 2.929863214492798, "learning_rate": 0.0002, "loss": 1.5735, "step": 47270 }, { "epoch": 0.19, "grad_norm": 2.842566728591919, "learning_rate": 0.0002, "loss": 1.6431, "step": 47280 }, { "epoch": 0.19, "grad_norm": 2.7723007202148438, "learning_rate": 0.0002, "loss": 1.6836, "step": 47290 }, { "epoch": 0.19, "grad_norm": 2.090160369873047, "learning_rate": 0.0002, "loss": 1.3875, "step": 47300 }, { "epoch": 0.19, "grad_norm": 2.3331427574157715, "learning_rate": 0.0002, "loss": 1.4266, "step": 47310 }, { "epoch": 0.19, "grad_norm": 4.989058017730713, "learning_rate": 0.0002, "loss": 1.4663, "step": 47320 }, { "epoch": 0.19, "grad_norm": 2.8257172107696533, "learning_rate": 0.0002, "loss": 1.4845, "step": 47330 }, { "epoch": 0.19, "grad_norm": 4.112755298614502, "learning_rate": 0.0002, "loss": 1.5235, "step": 47340 }, { "epoch": 0.19, "grad_norm": 2.101193428039551, "learning_rate": 0.0002, "loss": 1.5073, "step": 47350 }, { "epoch": 0.19, "grad_norm": 2.7201473712921143, "learning_rate": 0.0002, "loss": 1.5245, "step": 47360 }, { "epoch": 0.19, "grad_norm": 1.4537240266799927, "learning_rate": 0.0002, "loss": 1.4893, "step": 47370 }, { "epoch": 0.19, "grad_norm": 1.7321292161941528, "learning_rate": 0.0002, "loss": 1.5753, "step": 47380 }, { "epoch": 0.19, "grad_norm": 2.033590078353882, "learning_rate": 0.0002, "loss": 1.4289, "step": 47390 }, { "epoch": 0.19, "grad_norm": 3.418403148651123, "learning_rate": 0.0002, "loss": 1.5384, "step": 47400 }, { "epoch": 0.19, "grad_norm": 2.6333374977111816, "learning_rate": 0.0002, "loss": 1.5195, "step": 47410 }, { "epoch": 0.19, "grad_norm": 3.673658847808838, "learning_rate": 0.0002, "loss": 1.5389, "step": 47420 }, { "epoch": 0.19, "grad_norm": 2.5854718685150146, "learning_rate": 0.0002, "loss": 1.4895, "step": 47430 }, { "epoch": 0.19, "grad_norm": 3.490511894226074, "learning_rate": 0.0002, "loss": 1.6148, "step": 47440 }, { "epoch": 0.19, "grad_norm": 2.712301254272461, "learning_rate": 0.0002, "loss": 1.5053, "step": 47450 }, { "epoch": 0.19, "grad_norm": 2.93495512008667, "learning_rate": 0.0002, "loss": 1.6, "step": 47460 }, { "epoch": 0.19, "grad_norm": 2.0253870487213135, "learning_rate": 0.0002, "loss": 1.4451, "step": 47470 }, { "epoch": 0.19, "grad_norm": 0.9066154360771179, "learning_rate": 0.0002, "loss": 1.4546, "step": 47480 }, { "epoch": 0.19, "grad_norm": 3.5329201221466064, "learning_rate": 0.0002, "loss": 1.4429, "step": 47490 }, { "epoch": 0.19, "grad_norm": 1.9005070924758911, "learning_rate": 0.0002, "loss": 1.4489, "step": 47500 }, { "epoch": 0.19, "grad_norm": 2.8400638103485107, "learning_rate": 0.0002, "loss": 1.4981, "step": 47510 }, { "epoch": 0.19, "grad_norm": 2.64467191696167, "learning_rate": 0.0002, "loss": 1.7784, "step": 47520 }, { "epoch": 0.19, "grad_norm": 2.0048978328704834, "learning_rate": 0.0002, "loss": 1.6431, "step": 47530 }, { "epoch": 0.19, "grad_norm": 2.548253297805786, "learning_rate": 0.0002, "loss": 1.5262, "step": 47540 }, { "epoch": 0.19, "grad_norm": 2.8129422664642334, "learning_rate": 0.0002, "loss": 1.4327, "step": 47550 }, { "epoch": 0.19, "grad_norm": 2.591996908187866, "learning_rate": 0.0002, "loss": 1.5299, "step": 47560 }, { "epoch": 0.19, "grad_norm": 2.541465997695923, "learning_rate": 0.0002, "loss": 1.8787, "step": 47570 }, { "epoch": 0.19, "grad_norm": 1.6532726287841797, "learning_rate": 0.0002, "loss": 1.2901, "step": 47580 }, { "epoch": 0.19, "grad_norm": 3.076955795288086, "learning_rate": 0.0002, "loss": 1.6512, "step": 47590 }, { "epoch": 0.19, "grad_norm": 3.78934645652771, "learning_rate": 0.0002, "loss": 1.4796, "step": 47600 }, { "epoch": 0.19, "grad_norm": 4.80752420425415, "learning_rate": 0.0002, "loss": 1.4989, "step": 47610 }, { "epoch": 0.19, "grad_norm": 1.9833402633666992, "learning_rate": 0.0002, "loss": 1.451, "step": 47620 }, { "epoch": 0.19, "grad_norm": 3.7019248008728027, "learning_rate": 0.0002, "loss": 1.4725, "step": 47630 }, { "epoch": 0.19, "grad_norm": 3.2925336360931396, "learning_rate": 0.0002, "loss": 1.4597, "step": 47640 }, { "epoch": 0.19, "grad_norm": 2.292196750640869, "learning_rate": 0.0002, "loss": 1.5732, "step": 47650 }, { "epoch": 0.19, "grad_norm": 5.3394036293029785, "learning_rate": 0.0002, "loss": 1.4518, "step": 47660 }, { "epoch": 0.19, "grad_norm": 3.386244773864746, "learning_rate": 0.0002, "loss": 1.5113, "step": 47670 }, { "epoch": 0.19, "grad_norm": 3.666278839111328, "learning_rate": 0.0002, "loss": 1.5187, "step": 47680 }, { "epoch": 0.19, "grad_norm": 2.5351204872131348, "learning_rate": 0.0002, "loss": 1.5785, "step": 47690 }, { "epoch": 0.19, "grad_norm": 5.1758880615234375, "learning_rate": 0.0002, "loss": 1.6496, "step": 47700 }, { "epoch": 0.19, "grad_norm": 5.453043460845947, "learning_rate": 0.0002, "loss": 1.7893, "step": 47710 }, { "epoch": 0.19, "grad_norm": 2.638218402862549, "learning_rate": 0.0002, "loss": 1.5094, "step": 47720 }, { "epoch": 0.19, "grad_norm": 2.8253531455993652, "learning_rate": 0.0002, "loss": 1.5536, "step": 47730 }, { "epoch": 0.19, "grad_norm": 2.77878475189209, "learning_rate": 0.0002, "loss": 1.6684, "step": 47740 }, { "epoch": 0.19, "grad_norm": 2.9331438541412354, "learning_rate": 0.0002, "loss": 1.479, "step": 47750 }, { "epoch": 0.19, "grad_norm": 2.3899362087249756, "learning_rate": 0.0002, "loss": 1.5163, "step": 47760 }, { "epoch": 0.19, "grad_norm": 4.684146881103516, "learning_rate": 0.0002, "loss": 1.5721, "step": 47770 }, { "epoch": 0.19, "grad_norm": 2.8243424892425537, "learning_rate": 0.0002, "loss": 1.7356, "step": 47780 }, { "epoch": 0.19, "grad_norm": 3.375403881072998, "learning_rate": 0.0002, "loss": 1.453, "step": 47790 }, { "epoch": 0.19, "grad_norm": 2.981501817703247, "learning_rate": 0.0002, "loss": 1.889, "step": 47800 }, { "epoch": 0.19, "grad_norm": 3.1993207931518555, "learning_rate": 0.0002, "loss": 1.6692, "step": 47810 }, { "epoch": 0.19, "grad_norm": 2.591370105743408, "learning_rate": 0.0002, "loss": 1.3036, "step": 47820 }, { "epoch": 0.19, "grad_norm": 3.4168829917907715, "learning_rate": 0.0002, "loss": 1.505, "step": 47830 }, { "epoch": 0.19, "grad_norm": 2.505629539489746, "learning_rate": 0.0002, "loss": 1.6747, "step": 47840 }, { "epoch": 0.19, "grad_norm": 2.162641763687134, "learning_rate": 0.0002, "loss": 1.6004, "step": 47850 }, { "epoch": 0.19, "grad_norm": 2.4465525150299072, "learning_rate": 0.0002, "loss": 1.6141, "step": 47860 }, { "epoch": 0.19, "grad_norm": 3.534691333770752, "learning_rate": 0.0002, "loss": 1.5642, "step": 47870 }, { "epoch": 0.19, "grad_norm": 3.8943779468536377, "learning_rate": 0.0002, "loss": 1.4985, "step": 47880 }, { "epoch": 0.19, "grad_norm": 1.8407772779464722, "learning_rate": 0.0002, "loss": 1.6191, "step": 47890 }, { "epoch": 0.19, "grad_norm": 2.616121292114258, "learning_rate": 0.0002, "loss": 1.4844, "step": 47900 }, { "epoch": 0.2, "grad_norm": 3.280644416809082, "learning_rate": 0.0002, "loss": 2.0092, "step": 47910 }, { "epoch": 0.2, "grad_norm": 1.9505997896194458, "learning_rate": 0.0002, "loss": 1.2405, "step": 47920 }, { "epoch": 0.2, "grad_norm": 7.2726616859436035, "learning_rate": 0.0002, "loss": 1.6111, "step": 47930 }, { "epoch": 0.2, "grad_norm": 2.9870007038116455, "learning_rate": 0.0002, "loss": 1.5082, "step": 47940 }, { "epoch": 0.2, "grad_norm": 2.7724978923797607, "learning_rate": 0.0002, "loss": 1.4738, "step": 47950 }, { "epoch": 0.2, "grad_norm": 2.505686044692993, "learning_rate": 0.0002, "loss": 1.4479, "step": 47960 }, { "epoch": 0.2, "grad_norm": 11.484084129333496, "learning_rate": 0.0002, "loss": 1.3775, "step": 47970 }, { "epoch": 0.2, "grad_norm": 2.601405620574951, "learning_rate": 0.0002, "loss": 1.7681, "step": 47980 }, { "epoch": 0.2, "grad_norm": 2.0015721321105957, "learning_rate": 0.0002, "loss": 1.7499, "step": 47990 }, { "epoch": 0.2, "grad_norm": 2.4077112674713135, "learning_rate": 0.0002, "loss": 1.436, "step": 48000 }, { "epoch": 0.2, "grad_norm": 2.190340042114258, "learning_rate": 0.0002, "loss": 1.5749, "step": 48010 }, { "epoch": 0.2, "grad_norm": 5.549563884735107, "learning_rate": 0.0002, "loss": 1.4739, "step": 48020 }, { "epoch": 0.2, "grad_norm": 3.244746685028076, "learning_rate": 0.0002, "loss": 1.5288, "step": 48030 }, { "epoch": 0.2, "grad_norm": 2.1040422916412354, "learning_rate": 0.0002, "loss": 1.4983, "step": 48040 }, { "epoch": 0.2, "grad_norm": 3.7202348709106445, "learning_rate": 0.0002, "loss": 1.5494, "step": 48050 }, { "epoch": 0.2, "grad_norm": 3.6685798168182373, "learning_rate": 0.0002, "loss": 1.7, "step": 48060 }, { "epoch": 0.2, "grad_norm": 3.948237180709839, "learning_rate": 0.0002, "loss": 1.4147, "step": 48070 }, { "epoch": 0.2, "grad_norm": Infinity, "learning_rate": 0.0002, "loss": 1.372, "step": 48080 }, { "epoch": 0.2, "grad_norm": 2.1911075115203857, "learning_rate": 0.0002, "loss": 1.5917, "step": 48090 }, { "epoch": 0.2, "grad_norm": 3.210340976715088, "learning_rate": 0.0002, "loss": 1.5721, "step": 48100 }, { "epoch": 0.2, "grad_norm": 1.5937234163284302, "learning_rate": 0.0002, "loss": 1.6179, "step": 48110 }, { "epoch": 0.2, "grad_norm": 3.257272720336914, "learning_rate": 0.0002, "loss": 1.5914, "step": 48120 }, { "epoch": 0.2, "grad_norm": 2.1335196495056152, "learning_rate": 0.0002, "loss": 1.5687, "step": 48130 }, { "epoch": 0.2, "grad_norm": 1.6608127355575562, "learning_rate": 0.0002, "loss": 1.443, "step": 48140 }, { "epoch": 0.2, "grad_norm": 2.642122745513916, "learning_rate": 0.0002, "loss": 1.8058, "step": 48150 }, { "epoch": 0.2, "grad_norm": 1.9167346954345703, "learning_rate": 0.0002, "loss": 1.5292, "step": 48160 }, { "epoch": 0.2, "grad_norm": 3.5910706520080566, "learning_rate": 0.0002, "loss": 1.4674, "step": 48170 }, { "epoch": 0.2, "grad_norm": 2.363489866256714, "learning_rate": 0.0002, "loss": 1.2974, "step": 48180 }, { "epoch": 0.2, "grad_norm": 2.0337886810302734, "learning_rate": 0.0002, "loss": 1.5059, "step": 48190 }, { "epoch": 0.2, "grad_norm": 2.664360284805298, "learning_rate": 0.0002, "loss": 1.6197, "step": 48200 }, { "epoch": 0.2, "grad_norm": 3.5428595542907715, "learning_rate": 0.0002, "loss": 1.5929, "step": 48210 }, { "epoch": 0.2, "grad_norm": 2.0103869438171387, "learning_rate": 0.0002, "loss": 1.6513, "step": 48220 }, { "epoch": 0.2, "grad_norm": 6.412272930145264, "learning_rate": 0.0002, "loss": 1.5256, "step": 48230 }, { "epoch": 0.2, "grad_norm": 2.3093013763427734, "learning_rate": 0.0002, "loss": 1.561, "step": 48240 }, { "epoch": 0.2, "grad_norm": 3.2189152240753174, "learning_rate": 0.0002, "loss": 1.6369, "step": 48250 }, { "epoch": 0.2, "grad_norm": 3.7622122764587402, "learning_rate": 0.0002, "loss": 1.3, "step": 48260 }, { "epoch": 0.2, "grad_norm": 2.8824150562286377, "learning_rate": 0.0002, "loss": 1.4701, "step": 48270 }, { "epoch": 0.2, "grad_norm": 4.447414875030518, "learning_rate": 0.0002, "loss": 1.505, "step": 48280 }, { "epoch": 0.2, "grad_norm": 2.2963099479675293, "learning_rate": 0.0002, "loss": 1.4902, "step": 48290 }, { "epoch": 0.2, "grad_norm": 2.6879825592041016, "learning_rate": 0.0002, "loss": 1.2877, "step": 48300 }, { "epoch": 0.2, "grad_norm": 3.2864882946014404, "learning_rate": 0.0002, "loss": 1.6913, "step": 48310 }, { "epoch": 0.2, "grad_norm": 3.2635204792022705, "learning_rate": 0.0002, "loss": 1.4955, "step": 48320 }, { "epoch": 0.2, "grad_norm": 2.362456798553467, "learning_rate": 0.0002, "loss": 1.7944, "step": 48330 }, { "epoch": 0.2, "grad_norm": 6.222849369049072, "learning_rate": 0.0002, "loss": 1.1822, "step": 48340 }, { "epoch": 0.2, "grad_norm": 3.9686450958251953, "learning_rate": 0.0002, "loss": 1.3498, "step": 48350 }, { "epoch": 0.2, "grad_norm": 2.4641005992889404, "learning_rate": 0.0002, "loss": 1.5681, "step": 48360 }, { "epoch": 0.2, "grad_norm": 2.6811482906341553, "learning_rate": 0.0002, "loss": 1.6899, "step": 48370 }, { "epoch": 0.2, "grad_norm": 3.662914276123047, "learning_rate": 0.0002, "loss": 1.5234, "step": 48380 }, { "epoch": 0.2, "grad_norm": 3.0865564346313477, "learning_rate": 0.0002, "loss": 1.5587, "step": 48390 }, { "epoch": 0.2, "grad_norm": 3.655219316482544, "learning_rate": 0.0002, "loss": 1.6817, "step": 48400 }, { "epoch": 0.2, "grad_norm": 1.5078550577163696, "learning_rate": 0.0002, "loss": 1.5647, "step": 48410 }, { "epoch": 0.2, "grad_norm": 3.1856002807617188, "learning_rate": 0.0002, "loss": 1.5306, "step": 48420 }, { "epoch": 0.2, "grad_norm": 3.3227810859680176, "learning_rate": 0.0002, "loss": 1.5538, "step": 48430 }, { "epoch": 0.2, "grad_norm": 2.561861753463745, "learning_rate": 0.0002, "loss": 1.6337, "step": 48440 }, { "epoch": 0.2, "grad_norm": 3.7189927101135254, "learning_rate": 0.0002, "loss": 1.5547, "step": 48450 }, { "epoch": 0.2, "grad_norm": 5.6063361167907715, "learning_rate": 0.0002, "loss": 1.5061, "step": 48460 }, { "epoch": 0.2, "grad_norm": 2.7780067920684814, "learning_rate": 0.0002, "loss": 1.6694, "step": 48470 }, { "epoch": 0.2, "grad_norm": 1.869841456413269, "learning_rate": 0.0002, "loss": 1.6224, "step": 48480 }, { "epoch": 0.2, "grad_norm": 2.572420120239258, "learning_rate": 0.0002, "loss": 1.2131, "step": 48490 }, { "epoch": 0.2, "grad_norm": 2.381848096847534, "learning_rate": 0.0002, "loss": 1.4651, "step": 48500 }, { "epoch": 0.2, "grad_norm": 3.7298762798309326, "learning_rate": 0.0002, "loss": 1.6472, "step": 48510 }, { "epoch": 0.2, "grad_norm": 2.985779047012329, "learning_rate": 0.0002, "loss": 1.619, "step": 48520 }, { "epoch": 0.2, "grad_norm": 3.1771440505981445, "learning_rate": 0.0002, "loss": 1.4898, "step": 48530 }, { "epoch": 0.2, "grad_norm": 2.54561710357666, "learning_rate": 0.0002, "loss": 1.6539, "step": 48540 }, { "epoch": 0.2, "grad_norm": 2.8374173641204834, "learning_rate": 0.0002, "loss": 1.5289, "step": 48550 }, { "epoch": 0.2, "grad_norm": 2.7043027877807617, "learning_rate": 0.0002, "loss": 1.5467, "step": 48560 }, { "epoch": 0.2, "grad_norm": 2.4651131629943848, "learning_rate": 0.0002, "loss": 1.59, "step": 48570 }, { "epoch": 0.2, "grad_norm": 2.801712989807129, "learning_rate": 0.0002, "loss": 1.5125, "step": 48580 }, { "epoch": 0.2, "grad_norm": 1.9479365348815918, "learning_rate": 0.0002, "loss": 1.6965, "step": 48590 }, { "epoch": 0.2, "grad_norm": 2.2152018547058105, "learning_rate": 0.0002, "loss": 1.588, "step": 48600 }, { "epoch": 0.2, "grad_norm": 3.1699957847595215, "learning_rate": 0.0002, "loss": 1.5071, "step": 48610 }, { "epoch": 0.2, "grad_norm": 2.214650869369507, "learning_rate": 0.0002, "loss": 1.598, "step": 48620 }, { "epoch": 0.2, "grad_norm": 2.2255117893218994, "learning_rate": 0.0002, "loss": 1.4679, "step": 48630 }, { "epoch": 0.2, "grad_norm": 3.3616273403167725, "learning_rate": 0.0002, "loss": 1.5858, "step": 48640 }, { "epoch": 0.2, "grad_norm": 3.3097143173217773, "learning_rate": 0.0002, "loss": 1.6308, "step": 48650 }, { "epoch": 0.2, "grad_norm": 1.9530147314071655, "learning_rate": 0.0002, "loss": 1.4521, "step": 48660 }, { "epoch": 0.2, "grad_norm": 2.6058287620544434, "learning_rate": 0.0002, "loss": 1.497, "step": 48670 }, { "epoch": 0.2, "grad_norm": 6.355575084686279, "learning_rate": 0.0002, "loss": 1.5602, "step": 48680 }, { "epoch": 0.2, "grad_norm": 2.8670620918273926, "learning_rate": 0.0002, "loss": 1.7037, "step": 48690 }, { "epoch": 0.2, "grad_norm": 2.2017476558685303, "learning_rate": 0.0002, "loss": 1.6634, "step": 48700 }, { "epoch": 0.2, "grad_norm": 2.653005599975586, "learning_rate": 0.0002, "loss": 1.4372, "step": 48710 }, { "epoch": 0.2, "grad_norm": 2.954820156097412, "learning_rate": 0.0002, "loss": 1.5613, "step": 48720 }, { "epoch": 0.2, "grad_norm": 2.5734758377075195, "learning_rate": 0.0002, "loss": 1.6067, "step": 48730 }, { "epoch": 0.2, "grad_norm": 2.234938383102417, "learning_rate": 0.0002, "loss": 1.5947, "step": 48740 }, { "epoch": 0.2, "grad_norm": 2.5157060623168945, "learning_rate": 0.0002, "loss": 1.4349, "step": 48750 }, { "epoch": 0.2, "grad_norm": 3.748746871948242, "learning_rate": 0.0002, "loss": 1.6808, "step": 48760 }, { "epoch": 0.2, "grad_norm": 1.5348039865493774, "learning_rate": 0.0002, "loss": 1.6545, "step": 48770 }, { "epoch": 0.2, "grad_norm": 3.188854694366455, "learning_rate": 0.0002, "loss": 1.6882, "step": 48780 }, { "epoch": 0.2, "grad_norm": 3.185046672821045, "learning_rate": 0.0002, "loss": 1.6481, "step": 48790 }, { "epoch": 0.2, "grad_norm": 4.061102390289307, "learning_rate": 0.0002, "loss": 1.5437, "step": 48800 }, { "epoch": 0.2, "grad_norm": 5.885806083679199, "learning_rate": 0.0002, "loss": 1.5541, "step": 48810 }, { "epoch": 0.2, "grad_norm": 1.5371371507644653, "learning_rate": 0.0002, "loss": 1.5049, "step": 48820 }, { "epoch": 0.2, "grad_norm": 2.4071595668792725, "learning_rate": 0.0002, "loss": 1.5668, "step": 48830 }, { "epoch": 0.2, "grad_norm": 1.7393593788146973, "learning_rate": 0.0002, "loss": 1.7897, "step": 48840 }, { "epoch": 0.2, "grad_norm": 3.335416793823242, "learning_rate": 0.0002, "loss": 1.571, "step": 48850 }, { "epoch": 0.2, "grad_norm": 4.086038112640381, "learning_rate": 0.0002, "loss": 1.5473, "step": 48860 }, { "epoch": 0.2, "grad_norm": 1.491310715675354, "learning_rate": 0.0002, "loss": 1.6169, "step": 48870 }, { "epoch": 0.2, "grad_norm": 2.1105196475982666, "learning_rate": 0.0002, "loss": 1.4196, "step": 48880 }, { "epoch": 0.2, "grad_norm": 1.8243281841278076, "learning_rate": 0.0002, "loss": 1.6617, "step": 48890 }, { "epoch": 0.2, "grad_norm": 2.3735439777374268, "learning_rate": 0.0002, "loss": 1.6321, "step": 48900 }, { "epoch": 0.2, "grad_norm": 5.162319660186768, "learning_rate": 0.0002, "loss": 1.4284, "step": 48910 }, { "epoch": 0.2, "grad_norm": 2.8411359786987305, "learning_rate": 0.0002, "loss": 1.7834, "step": 48920 }, { "epoch": 0.2, "grad_norm": 2.775380849838257, "learning_rate": 0.0002, "loss": 1.7568, "step": 48930 }, { "epoch": 0.2, "grad_norm": 3.7760984897613525, "learning_rate": 0.0002, "loss": 1.5931, "step": 48940 }, { "epoch": 0.2, "grad_norm": 3.808043956756592, "learning_rate": 0.0002, "loss": 1.5203, "step": 48950 }, { "epoch": 0.2, "grad_norm": 3.0852463245391846, "learning_rate": 0.0002, "loss": 1.4194, "step": 48960 }, { "epoch": 0.2, "grad_norm": 2.3009235858917236, "learning_rate": 0.0002, "loss": 1.4355, "step": 48970 }, { "epoch": 0.2, "grad_norm": 3.963066816329956, "learning_rate": 0.0002, "loss": 1.6568, "step": 48980 }, { "epoch": 0.2, "grad_norm": 5.087075233459473, "learning_rate": 0.0002, "loss": 1.5289, "step": 48990 }, { "epoch": 0.2, "grad_norm": 4.279707908630371, "learning_rate": 0.0002, "loss": 1.3117, "step": 49000 }, { "epoch": 0.2, "grad_norm": 3.7203779220581055, "learning_rate": 0.0002, "loss": 1.6811, "step": 49010 }, { "epoch": 0.2, "grad_norm": 2.4601430892944336, "learning_rate": 0.0002, "loss": 1.489, "step": 49020 }, { "epoch": 0.2, "grad_norm": 2.571000337600708, "learning_rate": 0.0002, "loss": 1.7407, "step": 49030 }, { "epoch": 0.2, "grad_norm": 3.026801586151123, "learning_rate": 0.0002, "loss": 1.5548, "step": 49040 }, { "epoch": 0.2, "grad_norm": 3.0027706623077393, "learning_rate": 0.0002, "loss": 1.6255, "step": 49050 }, { "epoch": 0.2, "grad_norm": 3.115511417388916, "learning_rate": 0.0002, "loss": 1.6005, "step": 49060 }, { "epoch": 0.2, "grad_norm": 4.642597675323486, "learning_rate": 0.0002, "loss": 1.6629, "step": 49070 }, { "epoch": 0.2, "grad_norm": 4.647183895111084, "learning_rate": 0.0002, "loss": 1.547, "step": 49080 }, { "epoch": 0.2, "grad_norm": 1.4328185319900513, "learning_rate": 0.0002, "loss": 1.3756, "step": 49090 }, { "epoch": 0.2, "grad_norm": 3.0808682441711426, "learning_rate": 0.0002, "loss": 1.4157, "step": 49100 }, { "epoch": 0.2, "grad_norm": 2.8968777656555176, "learning_rate": 0.0002, "loss": 1.3642, "step": 49110 }, { "epoch": 0.2, "grad_norm": 3.738605499267578, "learning_rate": 0.0002, "loss": 1.5188, "step": 49120 }, { "epoch": 0.2, "grad_norm": 1.646828532218933, "learning_rate": 0.0002, "loss": 1.5663, "step": 49130 }, { "epoch": 0.2, "grad_norm": 3.2848920822143555, "learning_rate": 0.0002, "loss": 1.6221, "step": 49140 }, { "epoch": 0.2, "grad_norm": 3.2489430904388428, "learning_rate": 0.0002, "loss": 1.4258, "step": 49150 }, { "epoch": 0.2, "grad_norm": 3.603700637817383, "learning_rate": 0.0002, "loss": 1.4873, "step": 49160 }, { "epoch": 0.2, "grad_norm": 4.082406997680664, "learning_rate": 0.0002, "loss": 1.2965, "step": 49170 }, { "epoch": 0.2, "grad_norm": 2.832092046737671, "learning_rate": 0.0002, "loss": 1.6658, "step": 49180 }, { "epoch": 0.2, "grad_norm": 2.412487745285034, "learning_rate": 0.0002, "loss": 1.4836, "step": 49190 }, { "epoch": 0.2, "grad_norm": 3.32515811920166, "learning_rate": 0.0002, "loss": 1.5099, "step": 49200 }, { "epoch": 0.2, "grad_norm": 2.441305637359619, "learning_rate": 0.0002, "loss": 1.5526, "step": 49210 }, { "epoch": 0.2, "grad_norm": 2.255629301071167, "learning_rate": 0.0002, "loss": 1.5366, "step": 49220 }, { "epoch": 0.2, "grad_norm": 2.5637381076812744, "learning_rate": 0.0002, "loss": 1.2569, "step": 49230 }, { "epoch": 0.2, "grad_norm": 3.159043550491333, "learning_rate": 0.0002, "loss": 1.3895, "step": 49240 }, { "epoch": 0.2, "grad_norm": 3.0535125732421875, "learning_rate": 0.0002, "loss": 1.5941, "step": 49250 }, { "epoch": 0.2, "grad_norm": 2.651613235473633, "learning_rate": 0.0002, "loss": 1.4453, "step": 49260 }, { "epoch": 0.2, "grad_norm": 2.280780076980591, "learning_rate": 0.0002, "loss": 1.7121, "step": 49270 }, { "epoch": 0.2, "grad_norm": 2.912083387374878, "learning_rate": 0.0002, "loss": 1.5741, "step": 49280 }, { "epoch": 0.2, "grad_norm": 2.6824960708618164, "learning_rate": 0.0002, "loss": 1.5728, "step": 49290 }, { "epoch": 0.2, "grad_norm": 5.01777982711792, "learning_rate": 0.0002, "loss": 1.6137, "step": 49300 }, { "epoch": 0.2, "grad_norm": 3.129955530166626, "learning_rate": 0.0002, "loss": 1.7179, "step": 49310 }, { "epoch": 0.2, "grad_norm": 3.255378246307373, "learning_rate": 0.0002, "loss": 1.5126, "step": 49320 }, { "epoch": 0.2, "grad_norm": 3.481161594390869, "learning_rate": 0.0002, "loss": 1.5363, "step": 49330 }, { "epoch": 0.2, "grad_norm": 1.9805136919021606, "learning_rate": 0.0002, "loss": 1.7131, "step": 49340 }, { "epoch": 0.2, "grad_norm": 2.3450591564178467, "learning_rate": 0.0002, "loss": 1.6857, "step": 49350 }, { "epoch": 0.2, "grad_norm": 3.356813430786133, "learning_rate": 0.0002, "loss": 1.3758, "step": 49360 }, { "epoch": 0.2, "grad_norm": 3.9998350143432617, "learning_rate": 0.0002, "loss": 1.5054, "step": 49370 }, { "epoch": 0.2, "grad_norm": 2.9136698246002197, "learning_rate": 0.0002, "loss": 1.708, "step": 49380 }, { "epoch": 0.2, "grad_norm": 2.556993007659912, "learning_rate": 0.0002, "loss": 1.6039, "step": 49390 }, { "epoch": 0.2, "grad_norm": 2.7922708988189697, "learning_rate": 0.0002, "loss": 1.3428, "step": 49400 }, { "epoch": 0.2, "grad_norm": 1.6588753461837769, "learning_rate": 0.0002, "loss": 1.6412, "step": 49410 }, { "epoch": 0.2, "grad_norm": 2.717324733734131, "learning_rate": 0.0002, "loss": 1.6057, "step": 49420 }, { "epoch": 0.2, "grad_norm": 2.8602099418640137, "learning_rate": 0.0002, "loss": 1.7801, "step": 49430 }, { "epoch": 0.2, "grad_norm": 3.4883458614349365, "learning_rate": 0.0002, "loss": 1.5845, "step": 49440 }, { "epoch": 0.2, "grad_norm": 2.5209858417510986, "learning_rate": 0.0002, "loss": 1.4714, "step": 49450 }, { "epoch": 0.2, "grad_norm": 17.849702835083008, "learning_rate": 0.0002, "loss": 1.6868, "step": 49460 }, { "epoch": 0.2, "grad_norm": 3.8694779872894287, "learning_rate": 0.0002, "loss": 1.5156, "step": 49470 }, { "epoch": 0.2, "grad_norm": 2.883288621902466, "learning_rate": 0.0002, "loss": 1.5589, "step": 49480 }, { "epoch": 0.2, "grad_norm": 2.978400945663452, "learning_rate": 0.0002, "loss": 1.674, "step": 49490 }, { "epoch": 0.2, "grad_norm": 2.419820785522461, "learning_rate": 0.0002, "loss": 1.7902, "step": 49500 }, { "epoch": 0.2, "grad_norm": 4.749875545501709, "learning_rate": 0.0002, "loss": 1.4792, "step": 49510 }, { "epoch": 0.2, "grad_norm": 2.4578402042388916, "learning_rate": 0.0002, "loss": 1.6217, "step": 49520 }, { "epoch": 0.2, "grad_norm": 2.3498849868774414, "learning_rate": 0.0002, "loss": 1.523, "step": 49530 }, { "epoch": 0.2, "grad_norm": 3.3350768089294434, "learning_rate": 0.0002, "loss": 1.722, "step": 49540 }, { "epoch": 0.2, "grad_norm": 1.6630748510360718, "learning_rate": 0.0002, "loss": 1.4637, "step": 49550 }, { "epoch": 0.2, "grad_norm": 2.4019734859466553, "learning_rate": 0.0002, "loss": 1.494, "step": 49560 }, { "epoch": 0.2, "grad_norm": 3.428039789199829, "learning_rate": 0.0002, "loss": 1.6, "step": 49570 }, { "epoch": 0.2, "grad_norm": 4.048640251159668, "learning_rate": 0.0002, "loss": 1.5948, "step": 49580 }, { "epoch": 0.2, "grad_norm": 2.9644625186920166, "learning_rate": 0.0002, "loss": 1.4945, "step": 49590 }, { "epoch": 0.2, "grad_norm": 6.16420316696167, "learning_rate": 0.0002, "loss": 1.605, "step": 49600 }, { "epoch": 0.2, "grad_norm": 3.7880237102508545, "learning_rate": 0.0002, "loss": 1.2517, "step": 49610 }, { "epoch": 0.2, "grad_norm": 4.049795627593994, "learning_rate": 0.0002, "loss": 1.5421, "step": 49620 }, { "epoch": 0.2, "grad_norm": 2.8006703853607178, "learning_rate": 0.0002, "loss": 1.5777, "step": 49630 }, { "epoch": 0.2, "grad_norm": 2.8593993186950684, "learning_rate": 0.0002, "loss": 1.6322, "step": 49640 }, { "epoch": 0.2, "grad_norm": 3.421477794647217, "learning_rate": 0.0002, "loss": 1.5397, "step": 49650 }, { "epoch": 0.2, "grad_norm": 4.536801338195801, "learning_rate": 0.0002, "loss": 1.9265, "step": 49660 }, { "epoch": 0.2, "grad_norm": 2.330113172531128, "learning_rate": 0.0002, "loss": 1.5257, "step": 49670 }, { "epoch": 0.2, "grad_norm": 6.753411293029785, "learning_rate": 0.0002, "loss": 1.5491, "step": 49680 }, { "epoch": 0.2, "grad_norm": 2.110049247741699, "learning_rate": 0.0002, "loss": 1.5518, "step": 49690 }, { "epoch": 0.2, "grad_norm": 3.859771251678467, "learning_rate": 0.0002, "loss": 1.5961, "step": 49700 }, { "epoch": 0.2, "grad_norm": 2.865497350692749, "learning_rate": 0.0002, "loss": 1.5908, "step": 49710 }, { "epoch": 0.2, "grad_norm": 2.059394121170044, "learning_rate": 0.0002, "loss": 1.5277, "step": 49720 }, { "epoch": 0.2, "grad_norm": 4.339895725250244, "learning_rate": 0.0002, "loss": 1.5005, "step": 49730 }, { "epoch": 0.2, "grad_norm": 2.183783769607544, "learning_rate": 0.0002, "loss": 1.5128, "step": 49740 }, { "epoch": 0.2, "grad_norm": 2.9588706493377686, "learning_rate": 0.0002, "loss": 1.4661, "step": 49750 }, { "epoch": 0.2, "grad_norm": 1.4629896879196167, "learning_rate": 0.0002, "loss": 1.4188, "step": 49760 }, { "epoch": 0.2, "grad_norm": 2.177025556564331, "learning_rate": 0.0002, "loss": 1.589, "step": 49770 }, { "epoch": 0.2, "grad_norm": 3.4298267364501953, "learning_rate": 0.0002, "loss": 1.5631, "step": 49780 }, { "epoch": 0.2, "grad_norm": 3.4418177604675293, "learning_rate": 0.0002, "loss": 1.5348, "step": 49790 }, { "epoch": 0.2, "grad_norm": 2.741347551345825, "learning_rate": 0.0002, "loss": 1.6735, "step": 49800 }, { "epoch": 0.2, "grad_norm": 3.415219306945801, "learning_rate": 0.0002, "loss": 1.5602, "step": 49810 }, { "epoch": 0.2, "grad_norm": 2.5038869380950928, "learning_rate": 0.0002, "loss": 1.6301, "step": 49820 }, { "epoch": 0.2, "grad_norm": 3.6167426109313965, "learning_rate": 0.0002, "loss": 1.6212, "step": 49830 }, { "epoch": 0.2, "grad_norm": 2.689197063446045, "learning_rate": 0.0002, "loss": 1.4164, "step": 49840 }, { "epoch": 0.2, "grad_norm": 1.5600320100784302, "learning_rate": 0.0002, "loss": 1.5127, "step": 49850 }, { "epoch": 0.2, "grad_norm": 2.96977162361145, "learning_rate": 0.0002, "loss": 1.7498, "step": 49860 }, { "epoch": 0.2, "grad_norm": 3.4320075511932373, "learning_rate": 0.0002, "loss": 1.41, "step": 49870 }, { "epoch": 0.2, "grad_norm": 3.694237470626831, "learning_rate": 0.0002, "loss": 1.9255, "step": 49880 }, { "epoch": 0.2, "grad_norm": 4.084372043609619, "learning_rate": 0.0002, "loss": 1.657, "step": 49890 }, { "epoch": 0.2, "grad_norm": 1.6368381977081299, "learning_rate": 0.0002, "loss": 1.6446, "step": 49900 }, { "epoch": 0.2, "grad_norm": 3.957195520401001, "learning_rate": 0.0002, "loss": 1.6543, "step": 49910 }, { "epoch": 0.2, "grad_norm": 4.063333511352539, "learning_rate": 0.0002, "loss": 1.7267, "step": 49920 }, { "epoch": 0.2, "grad_norm": 2.2629358768463135, "learning_rate": 0.0002, "loss": 1.5174, "step": 49930 }, { "epoch": 0.2, "grad_norm": 3.1102278232574463, "learning_rate": 0.0002, "loss": 1.6537, "step": 49940 }, { "epoch": 0.2, "grad_norm": 2.8030102252960205, "learning_rate": 0.0002, "loss": 1.3539, "step": 49950 }, { "epoch": 0.2, "grad_norm": 3.61991810798645, "learning_rate": 0.0002, "loss": 1.5781, "step": 49960 }, { "epoch": 0.2, "grad_norm": 2.0835254192352295, "learning_rate": 0.0002, "loss": 1.5343, "step": 49970 }, { "epoch": 0.2, "grad_norm": 4.082818031311035, "learning_rate": 0.0002, "loss": 1.7639, "step": 49980 }, { "epoch": 0.2, "grad_norm": 2.725011110305786, "learning_rate": 0.0002, "loss": 1.4366, "step": 49990 }, { "epoch": 0.2, "grad_norm": 2.1659581661224365, "learning_rate": 0.0002, "loss": 1.6314, "step": 50000 }, { "epoch": 0.2, "grad_norm": 3.4999382495880127, "learning_rate": 0.0002, "loss": 1.6105, "step": 50010 }, { "epoch": 0.2, "grad_norm": 3.5281620025634766, "learning_rate": 0.0002, "loss": 1.7078, "step": 50020 }, { "epoch": 0.2, "grad_norm": 3.484867811203003, "learning_rate": 0.0002, "loss": 1.5931, "step": 50030 }, { "epoch": 0.2, "grad_norm": 1.7185534238815308, "learning_rate": 0.0002, "loss": 1.5593, "step": 50040 }, { "epoch": 0.2, "grad_norm": 3.8871049880981445, "learning_rate": 0.0002, "loss": 1.3885, "step": 50050 }, { "epoch": 0.2, "grad_norm": 4.982044219970703, "learning_rate": 0.0002, "loss": 1.5178, "step": 50060 }, { "epoch": 0.2, "grad_norm": 2.6772377490997314, "learning_rate": 0.0002, "loss": 1.5481, "step": 50070 }, { "epoch": 0.2, "grad_norm": 3.439385175704956, "learning_rate": 0.0002, "loss": 1.6674, "step": 50080 }, { "epoch": 0.2, "grad_norm": 2.4087605476379395, "learning_rate": 0.0002, "loss": 1.4982, "step": 50090 }, { "epoch": 0.2, "grad_norm": 4.141027927398682, "learning_rate": 0.0002, "loss": 1.7882, "step": 50100 }, { "epoch": 0.2, "grad_norm": 2.9457650184631348, "learning_rate": 0.0002, "loss": 1.4503, "step": 50110 }, { "epoch": 0.2, "grad_norm": 2.993234395980835, "learning_rate": 0.0002, "loss": 1.6859, "step": 50120 }, { "epoch": 0.2, "grad_norm": 3.0459771156311035, "learning_rate": 0.0002, "loss": 1.5486, "step": 50130 }, { "epoch": 0.2, "grad_norm": 2.485246419906616, "learning_rate": 0.0002, "loss": 1.3006, "step": 50140 }, { "epoch": 0.2, "grad_norm": 3.645596981048584, "learning_rate": 0.0002, "loss": 1.5459, "step": 50150 }, { "epoch": 0.2, "grad_norm": 4.5607829093933105, "learning_rate": 0.0002, "loss": 1.5102, "step": 50160 }, { "epoch": 0.2, "grad_norm": 1.577681541442871, "learning_rate": 0.0002, "loss": 1.499, "step": 50170 }, { "epoch": 0.2, "grad_norm": 4.7367987632751465, "learning_rate": 0.0002, "loss": 1.5262, "step": 50180 }, { "epoch": 0.2, "grad_norm": 3.3382210731506348, "learning_rate": 0.0002, "loss": 1.4088, "step": 50190 }, { "epoch": 0.2, "grad_norm": 3.1945064067840576, "learning_rate": 0.0002, "loss": 1.2578, "step": 50200 }, { "epoch": 0.2, "grad_norm": 4.588756084442139, "learning_rate": 0.0002, "loss": 1.5447, "step": 50210 }, { "epoch": 0.2, "grad_norm": 1.9353364706039429, "learning_rate": 0.0002, "loss": 1.5636, "step": 50220 }, { "epoch": 0.2, "grad_norm": 1.3963377475738525, "learning_rate": 0.0002, "loss": 1.5636, "step": 50230 }, { "epoch": 0.2, "grad_norm": 3.1842570304870605, "learning_rate": 0.0002, "loss": 1.4474, "step": 50240 }, { "epoch": 0.2, "grad_norm": 3.5288896560668945, "learning_rate": 0.0002, "loss": 1.7559, "step": 50250 }, { "epoch": 0.2, "grad_norm": 4.466775894165039, "learning_rate": 0.0002, "loss": 1.7641, "step": 50260 }, { "epoch": 0.2, "grad_norm": 2.6660218238830566, "learning_rate": 0.0002, "loss": 1.6272, "step": 50270 }, { "epoch": 0.2, "grad_norm": 4.487199306488037, "learning_rate": 0.0002, "loss": 1.7588, "step": 50280 }, { "epoch": 0.2, "grad_norm": 2.4537501335144043, "learning_rate": 0.0002, "loss": 1.6962, "step": 50290 }, { "epoch": 0.2, "grad_norm": 1.9743520021438599, "learning_rate": 0.0002, "loss": 1.5353, "step": 50300 }, { "epoch": 0.2, "grad_norm": 3.8131706714630127, "learning_rate": 0.0002, "loss": 1.1925, "step": 50310 }, { "epoch": 0.2, "grad_norm": 6.127547264099121, "learning_rate": 0.0002, "loss": 1.5772, "step": 50320 }, { "epoch": 0.2, "grad_norm": 2.9481704235076904, "learning_rate": 0.0002, "loss": 1.4595, "step": 50330 }, { "epoch": 0.2, "grad_norm": 1.7869541645050049, "learning_rate": 0.0002, "loss": 1.5348, "step": 50340 }, { "epoch": 0.2, "grad_norm": 2.8745031356811523, "learning_rate": 0.0002, "loss": 1.6689, "step": 50350 }, { "epoch": 0.21, "grad_norm": 3.078998565673828, "learning_rate": 0.0002, "loss": 1.3739, "step": 50360 }, { "epoch": 0.21, "grad_norm": 4.132401943206787, "learning_rate": 0.0002, "loss": 1.4126, "step": 50370 }, { "epoch": 0.21, "grad_norm": 3.886589288711548, "learning_rate": 0.0002, "loss": 1.8569, "step": 50380 }, { "epoch": 0.21, "grad_norm": 3.835153818130493, "learning_rate": 0.0002, "loss": 1.7952, "step": 50390 }, { "epoch": 0.21, "grad_norm": 2.229534864425659, "learning_rate": 0.0002, "loss": 1.9586, "step": 50400 }, { "epoch": 0.21, "grad_norm": 3.4396369457244873, "learning_rate": 0.0002, "loss": 1.817, "step": 50410 }, { "epoch": 0.21, "grad_norm": 3.5373616218566895, "learning_rate": 0.0002, "loss": 1.5911, "step": 50420 }, { "epoch": 0.21, "grad_norm": 3.458209753036499, "learning_rate": 0.0002, "loss": 1.5141, "step": 50430 }, { "epoch": 0.21, "grad_norm": 2.6940813064575195, "learning_rate": 0.0002, "loss": 1.6036, "step": 50440 }, { "epoch": 0.21, "grad_norm": 1.7310768365859985, "learning_rate": 0.0002, "loss": 1.5104, "step": 50450 }, { "epoch": 0.21, "grad_norm": 2.2768900394439697, "learning_rate": 0.0002, "loss": 1.4927, "step": 50460 }, { "epoch": 0.21, "grad_norm": 2.993788719177246, "learning_rate": 0.0002, "loss": 1.7719, "step": 50470 }, { "epoch": 0.21, "grad_norm": 1.7877166271209717, "learning_rate": 0.0002, "loss": 1.4623, "step": 50480 }, { "epoch": 0.21, "grad_norm": 2.842618227005005, "learning_rate": 0.0002, "loss": 1.6514, "step": 50490 }, { "epoch": 0.21, "grad_norm": 2.3062641620635986, "learning_rate": 0.0002, "loss": 1.3395, "step": 50500 }, { "epoch": 0.21, "grad_norm": 1.6718525886535645, "learning_rate": 0.0002, "loss": 1.4546, "step": 50510 }, { "epoch": 0.21, "grad_norm": 2.463925361633301, "learning_rate": 0.0002, "loss": 1.7827, "step": 50520 }, { "epoch": 0.21, "grad_norm": 2.9468841552734375, "learning_rate": 0.0002, "loss": 1.5301, "step": 50530 }, { "epoch": 0.21, "grad_norm": 2.7575244903564453, "learning_rate": 0.0002, "loss": 1.698, "step": 50540 }, { "epoch": 0.21, "grad_norm": 2.669404983520508, "learning_rate": 0.0002, "loss": 1.565, "step": 50550 }, { "epoch": 0.21, "grad_norm": 3.1858267784118652, "learning_rate": 0.0002, "loss": 1.4543, "step": 50560 }, { "epoch": 0.21, "grad_norm": 3.95893931388855, "learning_rate": 0.0002, "loss": 1.559, "step": 50570 }, { "epoch": 0.21, "grad_norm": 1.6986937522888184, "learning_rate": 0.0002, "loss": 1.4917, "step": 50580 }, { "epoch": 0.21, "grad_norm": 4.831020832061768, "learning_rate": 0.0002, "loss": 1.4395, "step": 50590 }, { "epoch": 0.21, "grad_norm": 2.196377754211426, "learning_rate": 0.0002, "loss": 1.5822, "step": 50600 }, { "epoch": 0.21, "grad_norm": 1.790490984916687, "learning_rate": 0.0002, "loss": 1.5569, "step": 50610 }, { "epoch": 0.21, "grad_norm": 4.255134582519531, "learning_rate": 0.0002, "loss": 1.5946, "step": 50620 }, { "epoch": 0.21, "grad_norm": 3.04095458984375, "learning_rate": 0.0002, "loss": 1.3962, "step": 50630 }, { "epoch": 0.21, "grad_norm": 5.018227577209473, "learning_rate": 0.0002, "loss": 1.7283, "step": 50640 }, { "epoch": 0.21, "grad_norm": 2.871917486190796, "learning_rate": 0.0002, "loss": 1.7073, "step": 50650 }, { "epoch": 0.21, "grad_norm": 3.130122661590576, "learning_rate": 0.0002, "loss": 1.3998, "step": 50660 }, { "epoch": 0.21, "grad_norm": 3.6532576084136963, "learning_rate": 0.0002, "loss": 1.9503, "step": 50670 }, { "epoch": 0.21, "grad_norm": 2.633493423461914, "learning_rate": 0.0002, "loss": 1.4711, "step": 50680 }, { "epoch": 0.21, "grad_norm": 3.022477865219116, "learning_rate": 0.0002, "loss": 1.6828, "step": 50690 }, { "epoch": 0.21, "grad_norm": 2.2778778076171875, "learning_rate": 0.0002, "loss": 1.5744, "step": 50700 }, { "epoch": 0.21, "grad_norm": 2.4110679626464844, "learning_rate": 0.0002, "loss": 1.5598, "step": 50710 }, { "epoch": 0.21, "grad_norm": 4.000602722167969, "learning_rate": 0.0002, "loss": 1.7796, "step": 50720 }, { "epoch": 0.21, "grad_norm": 2.9416866302490234, "learning_rate": 0.0002, "loss": 1.434, "step": 50730 }, { "epoch": 0.21, "grad_norm": 2.3495683670043945, "learning_rate": 0.0002, "loss": 1.4671, "step": 50740 }, { "epoch": 0.21, "grad_norm": 4.472649097442627, "learning_rate": 0.0002, "loss": 1.478, "step": 50750 }, { "epoch": 0.21, "grad_norm": 2.793088912963867, "learning_rate": 0.0002, "loss": 1.6014, "step": 50760 }, { "epoch": 0.21, "grad_norm": 2.2375361919403076, "learning_rate": 0.0002, "loss": 1.7152, "step": 50770 }, { "epoch": 0.21, "grad_norm": 1.9556714296340942, "learning_rate": 0.0002, "loss": 1.4746, "step": 50780 }, { "epoch": 0.21, "grad_norm": 6.590212345123291, "learning_rate": 0.0002, "loss": 1.5588, "step": 50790 }, { "epoch": 0.21, "grad_norm": 2.049025058746338, "learning_rate": 0.0002, "loss": 1.5689, "step": 50800 }, { "epoch": 0.21, "grad_norm": 4.746274948120117, "learning_rate": 0.0002, "loss": 1.6491, "step": 50810 }, { "epoch": 0.21, "grad_norm": 2.038860559463501, "learning_rate": 0.0002, "loss": 1.2451, "step": 50820 }, { "epoch": 0.21, "grad_norm": 2.1553328037261963, "learning_rate": 0.0002, "loss": 1.6197, "step": 50830 }, { "epoch": 0.21, "grad_norm": 3.160654306411743, "learning_rate": 0.0002, "loss": 1.4726, "step": 50840 }, { "epoch": 0.21, "grad_norm": 3.594935178756714, "learning_rate": 0.0002, "loss": 1.5004, "step": 50850 }, { "epoch": 0.21, "grad_norm": 4.163917064666748, "learning_rate": 0.0002, "loss": 1.7034, "step": 50860 }, { "epoch": 0.21, "grad_norm": 2.5912606716156006, "learning_rate": 0.0002, "loss": 1.5089, "step": 50870 }, { "epoch": 0.21, "grad_norm": 4.47160005569458, "learning_rate": 0.0002, "loss": 1.6297, "step": 50880 }, { "epoch": 0.21, "grad_norm": 3.3649704456329346, "learning_rate": 0.0002, "loss": 1.5985, "step": 50890 }, { "epoch": 0.21, "grad_norm": 2.8505115509033203, "learning_rate": 0.0002, "loss": 1.7063, "step": 50900 }, { "epoch": 0.21, "grad_norm": 3.532043695449829, "learning_rate": 0.0002, "loss": 1.7284, "step": 50910 }, { "epoch": 0.21, "grad_norm": 1.7056622505187988, "learning_rate": 0.0002, "loss": 1.5053, "step": 50920 }, { "epoch": 0.21, "grad_norm": 3.7382924556732178, "learning_rate": 0.0002, "loss": 1.4031, "step": 50930 }, { "epoch": 0.21, "grad_norm": 2.656370162963867, "learning_rate": 0.0002, "loss": 1.7489, "step": 50940 }, { "epoch": 0.21, "grad_norm": 3.622101068496704, "learning_rate": 0.0002, "loss": 1.4914, "step": 50950 }, { "epoch": 0.21, "grad_norm": 4.459443092346191, "learning_rate": 0.0002, "loss": 1.4151, "step": 50960 }, { "epoch": 0.21, "grad_norm": 3.60895037651062, "learning_rate": 0.0002, "loss": 1.678, "step": 50970 }, { "epoch": 0.21, "grad_norm": 3.689246416091919, "learning_rate": 0.0002, "loss": 1.4552, "step": 50980 }, { "epoch": 0.21, "grad_norm": 3.424765110015869, "learning_rate": 0.0002, "loss": 1.5835, "step": 50990 }, { "epoch": 0.21, "grad_norm": 3.7667036056518555, "learning_rate": 0.0002, "loss": 1.54, "step": 51000 }, { "epoch": 0.21, "grad_norm": 1.7105339765548706, "learning_rate": 0.0002, "loss": 1.6908, "step": 51010 }, { "epoch": 0.21, "grad_norm": 2.328618049621582, "learning_rate": 0.0002, "loss": 1.7367, "step": 51020 }, { "epoch": 0.21, "grad_norm": 2.5913054943084717, "learning_rate": 0.0002, "loss": 1.4796, "step": 51030 }, { "epoch": 0.21, "grad_norm": 1.9534968137741089, "learning_rate": 0.0002, "loss": 1.6129, "step": 51040 }, { "epoch": 0.21, "grad_norm": 3.1632275581359863, "learning_rate": 0.0002, "loss": 1.4458, "step": 51050 }, { "epoch": 0.21, "grad_norm": 3.630046844482422, "learning_rate": 0.0002, "loss": 1.6246, "step": 51060 }, { "epoch": 0.21, "grad_norm": 3.555570363998413, "learning_rate": 0.0002, "loss": 1.6033, "step": 51070 }, { "epoch": 0.21, "grad_norm": 4.216043949127197, "learning_rate": 0.0002, "loss": 1.623, "step": 51080 }, { "epoch": 0.21, "grad_norm": 2.3997817039489746, "learning_rate": 0.0002, "loss": 1.3937, "step": 51090 }, { "epoch": 0.21, "grad_norm": 1.8726962804794312, "learning_rate": 0.0002, "loss": 1.6322, "step": 51100 }, { "epoch": 0.21, "grad_norm": 5.287158012390137, "learning_rate": 0.0002, "loss": 1.4864, "step": 51110 }, { "epoch": 0.21, "grad_norm": 4.1753926277160645, "learning_rate": 0.0002, "loss": 1.5364, "step": 51120 }, { "epoch": 0.21, "grad_norm": 2.6700985431671143, "learning_rate": 0.0002, "loss": 1.3626, "step": 51130 }, { "epoch": 0.21, "grad_norm": 0.9699505567550659, "learning_rate": 0.0002, "loss": 1.4183, "step": 51140 }, { "epoch": 0.21, "grad_norm": 4.259189605712891, "learning_rate": 0.0002, "loss": 1.5653, "step": 51150 }, { "epoch": 0.21, "grad_norm": 3.8134515285491943, "learning_rate": 0.0002, "loss": 1.4479, "step": 51160 }, { "epoch": 0.21, "grad_norm": 5.694037914276123, "learning_rate": 0.0002, "loss": 1.649, "step": 51170 }, { "epoch": 0.21, "grad_norm": 3.9019501209259033, "learning_rate": 0.0002, "loss": 1.6215, "step": 51180 }, { "epoch": 0.21, "grad_norm": 2.994638681411743, "learning_rate": 0.0002, "loss": 1.6066, "step": 51190 }, { "epoch": 0.21, "grad_norm": 3.547281265258789, "learning_rate": 0.0002, "loss": 1.6879, "step": 51200 }, { "epoch": 0.21, "grad_norm": 2.1523735523223877, "learning_rate": 0.0002, "loss": 1.6552, "step": 51210 }, { "epoch": 0.21, "grad_norm": 3.6704835891723633, "learning_rate": 0.0002, "loss": 1.6088, "step": 51220 }, { "epoch": 0.21, "grad_norm": 3.918412446975708, "learning_rate": 0.0002, "loss": 1.5683, "step": 51230 }, { "epoch": 0.21, "grad_norm": 2.6515629291534424, "learning_rate": 0.0002, "loss": 1.6106, "step": 51240 }, { "epoch": 0.21, "grad_norm": 3.9414937496185303, "learning_rate": 0.0002, "loss": 1.6441, "step": 51250 }, { "epoch": 0.21, "grad_norm": 3.2143361568450928, "learning_rate": 0.0002, "loss": 1.6969, "step": 51260 }, { "epoch": 0.21, "grad_norm": 3.02909517288208, "learning_rate": 0.0002, "loss": 1.5257, "step": 51270 }, { "epoch": 0.21, "grad_norm": 2.205890655517578, "learning_rate": 0.0002, "loss": 1.3361, "step": 51280 }, { "epoch": 0.21, "grad_norm": 2.4539456367492676, "learning_rate": 0.0002, "loss": 1.4705, "step": 51290 }, { "epoch": 0.21, "grad_norm": 1.5626310110092163, "learning_rate": 0.0002, "loss": 1.8396, "step": 51300 }, { "epoch": 0.21, "grad_norm": 3.134016990661621, "learning_rate": 0.0002, "loss": 1.4078, "step": 51310 }, { "epoch": 0.21, "grad_norm": 2.658602714538574, "learning_rate": 0.0002, "loss": 1.5884, "step": 51320 }, { "epoch": 0.21, "grad_norm": 6.475766181945801, "learning_rate": 0.0002, "loss": 1.7041, "step": 51330 }, { "epoch": 0.21, "grad_norm": 5.242370128631592, "learning_rate": 0.0002, "loss": 1.4574, "step": 51340 }, { "epoch": 0.21, "grad_norm": 3.2809152603149414, "learning_rate": 0.0002, "loss": 1.4597, "step": 51350 }, { "epoch": 0.21, "grad_norm": 2.194262981414795, "learning_rate": 0.0002, "loss": 1.4566, "step": 51360 }, { "epoch": 0.21, "grad_norm": 1.8016115427017212, "learning_rate": 0.0002, "loss": 1.478, "step": 51370 }, { "epoch": 0.21, "grad_norm": 4.6012725830078125, "learning_rate": 0.0002, "loss": 1.6374, "step": 51380 }, { "epoch": 0.21, "grad_norm": 3.112295150756836, "learning_rate": 0.0002, "loss": 1.5425, "step": 51390 }, { "epoch": 0.21, "grad_norm": 1.8321573734283447, "learning_rate": 0.0002, "loss": 1.3242, "step": 51400 }, { "epoch": 0.21, "grad_norm": 2.4752390384674072, "learning_rate": 0.0002, "loss": 1.5229, "step": 51410 }, { "epoch": 0.21, "grad_norm": 2.497201681137085, "learning_rate": 0.0002, "loss": 1.3742, "step": 51420 }, { "epoch": 0.21, "grad_norm": 3.152944803237915, "learning_rate": 0.0002, "loss": 1.4789, "step": 51430 }, { "epoch": 0.21, "grad_norm": 3.0517311096191406, "learning_rate": 0.0002, "loss": 1.6453, "step": 51440 }, { "epoch": 0.21, "grad_norm": 2.7155745029449463, "learning_rate": 0.0002, "loss": 1.8827, "step": 51450 }, { "epoch": 0.21, "grad_norm": 2.6319353580474854, "learning_rate": 0.0002, "loss": 1.7321, "step": 51460 }, { "epoch": 0.21, "grad_norm": 2.351330518722534, "learning_rate": 0.0002, "loss": 1.3603, "step": 51470 }, { "epoch": 0.21, "grad_norm": 2.9398088455200195, "learning_rate": 0.0002, "loss": 1.6076, "step": 51480 }, { "epoch": 0.21, "grad_norm": 2.8323824405670166, "learning_rate": 0.0002, "loss": 1.6002, "step": 51490 }, { "epoch": 0.21, "grad_norm": 2.8120713233947754, "learning_rate": 0.0002, "loss": 1.6041, "step": 51500 }, { "epoch": 0.21, "grad_norm": 1.4675936698913574, "learning_rate": 0.0002, "loss": 1.3941, "step": 51510 }, { "epoch": 0.21, "grad_norm": 2.076627731323242, "learning_rate": 0.0002, "loss": 1.5308, "step": 51520 }, { "epoch": 0.21, "grad_norm": 1.9117261171340942, "learning_rate": 0.0002, "loss": 1.6961, "step": 51530 }, { "epoch": 0.21, "grad_norm": 2.0526974201202393, "learning_rate": 0.0002, "loss": 1.4444, "step": 51540 }, { "epoch": 0.21, "grad_norm": 2.449827194213867, "learning_rate": 0.0002, "loss": 1.6696, "step": 51550 }, { "epoch": 0.21, "grad_norm": 3.952338695526123, "learning_rate": 0.0002, "loss": 1.3682, "step": 51560 }, { "epoch": 0.21, "grad_norm": 2.0553064346313477, "learning_rate": 0.0002, "loss": 1.6218, "step": 51570 }, { "epoch": 0.21, "grad_norm": 3.6849634647369385, "learning_rate": 0.0002, "loss": 1.8566, "step": 51580 }, { "epoch": 0.21, "grad_norm": 3.1659982204437256, "learning_rate": 0.0002, "loss": 1.6331, "step": 51590 }, { "epoch": 0.21, "grad_norm": 3.52934193611145, "learning_rate": 0.0002, "loss": 1.6034, "step": 51600 }, { "epoch": 0.21, "grad_norm": 3.110183000564575, "learning_rate": 0.0002, "loss": 1.57, "step": 51610 }, { "epoch": 0.21, "grad_norm": 2.6525700092315674, "learning_rate": 0.0002, "loss": 1.6276, "step": 51620 }, { "epoch": 0.21, "grad_norm": 3.1875813007354736, "learning_rate": 0.0002, "loss": 1.6855, "step": 51630 }, { "epoch": 0.21, "grad_norm": 2.7269978523254395, "learning_rate": 0.0002, "loss": 1.5639, "step": 51640 }, { "epoch": 0.21, "grad_norm": 2.3968546390533447, "learning_rate": 0.0002, "loss": 1.5633, "step": 51650 }, { "epoch": 0.21, "grad_norm": 2.317363977432251, "learning_rate": 0.0002, "loss": 1.5523, "step": 51660 }, { "epoch": 0.21, "grad_norm": 5.281731605529785, "learning_rate": 0.0002, "loss": 1.6945, "step": 51670 }, { "epoch": 0.21, "grad_norm": 4.636981010437012, "learning_rate": 0.0002, "loss": 1.6854, "step": 51680 }, { "epoch": 0.21, "grad_norm": 2.1548783779144287, "learning_rate": 0.0002, "loss": 1.3868, "step": 51690 }, { "epoch": 0.21, "grad_norm": 3.328071355819702, "learning_rate": 0.0002, "loss": 1.4526, "step": 51700 }, { "epoch": 0.21, "grad_norm": 3.446880340576172, "learning_rate": 0.0002, "loss": 1.4462, "step": 51710 }, { "epoch": 0.21, "grad_norm": 3.490086078643799, "learning_rate": 0.0002, "loss": 1.5141, "step": 51720 }, { "epoch": 0.21, "grad_norm": 3.1074373722076416, "learning_rate": 0.0002, "loss": 1.5572, "step": 51730 }, { "epoch": 0.21, "grad_norm": 3.1575281620025635, "learning_rate": 0.0002, "loss": 1.5936, "step": 51740 }, { "epoch": 0.21, "grad_norm": 1.839003086090088, "learning_rate": 0.0002, "loss": 1.5917, "step": 51750 }, { "epoch": 0.21, "grad_norm": 2.9507620334625244, "learning_rate": 0.0002, "loss": 1.6961, "step": 51760 }, { "epoch": 0.21, "grad_norm": 3.7484793663024902, "learning_rate": 0.0002, "loss": 1.6811, "step": 51770 }, { "epoch": 0.21, "grad_norm": 8.609795570373535, "learning_rate": 0.0002, "loss": 1.5256, "step": 51780 }, { "epoch": 0.21, "grad_norm": 3.3210830688476562, "learning_rate": 0.0002, "loss": 1.6086, "step": 51790 }, { "epoch": 0.21, "grad_norm": 4.7609148025512695, "learning_rate": 0.0002, "loss": 1.9174, "step": 51800 }, { "epoch": 0.21, "grad_norm": 2.058537006378174, "learning_rate": 0.0002, "loss": 1.4703, "step": 51810 }, { "epoch": 0.21, "grad_norm": 2.976276159286499, "learning_rate": 0.0002, "loss": 1.4807, "step": 51820 }, { "epoch": 0.21, "grad_norm": 2.8004941940307617, "learning_rate": 0.0002, "loss": 1.3682, "step": 51830 }, { "epoch": 0.21, "grad_norm": 3.671644926071167, "learning_rate": 0.0002, "loss": 1.4706, "step": 51840 }, { "epoch": 0.21, "grad_norm": 2.1249711513519287, "learning_rate": 0.0002, "loss": 1.4172, "step": 51850 }, { "epoch": 0.21, "grad_norm": 3.3213624954223633, "learning_rate": 0.0002, "loss": 1.4641, "step": 51860 }, { "epoch": 0.21, "grad_norm": 3.8532602787017822, "learning_rate": 0.0002, "loss": 1.3838, "step": 51870 }, { "epoch": 0.21, "grad_norm": 2.6396453380584717, "learning_rate": 0.0002, "loss": 1.7701, "step": 51880 }, { "epoch": 0.21, "grad_norm": 2.8670942783355713, "learning_rate": 0.0002, "loss": 1.6866, "step": 51890 }, { "epoch": 0.21, "grad_norm": 2.422879695892334, "learning_rate": 0.0002, "loss": 1.3764, "step": 51900 }, { "epoch": 0.21, "grad_norm": 2.410552978515625, "learning_rate": 0.0002, "loss": 1.473, "step": 51910 }, { "epoch": 0.21, "grad_norm": 2.1363604068756104, "learning_rate": 0.0002, "loss": 1.4813, "step": 51920 }, { "epoch": 0.21, "grad_norm": 3.338682174682617, "learning_rate": 0.0002, "loss": 1.8202, "step": 51930 }, { "epoch": 0.21, "grad_norm": 1.9050755500793457, "learning_rate": 0.0002, "loss": 1.551, "step": 51940 }, { "epoch": 0.21, "grad_norm": 3.142883062362671, "learning_rate": 0.0002, "loss": 1.5191, "step": 51950 }, { "epoch": 0.21, "grad_norm": 2.1503024101257324, "learning_rate": 0.0002, "loss": 1.3249, "step": 51960 }, { "epoch": 0.21, "grad_norm": 2.353353977203369, "learning_rate": 0.0002, "loss": 1.5434, "step": 51970 }, { "epoch": 0.21, "grad_norm": 2.9846079349517822, "learning_rate": 0.0002, "loss": 1.5263, "step": 51980 }, { "epoch": 0.21, "grad_norm": 3.9089744091033936, "learning_rate": 0.0002, "loss": 1.2648, "step": 51990 }, { "epoch": 0.21, "grad_norm": 3.2966413497924805, "learning_rate": 0.0002, "loss": 1.5597, "step": 52000 }, { "epoch": 0.21, "grad_norm": 2.2271814346313477, "learning_rate": 0.0002, "loss": 1.5395, "step": 52010 }, { "epoch": 0.21, "grad_norm": 2.9259743690490723, "learning_rate": 0.0002, "loss": 1.4549, "step": 52020 }, { "epoch": 0.21, "grad_norm": 4.481688499450684, "learning_rate": 0.0002, "loss": 1.2606, "step": 52030 }, { "epoch": 0.21, "grad_norm": 2.383234739303589, "learning_rate": 0.0002, "loss": 1.6636, "step": 52040 }, { "epoch": 0.21, "grad_norm": 3.4274089336395264, "learning_rate": 0.0002, "loss": 1.6535, "step": 52050 }, { "epoch": 0.21, "grad_norm": 3.7079966068267822, "learning_rate": 0.0002, "loss": 1.4941, "step": 52060 }, { "epoch": 0.21, "grad_norm": 3.4001200199127197, "learning_rate": 0.0002, "loss": 1.5013, "step": 52070 }, { "epoch": 0.21, "grad_norm": 4.290552616119385, "learning_rate": 0.0002, "loss": 1.6553, "step": 52080 }, { "epoch": 0.21, "grad_norm": 2.5609514713287354, "learning_rate": 0.0002, "loss": 1.3823, "step": 52090 }, { "epoch": 0.21, "grad_norm": 3.602372646331787, "learning_rate": 0.0002, "loss": 1.3675, "step": 52100 }, { "epoch": 0.21, "grad_norm": 2.4133405685424805, "learning_rate": 0.0002, "loss": 1.648, "step": 52110 }, { "epoch": 0.21, "grad_norm": 4.515824794769287, "learning_rate": 0.0002, "loss": 1.5977, "step": 52120 }, { "epoch": 0.21, "grad_norm": 5.348160266876221, "learning_rate": 0.0002, "loss": 1.4524, "step": 52130 }, { "epoch": 0.21, "grad_norm": 2.798945903778076, "learning_rate": 0.0002, "loss": 1.4499, "step": 52140 }, { "epoch": 0.21, "grad_norm": 2.3221633434295654, "learning_rate": 0.0002, "loss": 1.8105, "step": 52150 }, { "epoch": 0.21, "grad_norm": 2.6161751747131348, "learning_rate": 0.0002, "loss": 1.7743, "step": 52160 }, { "epoch": 0.21, "grad_norm": 3.483473539352417, "learning_rate": 0.0002, "loss": 1.5424, "step": 52170 }, { "epoch": 0.21, "grad_norm": 2.0519397258758545, "learning_rate": 0.0002, "loss": 1.6395, "step": 52180 }, { "epoch": 0.21, "grad_norm": 2.7499940395355225, "learning_rate": 0.0002, "loss": 1.4812, "step": 52190 }, { "epoch": 0.21, "grad_norm": 4.603516101837158, "learning_rate": 0.0002, "loss": 1.6728, "step": 52200 }, { "epoch": 0.21, "grad_norm": 2.3930695056915283, "learning_rate": 0.0002, "loss": 1.7235, "step": 52210 }, { "epoch": 0.21, "grad_norm": 1.486292839050293, "learning_rate": 0.0002, "loss": 1.3043, "step": 52220 }, { "epoch": 0.21, "grad_norm": 2.4369912147521973, "learning_rate": 0.0002, "loss": 1.7301, "step": 52230 }, { "epoch": 0.21, "grad_norm": 2.4511940479278564, "learning_rate": 0.0002, "loss": 1.5715, "step": 52240 }, { "epoch": 0.21, "grad_norm": 1.6612517833709717, "learning_rate": 0.0002, "loss": 1.3156, "step": 52250 }, { "epoch": 0.21, "grad_norm": 3.255976438522339, "learning_rate": 0.0002, "loss": 1.6922, "step": 52260 }, { "epoch": 0.21, "grad_norm": 2.5401082038879395, "learning_rate": 0.0002, "loss": 1.4059, "step": 52270 }, { "epoch": 0.21, "grad_norm": 3.424952507019043, "learning_rate": 0.0002, "loss": 1.6661, "step": 52280 }, { "epoch": 0.21, "grad_norm": 2.659365177154541, "learning_rate": 0.0002, "loss": 1.4072, "step": 52290 }, { "epoch": 0.21, "grad_norm": 3.9017913341522217, "learning_rate": 0.0002, "loss": 1.6942, "step": 52300 }, { "epoch": 0.21, "grad_norm": 4.430959701538086, "learning_rate": 0.0002, "loss": 1.4494, "step": 52310 }, { "epoch": 0.21, "grad_norm": 2.8117599487304688, "learning_rate": 0.0002, "loss": 1.5726, "step": 52320 }, { "epoch": 0.21, "grad_norm": 2.421480417251587, "learning_rate": 0.0002, "loss": 1.5995, "step": 52330 }, { "epoch": 0.21, "grad_norm": 2.9508895874023438, "learning_rate": 0.0002, "loss": 1.6509, "step": 52340 }, { "epoch": 0.21, "grad_norm": 2.291107654571533, "learning_rate": 0.0002, "loss": 1.5496, "step": 52350 }, { "epoch": 0.21, "grad_norm": 3.285543441772461, "learning_rate": 0.0002, "loss": 1.5653, "step": 52360 }, { "epoch": 0.21, "grad_norm": 2.882330894470215, "learning_rate": 0.0002, "loss": 1.3127, "step": 52370 }, { "epoch": 0.21, "grad_norm": 4.999882698059082, "learning_rate": 0.0002, "loss": 1.635, "step": 52380 }, { "epoch": 0.21, "grad_norm": 2.9360523223876953, "learning_rate": 0.0002, "loss": 1.425, "step": 52390 }, { "epoch": 0.21, "grad_norm": 4.662642002105713, "learning_rate": 0.0002, "loss": 1.3397, "step": 52400 }, { "epoch": 0.21, "grad_norm": 3.1639652252197266, "learning_rate": 0.0002, "loss": 1.5543, "step": 52410 }, { "epoch": 0.21, "grad_norm": 3.1834018230438232, "learning_rate": 0.0002, "loss": 1.5471, "step": 52420 }, { "epoch": 0.21, "grad_norm": 3.368659496307373, "learning_rate": 0.0002, "loss": 1.4812, "step": 52430 }, { "epoch": 0.21, "grad_norm": 3.1557693481445312, "learning_rate": 0.0002, "loss": 1.5991, "step": 52440 }, { "epoch": 0.21, "grad_norm": 2.43485689163208, "learning_rate": 0.0002, "loss": 1.3796, "step": 52450 }, { "epoch": 0.21, "grad_norm": 3.696058750152588, "learning_rate": 0.0002, "loss": 1.5478, "step": 52460 }, { "epoch": 0.21, "grad_norm": 3.619586706161499, "learning_rate": 0.0002, "loss": 1.6612, "step": 52470 }, { "epoch": 0.21, "grad_norm": 2.1317858695983887, "learning_rate": 0.0002, "loss": 1.6893, "step": 52480 }, { "epoch": 0.21, "grad_norm": 3.552629232406616, "learning_rate": 0.0002, "loss": 1.6283, "step": 52490 }, { "epoch": 0.21, "grad_norm": 2.423487663269043, "learning_rate": 0.0002, "loss": 1.5933, "step": 52500 }, { "epoch": 0.21, "grad_norm": 3.134504556655884, "learning_rate": 0.0002, "loss": 1.4273, "step": 52510 }, { "epoch": 0.21, "grad_norm": 2.186915874481201, "learning_rate": 0.0002, "loss": 1.603, "step": 52520 }, { "epoch": 0.21, "grad_norm": 2.5189743041992188, "learning_rate": 0.0002, "loss": 1.5165, "step": 52530 }, { "epoch": 0.21, "grad_norm": 1.5677179098129272, "learning_rate": 0.0002, "loss": 1.7096, "step": 52540 }, { "epoch": 0.21, "grad_norm": 2.1259515285491943, "learning_rate": 0.0002, "loss": 1.6184, "step": 52550 }, { "epoch": 0.21, "grad_norm": 3.184692859649658, "learning_rate": 0.0002, "loss": 1.552, "step": 52560 }, { "epoch": 0.21, "grad_norm": 8.362890243530273, "learning_rate": 0.0002, "loss": 1.6569, "step": 52570 }, { "epoch": 0.21, "grad_norm": 2.7438113689422607, "learning_rate": 0.0002, "loss": 1.6362, "step": 52580 }, { "epoch": 0.21, "grad_norm": 2.3739633560180664, "learning_rate": 0.0002, "loss": 1.4853, "step": 52590 }, { "epoch": 0.21, "grad_norm": 6.3877177238464355, "learning_rate": 0.0002, "loss": 1.5284, "step": 52600 }, { "epoch": 0.21, "grad_norm": 1.7463760375976562, "learning_rate": 0.0002, "loss": 1.24, "step": 52610 }, { "epoch": 0.21, "grad_norm": 2.7482550144195557, "learning_rate": 0.0002, "loss": 1.4723, "step": 52620 }, { "epoch": 0.21, "grad_norm": 3.749450922012329, "learning_rate": 0.0002, "loss": 1.5196, "step": 52630 }, { "epoch": 0.21, "grad_norm": 2.292757034301758, "learning_rate": 0.0002, "loss": 1.5397, "step": 52640 }, { "epoch": 0.21, "grad_norm": 4.9205322265625, "learning_rate": 0.0002, "loss": 1.5122, "step": 52650 }, { "epoch": 0.21, "grad_norm": 4.525017738342285, "learning_rate": 0.0002, "loss": 1.6799, "step": 52660 }, { "epoch": 0.21, "grad_norm": 2.616286039352417, "learning_rate": 0.0002, "loss": 1.5263, "step": 52670 }, { "epoch": 0.21, "grad_norm": 3.8297393321990967, "learning_rate": 0.0002, "loss": 1.5849, "step": 52680 }, { "epoch": 0.21, "grad_norm": 2.874948024749756, "learning_rate": 0.0002, "loss": 1.5759, "step": 52690 }, { "epoch": 0.21, "grad_norm": 4.415337085723877, "learning_rate": 0.0002, "loss": 1.7535, "step": 52700 }, { "epoch": 0.21, "grad_norm": 3.247697114944458, "learning_rate": 0.0002, "loss": 1.5984, "step": 52710 }, { "epoch": 0.21, "grad_norm": 1.9552838802337646, "learning_rate": 0.0002, "loss": 1.6752, "step": 52720 }, { "epoch": 0.21, "grad_norm": 3.6676342487335205, "learning_rate": 0.0002, "loss": 1.6922, "step": 52730 }, { "epoch": 0.21, "grad_norm": 3.70951771736145, "learning_rate": 0.0002, "loss": 1.6703, "step": 52740 }, { "epoch": 0.21, "grad_norm": 4.902263641357422, "learning_rate": 0.0002, "loss": 1.5558, "step": 52750 }, { "epoch": 0.21, "grad_norm": 4.098974227905273, "learning_rate": 0.0002, "loss": 1.6124, "step": 52760 }, { "epoch": 0.21, "grad_norm": 2.3930094242095947, "learning_rate": 0.0002, "loss": 1.388, "step": 52770 }, { "epoch": 0.21, "grad_norm": 2.1575703620910645, "learning_rate": 0.0002, "loss": 1.3931, "step": 52780 }, { "epoch": 0.21, "grad_norm": 4.715050220489502, "learning_rate": 0.0002, "loss": 1.6471, "step": 52790 }, { "epoch": 0.21, "grad_norm": 1.935158371925354, "learning_rate": 0.0002, "loss": 1.4359, "step": 52800 }, { "epoch": 0.21, "grad_norm": 3.669484853744507, "learning_rate": 0.0002, "loss": 1.7195, "step": 52810 }, { "epoch": 0.22, "grad_norm": 2.0352487564086914, "learning_rate": 0.0002, "loss": 1.4385, "step": 52820 }, { "epoch": 0.22, "grad_norm": 2.5902223587036133, "learning_rate": 0.0002, "loss": 1.3717, "step": 52830 }, { "epoch": 0.22, "grad_norm": 4.0021071434021, "learning_rate": 0.0002, "loss": 1.6677, "step": 52840 }, { "epoch": 0.22, "grad_norm": 3.1469838619232178, "learning_rate": 0.0002, "loss": 1.6445, "step": 52850 }, { "epoch": 0.22, "grad_norm": 3.105280876159668, "learning_rate": 0.0002, "loss": 1.5142, "step": 52860 }, { "epoch": 0.22, "grad_norm": 2.669703722000122, "learning_rate": 0.0002, "loss": 1.6116, "step": 52870 }, { "epoch": 0.22, "grad_norm": 2.1515088081359863, "learning_rate": 0.0002, "loss": 1.7209, "step": 52880 }, { "epoch": 0.22, "grad_norm": 2.801898717880249, "learning_rate": 0.0002, "loss": 1.5248, "step": 52890 }, { "epoch": 0.22, "grad_norm": 3.4039852619171143, "learning_rate": 0.0002, "loss": 1.5541, "step": 52900 }, { "epoch": 0.22, "grad_norm": 3.5143771171569824, "learning_rate": 0.0002, "loss": 1.4682, "step": 52910 }, { "epoch": 0.22, "grad_norm": 2.7027082443237305, "learning_rate": 0.0002, "loss": 1.4323, "step": 52920 }, { "epoch": 0.22, "grad_norm": 3.332808256149292, "learning_rate": 0.0002, "loss": 1.5973, "step": 52930 }, { "epoch": 0.22, "grad_norm": 3.3437108993530273, "learning_rate": 0.0002, "loss": 1.5464, "step": 52940 }, { "epoch": 0.22, "grad_norm": 2.635622024536133, "learning_rate": 0.0002, "loss": 1.5474, "step": 52950 }, { "epoch": 0.22, "grad_norm": 5.01202392578125, "learning_rate": 0.0002, "loss": 1.6454, "step": 52960 }, { "epoch": 0.22, "grad_norm": 2.460721492767334, "learning_rate": 0.0002, "loss": 1.5092, "step": 52970 }, { "epoch": 0.22, "grad_norm": 3.8015739917755127, "learning_rate": 0.0002, "loss": 1.59, "step": 52980 }, { "epoch": 0.22, "grad_norm": 2.550391912460327, "learning_rate": 0.0002, "loss": 1.4262, "step": 52990 }, { "epoch": 0.22, "grad_norm": 2.9912025928497314, "learning_rate": 0.0002, "loss": 1.5626, "step": 53000 }, { "epoch": 0.22, "grad_norm": 2.6705920696258545, "learning_rate": 0.0002, "loss": 1.6394, "step": 53010 }, { "epoch": 0.22, "grad_norm": 2.483105421066284, "learning_rate": 0.0002, "loss": 1.4621, "step": 53020 }, { "epoch": 0.22, "grad_norm": 3.1543846130371094, "learning_rate": 0.0002, "loss": 1.6528, "step": 53030 }, { "epoch": 0.22, "grad_norm": 3.4618382453918457, "learning_rate": 0.0002, "loss": 1.6267, "step": 53040 }, { "epoch": 0.22, "grad_norm": 3.0843746662139893, "learning_rate": 0.0002, "loss": 1.6136, "step": 53050 }, { "epoch": 0.22, "grad_norm": 1.9045217037200928, "learning_rate": 0.0002, "loss": 1.5812, "step": 53060 }, { "epoch": 0.22, "grad_norm": 3.101384401321411, "learning_rate": 0.0002, "loss": 1.4486, "step": 53070 }, { "epoch": 0.22, "grad_norm": 2.194232225418091, "learning_rate": 0.0002, "loss": 1.423, "step": 53080 }, { "epoch": 0.22, "grad_norm": 3.4317829608917236, "learning_rate": 0.0002, "loss": 1.499, "step": 53090 }, { "epoch": 0.22, "grad_norm": 3.947246789932251, "learning_rate": 0.0002, "loss": 1.372, "step": 53100 }, { "epoch": 0.22, "grad_norm": 2.16117262840271, "learning_rate": 0.0002, "loss": 1.4772, "step": 53110 }, { "epoch": 0.22, "grad_norm": 4.286478042602539, "learning_rate": 0.0002, "loss": 1.7639, "step": 53120 }, { "epoch": 0.22, "grad_norm": 2.4942376613616943, "learning_rate": 0.0002, "loss": 1.6451, "step": 53130 }, { "epoch": 0.22, "grad_norm": 3.823716402053833, "learning_rate": 0.0002, "loss": 1.3242, "step": 53140 }, { "epoch": 0.22, "grad_norm": 3.54188871383667, "learning_rate": 0.0002, "loss": 1.4311, "step": 53150 }, { "epoch": 0.22, "grad_norm": 10.608240127563477, "learning_rate": 0.0002, "loss": 1.4342, "step": 53160 }, { "epoch": 0.22, "grad_norm": 2.526970624923706, "learning_rate": 0.0002, "loss": 1.522, "step": 53170 }, { "epoch": 0.22, "grad_norm": 3.290677070617676, "learning_rate": 0.0002, "loss": 1.6509, "step": 53180 }, { "epoch": 0.22, "grad_norm": 1.8396999835968018, "learning_rate": 0.0002, "loss": 1.5951, "step": 53190 }, { "epoch": 0.22, "grad_norm": 1.967778205871582, "learning_rate": 0.0002, "loss": 1.6125, "step": 53200 }, { "epoch": 0.22, "grad_norm": 3.807506561279297, "learning_rate": 0.0002, "loss": 1.346, "step": 53210 }, { "epoch": 0.22, "grad_norm": 1.3523969650268555, "learning_rate": 0.0002, "loss": 1.7675, "step": 53220 }, { "epoch": 0.22, "grad_norm": 3.609008550643921, "learning_rate": 0.0002, "loss": 1.6406, "step": 53230 }, { "epoch": 0.22, "grad_norm": 2.7108356952667236, "learning_rate": 0.0002, "loss": 1.6256, "step": 53240 }, { "epoch": 0.22, "grad_norm": 2.6764333248138428, "learning_rate": 0.0002, "loss": 1.5004, "step": 53250 }, { "epoch": 0.22, "grad_norm": 3.0582199096679688, "learning_rate": 0.0002, "loss": 1.7402, "step": 53260 }, { "epoch": 0.22, "grad_norm": 2.0878498554229736, "learning_rate": 0.0002, "loss": 1.4654, "step": 53270 }, { "epoch": 0.22, "grad_norm": 2.259721279144287, "learning_rate": 0.0002, "loss": 1.5547, "step": 53280 }, { "epoch": 0.22, "grad_norm": 2.4716126918792725, "learning_rate": 0.0002, "loss": 1.5063, "step": 53290 }, { "epoch": 0.22, "grad_norm": 3.154296875, "learning_rate": 0.0002, "loss": 1.4568, "step": 53300 }, { "epoch": 0.22, "grad_norm": 1.9047051668167114, "learning_rate": 0.0002, "loss": 1.8184, "step": 53310 }, { "epoch": 0.22, "grad_norm": 2.582631826400757, "learning_rate": 0.0002, "loss": 1.7458, "step": 53320 }, { "epoch": 0.22, "grad_norm": 2.6772732734680176, "learning_rate": 0.0002, "loss": 1.89, "step": 53330 }, { "epoch": 0.22, "grad_norm": 2.2239766120910645, "learning_rate": 0.0002, "loss": 1.7739, "step": 53340 }, { "epoch": 0.22, "grad_norm": 2.076770544052124, "learning_rate": 0.0002, "loss": 1.592, "step": 53350 }, { "epoch": 0.22, "grad_norm": 1.3237112760543823, "learning_rate": 0.0002, "loss": 1.5487, "step": 53360 }, { "epoch": 0.22, "grad_norm": 2.449730634689331, "learning_rate": 0.0002, "loss": 1.3291, "step": 53370 }, { "epoch": 0.22, "grad_norm": 3.2709691524505615, "learning_rate": 0.0002, "loss": 1.5646, "step": 53380 }, { "epoch": 0.22, "grad_norm": 2.3998730182647705, "learning_rate": 0.0002, "loss": 1.4393, "step": 53390 }, { "epoch": 0.22, "grad_norm": 2.3063530921936035, "learning_rate": 0.0002, "loss": 1.7462, "step": 53400 }, { "epoch": 0.22, "grad_norm": 2.6383020877838135, "learning_rate": 0.0002, "loss": 1.8038, "step": 53410 }, { "epoch": 0.22, "grad_norm": 1.9138067960739136, "learning_rate": 0.0002, "loss": 1.3342, "step": 53420 }, { "epoch": 0.22, "grad_norm": 2.8902225494384766, "learning_rate": 0.0002, "loss": 1.6481, "step": 53430 }, { "epoch": 0.22, "grad_norm": 2.247789144515991, "learning_rate": 0.0002, "loss": 1.7758, "step": 53440 }, { "epoch": 0.22, "grad_norm": 1.825437307357788, "learning_rate": 0.0002, "loss": 1.7833, "step": 53450 }, { "epoch": 0.22, "grad_norm": 2.827852487564087, "learning_rate": 0.0002, "loss": 1.4887, "step": 53460 }, { "epoch": 0.22, "grad_norm": 2.9332098960876465, "learning_rate": 0.0002, "loss": 1.5365, "step": 53470 }, { "epoch": 0.22, "grad_norm": 2.732025623321533, "learning_rate": 0.0002, "loss": 1.5906, "step": 53480 }, { "epoch": 0.22, "grad_norm": 5.541977882385254, "learning_rate": 0.0002, "loss": 1.6167, "step": 53490 }, { "epoch": 0.22, "grad_norm": 5.618814945220947, "learning_rate": 0.0002, "loss": 1.6749, "step": 53500 }, { "epoch": 0.22, "grad_norm": 4.82464075088501, "learning_rate": 0.0002, "loss": 1.5907, "step": 53510 }, { "epoch": 0.22, "grad_norm": 3.701801300048828, "learning_rate": 0.0002, "loss": 1.3225, "step": 53520 }, { "epoch": 0.22, "grad_norm": 2.71803879737854, "learning_rate": 0.0002, "loss": 1.7598, "step": 53530 }, { "epoch": 0.22, "grad_norm": 1.8890509605407715, "learning_rate": 0.0002, "loss": 1.5706, "step": 53540 }, { "epoch": 0.22, "grad_norm": 2.95121431350708, "learning_rate": 0.0002, "loss": 1.7614, "step": 53550 }, { "epoch": 0.22, "grad_norm": 4.323433876037598, "learning_rate": 0.0002, "loss": 1.48, "step": 53560 }, { "epoch": 0.22, "grad_norm": 5.194177627563477, "learning_rate": 0.0002, "loss": 1.647, "step": 53570 }, { "epoch": 0.22, "grad_norm": 2.712451934814453, "learning_rate": 0.0002, "loss": 1.5797, "step": 53580 }, { "epoch": 0.22, "grad_norm": 2.7993052005767822, "learning_rate": 0.0002, "loss": 1.4975, "step": 53590 }, { "epoch": 0.22, "grad_norm": 3.3280279636383057, "learning_rate": 0.0002, "loss": 1.7118, "step": 53600 }, { "epoch": 0.22, "grad_norm": 4.072790145874023, "learning_rate": 0.0002, "loss": 1.3526, "step": 53610 }, { "epoch": 0.22, "grad_norm": 2.228006601333618, "learning_rate": 0.0002, "loss": 1.6681, "step": 53620 }, { "epoch": 0.22, "grad_norm": 1.857018232345581, "learning_rate": 0.0002, "loss": 1.6232, "step": 53630 }, { "epoch": 0.22, "grad_norm": 2.5094497203826904, "learning_rate": 0.0002, "loss": 1.6527, "step": 53640 }, { "epoch": 0.22, "grad_norm": 2.6481988430023193, "learning_rate": 0.0002, "loss": 1.5895, "step": 53650 }, { "epoch": 0.22, "grad_norm": 1.7914056777954102, "learning_rate": 0.0002, "loss": 1.4732, "step": 53660 }, { "epoch": 0.22, "grad_norm": 2.6717586517333984, "learning_rate": 0.0002, "loss": 1.4621, "step": 53670 }, { "epoch": 0.22, "grad_norm": 2.7784767150878906, "learning_rate": 0.0002, "loss": 1.5869, "step": 53680 }, { "epoch": 0.22, "grad_norm": 3.686690330505371, "learning_rate": 0.0002, "loss": 1.859, "step": 53690 }, { "epoch": 0.22, "grad_norm": 2.5303075313568115, "learning_rate": 0.0002, "loss": 1.4605, "step": 53700 }, { "epoch": 0.22, "grad_norm": 4.156418323516846, "learning_rate": 0.0002, "loss": 1.5832, "step": 53710 }, { "epoch": 0.22, "grad_norm": 2.724773406982422, "learning_rate": 0.0002, "loss": 1.4205, "step": 53720 }, { "epoch": 0.22, "grad_norm": 3.8427019119262695, "learning_rate": 0.0002, "loss": 1.6097, "step": 53730 }, { "epoch": 0.22, "grad_norm": 3.2052156925201416, "learning_rate": 0.0002, "loss": 1.7903, "step": 53740 }, { "epoch": 0.22, "grad_norm": 2.454782485961914, "learning_rate": 0.0002, "loss": 1.3485, "step": 53750 }, { "epoch": 0.22, "grad_norm": 2.5568935871124268, "learning_rate": 0.0002, "loss": 1.6684, "step": 53760 }, { "epoch": 0.22, "grad_norm": 3.2451119422912598, "learning_rate": 0.0002, "loss": 1.909, "step": 53770 }, { "epoch": 0.22, "grad_norm": 2.347379684448242, "learning_rate": 0.0002, "loss": 1.4984, "step": 53780 }, { "epoch": 0.22, "grad_norm": 3.6565616130828857, "learning_rate": 0.0002, "loss": 1.6434, "step": 53790 }, { "epoch": 0.22, "grad_norm": 3.078735589981079, "learning_rate": 0.0002, "loss": 1.7222, "step": 53800 }, { "epoch": 0.22, "grad_norm": 2.962949275970459, "learning_rate": 0.0002, "loss": 1.6403, "step": 53810 }, { "epoch": 0.22, "grad_norm": 4.034688472747803, "learning_rate": 0.0002, "loss": 1.5185, "step": 53820 }, { "epoch": 0.22, "grad_norm": 3.713468313217163, "learning_rate": 0.0002, "loss": 1.7001, "step": 53830 }, { "epoch": 0.22, "grad_norm": 4.7779436111450195, "learning_rate": 0.0002, "loss": 1.6505, "step": 53840 }, { "epoch": 0.22, "grad_norm": 2.995652437210083, "learning_rate": 0.0002, "loss": 1.5046, "step": 53850 }, { "epoch": 0.22, "grad_norm": 1.923836588859558, "learning_rate": 0.0002, "loss": 1.5946, "step": 53860 }, { "epoch": 0.22, "grad_norm": 3.4579498767852783, "learning_rate": 0.0002, "loss": 1.4361, "step": 53870 }, { "epoch": 0.22, "grad_norm": 3.798602342605591, "learning_rate": 0.0002, "loss": 1.6031, "step": 53880 }, { "epoch": 0.22, "grad_norm": 7.041793346405029, "learning_rate": 0.0002, "loss": 1.4658, "step": 53890 }, { "epoch": 0.22, "grad_norm": 3.9367167949676514, "learning_rate": 0.0002, "loss": 1.7322, "step": 53900 }, { "epoch": 0.22, "grad_norm": 3.191432237625122, "learning_rate": 0.0002, "loss": 1.5908, "step": 53910 }, { "epoch": 0.22, "grad_norm": 2.5839626789093018, "learning_rate": 0.0002, "loss": 1.6457, "step": 53920 }, { "epoch": 0.22, "grad_norm": 2.752927541732788, "learning_rate": 0.0002, "loss": 1.4592, "step": 53930 }, { "epoch": 0.22, "grad_norm": 3.0074048042297363, "learning_rate": 0.0002, "loss": 1.5378, "step": 53940 }, { "epoch": 0.22, "grad_norm": 2.5588572025299072, "learning_rate": 0.0002, "loss": 1.2537, "step": 53950 }, { "epoch": 0.22, "grad_norm": 3.172793388366699, "learning_rate": 0.0002, "loss": 1.5744, "step": 53960 }, { "epoch": 0.22, "grad_norm": 4.530451774597168, "learning_rate": 0.0002, "loss": 1.562, "step": 53970 }, { "epoch": 0.22, "grad_norm": 2.2061588764190674, "learning_rate": 0.0002, "loss": 1.4821, "step": 53980 }, { "epoch": 0.22, "grad_norm": 3.1496527194976807, "learning_rate": 0.0002, "loss": 1.7345, "step": 53990 }, { "epoch": 0.22, "grad_norm": 1.5055207014083862, "learning_rate": 0.0002, "loss": 1.4826, "step": 54000 }, { "epoch": 0.22, "grad_norm": 6.218966007232666, "learning_rate": 0.0002, "loss": 1.3704, "step": 54010 }, { "epoch": 0.22, "grad_norm": 2.270256757736206, "learning_rate": 0.0002, "loss": 1.6103, "step": 54020 }, { "epoch": 0.22, "grad_norm": 2.5944607257843018, "learning_rate": 0.0002, "loss": 1.6253, "step": 54030 }, { "epoch": 0.22, "grad_norm": 2.9952361583709717, "learning_rate": 0.0002, "loss": 1.6211, "step": 54040 }, { "epoch": 0.22, "grad_norm": 2.788609027862549, "learning_rate": 0.0002, "loss": 1.4599, "step": 54050 }, { "epoch": 0.22, "grad_norm": 2.181264638900757, "learning_rate": 0.0002, "loss": 1.6434, "step": 54060 }, { "epoch": 0.22, "grad_norm": 1.932898998260498, "learning_rate": 0.0002, "loss": 1.3841, "step": 54070 }, { "epoch": 0.22, "grad_norm": 8.103865623474121, "learning_rate": 0.0002, "loss": 1.4434, "step": 54080 }, { "epoch": 0.22, "grad_norm": 3.9054672718048096, "learning_rate": 0.0002, "loss": 1.3648, "step": 54090 }, { "epoch": 0.22, "grad_norm": 2.668936252593994, "learning_rate": 0.0002, "loss": 1.4132, "step": 54100 }, { "epoch": 0.22, "grad_norm": 2.921921730041504, "learning_rate": 0.0002, "loss": 1.7219, "step": 54110 }, { "epoch": 0.22, "grad_norm": 4.403744697570801, "learning_rate": 0.0002, "loss": 1.6672, "step": 54120 }, { "epoch": 0.22, "grad_norm": 2.384885549545288, "learning_rate": 0.0002, "loss": 1.2512, "step": 54130 }, { "epoch": 0.22, "grad_norm": 3.677736759185791, "learning_rate": 0.0002, "loss": 1.65, "step": 54140 }, { "epoch": 0.22, "grad_norm": 2.4802122116088867, "learning_rate": 0.0002, "loss": 1.4614, "step": 54150 }, { "epoch": 0.22, "grad_norm": 2.791545867919922, "learning_rate": 0.0002, "loss": 1.3759, "step": 54160 }, { "epoch": 0.22, "grad_norm": 5.133837699890137, "learning_rate": 0.0002, "loss": 1.7925, "step": 54170 }, { "epoch": 0.22, "grad_norm": 3.8926830291748047, "learning_rate": 0.0002, "loss": 1.4887, "step": 54180 }, { "epoch": 0.22, "grad_norm": 2.2965879440307617, "learning_rate": 0.0002, "loss": 1.5844, "step": 54190 }, { "epoch": 0.22, "grad_norm": 3.189345121383667, "learning_rate": 0.0002, "loss": 1.8877, "step": 54200 }, { "epoch": 0.22, "grad_norm": 3.6949663162231445, "learning_rate": 0.0002, "loss": 1.6049, "step": 54210 }, { "epoch": 0.22, "grad_norm": 2.2969484329223633, "learning_rate": 0.0002, "loss": 1.7018, "step": 54220 }, { "epoch": 0.22, "grad_norm": 2.5914952754974365, "learning_rate": 0.0002, "loss": 1.5594, "step": 54230 }, { "epoch": 0.22, "grad_norm": 3.3645408153533936, "learning_rate": 0.0002, "loss": 1.5142, "step": 54240 }, { "epoch": 0.22, "grad_norm": 3.5512444972991943, "learning_rate": 0.0002, "loss": 1.7229, "step": 54250 }, { "epoch": 0.22, "grad_norm": 4.615589141845703, "learning_rate": 0.0002, "loss": 1.6058, "step": 54260 }, { "epoch": 0.22, "grad_norm": 1.743614912033081, "learning_rate": 0.0002, "loss": 1.5271, "step": 54270 }, { "epoch": 0.22, "grad_norm": 2.0413460731506348, "learning_rate": 0.0002, "loss": 1.539, "step": 54280 }, { "epoch": 0.22, "grad_norm": 2.242875576019287, "learning_rate": 0.0002, "loss": 1.8616, "step": 54290 }, { "epoch": 0.22, "grad_norm": 2.3194448947906494, "learning_rate": 0.0002, "loss": 1.8093, "step": 54300 }, { "epoch": 0.22, "grad_norm": 4.4066596031188965, "learning_rate": 0.0002, "loss": 1.8447, "step": 54310 }, { "epoch": 0.22, "grad_norm": 2.9674549102783203, "learning_rate": 0.0002, "loss": 1.4893, "step": 54320 }, { "epoch": 0.22, "grad_norm": 2.3389480113983154, "learning_rate": 0.0002, "loss": 1.4061, "step": 54330 }, { "epoch": 0.22, "grad_norm": 2.195998191833496, "learning_rate": 0.0002, "loss": 1.5732, "step": 54340 }, { "epoch": 0.22, "grad_norm": 4.033536911010742, "learning_rate": 0.0002, "loss": 1.5664, "step": 54350 }, { "epoch": 0.22, "grad_norm": 5.055319786071777, "learning_rate": 0.0002, "loss": 1.6015, "step": 54360 }, { "epoch": 0.22, "grad_norm": 4.803711891174316, "learning_rate": 0.0002, "loss": 1.4806, "step": 54370 }, { "epoch": 0.22, "grad_norm": 2.9258759021759033, "learning_rate": 0.0002, "loss": 1.6163, "step": 54380 }, { "epoch": 0.22, "grad_norm": 2.7045578956604004, "learning_rate": 0.0002, "loss": 1.6056, "step": 54390 }, { "epoch": 0.22, "grad_norm": 3.6370413303375244, "learning_rate": 0.0002, "loss": 1.4744, "step": 54400 }, { "epoch": 0.22, "grad_norm": 4.095394134521484, "learning_rate": 0.0002, "loss": 1.6222, "step": 54410 }, { "epoch": 0.22, "grad_norm": 2.52006459236145, "learning_rate": 0.0002, "loss": 1.368, "step": 54420 }, { "epoch": 0.22, "grad_norm": 2.5656778812408447, "learning_rate": 0.0002, "loss": 1.3721, "step": 54430 }, { "epoch": 0.22, "grad_norm": 2.5482535362243652, "learning_rate": 0.0002, "loss": 1.6864, "step": 54440 }, { "epoch": 0.22, "grad_norm": 2.5983994007110596, "learning_rate": 0.0002, "loss": 1.5646, "step": 54450 }, { "epoch": 0.22, "grad_norm": 3.050020217895508, "learning_rate": 0.0002, "loss": 1.4493, "step": 54460 }, { "epoch": 0.22, "grad_norm": 2.764943838119507, "learning_rate": 0.0002, "loss": 1.5438, "step": 54470 }, { "epoch": 0.22, "grad_norm": 2.353487253189087, "learning_rate": 0.0002, "loss": 1.3864, "step": 54480 }, { "epoch": 0.22, "grad_norm": 2.721684694290161, "learning_rate": 0.0002, "loss": 1.5371, "step": 54490 }, { "epoch": 0.22, "grad_norm": 3.673034429550171, "learning_rate": 0.0002, "loss": 1.7124, "step": 54500 }, { "epoch": 0.22, "grad_norm": 3.9177334308624268, "learning_rate": 0.0002, "loss": 1.7516, "step": 54510 }, { "epoch": 0.22, "grad_norm": 2.7440264225006104, "learning_rate": 0.0002, "loss": 1.617, "step": 54520 }, { "epoch": 0.22, "grad_norm": 2.439772129058838, "learning_rate": 0.0002, "loss": 1.5792, "step": 54530 }, { "epoch": 0.22, "grad_norm": 3.194795608520508, "learning_rate": 0.0002, "loss": 1.7713, "step": 54540 }, { "epoch": 0.22, "grad_norm": 2.9816837310791016, "learning_rate": 0.0002, "loss": 1.7831, "step": 54550 }, { "epoch": 0.22, "grad_norm": 1.9504815340042114, "learning_rate": 0.0002, "loss": 1.6419, "step": 54560 }, { "epoch": 0.22, "grad_norm": 2.476625919342041, "learning_rate": 0.0002, "loss": 1.6154, "step": 54570 }, { "epoch": 0.22, "grad_norm": 3.0062718391418457, "learning_rate": 0.0002, "loss": 1.5825, "step": 54580 }, { "epoch": 0.22, "grad_norm": 2.3384344577789307, "learning_rate": 0.0002, "loss": 1.6107, "step": 54590 }, { "epoch": 0.22, "grad_norm": 2.3206660747528076, "learning_rate": 0.0002, "loss": 1.7576, "step": 54600 }, { "epoch": 0.22, "grad_norm": 3.3578941822052, "learning_rate": 0.0002, "loss": 1.6477, "step": 54610 }, { "epoch": 0.22, "grad_norm": 2.034635066986084, "learning_rate": 0.0002, "loss": 1.5009, "step": 54620 }, { "epoch": 0.22, "grad_norm": 2.176945447921753, "learning_rate": 0.0002, "loss": 1.459, "step": 54630 }, { "epoch": 0.22, "grad_norm": 4.642062187194824, "learning_rate": 0.0002, "loss": 1.5938, "step": 54640 }, { "epoch": 0.22, "grad_norm": 2.7354588508605957, "learning_rate": 0.0002, "loss": 1.6472, "step": 54650 }, { "epoch": 0.22, "grad_norm": 2.021998167037964, "learning_rate": 0.0002, "loss": 1.7712, "step": 54660 }, { "epoch": 0.22, "grad_norm": 3.3703010082244873, "learning_rate": 0.0002, "loss": 1.8869, "step": 54670 }, { "epoch": 0.22, "grad_norm": 4.593804836273193, "learning_rate": 0.0002, "loss": 1.859, "step": 54680 }, { "epoch": 0.22, "grad_norm": 2.293778896331787, "learning_rate": 0.0002, "loss": 1.5567, "step": 54690 }, { "epoch": 0.22, "grad_norm": 3.004307270050049, "learning_rate": 0.0002, "loss": 1.4647, "step": 54700 }, { "epoch": 0.22, "grad_norm": 3.2636475563049316, "learning_rate": 0.0002, "loss": 1.624, "step": 54710 }, { "epoch": 0.22, "grad_norm": 4.572479248046875, "learning_rate": 0.0002, "loss": 1.4402, "step": 54720 }, { "epoch": 0.22, "grad_norm": 3.112762689590454, "learning_rate": 0.0002, "loss": 1.4291, "step": 54730 }, { "epoch": 0.22, "grad_norm": 2.2866709232330322, "learning_rate": 0.0002, "loss": 1.6429, "step": 54740 }, { "epoch": 0.22, "grad_norm": 1.71864652633667, "learning_rate": 0.0002, "loss": 1.7155, "step": 54750 }, { "epoch": 0.22, "grad_norm": 2.9282169342041016, "learning_rate": 0.0002, "loss": 1.3568, "step": 54760 }, { "epoch": 0.22, "grad_norm": 2.880725383758545, "learning_rate": 0.0002, "loss": 1.3873, "step": 54770 }, { "epoch": 0.22, "grad_norm": 7.331480026245117, "learning_rate": 0.0002, "loss": 1.6631, "step": 54780 }, { "epoch": 0.22, "grad_norm": 1.7693345546722412, "learning_rate": 0.0002, "loss": 1.7629, "step": 54790 }, { "epoch": 0.22, "grad_norm": 2.9361774921417236, "learning_rate": 0.0002, "loss": 1.774, "step": 54800 }, { "epoch": 0.22, "grad_norm": 2.6847469806671143, "learning_rate": 0.0002, "loss": 1.5463, "step": 54810 }, { "epoch": 0.22, "grad_norm": 3.634248733520508, "learning_rate": 0.0002, "loss": 1.4661, "step": 54820 }, { "epoch": 0.22, "grad_norm": 3.2563750743865967, "learning_rate": 0.0002, "loss": 1.5092, "step": 54830 }, { "epoch": 0.22, "grad_norm": 4.108393669128418, "learning_rate": 0.0002, "loss": 1.5779, "step": 54840 }, { "epoch": 0.22, "grad_norm": 2.1696832180023193, "learning_rate": 0.0002, "loss": 1.5856, "step": 54850 }, { "epoch": 0.22, "grad_norm": 2.31565260887146, "learning_rate": 0.0002, "loss": 1.5401, "step": 54860 }, { "epoch": 0.22, "grad_norm": 2.855741500854492, "learning_rate": 0.0002, "loss": 1.5524, "step": 54870 }, { "epoch": 0.22, "grad_norm": 3.4358668327331543, "learning_rate": 0.0002, "loss": 1.8108, "step": 54880 }, { "epoch": 0.22, "grad_norm": 2.672626495361328, "learning_rate": 0.0002, "loss": 1.5929, "step": 54890 }, { "epoch": 0.22, "grad_norm": 2.4551639556884766, "learning_rate": 0.0002, "loss": 1.6078, "step": 54900 }, { "epoch": 0.22, "grad_norm": 1.7716864347457886, "learning_rate": 0.0002, "loss": 1.4337, "step": 54910 }, { "epoch": 0.22, "grad_norm": 2.261579990386963, "learning_rate": 0.0002, "loss": 1.548, "step": 54920 }, { "epoch": 0.22, "grad_norm": 2.946720600128174, "learning_rate": 0.0002, "loss": 1.6598, "step": 54930 }, { "epoch": 0.22, "grad_norm": 1.4740500450134277, "learning_rate": 0.0002, "loss": 1.5286, "step": 54940 }, { "epoch": 0.22, "grad_norm": 3.4996583461761475, "learning_rate": 0.0002, "loss": 1.6728, "step": 54950 }, { "epoch": 0.22, "grad_norm": 7.4832940101623535, "learning_rate": 0.0002, "loss": 1.2786, "step": 54960 }, { "epoch": 0.22, "grad_norm": 2.688987970352173, "learning_rate": 0.0002, "loss": 1.9045, "step": 54970 }, { "epoch": 0.22, "grad_norm": 4.090775489807129, "learning_rate": 0.0002, "loss": 1.3659, "step": 54980 }, { "epoch": 0.22, "grad_norm": 1.3764286041259766, "learning_rate": 0.0002, "loss": 1.7563, "step": 54990 }, { "epoch": 0.22, "grad_norm": 3.1448047161102295, "learning_rate": 0.0002, "loss": 1.7059, "step": 55000 }, { "epoch": 0.22, "grad_norm": 2.5384888648986816, "learning_rate": 0.0002, "loss": 1.8145, "step": 55010 }, { "epoch": 0.22, "grad_norm": 2.1288928985595703, "learning_rate": 0.0002, "loss": 1.5562, "step": 55020 }, { "epoch": 0.22, "grad_norm": 2.419285297393799, "learning_rate": 0.0002, "loss": 1.7195, "step": 55030 }, { "epoch": 0.22, "grad_norm": 2.8943657875061035, "learning_rate": 0.0002, "loss": 1.4429, "step": 55040 }, { "epoch": 0.22, "grad_norm": 4.069022178649902, "learning_rate": 0.0002, "loss": 1.5324, "step": 55050 }, { "epoch": 0.22, "grad_norm": 1.8443344831466675, "learning_rate": 0.0002, "loss": 1.7665, "step": 55060 }, { "epoch": 0.22, "grad_norm": 2.903672933578491, "learning_rate": 0.0002, "loss": 1.5633, "step": 55070 }, { "epoch": 0.22, "grad_norm": 3.1308162212371826, "learning_rate": 0.0002, "loss": 1.726, "step": 55080 }, { "epoch": 0.22, "grad_norm": 3.0904576778411865, "learning_rate": 0.0002, "loss": 1.677, "step": 55090 }, { "epoch": 0.22, "grad_norm": 2.88753604888916, "learning_rate": 0.0002, "loss": 1.605, "step": 55100 }, { "epoch": 0.22, "grad_norm": 3.122593879699707, "learning_rate": 0.0002, "loss": 1.6075, "step": 55110 }, { "epoch": 0.22, "grad_norm": 3.1019909381866455, "learning_rate": 0.0002, "loss": 1.8207, "step": 55120 }, { "epoch": 0.22, "grad_norm": 2.951420545578003, "learning_rate": 0.0002, "loss": 1.6998, "step": 55130 }, { "epoch": 0.22, "grad_norm": 2.930957555770874, "learning_rate": 0.0002, "loss": 1.4737, "step": 55140 }, { "epoch": 0.22, "grad_norm": 2.53916335105896, "learning_rate": 0.0002, "loss": 1.3297, "step": 55150 }, { "epoch": 0.22, "grad_norm": 3.406348943710327, "learning_rate": 0.0002, "loss": 1.4124, "step": 55160 }, { "epoch": 0.22, "grad_norm": 2.922147512435913, "learning_rate": 0.0002, "loss": 1.4696, "step": 55170 }, { "epoch": 0.22, "grad_norm": 2.149125337600708, "learning_rate": 0.0002, "loss": 1.3981, "step": 55180 }, { "epoch": 0.22, "grad_norm": 3.1135082244873047, "learning_rate": 0.0002, "loss": 1.4151, "step": 55190 }, { "epoch": 0.22, "grad_norm": 2.9941158294677734, "learning_rate": 0.0002, "loss": 1.7512, "step": 55200 }, { "epoch": 0.22, "grad_norm": 4.499266624450684, "learning_rate": 0.0002, "loss": 1.6759, "step": 55210 }, { "epoch": 0.22, "grad_norm": 2.4122912883758545, "learning_rate": 0.0002, "loss": 1.8158, "step": 55220 }, { "epoch": 0.22, "grad_norm": 2.7444746494293213, "learning_rate": 0.0002, "loss": 1.5924, "step": 55230 }, { "epoch": 0.22, "grad_norm": 2.1634161472320557, "learning_rate": 0.0002, "loss": 1.3174, "step": 55240 }, { "epoch": 0.22, "grad_norm": 3.7167601585388184, "learning_rate": 0.0002, "loss": 1.6197, "step": 55250 }, { "epoch": 0.22, "grad_norm": 2.3488738536834717, "learning_rate": 0.0002, "loss": 1.5967, "step": 55260 }, { "epoch": 0.23, "grad_norm": 3.1431288719177246, "learning_rate": 0.0002, "loss": 1.3143, "step": 55270 }, { "epoch": 0.23, "grad_norm": 3.2678253650665283, "learning_rate": 0.0002, "loss": 1.6552, "step": 55280 }, { "epoch": 0.23, "grad_norm": 2.513244152069092, "learning_rate": 0.0002, "loss": 1.7528, "step": 55290 }, { "epoch": 0.23, "grad_norm": 3.35451340675354, "learning_rate": 0.0002, "loss": 1.6383, "step": 55300 }, { "epoch": 0.23, "grad_norm": 1.726981282234192, "learning_rate": 0.0002, "loss": 1.4634, "step": 55310 }, { "epoch": 0.23, "grad_norm": 2.7187228202819824, "learning_rate": 0.0002, "loss": 1.5314, "step": 55320 }, { "epoch": 0.23, "grad_norm": 3.1673202514648438, "learning_rate": 0.0002, "loss": 1.5634, "step": 55330 }, { "epoch": 0.23, "grad_norm": 2.6677439212799072, "learning_rate": 0.0002, "loss": 1.5609, "step": 55340 }, { "epoch": 0.23, "grad_norm": 2.243567705154419, "learning_rate": 0.0002, "loss": 1.7267, "step": 55350 }, { "epoch": 0.23, "grad_norm": 2.63323974609375, "learning_rate": 0.0002, "loss": 1.3451, "step": 55360 }, { "epoch": 0.23, "grad_norm": 3.0393776893615723, "learning_rate": 0.0002, "loss": 1.4718, "step": 55370 }, { "epoch": 0.23, "grad_norm": 2.5094313621520996, "learning_rate": 0.0002, "loss": 1.3426, "step": 55380 }, { "epoch": 0.23, "grad_norm": 3.3934555053710938, "learning_rate": 0.0002, "loss": 1.4843, "step": 55390 }, { "epoch": 0.23, "grad_norm": 3.1450119018554688, "learning_rate": 0.0002, "loss": 1.7323, "step": 55400 }, { "epoch": 0.23, "grad_norm": 1.7889271974563599, "learning_rate": 0.0002, "loss": 1.5261, "step": 55410 }, { "epoch": 0.23, "grad_norm": 9.141748428344727, "learning_rate": 0.0002, "loss": 1.5717, "step": 55420 }, { "epoch": 0.23, "grad_norm": 4.227161407470703, "learning_rate": 0.0002, "loss": 1.5914, "step": 55430 }, { "epoch": 0.23, "grad_norm": 2.955000877380371, "learning_rate": 0.0002, "loss": 1.6047, "step": 55440 }, { "epoch": 0.23, "grad_norm": 2.4789018630981445, "learning_rate": 0.0002, "loss": 1.5033, "step": 55450 }, { "epoch": 0.23, "grad_norm": 5.104526519775391, "learning_rate": 0.0002, "loss": 1.6768, "step": 55460 }, { "epoch": 0.23, "grad_norm": 2.681847333908081, "learning_rate": 0.0002, "loss": 1.3252, "step": 55470 }, { "epoch": 0.23, "grad_norm": 1.6886749267578125, "learning_rate": 0.0002, "loss": 1.4676, "step": 55480 }, { "epoch": 0.23, "grad_norm": 2.4832377433776855, "learning_rate": 0.0002, "loss": 1.5186, "step": 55490 }, { "epoch": 0.23, "grad_norm": 3.394508123397827, "learning_rate": 0.0002, "loss": 1.5634, "step": 55500 }, { "epoch": 0.23, "grad_norm": 4.262568950653076, "learning_rate": 0.0002, "loss": 1.2647, "step": 55510 }, { "epoch": 0.23, "grad_norm": 2.197260856628418, "learning_rate": 0.0002, "loss": 1.4052, "step": 55520 }, { "epoch": 0.23, "grad_norm": 5.981957912445068, "learning_rate": 0.0002, "loss": 1.4402, "step": 55530 }, { "epoch": 0.23, "grad_norm": 1.3632395267486572, "learning_rate": 0.0002, "loss": 1.7927, "step": 55540 }, { "epoch": 0.23, "grad_norm": 2.6454625129699707, "learning_rate": 0.0002, "loss": 1.3794, "step": 55550 }, { "epoch": 0.23, "grad_norm": 2.5007832050323486, "learning_rate": 0.0002, "loss": 1.6186, "step": 55560 }, { "epoch": 0.23, "grad_norm": 4.843434810638428, "learning_rate": 0.0002, "loss": 1.5473, "step": 55570 }, { "epoch": 0.23, "grad_norm": 1.9761865139007568, "learning_rate": 0.0002, "loss": 1.4048, "step": 55580 }, { "epoch": 0.23, "grad_norm": 2.137632369995117, "learning_rate": 0.0002, "loss": 1.5768, "step": 55590 }, { "epoch": 0.23, "grad_norm": 3.2368581295013428, "learning_rate": 0.0002, "loss": 1.6396, "step": 55600 }, { "epoch": 0.23, "grad_norm": 4.597871780395508, "learning_rate": 0.0002, "loss": 1.5615, "step": 55610 }, { "epoch": 0.23, "grad_norm": 2.0338428020477295, "learning_rate": 0.0002, "loss": 1.5855, "step": 55620 }, { "epoch": 0.23, "grad_norm": 2.7715728282928467, "learning_rate": 0.0002, "loss": 1.5548, "step": 55630 }, { "epoch": 0.23, "grad_norm": 5.387057781219482, "learning_rate": 0.0002, "loss": 1.4782, "step": 55640 }, { "epoch": 0.23, "grad_norm": 3.078925132751465, "learning_rate": 0.0002, "loss": 1.4877, "step": 55650 }, { "epoch": 0.23, "grad_norm": 3.7865259647369385, "learning_rate": 0.0002, "loss": 1.6067, "step": 55660 }, { "epoch": 0.23, "grad_norm": 2.222717523574829, "learning_rate": 0.0002, "loss": 1.6599, "step": 55670 }, { "epoch": 0.23, "grad_norm": 2.250188112258911, "learning_rate": 0.0002, "loss": 1.6142, "step": 55680 }, { "epoch": 0.23, "grad_norm": 2.5102529525756836, "learning_rate": 0.0002, "loss": 1.5616, "step": 55690 }, { "epoch": 0.23, "grad_norm": 1.897107720375061, "learning_rate": 0.0002, "loss": 1.6507, "step": 55700 }, { "epoch": 0.23, "grad_norm": 4.261868476867676, "learning_rate": 0.0002, "loss": 1.5301, "step": 55710 }, { "epoch": 0.23, "grad_norm": 4.6430182456970215, "learning_rate": 0.0002, "loss": 1.3781, "step": 55720 }, { "epoch": 0.23, "grad_norm": 2.626943349838257, "learning_rate": 0.0002, "loss": 1.6758, "step": 55730 }, { "epoch": 0.23, "grad_norm": 2.895376205444336, "learning_rate": 0.0002, "loss": 1.4888, "step": 55740 }, { "epoch": 0.23, "grad_norm": 3.088524580001831, "learning_rate": 0.0002, "loss": 1.6544, "step": 55750 }, { "epoch": 0.23, "grad_norm": 4.115562915802002, "learning_rate": 0.0002, "loss": 1.5484, "step": 55760 }, { "epoch": 0.23, "grad_norm": 1.7963693141937256, "learning_rate": 0.0002, "loss": 1.5591, "step": 55770 }, { "epoch": 0.23, "grad_norm": 2.5968408584594727, "learning_rate": 0.0002, "loss": 1.5036, "step": 55780 }, { "epoch": 0.23, "grad_norm": 2.5111496448516846, "learning_rate": 0.0002, "loss": 1.5577, "step": 55790 }, { "epoch": 0.23, "grad_norm": 2.4834258556365967, "learning_rate": 0.0002, "loss": 1.4527, "step": 55800 }, { "epoch": 0.23, "grad_norm": 2.804229736328125, "learning_rate": 0.0002, "loss": 1.3651, "step": 55810 }, { "epoch": 0.23, "grad_norm": 2.479114294052124, "learning_rate": 0.0002, "loss": 1.5053, "step": 55820 }, { "epoch": 0.23, "grad_norm": 3.295644521713257, "learning_rate": 0.0002, "loss": 1.6467, "step": 55830 }, { "epoch": 0.23, "grad_norm": 3.0979561805725098, "learning_rate": 0.0002, "loss": 1.6569, "step": 55840 }, { "epoch": 0.23, "grad_norm": 2.346496820449829, "learning_rate": 0.0002, "loss": 1.6968, "step": 55850 }, { "epoch": 0.23, "grad_norm": 1.8028905391693115, "learning_rate": 0.0002, "loss": 1.6135, "step": 55860 }, { "epoch": 0.23, "grad_norm": 2.370445966720581, "learning_rate": 0.0002, "loss": 1.5838, "step": 55870 }, { "epoch": 0.23, "grad_norm": 2.3496127128601074, "learning_rate": 0.0002, "loss": 1.5314, "step": 55880 }, { "epoch": 0.23, "grad_norm": 2.431722402572632, "learning_rate": 0.0002, "loss": 1.5957, "step": 55890 }, { "epoch": 0.23, "grad_norm": 2.5762670040130615, "learning_rate": 0.0002, "loss": 1.3342, "step": 55900 }, { "epoch": 0.23, "grad_norm": 4.222506523132324, "learning_rate": 0.0002, "loss": 1.713, "step": 55910 }, { "epoch": 0.23, "grad_norm": 1.8379372358322144, "learning_rate": 0.0002, "loss": 1.5127, "step": 55920 }, { "epoch": 0.23, "grad_norm": 6.080053806304932, "learning_rate": 0.0002, "loss": 1.728, "step": 55930 }, { "epoch": 0.23, "grad_norm": 3.374260902404785, "learning_rate": 0.0002, "loss": 1.6456, "step": 55940 }, { "epoch": 0.23, "grad_norm": 2.537684440612793, "learning_rate": 0.0002, "loss": 1.4024, "step": 55950 }, { "epoch": 0.23, "grad_norm": 3.0458905696868896, "learning_rate": 0.0002, "loss": 1.5846, "step": 55960 }, { "epoch": 0.23, "grad_norm": 2.606487989425659, "learning_rate": 0.0002, "loss": 1.6569, "step": 55970 }, { "epoch": 0.23, "grad_norm": 2.9945292472839355, "learning_rate": 0.0002, "loss": 1.2649, "step": 55980 }, { "epoch": 0.23, "grad_norm": 3.22680926322937, "learning_rate": 0.0002, "loss": 1.5252, "step": 55990 }, { "epoch": 0.23, "grad_norm": 4.525209426879883, "learning_rate": 0.0002, "loss": 1.5066, "step": 56000 }, { "epoch": 0.23, "grad_norm": 3.1065404415130615, "learning_rate": 0.0002, "loss": 1.5538, "step": 56010 }, { "epoch": 0.23, "grad_norm": 3.6811938285827637, "learning_rate": 0.0002, "loss": 1.5408, "step": 56020 }, { "epoch": 0.23, "grad_norm": 4.113527297973633, "learning_rate": 0.0002, "loss": 1.5919, "step": 56030 }, { "epoch": 0.23, "grad_norm": 3.373101234436035, "learning_rate": 0.0002, "loss": 1.5895, "step": 56040 }, { "epoch": 0.23, "grad_norm": 2.394564628601074, "learning_rate": 0.0002, "loss": 1.5513, "step": 56050 }, { "epoch": 0.23, "grad_norm": 3.9792513847351074, "learning_rate": 0.0002, "loss": 1.46, "step": 56060 }, { "epoch": 0.23, "grad_norm": 2.670098304748535, "learning_rate": 0.0002, "loss": 1.5952, "step": 56070 }, { "epoch": 0.23, "grad_norm": 2.994285821914673, "learning_rate": 0.0002, "loss": 1.7732, "step": 56080 }, { "epoch": 0.23, "grad_norm": 3.3411946296691895, "learning_rate": 0.0002, "loss": 1.6258, "step": 56090 }, { "epoch": 0.23, "grad_norm": 3.4050445556640625, "learning_rate": 0.0002, "loss": 1.5944, "step": 56100 }, { "epoch": 0.23, "grad_norm": 3.5078635215759277, "learning_rate": 0.0002, "loss": 1.6854, "step": 56110 }, { "epoch": 0.23, "grad_norm": 3.9151673316955566, "learning_rate": 0.0002, "loss": 1.482, "step": 56120 }, { "epoch": 0.23, "grad_norm": 4.741184234619141, "learning_rate": 0.0002, "loss": 1.7564, "step": 56130 }, { "epoch": 0.23, "grad_norm": 3.5712168216705322, "learning_rate": 0.0002, "loss": 1.4926, "step": 56140 }, { "epoch": 0.23, "grad_norm": 3.213812828063965, "learning_rate": 0.0002, "loss": 1.5719, "step": 56150 }, { "epoch": 0.23, "grad_norm": 2.217061758041382, "learning_rate": 0.0002, "loss": 1.586, "step": 56160 }, { "epoch": 0.23, "grad_norm": 4.224521636962891, "learning_rate": 0.0002, "loss": 1.4399, "step": 56170 }, { "epoch": 0.23, "grad_norm": 3.4003705978393555, "learning_rate": 0.0002, "loss": 1.6646, "step": 56180 }, { "epoch": 0.23, "grad_norm": 2.079646110534668, "learning_rate": 0.0002, "loss": 1.4738, "step": 56190 }, { "epoch": 0.23, "grad_norm": 2.647019863128662, "learning_rate": 0.0002, "loss": 1.612, "step": 56200 }, { "epoch": 0.23, "grad_norm": 1.5151653289794922, "learning_rate": 0.0002, "loss": 1.4063, "step": 56210 }, { "epoch": 0.23, "grad_norm": 3.720211982727051, "learning_rate": 0.0002, "loss": 1.7419, "step": 56220 }, { "epoch": 0.23, "grad_norm": 3.4962387084960938, "learning_rate": 0.0002, "loss": 1.4711, "step": 56230 }, { "epoch": 0.23, "grad_norm": 2.5062942504882812, "learning_rate": 0.0002, "loss": 1.5081, "step": 56240 }, { "epoch": 0.23, "grad_norm": 2.4767603874206543, "learning_rate": 0.0002, "loss": 1.6755, "step": 56250 }, { "epoch": 0.23, "grad_norm": 4.40393590927124, "learning_rate": 0.0002, "loss": 1.4243, "step": 56260 }, { "epoch": 0.23, "grad_norm": 4.090254306793213, "learning_rate": 0.0002, "loss": 1.5353, "step": 56270 }, { "epoch": 0.23, "grad_norm": 3.198617696762085, "learning_rate": 0.0002, "loss": 1.3618, "step": 56280 }, { "epoch": 0.23, "grad_norm": 2.3354554176330566, "learning_rate": 0.0002, "loss": 1.5987, "step": 56290 }, { "epoch": 0.23, "grad_norm": 1.9634957313537598, "learning_rate": 0.0002, "loss": 1.5788, "step": 56300 }, { "epoch": 0.23, "grad_norm": 2.23412823677063, "learning_rate": 0.0002, "loss": 1.5439, "step": 56310 }, { "epoch": 0.23, "grad_norm": 2.3972346782684326, "learning_rate": 0.0002, "loss": 1.3506, "step": 56320 }, { "epoch": 0.23, "grad_norm": 3.4178428649902344, "learning_rate": 0.0002, "loss": 1.4927, "step": 56330 }, { "epoch": 0.23, "grad_norm": 2.2928972244262695, "learning_rate": 0.0002, "loss": 1.5245, "step": 56340 }, { "epoch": 0.23, "grad_norm": 3.3140709400177, "learning_rate": 0.0002, "loss": 1.5793, "step": 56350 }, { "epoch": 0.23, "grad_norm": 2.7450449466705322, "learning_rate": 0.0002, "loss": 1.4519, "step": 56360 }, { "epoch": 0.23, "grad_norm": 1.9450842142105103, "learning_rate": 0.0002, "loss": 1.7039, "step": 56370 }, { "epoch": 0.23, "grad_norm": 3.973451852798462, "learning_rate": 0.0002, "loss": 1.8327, "step": 56380 }, { "epoch": 0.23, "grad_norm": 3.3211870193481445, "learning_rate": 0.0002, "loss": 1.7246, "step": 56390 }, { "epoch": 0.23, "grad_norm": 1.2383828163146973, "learning_rate": 0.0002, "loss": 1.4965, "step": 56400 }, { "epoch": 0.23, "grad_norm": 3.7137560844421387, "learning_rate": 0.0002, "loss": 1.8216, "step": 56410 }, { "epoch": 0.23, "grad_norm": 1.9683059453964233, "learning_rate": 0.0002, "loss": 1.6709, "step": 56420 }, { "epoch": 0.23, "grad_norm": 2.523381471633911, "learning_rate": 0.0002, "loss": 1.5896, "step": 56430 }, { "epoch": 0.23, "grad_norm": 3.2850704193115234, "learning_rate": 0.0002, "loss": 1.5363, "step": 56440 }, { "epoch": 0.23, "grad_norm": 3.5641098022460938, "learning_rate": 0.0002, "loss": 1.5536, "step": 56450 }, { "epoch": 0.23, "grad_norm": 3.133606195449829, "learning_rate": 0.0002, "loss": 1.4789, "step": 56460 }, { "epoch": 0.23, "grad_norm": 3.3109655380249023, "learning_rate": 0.0002, "loss": 1.7063, "step": 56470 }, { "epoch": 0.23, "grad_norm": 2.3829565048217773, "learning_rate": 0.0002, "loss": 1.5349, "step": 56480 }, { "epoch": 0.23, "grad_norm": 2.3555593490600586, "learning_rate": 0.0002, "loss": 1.5342, "step": 56490 }, { "epoch": 0.23, "grad_norm": 2.520493745803833, "learning_rate": 0.0002, "loss": 1.61, "step": 56500 }, { "epoch": 0.23, "grad_norm": 3.099442958831787, "learning_rate": 0.0002, "loss": 1.5026, "step": 56510 }, { "epoch": 0.23, "grad_norm": 3.6840438842773438, "learning_rate": 0.0002, "loss": 1.3883, "step": 56520 }, { "epoch": 0.23, "grad_norm": 2.269319772720337, "learning_rate": 0.0002, "loss": 1.5965, "step": 56530 }, { "epoch": 0.23, "grad_norm": 7.533777713775635, "learning_rate": 0.0002, "loss": 1.5612, "step": 56540 }, { "epoch": 0.23, "grad_norm": 4.281620502471924, "learning_rate": 0.0002, "loss": 1.5755, "step": 56550 }, { "epoch": 0.23, "grad_norm": 5.101626396179199, "learning_rate": 0.0002, "loss": 1.4438, "step": 56560 }, { "epoch": 0.23, "grad_norm": 2.223619222640991, "learning_rate": 0.0002, "loss": 1.7542, "step": 56570 }, { "epoch": 0.23, "grad_norm": 2.8087828159332275, "learning_rate": 0.0002, "loss": 1.7632, "step": 56580 }, { "epoch": 0.23, "grad_norm": 2.7306480407714844, "learning_rate": 0.0002, "loss": 1.3755, "step": 56590 }, { "epoch": 0.23, "grad_norm": 3.4258222579956055, "learning_rate": 0.0002, "loss": 1.4431, "step": 56600 }, { "epoch": 0.23, "grad_norm": 2.5188980102539062, "learning_rate": 0.0002, "loss": 1.6951, "step": 56610 }, { "epoch": 0.23, "grad_norm": 1.8072757720947266, "learning_rate": 0.0002, "loss": 1.6968, "step": 56620 }, { "epoch": 0.23, "grad_norm": 2.401061534881592, "learning_rate": 0.0002, "loss": 1.5393, "step": 56630 }, { "epoch": 0.23, "grad_norm": 3.018749475479126, "learning_rate": 0.0002, "loss": 1.5116, "step": 56640 }, { "epoch": 0.23, "grad_norm": 2.8498001098632812, "learning_rate": 0.0002, "loss": 2.0145, "step": 56650 }, { "epoch": 0.23, "grad_norm": 4.644864082336426, "learning_rate": 0.0002, "loss": 1.5869, "step": 56660 }, { "epoch": 0.23, "grad_norm": 2.516209363937378, "learning_rate": 0.0002, "loss": 1.6927, "step": 56670 }, { "epoch": 0.23, "grad_norm": 2.3863015174865723, "learning_rate": 0.0002, "loss": 1.7472, "step": 56680 }, { "epoch": 0.23, "grad_norm": 2.377157211303711, "learning_rate": 0.0002, "loss": 1.6472, "step": 56690 }, { "epoch": 0.23, "grad_norm": 3.369295597076416, "learning_rate": 0.0002, "loss": 1.5894, "step": 56700 }, { "epoch": 0.23, "grad_norm": 2.073577404022217, "learning_rate": 0.0002, "loss": 1.496, "step": 56710 }, { "epoch": 0.23, "grad_norm": 3.053353786468506, "learning_rate": 0.0002, "loss": 1.5889, "step": 56720 }, { "epoch": 0.23, "grad_norm": 6.098090171813965, "learning_rate": 0.0002, "loss": 1.6697, "step": 56730 }, { "epoch": 0.23, "grad_norm": 3.1674156188964844, "learning_rate": 0.0002, "loss": 1.7933, "step": 56740 }, { "epoch": 0.23, "grad_norm": 1.8888678550720215, "learning_rate": 0.0002, "loss": 1.539, "step": 56750 }, { "epoch": 0.23, "grad_norm": 2.635772466659546, "learning_rate": 0.0002, "loss": 1.5854, "step": 56760 }, { "epoch": 0.23, "grad_norm": 2.0715057849884033, "learning_rate": 0.0002, "loss": 1.6015, "step": 56770 }, { "epoch": 0.23, "grad_norm": 3.405913829803467, "learning_rate": 0.0002, "loss": 1.5868, "step": 56780 }, { "epoch": 0.23, "grad_norm": 5.79306173324585, "learning_rate": 0.0002, "loss": 1.6746, "step": 56790 }, { "epoch": 0.23, "grad_norm": 2.490936756134033, "learning_rate": 0.0002, "loss": 1.6211, "step": 56800 }, { "epoch": 0.23, "grad_norm": 3.992957353591919, "learning_rate": 0.0002, "loss": 1.643, "step": 56810 }, { "epoch": 0.23, "grad_norm": 3.0080618858337402, "learning_rate": 0.0002, "loss": 1.4515, "step": 56820 }, { "epoch": 0.23, "grad_norm": 4.238487720489502, "learning_rate": 0.0002, "loss": 1.6119, "step": 56830 }, { "epoch": 0.23, "grad_norm": 2.6563198566436768, "learning_rate": 0.0002, "loss": 1.6842, "step": 56840 }, { "epoch": 0.23, "grad_norm": 3.3759539127349854, "learning_rate": 0.0002, "loss": 1.5272, "step": 56850 }, { "epoch": 0.23, "grad_norm": 3.95695161819458, "learning_rate": 0.0002, "loss": 1.7267, "step": 56860 }, { "epoch": 0.23, "grad_norm": 3.099194049835205, "learning_rate": 0.0002, "loss": 1.5698, "step": 56870 }, { "epoch": 0.23, "grad_norm": 2.2525739669799805, "learning_rate": 0.0002, "loss": 1.6149, "step": 56880 }, { "epoch": 0.23, "grad_norm": 3.8289647102355957, "learning_rate": 0.0002, "loss": 1.3159, "step": 56890 }, { "epoch": 0.23, "grad_norm": 3.175677537918091, "learning_rate": 0.0002, "loss": 1.5342, "step": 56900 }, { "epoch": 0.23, "grad_norm": 5.202762126922607, "learning_rate": 0.0002, "loss": 1.6193, "step": 56910 }, { "epoch": 0.23, "grad_norm": 3.1599349975585938, "learning_rate": 0.0002, "loss": 1.7487, "step": 56920 }, { "epoch": 0.23, "grad_norm": 2.2000350952148438, "learning_rate": 0.0002, "loss": 1.6917, "step": 56930 }, { "epoch": 0.23, "grad_norm": 2.790475368499756, "learning_rate": 0.0002, "loss": 1.7444, "step": 56940 }, { "epoch": 0.23, "grad_norm": 3.0361807346343994, "learning_rate": 0.0002, "loss": 1.6637, "step": 56950 }, { "epoch": 0.23, "grad_norm": 3.710448741912842, "learning_rate": 0.0002, "loss": 1.5259, "step": 56960 }, { "epoch": 0.23, "grad_norm": 3.058638334274292, "learning_rate": 0.0002, "loss": 1.6605, "step": 56970 }, { "epoch": 0.23, "grad_norm": 2.210482120513916, "learning_rate": 0.0002, "loss": 1.3812, "step": 56980 }, { "epoch": 0.23, "grad_norm": 2.204115629196167, "learning_rate": 0.0002, "loss": 1.4271, "step": 56990 }, { "epoch": 0.23, "grad_norm": 2.0815041065216064, "learning_rate": 0.0002, "loss": 1.4085, "step": 57000 }, { "epoch": 0.23, "grad_norm": 2.7147347927093506, "learning_rate": 0.0002, "loss": 1.242, "step": 57010 }, { "epoch": 0.23, "grad_norm": 2.963526964187622, "learning_rate": 0.0002, "loss": 1.552, "step": 57020 }, { "epoch": 0.23, "grad_norm": 3.9526641368865967, "learning_rate": 0.0002, "loss": 1.4984, "step": 57030 }, { "epoch": 0.23, "grad_norm": 3.7736423015594482, "learning_rate": 0.0002, "loss": 1.6672, "step": 57040 }, { "epoch": 0.23, "grad_norm": 2.0203347206115723, "learning_rate": 0.0002, "loss": 1.6299, "step": 57050 }, { "epoch": 0.23, "grad_norm": 2.5450961589813232, "learning_rate": 0.0002, "loss": 1.3932, "step": 57060 }, { "epoch": 0.23, "grad_norm": 3.1424813270568848, "learning_rate": 0.0002, "loss": 1.6853, "step": 57070 }, { "epoch": 0.23, "grad_norm": 2.704362630844116, "learning_rate": 0.0002, "loss": 1.6728, "step": 57080 }, { "epoch": 0.23, "grad_norm": 3.593714475631714, "learning_rate": 0.0002, "loss": 1.5722, "step": 57090 }, { "epoch": 0.23, "grad_norm": 1.8894331455230713, "learning_rate": 0.0002, "loss": 1.8097, "step": 57100 }, { "epoch": 0.23, "grad_norm": 2.6395070552825928, "learning_rate": 0.0002, "loss": 1.4479, "step": 57110 }, { "epoch": 0.23, "grad_norm": 2.7876338958740234, "learning_rate": 0.0002, "loss": 1.1881, "step": 57120 }, { "epoch": 0.23, "grad_norm": 4.8138041496276855, "learning_rate": 0.0002, "loss": 1.3225, "step": 57130 }, { "epoch": 0.23, "grad_norm": 3.8709442615509033, "learning_rate": 0.0002, "loss": 1.7334, "step": 57140 }, { "epoch": 0.23, "grad_norm": 5.249161720275879, "learning_rate": 0.0002, "loss": 1.8352, "step": 57150 }, { "epoch": 0.23, "grad_norm": 3.1981518268585205, "learning_rate": 0.0002, "loss": 1.594, "step": 57160 }, { "epoch": 0.23, "grad_norm": 2.3852028846740723, "learning_rate": 0.0002, "loss": 1.5802, "step": 57170 }, { "epoch": 0.23, "grad_norm": 2.862795829772949, "learning_rate": 0.0002, "loss": 1.5533, "step": 57180 }, { "epoch": 0.23, "grad_norm": 2.980463743209839, "learning_rate": 0.0002, "loss": 1.4956, "step": 57190 }, { "epoch": 0.23, "grad_norm": 3.2124416828155518, "learning_rate": 0.0002, "loss": 1.4611, "step": 57200 }, { "epoch": 0.23, "grad_norm": 2.6711831092834473, "learning_rate": 0.0002, "loss": 1.5101, "step": 57210 }, { "epoch": 0.23, "grad_norm": 2.3646697998046875, "learning_rate": 0.0002, "loss": 1.3055, "step": 57220 }, { "epoch": 0.23, "grad_norm": 3.019496440887451, "learning_rate": 0.0002, "loss": 1.5117, "step": 57230 }, { "epoch": 0.23, "grad_norm": 2.8107872009277344, "learning_rate": 0.0002, "loss": 1.4239, "step": 57240 }, { "epoch": 0.23, "grad_norm": 2.1213104724884033, "learning_rate": 0.0002, "loss": 1.3249, "step": 57250 }, { "epoch": 0.23, "grad_norm": 2.2647879123687744, "learning_rate": 0.0002, "loss": 1.546, "step": 57260 }, { "epoch": 0.23, "grad_norm": 2.9984495639801025, "learning_rate": 0.0002, "loss": 1.7017, "step": 57270 }, { "epoch": 0.23, "grad_norm": 2.48276948928833, "learning_rate": 0.0002, "loss": 1.5501, "step": 57280 }, { "epoch": 0.23, "grad_norm": 2.6724488735198975, "learning_rate": 0.0002, "loss": 1.4522, "step": 57290 }, { "epoch": 0.23, "grad_norm": 1.8788014650344849, "learning_rate": 0.0002, "loss": 1.4865, "step": 57300 }, { "epoch": 0.23, "grad_norm": 4.963150978088379, "learning_rate": 0.0002, "loss": 1.6083, "step": 57310 }, { "epoch": 0.23, "grad_norm": 3.6502301692962646, "learning_rate": 0.0002, "loss": 1.1754, "step": 57320 }, { "epoch": 0.23, "grad_norm": 4.709157466888428, "learning_rate": 0.0002, "loss": 1.6537, "step": 57330 }, { "epoch": 0.23, "grad_norm": 2.3478214740753174, "learning_rate": 0.0002, "loss": 1.7305, "step": 57340 }, { "epoch": 0.23, "grad_norm": 3.6139354705810547, "learning_rate": 0.0002, "loss": 1.6893, "step": 57350 }, { "epoch": 0.23, "grad_norm": 3.0408642292022705, "learning_rate": 0.0002, "loss": 1.5092, "step": 57360 }, { "epoch": 0.23, "grad_norm": 2.3191776275634766, "learning_rate": 0.0002, "loss": 1.4216, "step": 57370 }, { "epoch": 0.23, "grad_norm": 2.037513017654419, "learning_rate": 0.0002, "loss": 1.7297, "step": 57380 }, { "epoch": 0.23, "grad_norm": 2.830510139465332, "learning_rate": 0.0002, "loss": 1.575, "step": 57390 }, { "epoch": 0.23, "grad_norm": 3.1832950115203857, "learning_rate": 0.0002, "loss": 1.5838, "step": 57400 }, { "epoch": 0.23, "grad_norm": 2.7634708881378174, "learning_rate": 0.0002, "loss": 1.3519, "step": 57410 }, { "epoch": 0.23, "grad_norm": 3.199127197265625, "learning_rate": 0.0002, "loss": 1.657, "step": 57420 }, { "epoch": 0.23, "grad_norm": 3.514909029006958, "learning_rate": 0.0002, "loss": 1.4389, "step": 57430 }, { "epoch": 0.23, "grad_norm": 1.9411250352859497, "learning_rate": 0.0002, "loss": 1.3122, "step": 57440 }, { "epoch": 0.23, "grad_norm": 4.195519924163818, "learning_rate": 0.0002, "loss": 1.4804, "step": 57450 }, { "epoch": 0.23, "grad_norm": 3.49816632270813, "learning_rate": 0.0002, "loss": 1.5471, "step": 57460 }, { "epoch": 0.23, "grad_norm": 4.5113372802734375, "learning_rate": 0.0002, "loss": 1.5626, "step": 57470 }, { "epoch": 0.23, "grad_norm": 2.519932746887207, "learning_rate": 0.0002, "loss": 1.279, "step": 57480 }, { "epoch": 0.23, "grad_norm": 4.241296291351318, "learning_rate": 0.0002, "loss": 1.7401, "step": 57490 }, { "epoch": 0.23, "grad_norm": 3.7489280700683594, "learning_rate": 0.0002, "loss": 1.3354, "step": 57500 }, { "epoch": 0.23, "grad_norm": 1.4718750715255737, "learning_rate": 0.0002, "loss": 1.8325, "step": 57510 }, { "epoch": 0.23, "grad_norm": 2.753049850463867, "learning_rate": 0.0002, "loss": 1.4385, "step": 57520 }, { "epoch": 0.23, "grad_norm": 2.667874336242676, "learning_rate": 0.0002, "loss": 1.682, "step": 57530 }, { "epoch": 0.23, "grad_norm": 3.368579387664795, "learning_rate": 0.0002, "loss": 1.493, "step": 57540 }, { "epoch": 0.23, "grad_norm": 3.1095166206359863, "learning_rate": 0.0002, "loss": 1.5315, "step": 57550 }, { "epoch": 0.23, "grad_norm": 3.249204158782959, "learning_rate": 0.0002, "loss": 1.8125, "step": 57560 }, { "epoch": 0.23, "grad_norm": 2.4462060928344727, "learning_rate": 0.0002, "loss": 1.5489, "step": 57570 }, { "epoch": 0.23, "grad_norm": 2.8800604343414307, "learning_rate": 0.0002, "loss": 1.5827, "step": 57580 }, { "epoch": 0.23, "grad_norm": 2.9803624153137207, "learning_rate": 0.0002, "loss": 1.4206, "step": 57590 }, { "epoch": 0.23, "grad_norm": 3.718355417251587, "learning_rate": 0.0002, "loss": 1.5053, "step": 57600 }, { "epoch": 0.23, "grad_norm": 2.8217577934265137, "learning_rate": 0.0002, "loss": 1.7494, "step": 57610 }, { "epoch": 0.23, "grad_norm": 3.063140869140625, "learning_rate": 0.0002, "loss": 1.4824, "step": 57620 }, { "epoch": 0.23, "grad_norm": 2.412580728530884, "learning_rate": 0.0002, "loss": 1.4413, "step": 57630 }, { "epoch": 0.23, "grad_norm": 3.2386608123779297, "learning_rate": 0.0002, "loss": 1.6394, "step": 57640 }, { "epoch": 0.23, "grad_norm": 3.2877144813537598, "learning_rate": 0.0002, "loss": 1.5954, "step": 57650 }, { "epoch": 0.23, "grad_norm": 4.277543067932129, "learning_rate": 0.0002, "loss": 1.5656, "step": 57660 }, { "epoch": 0.23, "grad_norm": 9.790483474731445, "learning_rate": 0.0002, "loss": 1.5895, "step": 57670 }, { "epoch": 0.23, "grad_norm": 2.684183359146118, "learning_rate": 0.0002, "loss": 1.1365, "step": 57680 }, { "epoch": 0.23, "grad_norm": 3.9073169231414795, "learning_rate": 0.0002, "loss": 1.5472, "step": 57690 }, { "epoch": 0.23, "grad_norm": 4.088244915008545, "learning_rate": 0.0002, "loss": 1.5514, "step": 57700 }, { "epoch": 0.23, "grad_norm": 12.522252082824707, "learning_rate": 0.0002, "loss": 1.7709, "step": 57710 }, { "epoch": 0.23, "grad_norm": 3.3073768615722656, "learning_rate": 0.0002, "loss": 1.4314, "step": 57720 }, { "epoch": 0.24, "grad_norm": 6.938363552093506, "learning_rate": 0.0002, "loss": 1.6255, "step": 57730 }, { "epoch": 0.24, "grad_norm": 3.2572784423828125, "learning_rate": 0.0002, "loss": 1.3546, "step": 57740 }, { "epoch": 0.24, "grad_norm": 2.7468717098236084, "learning_rate": 0.0002, "loss": 1.5289, "step": 57750 }, { "epoch": 0.24, "grad_norm": 4.530308246612549, "learning_rate": 0.0002, "loss": 1.641, "step": 57760 }, { "epoch": 0.24, "grad_norm": 1.6989128589630127, "learning_rate": 0.0002, "loss": 1.5979, "step": 57770 }, { "epoch": 0.24, "grad_norm": 2.905250072479248, "learning_rate": 0.0002, "loss": 1.5977, "step": 57780 }, { "epoch": 0.24, "grad_norm": 3.1434264183044434, "learning_rate": 0.0002, "loss": 1.8612, "step": 57790 }, { "epoch": 0.24, "grad_norm": 3.0895180702209473, "learning_rate": 0.0002, "loss": 1.4549, "step": 57800 }, { "epoch": 0.24, "grad_norm": 3.3077120780944824, "learning_rate": 0.0002, "loss": 1.5559, "step": 57810 }, { "epoch": 0.24, "grad_norm": 3.768815755844116, "learning_rate": 0.0002, "loss": 1.6933, "step": 57820 }, { "epoch": 0.24, "grad_norm": 3.014707088470459, "learning_rate": 0.0002, "loss": 1.929, "step": 57830 }, { "epoch": 0.24, "grad_norm": 2.958756923675537, "learning_rate": 0.0002, "loss": 1.4746, "step": 57840 }, { "epoch": 0.24, "grad_norm": 2.3005099296569824, "learning_rate": 0.0002, "loss": 1.5868, "step": 57850 }, { "epoch": 0.24, "grad_norm": 1.8394591808319092, "learning_rate": 0.0002, "loss": 1.6262, "step": 57860 }, { "epoch": 0.24, "grad_norm": 3.310525894165039, "learning_rate": 0.0002, "loss": 1.6755, "step": 57870 }, { "epoch": 0.24, "grad_norm": 1.4455595016479492, "learning_rate": 0.0002, "loss": 1.7004, "step": 57880 }, { "epoch": 0.24, "grad_norm": 2.449561357498169, "learning_rate": 0.0002, "loss": 1.5755, "step": 57890 }, { "epoch": 0.24, "grad_norm": 2.8121912479400635, "learning_rate": 0.0002, "loss": 1.5198, "step": 57900 }, { "epoch": 0.24, "grad_norm": 3.3951873779296875, "learning_rate": 0.0002, "loss": 1.5891, "step": 57910 }, { "epoch": 0.24, "grad_norm": 2.8740687370300293, "learning_rate": 0.0002, "loss": 1.6778, "step": 57920 }, { "epoch": 0.24, "grad_norm": 3.590283155441284, "learning_rate": 0.0002, "loss": 1.5468, "step": 57930 }, { "epoch": 0.24, "grad_norm": 2.228633165359497, "learning_rate": 0.0002, "loss": 1.6981, "step": 57940 }, { "epoch": 0.24, "grad_norm": 2.9779212474823, "learning_rate": 0.0002, "loss": 1.5563, "step": 57950 }, { "epoch": 0.24, "grad_norm": 2.2817487716674805, "learning_rate": 0.0002, "loss": 1.6506, "step": 57960 }, { "epoch": 0.24, "grad_norm": 2.1613025665283203, "learning_rate": 0.0002, "loss": 1.5594, "step": 57970 }, { "epoch": 0.24, "grad_norm": 2.1570918560028076, "learning_rate": 0.0002, "loss": 1.4262, "step": 57980 }, { "epoch": 0.24, "grad_norm": 3.833483934402466, "learning_rate": 0.0002, "loss": 1.2728, "step": 57990 }, { "epoch": 0.24, "grad_norm": 1.8983885049819946, "learning_rate": 0.0002, "loss": 1.4819, "step": 58000 }, { "epoch": 0.24, "grad_norm": 2.234528064727783, "learning_rate": 0.0002, "loss": 1.5197, "step": 58010 }, { "epoch": 0.24, "grad_norm": 3.7478978633880615, "learning_rate": 0.0002, "loss": 1.1236, "step": 58020 }, { "epoch": 0.24, "grad_norm": 2.5250654220581055, "learning_rate": 0.0002, "loss": 1.606, "step": 58030 }, { "epoch": 0.24, "grad_norm": 2.4918179512023926, "learning_rate": 0.0002, "loss": 1.48, "step": 58040 }, { "epoch": 0.24, "grad_norm": 3.6836600303649902, "learning_rate": 0.0002, "loss": 1.5367, "step": 58050 }, { "epoch": 0.24, "grad_norm": 3.51171875, "learning_rate": 0.0002, "loss": 1.5264, "step": 58060 }, { "epoch": 0.24, "grad_norm": 2.834094762802124, "learning_rate": 0.0002, "loss": 1.5414, "step": 58070 }, { "epoch": 0.24, "grad_norm": 3.9791905879974365, "learning_rate": 0.0002, "loss": 1.1543, "step": 58080 }, { "epoch": 0.24, "grad_norm": 2.658318281173706, "learning_rate": 0.0002, "loss": 1.5964, "step": 58090 }, { "epoch": 0.24, "grad_norm": 3.754504919052124, "learning_rate": 0.0002, "loss": 1.6945, "step": 58100 }, { "epoch": 0.24, "grad_norm": 3.2644448280334473, "learning_rate": 0.0002, "loss": 1.4087, "step": 58110 }, { "epoch": 0.24, "grad_norm": 2.419217586517334, "learning_rate": 0.0002, "loss": 1.4904, "step": 58120 }, { "epoch": 0.24, "grad_norm": 2.232595920562744, "learning_rate": 0.0002, "loss": 1.7234, "step": 58130 }, { "epoch": 0.24, "grad_norm": 1.9469696283340454, "learning_rate": 0.0002, "loss": 1.7465, "step": 58140 }, { "epoch": 0.24, "grad_norm": 3.058110237121582, "learning_rate": 0.0002, "loss": 1.4685, "step": 58150 }, { "epoch": 0.24, "grad_norm": 2.6818418502807617, "learning_rate": 0.0002, "loss": 1.581, "step": 58160 }, { "epoch": 0.24, "grad_norm": 2.62099552154541, "learning_rate": 0.0002, "loss": 1.5325, "step": 58170 }, { "epoch": 0.24, "grad_norm": 2.6273181438446045, "learning_rate": 0.0002, "loss": 1.6386, "step": 58180 }, { "epoch": 0.24, "grad_norm": 2.3931660652160645, "learning_rate": 0.0002, "loss": 1.4685, "step": 58190 }, { "epoch": 0.24, "grad_norm": 1.8791850805282593, "learning_rate": 0.0002, "loss": 1.6113, "step": 58200 }, { "epoch": 0.24, "grad_norm": 3.1967527866363525, "learning_rate": 0.0002, "loss": 1.5546, "step": 58210 }, { "epoch": 0.24, "grad_norm": 3.724782943725586, "learning_rate": 0.0002, "loss": 1.581, "step": 58220 }, { "epoch": 0.24, "grad_norm": 6.582754611968994, "learning_rate": 0.0002, "loss": 1.4412, "step": 58230 }, { "epoch": 0.24, "grad_norm": 1.9295367002487183, "learning_rate": 0.0002, "loss": 1.6604, "step": 58240 }, { "epoch": 0.24, "grad_norm": 2.6944422721862793, "learning_rate": 0.0002, "loss": 1.5725, "step": 58250 }, { "epoch": 0.24, "grad_norm": 3.424532890319824, "learning_rate": 0.0002, "loss": 1.5227, "step": 58260 }, { "epoch": 0.24, "grad_norm": 2.9215643405914307, "learning_rate": 0.0002, "loss": 1.4876, "step": 58270 }, { "epoch": 0.24, "grad_norm": 3.3373265266418457, "learning_rate": 0.0002, "loss": 1.5135, "step": 58280 }, { "epoch": 0.24, "grad_norm": 2.0872642993927, "learning_rate": 0.0002, "loss": 1.5511, "step": 58290 }, { "epoch": 0.24, "grad_norm": 2.171527147293091, "learning_rate": 0.0002, "loss": 1.4123, "step": 58300 }, { "epoch": 0.24, "grad_norm": 4.658817768096924, "learning_rate": 0.0002, "loss": 1.6343, "step": 58310 }, { "epoch": 0.24, "grad_norm": 7.577494144439697, "learning_rate": 0.0002, "loss": 1.7274, "step": 58320 }, { "epoch": 0.24, "grad_norm": 2.199617624282837, "learning_rate": 0.0002, "loss": 1.6085, "step": 58330 }, { "epoch": 0.24, "grad_norm": 8.506613731384277, "learning_rate": 0.0002, "loss": 1.8156, "step": 58340 }, { "epoch": 0.24, "grad_norm": 3.4648897647857666, "learning_rate": 0.0002, "loss": 1.5108, "step": 58350 }, { "epoch": 0.24, "grad_norm": 2.2794222831726074, "learning_rate": 0.0002, "loss": 1.2657, "step": 58360 }, { "epoch": 0.24, "grad_norm": 2.3731625080108643, "learning_rate": 0.0002, "loss": 1.5567, "step": 58370 }, { "epoch": 0.24, "grad_norm": 3.3470613956451416, "learning_rate": 0.0002, "loss": 1.4966, "step": 58380 }, { "epoch": 0.24, "grad_norm": 4.021883964538574, "learning_rate": 0.0002, "loss": 1.569, "step": 58390 }, { "epoch": 0.24, "grad_norm": 1.9409761428833008, "learning_rate": 0.0002, "loss": 1.5521, "step": 58400 }, { "epoch": 0.24, "grad_norm": 2.493623971939087, "learning_rate": 0.0002, "loss": 1.4299, "step": 58410 }, { "epoch": 0.24, "grad_norm": 2.0740177631378174, "learning_rate": 0.0002, "loss": 1.6028, "step": 58420 }, { "epoch": 0.24, "grad_norm": 3.173072099685669, "learning_rate": 0.0002, "loss": 1.5778, "step": 58430 }, { "epoch": 0.24, "grad_norm": 3.0682404041290283, "learning_rate": 0.0002, "loss": 1.7196, "step": 58440 }, { "epoch": 0.24, "grad_norm": 4.023421287536621, "learning_rate": 0.0002, "loss": 1.5482, "step": 58450 }, { "epoch": 0.24, "grad_norm": 2.295816421508789, "learning_rate": 0.0002, "loss": 1.6511, "step": 58460 }, { "epoch": 0.24, "grad_norm": 2.5912482738494873, "learning_rate": 0.0002, "loss": 1.5073, "step": 58470 }, { "epoch": 0.24, "grad_norm": 3.284269094467163, "learning_rate": 0.0002, "loss": 1.6144, "step": 58480 }, { "epoch": 0.24, "grad_norm": 2.9029200077056885, "learning_rate": 0.0002, "loss": 1.7033, "step": 58490 }, { "epoch": 0.24, "grad_norm": 3.523685932159424, "learning_rate": 0.0002, "loss": 1.5504, "step": 58500 }, { "epoch": 0.24, "grad_norm": 3.319295883178711, "learning_rate": 0.0002, "loss": 1.6557, "step": 58510 }, { "epoch": 0.24, "grad_norm": 2.671572685241699, "learning_rate": 0.0002, "loss": 1.6134, "step": 58520 }, { "epoch": 0.24, "grad_norm": 2.3621702194213867, "learning_rate": 0.0002, "loss": 1.5787, "step": 58530 }, { "epoch": 0.24, "grad_norm": 2.9134902954101562, "learning_rate": 0.0002, "loss": 1.6615, "step": 58540 }, { "epoch": 0.24, "grad_norm": 4.228507041931152, "learning_rate": 0.0002, "loss": 1.6605, "step": 58550 }, { "epoch": 0.24, "grad_norm": 2.6870336532592773, "learning_rate": 0.0002, "loss": 1.5345, "step": 58560 }, { "epoch": 0.24, "grad_norm": 2.299332618713379, "learning_rate": 0.0002, "loss": 1.4188, "step": 58570 }, { "epoch": 0.24, "grad_norm": 4.621550559997559, "learning_rate": 0.0002, "loss": 1.4426, "step": 58580 }, { "epoch": 0.24, "grad_norm": 4.75216817855835, "learning_rate": 0.0002, "loss": 1.8532, "step": 58590 }, { "epoch": 0.24, "grad_norm": 2.968073844909668, "learning_rate": 0.0002, "loss": 1.4661, "step": 58600 }, { "epoch": 0.24, "grad_norm": 3.9304676055908203, "learning_rate": 0.0002, "loss": 1.3318, "step": 58610 }, { "epoch": 0.24, "grad_norm": 1.7401745319366455, "learning_rate": 0.0002, "loss": 1.6987, "step": 58620 }, { "epoch": 0.24, "grad_norm": 5.079002380371094, "learning_rate": 0.0002, "loss": 1.5646, "step": 58630 }, { "epoch": 0.24, "grad_norm": 2.7894446849823, "learning_rate": 0.0002, "loss": 1.4699, "step": 58640 }, { "epoch": 0.24, "grad_norm": 3.0745794773101807, "learning_rate": 0.0002, "loss": 1.8075, "step": 58650 }, { "epoch": 0.24, "grad_norm": 2.607436180114746, "learning_rate": 0.0002, "loss": 1.4142, "step": 58660 }, { "epoch": 0.24, "grad_norm": 2.7091245651245117, "learning_rate": 0.0002, "loss": 1.3976, "step": 58670 }, { "epoch": 0.24, "grad_norm": 2.9347169399261475, "learning_rate": 0.0002, "loss": 1.8524, "step": 58680 }, { "epoch": 0.24, "grad_norm": 5.883279323577881, "learning_rate": 0.0002, "loss": 1.4583, "step": 58690 }, { "epoch": 0.24, "grad_norm": 2.422603130340576, "learning_rate": 0.0002, "loss": 1.7061, "step": 58700 }, { "epoch": 0.24, "grad_norm": 3.56866717338562, "learning_rate": 0.0002, "loss": 1.3601, "step": 58710 }, { "epoch": 0.24, "grad_norm": 2.106955051422119, "learning_rate": 0.0002, "loss": 1.3247, "step": 58720 }, { "epoch": 0.24, "grad_norm": 2.5824592113494873, "learning_rate": 0.0002, "loss": 1.5584, "step": 58730 }, { "epoch": 0.24, "grad_norm": 3.8124101161956787, "learning_rate": 0.0002, "loss": 1.9489, "step": 58740 }, { "epoch": 0.24, "grad_norm": 2.998718738555908, "learning_rate": 0.0002, "loss": 1.5778, "step": 58750 }, { "epoch": 0.24, "grad_norm": 2.670991897583008, "learning_rate": 0.0002, "loss": 1.6956, "step": 58760 }, { "epoch": 0.24, "grad_norm": 1.728931188583374, "learning_rate": 0.0002, "loss": 1.7843, "step": 58770 }, { "epoch": 0.24, "grad_norm": 2.935877799987793, "learning_rate": 0.0002, "loss": 1.7268, "step": 58780 }, { "epoch": 0.24, "grad_norm": 2.0077908039093018, "learning_rate": 0.0002, "loss": 1.4274, "step": 58790 }, { "epoch": 0.24, "grad_norm": 2.4587864875793457, "learning_rate": 0.0002, "loss": 1.5341, "step": 58800 }, { "epoch": 0.24, "grad_norm": 2.3794100284576416, "learning_rate": 0.0002, "loss": 1.556, "step": 58810 }, { "epoch": 0.24, "grad_norm": 3.2444519996643066, "learning_rate": 0.0002, "loss": 1.5168, "step": 58820 }, { "epoch": 0.24, "grad_norm": 1.8667302131652832, "learning_rate": 0.0002, "loss": 1.6122, "step": 58830 }, { "epoch": 0.24, "grad_norm": 1.8326665163040161, "learning_rate": 0.0002, "loss": 1.5532, "step": 58840 }, { "epoch": 0.24, "grad_norm": 2.4630143642425537, "learning_rate": 0.0002, "loss": 1.7974, "step": 58850 }, { "epoch": 0.24, "grad_norm": 3.023425579071045, "learning_rate": 0.0002, "loss": 1.7301, "step": 58860 }, { "epoch": 0.24, "grad_norm": 2.7754504680633545, "learning_rate": 0.0002, "loss": 1.3703, "step": 58870 }, { "epoch": 0.24, "grad_norm": 2.5647120475769043, "learning_rate": 0.0002, "loss": 1.7864, "step": 58880 }, { "epoch": 0.24, "grad_norm": 2.6308233737945557, "learning_rate": 0.0002, "loss": 1.7538, "step": 58890 }, { "epoch": 0.24, "grad_norm": 3.3314263820648193, "learning_rate": 0.0002, "loss": 1.7696, "step": 58900 }, { "epoch": 0.24, "grad_norm": 3.5712571144104004, "learning_rate": 0.0002, "loss": 1.6177, "step": 58910 }, { "epoch": 0.24, "grad_norm": 1.677738904953003, "learning_rate": 0.0002, "loss": 1.5428, "step": 58920 }, { "epoch": 0.24, "grad_norm": 1.8764318227767944, "learning_rate": 0.0002, "loss": 1.5916, "step": 58930 }, { "epoch": 0.24, "grad_norm": 2.8376471996307373, "learning_rate": 0.0002, "loss": 1.4094, "step": 58940 }, { "epoch": 0.24, "grad_norm": 3.632274627685547, "learning_rate": 0.0002, "loss": 1.6342, "step": 58950 }, { "epoch": 0.24, "grad_norm": 3.308422327041626, "learning_rate": 0.0002, "loss": 1.643, "step": 58960 }, { "epoch": 0.24, "grad_norm": 3.9825289249420166, "learning_rate": 0.0002, "loss": 1.3977, "step": 58970 }, { "epoch": 0.24, "grad_norm": 3.051950216293335, "learning_rate": 0.0002, "loss": 1.6055, "step": 58980 }, { "epoch": 0.24, "grad_norm": 3.3296144008636475, "learning_rate": 0.0002, "loss": 1.7176, "step": 58990 }, { "epoch": 0.24, "grad_norm": 5.905830383300781, "learning_rate": 0.0002, "loss": 1.5685, "step": 59000 }, { "epoch": 0.24, "grad_norm": 2.2509520053863525, "learning_rate": 0.0002, "loss": 1.6016, "step": 59010 }, { "epoch": 0.24, "grad_norm": 1.9389265775680542, "learning_rate": 0.0002, "loss": 1.549, "step": 59020 }, { "epoch": 0.24, "grad_norm": 3.1626760959625244, "learning_rate": 0.0002, "loss": 1.4998, "step": 59030 }, { "epoch": 0.24, "grad_norm": 2.7136127948760986, "learning_rate": 0.0002, "loss": 1.4879, "step": 59040 }, { "epoch": 0.24, "grad_norm": 3.867948293685913, "learning_rate": 0.0002, "loss": 1.5115, "step": 59050 }, { "epoch": 0.24, "grad_norm": 3.54948091506958, "learning_rate": 0.0002, "loss": 1.3559, "step": 59060 }, { "epoch": 0.24, "grad_norm": 3.8337409496307373, "learning_rate": 0.0002, "loss": 1.578, "step": 59070 }, { "epoch": 0.24, "grad_norm": 3.5183029174804688, "learning_rate": 0.0002, "loss": 1.4279, "step": 59080 }, { "epoch": 0.24, "grad_norm": 2.7138595581054688, "learning_rate": 0.0002, "loss": 1.4651, "step": 59090 }, { "epoch": 0.24, "grad_norm": 3.004624366760254, "learning_rate": 0.0002, "loss": 1.4033, "step": 59100 }, { "epoch": 0.24, "grad_norm": 2.6858420372009277, "learning_rate": 0.0002, "loss": 1.4802, "step": 59110 }, { "epoch": 0.24, "grad_norm": 1.8151216506958008, "learning_rate": 0.0002, "loss": 1.4553, "step": 59120 }, { "epoch": 0.24, "grad_norm": 3.181339740753174, "learning_rate": 0.0002, "loss": 1.5854, "step": 59130 }, { "epoch": 0.24, "grad_norm": 3.3863484859466553, "learning_rate": 0.0002, "loss": 1.672, "step": 59140 }, { "epoch": 0.24, "grad_norm": 2.4051544666290283, "learning_rate": 0.0002, "loss": 1.4661, "step": 59150 }, { "epoch": 0.24, "grad_norm": 3.1293933391571045, "learning_rate": 0.0002, "loss": 1.4247, "step": 59160 }, { "epoch": 0.24, "grad_norm": 3.5269155502319336, "learning_rate": 0.0002, "loss": 1.6616, "step": 59170 }, { "epoch": 0.24, "grad_norm": 2.5027565956115723, "learning_rate": 0.0002, "loss": 1.6335, "step": 59180 }, { "epoch": 0.24, "grad_norm": 4.0896992683410645, "learning_rate": 0.0002, "loss": 1.5592, "step": 59190 }, { "epoch": 0.24, "grad_norm": 3.37620210647583, "learning_rate": 0.0002, "loss": 1.598, "step": 59200 }, { "epoch": 0.24, "grad_norm": 2.903038501739502, "learning_rate": 0.0002, "loss": 1.5913, "step": 59210 }, { "epoch": 0.24, "grad_norm": 4.2117791175842285, "learning_rate": 0.0002, "loss": 1.5219, "step": 59220 }, { "epoch": 0.24, "grad_norm": 3.226567029953003, "learning_rate": 0.0002, "loss": 1.5768, "step": 59230 }, { "epoch": 0.24, "grad_norm": 4.364389896392822, "learning_rate": 0.0002, "loss": 1.719, "step": 59240 }, { "epoch": 0.24, "grad_norm": 1.6657631397247314, "learning_rate": 0.0002, "loss": 1.5992, "step": 59250 }, { "epoch": 0.24, "grad_norm": 2.3270981311798096, "learning_rate": 0.0002, "loss": 1.5305, "step": 59260 }, { "epoch": 0.24, "grad_norm": 3.4393563270568848, "learning_rate": 0.0002, "loss": 1.4797, "step": 59270 }, { "epoch": 0.24, "grad_norm": 6.304102897644043, "learning_rate": 0.0002, "loss": 1.4349, "step": 59280 }, { "epoch": 0.24, "grad_norm": 2.6034209728240967, "learning_rate": 0.0002, "loss": 1.4494, "step": 59290 }, { "epoch": 0.24, "grad_norm": 3.360471248626709, "learning_rate": 0.0002, "loss": 1.423, "step": 59300 }, { "epoch": 0.24, "grad_norm": 2.3205864429473877, "learning_rate": 0.0002, "loss": 1.5255, "step": 59310 }, { "epoch": 0.24, "grad_norm": 2.269812822341919, "learning_rate": 0.0002, "loss": 1.496, "step": 59320 }, { "epoch": 0.24, "grad_norm": 3.2418174743652344, "learning_rate": 0.0002, "loss": 1.6351, "step": 59330 }, { "epoch": 0.24, "grad_norm": 1.9045120477676392, "learning_rate": 0.0002, "loss": 1.7277, "step": 59340 }, { "epoch": 0.24, "grad_norm": 4.3058762550354, "learning_rate": 0.0002, "loss": 1.4773, "step": 59350 }, { "epoch": 0.24, "grad_norm": 2.1012892723083496, "learning_rate": 0.0002, "loss": 1.5804, "step": 59360 }, { "epoch": 0.24, "grad_norm": 2.082690954208374, "learning_rate": 0.0002, "loss": 1.7323, "step": 59370 }, { "epoch": 0.24, "grad_norm": 2.7266290187835693, "learning_rate": 0.0002, "loss": 1.4524, "step": 59380 }, { "epoch": 0.24, "grad_norm": 1.7708741426467896, "learning_rate": 0.0002, "loss": 1.4397, "step": 59390 }, { "epoch": 0.24, "grad_norm": 2.7867414951324463, "learning_rate": 0.0002, "loss": 1.4378, "step": 59400 }, { "epoch": 0.24, "grad_norm": 4.572828769683838, "learning_rate": 0.0002, "loss": 1.4743, "step": 59410 }, { "epoch": 0.24, "grad_norm": 1.5430126190185547, "learning_rate": 0.0002, "loss": 1.6687, "step": 59420 }, { "epoch": 0.24, "grad_norm": 2.1693601608276367, "learning_rate": 0.0002, "loss": 1.2746, "step": 59430 }, { "epoch": 0.24, "grad_norm": 1.8127518892288208, "learning_rate": 0.0002, "loss": 1.6336, "step": 59440 }, { "epoch": 0.24, "grad_norm": 1.4979832172393799, "learning_rate": 0.0002, "loss": 1.6107, "step": 59450 }, { "epoch": 0.24, "grad_norm": 2.4810519218444824, "learning_rate": 0.0002, "loss": 1.4078, "step": 59460 }, { "epoch": 0.24, "grad_norm": 2.186389923095703, "learning_rate": 0.0002, "loss": 1.5162, "step": 59470 }, { "epoch": 0.24, "grad_norm": 5.014943599700928, "learning_rate": 0.0002, "loss": 1.7654, "step": 59480 }, { "epoch": 0.24, "grad_norm": 3.4003429412841797, "learning_rate": 0.0002, "loss": 1.3137, "step": 59490 }, { "epoch": 0.24, "grad_norm": 4.39351224899292, "learning_rate": 0.0002, "loss": 1.4465, "step": 59500 }, { "epoch": 0.24, "grad_norm": 2.9453938007354736, "learning_rate": 0.0002, "loss": 1.5932, "step": 59510 }, { "epoch": 0.24, "grad_norm": 2.6392180919647217, "learning_rate": 0.0002, "loss": 1.5987, "step": 59520 }, { "epoch": 0.24, "grad_norm": 4.1774516105651855, "learning_rate": 0.0002, "loss": 1.3616, "step": 59530 }, { "epoch": 0.24, "grad_norm": 2.531536340713501, "learning_rate": 0.0002, "loss": 1.7099, "step": 59540 }, { "epoch": 0.24, "grad_norm": 3.0633974075317383, "learning_rate": 0.0002, "loss": 1.788, "step": 59550 }, { "epoch": 0.24, "grad_norm": 3.7954022884368896, "learning_rate": 0.0002, "loss": 1.624, "step": 59560 }, { "epoch": 0.24, "grad_norm": 2.5678422451019287, "learning_rate": 0.0002, "loss": 1.5159, "step": 59570 }, { "epoch": 0.24, "grad_norm": 2.4496400356292725, "learning_rate": 0.0002, "loss": 1.3504, "step": 59580 }, { "epoch": 0.24, "grad_norm": 4.543359756469727, "learning_rate": 0.0002, "loss": 1.3616, "step": 59590 }, { "epoch": 0.24, "grad_norm": 2.9768784046173096, "learning_rate": 0.0002, "loss": 1.5586, "step": 59600 }, { "epoch": 0.24, "grad_norm": 3.389608383178711, "learning_rate": 0.0002, "loss": 1.4527, "step": 59610 }, { "epoch": 0.24, "grad_norm": 3.846195697784424, "learning_rate": 0.0002, "loss": 1.6846, "step": 59620 }, { "epoch": 0.24, "grad_norm": 2.2497265338897705, "learning_rate": 0.0002, "loss": 1.4464, "step": 59630 }, { "epoch": 0.24, "grad_norm": 3.030228853225708, "learning_rate": 0.0002, "loss": 1.4474, "step": 59640 }, { "epoch": 0.24, "grad_norm": 1.3093876838684082, "learning_rate": 0.0002, "loss": 1.6952, "step": 59650 }, { "epoch": 0.24, "grad_norm": 2.170811414718628, "learning_rate": 0.0002, "loss": 1.2814, "step": 59660 }, { "epoch": 0.24, "grad_norm": 2.548356294631958, "learning_rate": 0.0002, "loss": 1.4058, "step": 59670 }, { "epoch": 0.24, "grad_norm": 1.3876025676727295, "learning_rate": 0.0002, "loss": 1.7077, "step": 59680 }, { "epoch": 0.24, "grad_norm": 6.742061138153076, "learning_rate": 0.0002, "loss": 1.5407, "step": 59690 }, { "epoch": 0.24, "grad_norm": 6.065975666046143, "learning_rate": 0.0002, "loss": 1.5319, "step": 59700 }, { "epoch": 0.24, "grad_norm": 3.4384846687316895, "learning_rate": 0.0002, "loss": 1.4836, "step": 59710 }, { "epoch": 0.24, "grad_norm": 2.7356581687927246, "learning_rate": 0.0002, "loss": 1.6102, "step": 59720 }, { "epoch": 0.24, "grad_norm": 2.3670809268951416, "learning_rate": 0.0002, "loss": 1.4617, "step": 59730 }, { "epoch": 0.24, "grad_norm": 2.6713175773620605, "learning_rate": 0.0002, "loss": 1.7835, "step": 59740 }, { "epoch": 0.24, "grad_norm": 3.4057095050811768, "learning_rate": 0.0002, "loss": 1.6798, "step": 59750 }, { "epoch": 0.24, "grad_norm": 2.8224992752075195, "learning_rate": 0.0002, "loss": 1.6307, "step": 59760 }, { "epoch": 0.24, "grad_norm": 2.912750244140625, "learning_rate": 0.0002, "loss": 1.6252, "step": 59770 }, { "epoch": 0.24, "grad_norm": 2.41760516166687, "learning_rate": 0.0002, "loss": 1.4645, "step": 59780 }, { "epoch": 0.24, "grad_norm": 2.648756742477417, "learning_rate": 0.0002, "loss": 1.6912, "step": 59790 }, { "epoch": 0.24, "grad_norm": 2.6015021800994873, "learning_rate": 0.0002, "loss": 1.4501, "step": 59800 }, { "epoch": 0.24, "grad_norm": 2.5391108989715576, "learning_rate": 0.0002, "loss": 1.5048, "step": 59810 }, { "epoch": 0.24, "grad_norm": 3.126727819442749, "learning_rate": 0.0002, "loss": 1.4939, "step": 59820 }, { "epoch": 0.24, "grad_norm": 3.759584426879883, "learning_rate": 0.0002, "loss": 1.5518, "step": 59830 }, { "epoch": 0.24, "grad_norm": 3.0355281829833984, "learning_rate": 0.0002, "loss": 1.519, "step": 59840 }, { "epoch": 0.24, "grad_norm": 2.350409746170044, "learning_rate": 0.0002, "loss": 1.5304, "step": 59850 }, { "epoch": 0.24, "grad_norm": 2.349745750427246, "learning_rate": 0.0002, "loss": 1.3989, "step": 59860 }, { "epoch": 0.24, "grad_norm": 1.5499038696289062, "learning_rate": 0.0002, "loss": 1.5496, "step": 59870 }, { "epoch": 0.24, "grad_norm": 3.3231019973754883, "learning_rate": 0.0002, "loss": 1.5762, "step": 59880 }, { "epoch": 0.24, "grad_norm": 2.4791924953460693, "learning_rate": 0.0002, "loss": 1.6116, "step": 59890 }, { "epoch": 0.24, "grad_norm": 2.7979609966278076, "learning_rate": 0.0002, "loss": 1.8409, "step": 59900 }, { "epoch": 0.24, "grad_norm": 1.8928074836730957, "learning_rate": 0.0002, "loss": 1.6593, "step": 59910 }, { "epoch": 0.24, "grad_norm": 2.393435478210449, "learning_rate": 0.0002, "loss": 1.5627, "step": 59920 }, { "epoch": 0.24, "grad_norm": 3.2497026920318604, "learning_rate": 0.0002, "loss": 1.6148, "step": 59930 }, { "epoch": 0.24, "grad_norm": 1.911492943763733, "learning_rate": 0.0002, "loss": 1.5632, "step": 59940 }, { "epoch": 0.24, "grad_norm": 3.171273708343506, "learning_rate": 0.0002, "loss": 1.4563, "step": 59950 }, { "epoch": 0.24, "grad_norm": 3.091115713119507, "learning_rate": 0.0002, "loss": 1.5856, "step": 59960 }, { "epoch": 0.24, "grad_norm": 4.023662090301514, "learning_rate": 0.0002, "loss": 1.6523, "step": 59970 }, { "epoch": 0.24, "grad_norm": 6.821052551269531, "learning_rate": 0.0002, "loss": 1.7904, "step": 59980 }, { "epoch": 0.24, "grad_norm": 2.5457987785339355, "learning_rate": 0.0002, "loss": 1.8058, "step": 59990 }, { "epoch": 0.24, "grad_norm": 2.956148862838745, "learning_rate": 0.0002, "loss": 1.6023, "step": 60000 }, { "epoch": 0.24, "grad_norm": 3.817948818206787, "learning_rate": 0.0002, "loss": 1.7109, "step": 60010 }, { "epoch": 0.24, "grad_norm": 4.776464462280273, "learning_rate": 0.0002, "loss": 1.5095, "step": 60020 }, { "epoch": 0.24, "grad_norm": 2.4598135948181152, "learning_rate": 0.0002, "loss": 1.4001, "step": 60030 }, { "epoch": 0.24, "grad_norm": 3.026157855987549, "learning_rate": 0.0002, "loss": 1.3315, "step": 60040 }, { "epoch": 0.24, "grad_norm": 2.9843339920043945, "learning_rate": 0.0002, "loss": 1.5981, "step": 60050 }, { "epoch": 0.24, "grad_norm": 2.5936412811279297, "learning_rate": 0.0002, "loss": 1.401, "step": 60060 }, { "epoch": 0.24, "grad_norm": 2.9016308784484863, "learning_rate": 0.0002, "loss": 1.355, "step": 60070 }, { "epoch": 0.24, "grad_norm": 2.517824411392212, "learning_rate": 0.0002, "loss": 1.5312, "step": 60080 }, { "epoch": 0.24, "grad_norm": 3.0843329429626465, "learning_rate": 0.0002, "loss": 1.4106, "step": 60090 }, { "epoch": 0.24, "grad_norm": 3.4148471355438232, "learning_rate": 0.0002, "loss": 1.4916, "step": 60100 }, { "epoch": 0.24, "grad_norm": 1.322930097579956, "learning_rate": 0.0002, "loss": 1.4793, "step": 60110 }, { "epoch": 0.24, "grad_norm": 2.834442615509033, "learning_rate": 0.0002, "loss": 1.6283, "step": 60120 }, { "epoch": 0.24, "grad_norm": 2.9427757263183594, "learning_rate": 0.0002, "loss": 1.5875, "step": 60130 }, { "epoch": 0.24, "grad_norm": 2.691179037094116, "learning_rate": 0.0002, "loss": 1.492, "step": 60140 }, { "epoch": 0.24, "grad_norm": 4.022714614868164, "learning_rate": 0.0002, "loss": 1.5932, "step": 60150 }, { "epoch": 0.24, "grad_norm": 2.5078890323638916, "learning_rate": 0.0002, "loss": 1.4761, "step": 60160 }, { "epoch": 0.24, "grad_norm": 3.346761465072632, "learning_rate": 0.0002, "loss": 1.7516, "step": 60170 }, { "epoch": 0.24, "grad_norm": 5.578930377960205, "learning_rate": 0.0002, "loss": 1.5832, "step": 60180 }, { "epoch": 0.25, "grad_norm": 3.212830066680908, "learning_rate": 0.0002, "loss": 1.4303, "step": 60190 }, { "epoch": 0.25, "grad_norm": 1.6534364223480225, "learning_rate": 0.0002, "loss": 1.2419, "step": 60200 }, { "epoch": 0.25, "grad_norm": 4.242516040802002, "learning_rate": 0.0002, "loss": 1.5902, "step": 60210 }, { "epoch": 0.25, "grad_norm": 6.754825592041016, "learning_rate": 0.0002, "loss": 2.0019, "step": 60220 }, { "epoch": 0.25, "grad_norm": 2.9240992069244385, "learning_rate": 0.0002, "loss": 1.8328, "step": 60230 }, { "epoch": 0.25, "grad_norm": 1.7442470788955688, "learning_rate": 0.0002, "loss": 1.6471, "step": 60240 }, { "epoch": 0.25, "grad_norm": 3.5520565509796143, "learning_rate": 0.0002, "loss": 1.4935, "step": 60250 }, { "epoch": 0.25, "grad_norm": 1.4444198608398438, "learning_rate": 0.0002, "loss": 1.6237, "step": 60260 }, { "epoch": 0.25, "grad_norm": 2.7869133949279785, "learning_rate": 0.0002, "loss": 1.6384, "step": 60270 }, { "epoch": 0.25, "grad_norm": 4.050931453704834, "learning_rate": 0.0002, "loss": 1.6894, "step": 60280 }, { "epoch": 0.25, "grad_norm": 2.1045820713043213, "learning_rate": 0.0002, "loss": 1.6191, "step": 60290 }, { "epoch": 0.25, "grad_norm": 2.312004327774048, "learning_rate": 0.0002, "loss": 1.6011, "step": 60300 }, { "epoch": 0.25, "grad_norm": 2.138887643814087, "learning_rate": 0.0002, "loss": 1.6624, "step": 60310 }, { "epoch": 0.25, "grad_norm": 9.247352600097656, "learning_rate": 0.0002, "loss": 1.6424, "step": 60320 }, { "epoch": 0.25, "grad_norm": 2.6933507919311523, "learning_rate": 0.0002, "loss": 1.526, "step": 60330 }, { "epoch": 0.25, "grad_norm": 2.902883291244507, "learning_rate": 0.0002, "loss": 1.6446, "step": 60340 }, { "epoch": 0.25, "grad_norm": 2.4460415840148926, "learning_rate": 0.0002, "loss": 1.5041, "step": 60350 }, { "epoch": 0.25, "grad_norm": 7.142252445220947, "learning_rate": 0.0002, "loss": 1.7096, "step": 60360 }, { "epoch": 0.25, "grad_norm": 3.422773838043213, "learning_rate": 0.0002, "loss": 1.6046, "step": 60370 }, { "epoch": 0.25, "grad_norm": 2.629213809967041, "learning_rate": 0.0002, "loss": 1.6756, "step": 60380 }, { "epoch": 0.25, "grad_norm": 3.8096349239349365, "learning_rate": 0.0002, "loss": 1.5633, "step": 60390 }, { "epoch": 0.25, "grad_norm": 3.7722227573394775, "learning_rate": 0.0002, "loss": 1.7286, "step": 60400 }, { "epoch": 0.25, "grad_norm": 4.647707939147949, "learning_rate": 0.0002, "loss": 1.7041, "step": 60410 }, { "epoch": 0.25, "grad_norm": 4.217264175415039, "learning_rate": 0.0002, "loss": 1.6746, "step": 60420 }, { "epoch": 0.25, "grad_norm": 2.723374605178833, "learning_rate": 0.0002, "loss": 1.5196, "step": 60430 }, { "epoch": 0.25, "grad_norm": 3.3664777278900146, "learning_rate": 0.0002, "loss": 1.7519, "step": 60440 }, { "epoch": 0.25, "grad_norm": 2.9333457946777344, "learning_rate": 0.0002, "loss": 1.8362, "step": 60450 }, { "epoch": 0.25, "grad_norm": 2.3793375492095947, "learning_rate": 0.0002, "loss": 1.7028, "step": 60460 }, { "epoch": 0.25, "grad_norm": 2.53336501121521, "learning_rate": 0.0002, "loss": 1.474, "step": 60470 }, { "epoch": 0.25, "grad_norm": 1.6342427730560303, "learning_rate": 0.0002, "loss": 1.4565, "step": 60480 }, { "epoch": 0.25, "grad_norm": 3.2870185375213623, "learning_rate": 0.0002, "loss": 1.5376, "step": 60490 }, { "epoch": 0.25, "grad_norm": 2.8747239112854004, "learning_rate": 0.0002, "loss": 1.5075, "step": 60500 }, { "epoch": 0.25, "grad_norm": 2.6122264862060547, "learning_rate": 0.0002, "loss": 1.5322, "step": 60510 }, { "epoch": 0.25, "grad_norm": 2.7038028240203857, "learning_rate": 0.0002, "loss": 1.4074, "step": 60520 }, { "epoch": 0.25, "grad_norm": 3.132668972015381, "learning_rate": 0.0002, "loss": 1.3201, "step": 60530 }, { "epoch": 0.25, "grad_norm": 2.3739941120147705, "learning_rate": 0.0002, "loss": 1.6984, "step": 60540 }, { "epoch": 0.25, "grad_norm": 1.9549494981765747, "learning_rate": 0.0002, "loss": 1.433, "step": 60550 }, { "epoch": 0.25, "grad_norm": 3.503443479537964, "learning_rate": 0.0002, "loss": 1.6969, "step": 60560 }, { "epoch": 0.25, "grad_norm": 2.8893210887908936, "learning_rate": 0.0002, "loss": 1.4844, "step": 60570 }, { "epoch": 0.25, "grad_norm": 2.7751169204711914, "learning_rate": 0.0002, "loss": 1.5279, "step": 60580 }, { "epoch": 0.25, "grad_norm": 1.2864177227020264, "learning_rate": 0.0002, "loss": 1.5761, "step": 60590 }, { "epoch": 0.25, "grad_norm": 2.8190951347351074, "learning_rate": 0.0002, "loss": 1.813, "step": 60600 }, { "epoch": 0.25, "grad_norm": 2.9938173294067383, "learning_rate": 0.0002, "loss": 1.5192, "step": 60610 }, { "epoch": 0.25, "grad_norm": 3.4240047931671143, "learning_rate": 0.0002, "loss": 1.4828, "step": 60620 }, { "epoch": 0.25, "grad_norm": 1.2043315172195435, "learning_rate": 0.0002, "loss": 1.384, "step": 60630 }, { "epoch": 0.25, "grad_norm": 2.770468235015869, "learning_rate": 0.0002, "loss": 1.7425, "step": 60640 }, { "epoch": 0.25, "grad_norm": 2.899566650390625, "learning_rate": 0.0002, "loss": 1.344, "step": 60650 }, { "epoch": 0.25, "grad_norm": 3.004946708679199, "learning_rate": 0.0002, "loss": 1.4973, "step": 60660 }, { "epoch": 0.25, "grad_norm": 2.1528942584991455, "learning_rate": 0.0002, "loss": 1.6558, "step": 60670 }, { "epoch": 0.25, "grad_norm": 1.8051457405090332, "learning_rate": 0.0002, "loss": 1.4777, "step": 60680 }, { "epoch": 0.25, "grad_norm": 2.5242607593536377, "learning_rate": 0.0002, "loss": 1.6024, "step": 60690 }, { "epoch": 0.25, "grad_norm": 2.7724335193634033, "learning_rate": 0.0002, "loss": 1.4922, "step": 60700 }, { "epoch": 0.25, "grad_norm": 1.892212152481079, "learning_rate": 0.0002, "loss": 1.6789, "step": 60710 }, { "epoch": 0.25, "grad_norm": 2.0714566707611084, "learning_rate": 0.0002, "loss": 1.6662, "step": 60720 }, { "epoch": 0.25, "grad_norm": 3.7757480144500732, "learning_rate": 0.0002, "loss": 1.4078, "step": 60730 }, { "epoch": 0.25, "grad_norm": 2.6324808597564697, "learning_rate": 0.0002, "loss": 1.5583, "step": 60740 }, { "epoch": 0.25, "grad_norm": 2.399366617202759, "learning_rate": 0.0002, "loss": 1.5534, "step": 60750 }, { "epoch": 0.25, "grad_norm": 5.052277565002441, "learning_rate": 0.0002, "loss": 1.6077, "step": 60760 }, { "epoch": 0.25, "grad_norm": 2.5695242881774902, "learning_rate": 0.0002, "loss": 1.5693, "step": 60770 }, { "epoch": 0.25, "grad_norm": 2.961447238922119, "learning_rate": 0.0002, "loss": 1.5802, "step": 60780 }, { "epoch": 0.25, "grad_norm": 14.014458656311035, "learning_rate": 0.0002, "loss": 1.6749, "step": 60790 }, { "epoch": 0.25, "grad_norm": 2.4716100692749023, "learning_rate": 0.0002, "loss": 1.5082, "step": 60800 }, { "epoch": 0.25, "grad_norm": 4.812938690185547, "learning_rate": 0.0002, "loss": 1.4493, "step": 60810 }, { "epoch": 0.25, "grad_norm": 3.745121479034424, "learning_rate": 0.0002, "loss": 1.4895, "step": 60820 }, { "epoch": 0.25, "grad_norm": 2.75360369682312, "learning_rate": 0.0002, "loss": 1.9095, "step": 60830 }, { "epoch": 0.25, "grad_norm": 1.8791956901550293, "learning_rate": 0.0002, "loss": 1.4581, "step": 60840 }, { "epoch": 0.25, "grad_norm": 3.1932196617126465, "learning_rate": 0.0002, "loss": 1.6715, "step": 60850 }, { "epoch": 0.25, "grad_norm": 5.4947028160095215, "learning_rate": 0.0002, "loss": 1.5479, "step": 60860 }, { "epoch": 0.25, "grad_norm": 1.9106669425964355, "learning_rate": 0.0002, "loss": 1.6719, "step": 60870 }, { "epoch": 0.25, "grad_norm": 2.6836812496185303, "learning_rate": 0.0002, "loss": 1.4331, "step": 60880 }, { "epoch": 0.25, "grad_norm": 3.2710952758789062, "learning_rate": 0.0002, "loss": 1.7207, "step": 60890 }, { "epoch": 0.25, "grad_norm": 3.428582191467285, "learning_rate": 0.0002, "loss": 1.4412, "step": 60900 }, { "epoch": 0.25, "grad_norm": 2.48915958404541, "learning_rate": 0.0002, "loss": 1.4758, "step": 60910 }, { "epoch": 0.25, "grad_norm": 1.689548134803772, "learning_rate": 0.0002, "loss": 1.6007, "step": 60920 }, { "epoch": 0.25, "grad_norm": 2.71173095703125, "learning_rate": 0.0002, "loss": 1.65, "step": 60930 }, { "epoch": 0.25, "grad_norm": 3.0211923122406006, "learning_rate": 0.0002, "loss": 1.753, "step": 60940 }, { "epoch": 0.25, "grad_norm": 3.6414387226104736, "learning_rate": 0.0002, "loss": 1.5413, "step": 60950 }, { "epoch": 0.25, "grad_norm": 3.9745357036590576, "learning_rate": 0.0002, "loss": 1.5373, "step": 60960 }, { "epoch": 0.25, "grad_norm": 2.4085352420806885, "learning_rate": 0.0002, "loss": 1.5895, "step": 60970 }, { "epoch": 0.25, "grad_norm": 3.068279981613159, "learning_rate": 0.0002, "loss": 1.2793, "step": 60980 }, { "epoch": 0.25, "grad_norm": 2.960329294204712, "learning_rate": 0.0002, "loss": 1.769, "step": 60990 }, { "epoch": 0.25, "grad_norm": 3.5505385398864746, "learning_rate": 0.0002, "loss": 1.4101, "step": 61000 }, { "epoch": 0.25, "grad_norm": 3.2808144092559814, "learning_rate": 0.0002, "loss": 1.4683, "step": 61010 }, { "epoch": 0.25, "grad_norm": 3.6044678688049316, "learning_rate": 0.0002, "loss": 1.5994, "step": 61020 }, { "epoch": 0.25, "grad_norm": 2.5816500186920166, "learning_rate": 0.0002, "loss": 1.6875, "step": 61030 }, { "epoch": 0.25, "grad_norm": 1.5184396505355835, "learning_rate": 0.0002, "loss": 1.5028, "step": 61040 }, { "epoch": 0.25, "grad_norm": 2.0432662963867188, "learning_rate": 0.0002, "loss": 1.4573, "step": 61050 }, { "epoch": 0.25, "grad_norm": 2.2421886920928955, "learning_rate": 0.0002, "loss": 1.5062, "step": 61060 }, { "epoch": 0.25, "grad_norm": 1.7323648929595947, "learning_rate": 0.0002, "loss": 1.5632, "step": 61070 }, { "epoch": 0.25, "grad_norm": 2.9903032779693604, "learning_rate": 0.0002, "loss": 1.65, "step": 61080 }, { "epoch": 0.25, "grad_norm": 2.377476930618286, "learning_rate": 0.0002, "loss": 1.6872, "step": 61090 }, { "epoch": 0.25, "grad_norm": 1.9026137590408325, "learning_rate": 0.0002, "loss": 1.6532, "step": 61100 }, { "epoch": 0.25, "grad_norm": 4.464083671569824, "learning_rate": 0.0002, "loss": 1.5427, "step": 61110 }, { "epoch": 0.25, "grad_norm": 2.480351209640503, "learning_rate": 0.0002, "loss": 1.5913, "step": 61120 }, { "epoch": 0.25, "grad_norm": 3.2891781330108643, "learning_rate": 0.0002, "loss": 1.7281, "step": 61130 }, { "epoch": 0.25, "grad_norm": 2.470158815383911, "learning_rate": 0.0002, "loss": 1.8975, "step": 61140 }, { "epoch": 0.25, "grad_norm": 4.975458145141602, "learning_rate": 0.0002, "loss": 1.5504, "step": 61150 }, { "epoch": 0.25, "grad_norm": 2.5640735626220703, "learning_rate": 0.0002, "loss": 1.5763, "step": 61160 }, { "epoch": 0.25, "grad_norm": 1.5360339879989624, "learning_rate": 0.0002, "loss": 1.4922, "step": 61170 }, { "epoch": 0.25, "grad_norm": 3.5853657722473145, "learning_rate": 0.0002, "loss": 1.5863, "step": 61180 }, { "epoch": 0.25, "grad_norm": 3.1819515228271484, "learning_rate": 0.0002, "loss": 1.4588, "step": 61190 }, { "epoch": 0.25, "grad_norm": 2.551579475402832, "learning_rate": 0.0002, "loss": 1.6727, "step": 61200 }, { "epoch": 0.25, "grad_norm": 3.2933547496795654, "learning_rate": 0.0002, "loss": 1.48, "step": 61210 }, { "epoch": 0.25, "grad_norm": 10.53423023223877, "learning_rate": 0.0002, "loss": 1.5028, "step": 61220 }, { "epoch": 0.25, "grad_norm": 4.62026834487915, "learning_rate": 0.0002, "loss": 1.4535, "step": 61230 }, { "epoch": 0.25, "grad_norm": 3.657978057861328, "learning_rate": 0.0002, "loss": 1.5195, "step": 61240 }, { "epoch": 0.25, "grad_norm": 3.3926050662994385, "learning_rate": 0.0002, "loss": 1.5293, "step": 61250 }, { "epoch": 0.25, "grad_norm": 5.324586868286133, "learning_rate": 0.0002, "loss": 1.5402, "step": 61260 }, { "epoch": 0.25, "grad_norm": 3.380418300628662, "learning_rate": 0.0002, "loss": 1.5022, "step": 61270 }, { "epoch": 0.25, "grad_norm": 2.6945550441741943, "learning_rate": 0.0002, "loss": 1.4586, "step": 61280 }, { "epoch": 0.25, "grad_norm": 2.0606706142425537, "learning_rate": 0.0002, "loss": 1.9819, "step": 61290 }, { "epoch": 0.25, "grad_norm": 3.4173223972320557, "learning_rate": 0.0002, "loss": 1.5803, "step": 61300 }, { "epoch": 0.25, "grad_norm": 2.073336601257324, "learning_rate": 0.0002, "loss": 1.7446, "step": 61310 }, { "epoch": 0.25, "grad_norm": 3.9509005546569824, "learning_rate": 0.0002, "loss": 1.5598, "step": 61320 }, { "epoch": 0.25, "grad_norm": 3.1828291416168213, "learning_rate": 0.0002, "loss": 1.6338, "step": 61330 }, { "epoch": 0.25, "grad_norm": 2.553701400756836, "learning_rate": 0.0002, "loss": 1.5958, "step": 61340 }, { "epoch": 0.25, "grad_norm": 3.1589083671569824, "learning_rate": 0.0002, "loss": 1.8033, "step": 61350 }, { "epoch": 0.25, "grad_norm": 3.9667351245880127, "learning_rate": 0.0002, "loss": 1.5352, "step": 61360 }, { "epoch": 0.25, "grad_norm": 3.075002670288086, "learning_rate": 0.0002, "loss": 1.7351, "step": 61370 }, { "epoch": 0.25, "grad_norm": 2.7095701694488525, "learning_rate": 0.0002, "loss": 1.5282, "step": 61380 }, { "epoch": 0.25, "grad_norm": 2.691687822341919, "learning_rate": 0.0002, "loss": 1.4871, "step": 61390 }, { "epoch": 0.25, "grad_norm": 2.677055835723877, "learning_rate": 0.0002, "loss": 1.4966, "step": 61400 }, { "epoch": 0.25, "grad_norm": 1.5708271265029907, "learning_rate": 0.0002, "loss": 1.5999, "step": 61410 }, { "epoch": 0.25, "grad_norm": 2.2305970191955566, "learning_rate": 0.0002, "loss": 1.4877, "step": 61420 }, { "epoch": 0.25, "grad_norm": 1.4471787214279175, "learning_rate": 0.0002, "loss": 1.8115, "step": 61430 }, { "epoch": 0.25, "grad_norm": 2.5663487911224365, "learning_rate": 0.0002, "loss": 1.5484, "step": 61440 }, { "epoch": 0.25, "grad_norm": 2.821059226989746, "learning_rate": 0.0002, "loss": 1.3461, "step": 61450 }, { "epoch": 0.25, "grad_norm": 5.637833595275879, "learning_rate": 0.0002, "loss": 1.5797, "step": 61460 }, { "epoch": 0.25, "grad_norm": 3.464695930480957, "learning_rate": 0.0002, "loss": 1.6152, "step": 61470 }, { "epoch": 0.25, "grad_norm": 1.8199316263198853, "learning_rate": 0.0002, "loss": 1.6881, "step": 61480 }, { "epoch": 0.25, "grad_norm": 3.3487977981567383, "learning_rate": 0.0002, "loss": 1.8063, "step": 61490 }, { "epoch": 0.25, "grad_norm": 2.4005470275878906, "learning_rate": 0.0002, "loss": 1.8791, "step": 61500 }, { "epoch": 0.25, "grad_norm": 5.5244927406311035, "learning_rate": 0.0002, "loss": 1.5614, "step": 61510 }, { "epoch": 0.25, "grad_norm": 2.6003220081329346, "learning_rate": 0.0002, "loss": 1.5823, "step": 61520 }, { "epoch": 0.25, "grad_norm": 4.039042949676514, "learning_rate": 0.0002, "loss": 1.8305, "step": 61530 }, { "epoch": 0.25, "grad_norm": 3.1148648262023926, "learning_rate": 0.0002, "loss": 1.4468, "step": 61540 }, { "epoch": 0.25, "grad_norm": 3.565066337585449, "learning_rate": 0.0002, "loss": 1.6438, "step": 61550 }, { "epoch": 0.25, "grad_norm": 2.536468505859375, "learning_rate": 0.0002, "loss": 1.4679, "step": 61560 }, { "epoch": 0.25, "grad_norm": 2.445772647857666, "learning_rate": 0.0002, "loss": 1.5152, "step": 61570 }, { "epoch": 0.25, "grad_norm": 4.0490922927856445, "learning_rate": 0.0002, "loss": 1.83, "step": 61580 }, { "epoch": 0.25, "grad_norm": 1.981791615486145, "learning_rate": 0.0002, "loss": 1.6947, "step": 61590 }, { "epoch": 0.25, "grad_norm": 2.882615327835083, "learning_rate": 0.0002, "loss": 1.3822, "step": 61600 }, { "epoch": 0.25, "grad_norm": 3.2704336643218994, "learning_rate": 0.0002, "loss": 1.5606, "step": 61610 }, { "epoch": 0.25, "grad_norm": 1.7796021699905396, "learning_rate": 0.0002, "loss": 1.7646, "step": 61620 }, { "epoch": 0.25, "grad_norm": 3.396132230758667, "learning_rate": 0.0002, "loss": 1.5494, "step": 61630 }, { "epoch": 0.25, "grad_norm": 2.795389413833618, "learning_rate": 0.0002, "loss": 1.7482, "step": 61640 }, { "epoch": 0.25, "grad_norm": 2.8493869304656982, "learning_rate": 0.0002, "loss": 1.6017, "step": 61650 }, { "epoch": 0.25, "grad_norm": 3.1622188091278076, "learning_rate": 0.0002, "loss": 1.5705, "step": 61660 }, { "epoch": 0.25, "grad_norm": 2.434967517852783, "learning_rate": 0.0002, "loss": 1.4773, "step": 61670 }, { "epoch": 0.25, "grad_norm": 3.0914995670318604, "learning_rate": 0.0002, "loss": 1.7196, "step": 61680 }, { "epoch": 0.25, "grad_norm": 2.4695661067962646, "learning_rate": 0.0002, "loss": 1.6907, "step": 61690 }, { "epoch": 0.25, "grad_norm": 1.8297007083892822, "learning_rate": 0.0002, "loss": 1.7228, "step": 61700 }, { "epoch": 0.25, "grad_norm": 3.284109354019165, "learning_rate": 0.0002, "loss": 1.5431, "step": 61710 }, { "epoch": 0.25, "grad_norm": 1.8561676740646362, "learning_rate": 0.0002, "loss": 1.5099, "step": 61720 }, { "epoch": 0.25, "grad_norm": 6.529781341552734, "learning_rate": 0.0002, "loss": 1.3991, "step": 61730 }, { "epoch": 0.25, "grad_norm": 2.023899555206299, "learning_rate": 0.0002, "loss": 1.619, "step": 61740 }, { "epoch": 0.25, "grad_norm": 2.317275285720825, "learning_rate": 0.0002, "loss": 1.3738, "step": 61750 }, { "epoch": 0.25, "grad_norm": 4.094508171081543, "learning_rate": 0.0002, "loss": 1.505, "step": 61760 }, { "epoch": 0.25, "grad_norm": 3.092139959335327, "learning_rate": 0.0002, "loss": 1.6085, "step": 61770 }, { "epoch": 0.25, "grad_norm": 2.1180427074432373, "learning_rate": 0.0002, "loss": 1.4371, "step": 61780 }, { "epoch": 0.25, "grad_norm": 2.7097065448760986, "learning_rate": 0.0002, "loss": 1.378, "step": 61790 }, { "epoch": 0.25, "grad_norm": 3.751323699951172, "learning_rate": 0.0002, "loss": 1.4926, "step": 61800 }, { "epoch": 0.25, "grad_norm": 1.6493507623672485, "learning_rate": 0.0002, "loss": 1.4935, "step": 61810 }, { "epoch": 0.25, "grad_norm": 2.854637622833252, "learning_rate": 0.0002, "loss": 1.7281, "step": 61820 }, { "epoch": 0.25, "grad_norm": 4.193182945251465, "learning_rate": 0.0002, "loss": 1.7703, "step": 61830 }, { "epoch": 0.25, "grad_norm": 2.7865636348724365, "learning_rate": 0.0002, "loss": 1.3418, "step": 61840 }, { "epoch": 0.25, "grad_norm": 3.493854284286499, "learning_rate": 0.0002, "loss": 1.3242, "step": 61850 }, { "epoch": 0.25, "grad_norm": 1.8146791458129883, "learning_rate": 0.0002, "loss": 1.5538, "step": 61860 }, { "epoch": 0.25, "grad_norm": 3.0378127098083496, "learning_rate": 0.0002, "loss": 1.5848, "step": 61870 }, { "epoch": 0.25, "grad_norm": 3.652871608734131, "learning_rate": 0.0002, "loss": 1.516, "step": 61880 }, { "epoch": 0.25, "grad_norm": 1.957237958908081, "learning_rate": 0.0002, "loss": 1.6093, "step": 61890 }, { "epoch": 0.25, "grad_norm": 3.1336240768432617, "learning_rate": 0.0002, "loss": 1.4772, "step": 61900 }, { "epoch": 0.25, "grad_norm": 3.330343008041382, "learning_rate": 0.0002, "loss": 1.6904, "step": 61910 }, { "epoch": 0.25, "grad_norm": 1.7265573740005493, "learning_rate": 0.0002, "loss": 1.506, "step": 61920 }, { "epoch": 0.25, "grad_norm": 4.757582187652588, "learning_rate": 0.0002, "loss": 1.4372, "step": 61930 }, { "epoch": 0.25, "grad_norm": 2.911498785018921, "learning_rate": 0.0002, "loss": 1.4151, "step": 61940 }, { "epoch": 0.25, "grad_norm": 2.865440845489502, "learning_rate": 0.0002, "loss": 1.5937, "step": 61950 }, { "epoch": 0.25, "grad_norm": 1.9901539087295532, "learning_rate": 0.0002, "loss": 1.5751, "step": 61960 }, { "epoch": 0.25, "grad_norm": 3.2222964763641357, "learning_rate": 0.0002, "loss": 1.3998, "step": 61970 }, { "epoch": 0.25, "grad_norm": 3.6480460166931152, "learning_rate": 0.0002, "loss": 1.7225, "step": 61980 }, { "epoch": 0.25, "grad_norm": 3.184082508087158, "learning_rate": 0.0002, "loss": 1.4661, "step": 61990 }, { "epoch": 0.25, "grad_norm": 3.0813019275665283, "learning_rate": 0.0002, "loss": 1.6652, "step": 62000 }, { "epoch": 0.25, "grad_norm": 3.2468786239624023, "learning_rate": 0.0002, "loss": 1.7147, "step": 62010 }, { "epoch": 0.25, "grad_norm": 2.941420555114746, "learning_rate": 0.0002, "loss": 1.7722, "step": 62020 }, { "epoch": 0.25, "grad_norm": 3.2005953788757324, "learning_rate": 0.0002, "loss": 1.5484, "step": 62030 }, { "epoch": 0.25, "grad_norm": 3.9853131771087646, "learning_rate": 0.0002, "loss": 1.5568, "step": 62040 }, { "epoch": 0.25, "grad_norm": 2.8599812984466553, "learning_rate": 0.0002, "loss": 1.6894, "step": 62050 }, { "epoch": 0.25, "grad_norm": 2.2173008918762207, "learning_rate": 0.0002, "loss": 1.6811, "step": 62060 }, { "epoch": 0.25, "grad_norm": 3.905184745788574, "learning_rate": 0.0002, "loss": 1.5664, "step": 62070 }, { "epoch": 0.25, "grad_norm": 1.6615408658981323, "learning_rate": 0.0002, "loss": 1.4722, "step": 62080 }, { "epoch": 0.25, "grad_norm": 3.001887798309326, "learning_rate": 0.0002, "loss": 1.4554, "step": 62090 }, { "epoch": 0.25, "grad_norm": 6.163793087005615, "learning_rate": 0.0002, "loss": 1.461, "step": 62100 }, { "epoch": 0.25, "grad_norm": 2.88757061958313, "learning_rate": 0.0002, "loss": 1.5543, "step": 62110 }, { "epoch": 0.25, "grad_norm": 2.462759494781494, "learning_rate": 0.0002, "loss": 1.604, "step": 62120 }, { "epoch": 0.25, "grad_norm": 2.3679120540618896, "learning_rate": 0.0002, "loss": 1.147, "step": 62130 }, { "epoch": 0.25, "grad_norm": 4.496320724487305, "learning_rate": 0.0002, "loss": 1.4673, "step": 62140 }, { "epoch": 0.25, "grad_norm": 2.4594454765319824, "learning_rate": 0.0002, "loss": 1.5652, "step": 62150 }, { "epoch": 0.25, "grad_norm": 2.360295295715332, "learning_rate": 0.0002, "loss": 1.4828, "step": 62160 }, { "epoch": 0.25, "grad_norm": 2.8165149688720703, "learning_rate": 0.0002, "loss": 1.491, "step": 62170 }, { "epoch": 0.25, "grad_norm": 2.688225746154785, "learning_rate": 0.0002, "loss": 1.5704, "step": 62180 }, { "epoch": 0.25, "grad_norm": 3.169868230819702, "learning_rate": 0.0002, "loss": 1.8913, "step": 62190 }, { "epoch": 0.25, "grad_norm": 2.899853467941284, "learning_rate": 0.0002, "loss": 1.5307, "step": 62200 }, { "epoch": 0.25, "grad_norm": 2.8195905685424805, "learning_rate": 0.0002, "loss": 1.4528, "step": 62210 }, { "epoch": 0.25, "grad_norm": 2.8074002265930176, "learning_rate": 0.0002, "loss": 1.6573, "step": 62220 }, { "epoch": 0.25, "grad_norm": 2.934948682785034, "learning_rate": 0.0002, "loss": 1.7902, "step": 62230 }, { "epoch": 0.25, "grad_norm": 2.2361338138580322, "learning_rate": 0.0002, "loss": 1.4493, "step": 62240 }, { "epoch": 0.25, "grad_norm": 2.939396619796753, "learning_rate": 0.0002, "loss": 1.6607, "step": 62250 }, { "epoch": 0.25, "grad_norm": 3.6417434215545654, "learning_rate": 0.0002, "loss": 1.3437, "step": 62260 }, { "epoch": 0.25, "grad_norm": 4.116551399230957, "learning_rate": 0.0002, "loss": 1.6836, "step": 62270 }, { "epoch": 0.25, "grad_norm": 2.901564598083496, "learning_rate": 0.0002, "loss": 1.5192, "step": 62280 }, { "epoch": 0.25, "grad_norm": 3.2157208919525146, "learning_rate": 0.0002, "loss": 1.332, "step": 62290 }, { "epoch": 0.25, "grad_norm": 2.2283565998077393, "learning_rate": 0.0002, "loss": 1.6837, "step": 62300 }, { "epoch": 0.25, "grad_norm": 6.637249946594238, "learning_rate": 0.0002, "loss": 1.533, "step": 62310 }, { "epoch": 0.25, "grad_norm": 1.5890365839004517, "learning_rate": 0.0002, "loss": 1.509, "step": 62320 }, { "epoch": 0.25, "grad_norm": 2.386486291885376, "learning_rate": 0.0002, "loss": 1.4479, "step": 62330 }, { "epoch": 0.25, "grad_norm": 3.336117744445801, "learning_rate": 0.0002, "loss": 1.6065, "step": 62340 }, { "epoch": 0.25, "grad_norm": 3.8387486934661865, "learning_rate": 0.0002, "loss": 1.6605, "step": 62350 }, { "epoch": 0.25, "grad_norm": 2.9705278873443604, "learning_rate": 0.0002, "loss": 1.8695, "step": 62360 }, { "epoch": 0.25, "grad_norm": 3.197608470916748, "learning_rate": 0.0002, "loss": 1.7041, "step": 62370 }, { "epoch": 0.25, "grad_norm": 2.0880420207977295, "learning_rate": 0.0002, "loss": 1.4227, "step": 62380 }, { "epoch": 0.25, "grad_norm": 1.9914233684539795, "learning_rate": 0.0002, "loss": 1.4449, "step": 62390 }, { "epoch": 0.25, "grad_norm": 2.3231210708618164, "learning_rate": 0.0002, "loss": 1.5477, "step": 62400 }, { "epoch": 0.25, "grad_norm": 2.7374749183654785, "learning_rate": 0.0002, "loss": 1.7118, "step": 62410 }, { "epoch": 0.25, "grad_norm": 2.786740779876709, "learning_rate": 0.0002, "loss": 1.6764, "step": 62420 }, { "epoch": 0.25, "grad_norm": 4.056027412414551, "learning_rate": 0.0002, "loss": 1.5851, "step": 62430 }, { "epoch": 0.25, "grad_norm": 2.842735767364502, "learning_rate": 0.0002, "loss": 1.5694, "step": 62440 }, { "epoch": 0.25, "grad_norm": 2.3023884296417236, "learning_rate": 0.0002, "loss": 1.3023, "step": 62450 }, { "epoch": 0.25, "grad_norm": 3.193434953689575, "learning_rate": 0.0002, "loss": 1.7128, "step": 62460 }, { "epoch": 0.25, "grad_norm": 4.224456310272217, "learning_rate": 0.0002, "loss": 1.5411, "step": 62470 }, { "epoch": 0.25, "grad_norm": 2.4632723331451416, "learning_rate": 0.0002, "loss": 1.5637, "step": 62480 }, { "epoch": 0.25, "grad_norm": 13.789681434631348, "learning_rate": 0.0002, "loss": 1.6326, "step": 62490 }, { "epoch": 0.25, "grad_norm": 3.61348557472229, "learning_rate": 0.0002, "loss": 1.5589, "step": 62500 }, { "epoch": 0.25, "grad_norm": 2.701244592666626, "learning_rate": 0.0002, "loss": 1.6985, "step": 62510 }, { "epoch": 0.25, "grad_norm": 4.971467018127441, "learning_rate": 0.0002, "loss": 1.5575, "step": 62520 }, { "epoch": 0.25, "grad_norm": 3.2259066104888916, "learning_rate": 0.0002, "loss": 1.6333, "step": 62530 }, { "epoch": 0.25, "grad_norm": 3.2550971508026123, "learning_rate": 0.0002, "loss": 1.4977, "step": 62540 }, { "epoch": 0.25, "grad_norm": 2.0394046306610107, "learning_rate": 0.0002, "loss": 1.7478, "step": 62550 }, { "epoch": 0.25, "grad_norm": 2.208173990249634, "learning_rate": 0.0002, "loss": 1.6559, "step": 62560 }, { "epoch": 0.25, "grad_norm": 2.0513598918914795, "learning_rate": 0.0002, "loss": 1.6586, "step": 62570 }, { "epoch": 0.25, "grad_norm": 2.924215316772461, "learning_rate": 0.0002, "loss": 1.4705, "step": 62580 }, { "epoch": 0.25, "grad_norm": 3.3729677200317383, "learning_rate": 0.0002, "loss": 1.6004, "step": 62590 }, { "epoch": 0.25, "grad_norm": 2.085275173187256, "learning_rate": 0.0002, "loss": 1.5295, "step": 62600 }, { "epoch": 0.25, "grad_norm": 3.4249448776245117, "learning_rate": 0.0002, "loss": 1.5138, "step": 62610 }, { "epoch": 0.25, "grad_norm": 2.3086130619049072, "learning_rate": 0.0002, "loss": 1.5551, "step": 62620 }, { "epoch": 0.25, "grad_norm": 3.6928555965423584, "learning_rate": 0.0002, "loss": 1.4904, "step": 62630 }, { "epoch": 0.26, "grad_norm": 2.46098256111145, "learning_rate": 0.0002, "loss": 1.4833, "step": 62640 }, { "epoch": 0.26, "grad_norm": 2.2749125957489014, "learning_rate": 0.0002, "loss": 1.5809, "step": 62650 }, { "epoch": 0.26, "grad_norm": 10.673904418945312, "learning_rate": 0.0002, "loss": 1.6883, "step": 62660 }, { "epoch": 0.26, "grad_norm": 3.3239283561706543, "learning_rate": 0.0002, "loss": 1.7318, "step": 62670 }, { "epoch": 0.26, "grad_norm": 2.429765224456787, "learning_rate": 0.0002, "loss": 1.6358, "step": 62680 }, { "epoch": 0.26, "grad_norm": 2.566296339035034, "learning_rate": 0.0002, "loss": 1.6967, "step": 62690 }, { "epoch": 0.26, "grad_norm": 3.5690829753875732, "learning_rate": 0.0002, "loss": 1.4117, "step": 62700 }, { "epoch": 0.26, "grad_norm": 3.0465404987335205, "learning_rate": 0.0002, "loss": 1.5334, "step": 62710 }, { "epoch": 0.26, "grad_norm": 2.9913551807403564, "learning_rate": 0.0002, "loss": 1.563, "step": 62720 }, { "epoch": 0.26, "grad_norm": 2.6597630977630615, "learning_rate": 0.0002, "loss": 1.4517, "step": 62730 }, { "epoch": 0.26, "grad_norm": 2.862215995788574, "learning_rate": 0.0002, "loss": 1.5512, "step": 62740 }, { "epoch": 0.26, "grad_norm": 4.514556884765625, "learning_rate": 0.0002, "loss": 1.3967, "step": 62750 }, { "epoch": 0.26, "grad_norm": 2.0056934356689453, "learning_rate": 0.0002, "loss": 1.4632, "step": 62760 }, { "epoch": 0.26, "grad_norm": 2.7721405029296875, "learning_rate": 0.0002, "loss": 1.4907, "step": 62770 }, { "epoch": 0.26, "grad_norm": 2.9736781120300293, "learning_rate": 0.0002, "loss": 1.6137, "step": 62780 }, { "epoch": 0.26, "grad_norm": 3.0888595581054688, "learning_rate": 0.0002, "loss": 1.4853, "step": 62790 }, { "epoch": 0.26, "grad_norm": 2.2537779808044434, "learning_rate": 0.0002, "loss": 1.9806, "step": 62800 }, { "epoch": 0.26, "grad_norm": 2.4597129821777344, "learning_rate": 0.0002, "loss": 1.3391, "step": 62810 }, { "epoch": 0.26, "grad_norm": 2.4600489139556885, "learning_rate": 0.0002, "loss": 1.5317, "step": 62820 }, { "epoch": 0.26, "grad_norm": 3.7022109031677246, "learning_rate": 0.0002, "loss": 1.6126, "step": 62830 }, { "epoch": 0.26, "grad_norm": 3.1724061965942383, "learning_rate": 0.0002, "loss": 1.5853, "step": 62840 }, { "epoch": 0.26, "grad_norm": 1.716683030128479, "learning_rate": 0.0002, "loss": 1.5193, "step": 62850 }, { "epoch": 0.26, "grad_norm": 3.347825288772583, "learning_rate": 0.0002, "loss": 1.5061, "step": 62860 }, { "epoch": 0.26, "grad_norm": 3.3296494483947754, "learning_rate": 0.0002, "loss": 1.4712, "step": 62870 }, { "epoch": 0.26, "grad_norm": 3.032672643661499, "learning_rate": 0.0002, "loss": 1.4984, "step": 62880 }, { "epoch": 0.26, "grad_norm": 2.5729596614837646, "learning_rate": 0.0002, "loss": 1.3836, "step": 62890 }, { "epoch": 0.26, "grad_norm": 1.9870712757110596, "learning_rate": 0.0002, "loss": 1.5056, "step": 62900 }, { "epoch": 0.26, "grad_norm": 3.3074967861175537, "learning_rate": 0.0002, "loss": 1.4551, "step": 62910 }, { "epoch": 0.26, "grad_norm": 3.0837032794952393, "learning_rate": 0.0002, "loss": 1.4119, "step": 62920 }, { "epoch": 0.26, "grad_norm": 3.713611364364624, "learning_rate": 0.0002, "loss": 1.7571, "step": 62930 }, { "epoch": 0.26, "grad_norm": 3.3750076293945312, "learning_rate": 0.0002, "loss": 1.4684, "step": 62940 }, { "epoch": 0.26, "grad_norm": 1.8606573343276978, "learning_rate": 0.0002, "loss": 1.4521, "step": 62950 }, { "epoch": 0.26, "grad_norm": 3.085843086242676, "learning_rate": 0.0002, "loss": 1.6603, "step": 62960 }, { "epoch": 0.26, "grad_norm": 2.8692455291748047, "learning_rate": 0.0002, "loss": 1.8388, "step": 62970 }, { "epoch": 0.26, "grad_norm": 3.901750326156616, "learning_rate": 0.0002, "loss": 1.6315, "step": 62980 }, { "epoch": 0.26, "grad_norm": 2.0941097736358643, "learning_rate": 0.0002, "loss": 1.5138, "step": 62990 }, { "epoch": 0.26, "grad_norm": 2.5228962898254395, "learning_rate": 0.0002, "loss": 1.7379, "step": 63000 }, { "epoch": 0.26, "grad_norm": 1.7907239198684692, "learning_rate": 0.0002, "loss": 1.4934, "step": 63010 }, { "epoch": 0.26, "grad_norm": 4.107914924621582, "learning_rate": 0.0002, "loss": 1.7137, "step": 63020 }, { "epoch": 0.26, "grad_norm": 2.829941749572754, "learning_rate": 0.0002, "loss": 1.5803, "step": 63030 }, { "epoch": 0.26, "grad_norm": 3.086027145385742, "learning_rate": 0.0002, "loss": 1.4895, "step": 63040 }, { "epoch": 0.26, "grad_norm": 1.9437899589538574, "learning_rate": 0.0002, "loss": 1.5222, "step": 63050 }, { "epoch": 0.26, "grad_norm": 4.099078178405762, "learning_rate": 0.0002, "loss": 1.7351, "step": 63060 }, { "epoch": 0.26, "grad_norm": 3.0558652877807617, "learning_rate": 0.0002, "loss": 1.5787, "step": 63070 }, { "epoch": 0.26, "grad_norm": 3.7017478942871094, "learning_rate": 0.0002, "loss": 1.593, "step": 63080 }, { "epoch": 0.26, "grad_norm": 2.759050130844116, "learning_rate": 0.0002, "loss": 1.5471, "step": 63090 }, { "epoch": 0.26, "grad_norm": 5.146360874176025, "learning_rate": 0.0002, "loss": 1.5102, "step": 63100 }, { "epoch": 0.26, "grad_norm": 4.137115478515625, "learning_rate": 0.0002, "loss": 1.6523, "step": 63110 }, { "epoch": 0.26, "grad_norm": 2.8107783794403076, "learning_rate": 0.0002, "loss": 1.5937, "step": 63120 }, { "epoch": 0.26, "grad_norm": 2.0286405086517334, "learning_rate": 0.0002, "loss": 1.8038, "step": 63130 }, { "epoch": 0.26, "grad_norm": 4.544497489929199, "learning_rate": 0.0002, "loss": 1.7503, "step": 63140 }, { "epoch": 0.26, "grad_norm": 2.2241766452789307, "learning_rate": 0.0002, "loss": 1.4718, "step": 63150 }, { "epoch": 0.26, "grad_norm": 2.8990845680236816, "learning_rate": 0.0002, "loss": 1.3556, "step": 63160 }, { "epoch": 0.26, "grad_norm": 2.997459888458252, "learning_rate": 0.0002, "loss": 1.6114, "step": 63170 }, { "epoch": 0.26, "grad_norm": 2.05078125, "learning_rate": 0.0002, "loss": 1.4182, "step": 63180 }, { "epoch": 0.26, "grad_norm": 3.086947441101074, "learning_rate": 0.0002, "loss": 1.6366, "step": 63190 }, { "epoch": 0.26, "grad_norm": 1.9101868867874146, "learning_rate": 0.0002, "loss": 1.6085, "step": 63200 }, { "epoch": 0.26, "grad_norm": 2.8230550289154053, "learning_rate": 0.0002, "loss": 1.5343, "step": 63210 }, { "epoch": 0.26, "grad_norm": 3.08058500289917, "learning_rate": 0.0002, "loss": 1.4461, "step": 63220 }, { "epoch": 0.26, "grad_norm": 2.0557029247283936, "learning_rate": 0.0002, "loss": 1.7125, "step": 63230 }, { "epoch": 0.26, "grad_norm": 2.3921096324920654, "learning_rate": 0.0002, "loss": 1.8312, "step": 63240 }, { "epoch": 0.26, "grad_norm": 3.161677598953247, "learning_rate": 0.0002, "loss": 1.3523, "step": 63250 }, { "epoch": 0.26, "grad_norm": 1.8589428663253784, "learning_rate": 0.0002, "loss": 1.5536, "step": 63260 }, { "epoch": 0.26, "grad_norm": 2.5079963207244873, "learning_rate": 0.0002, "loss": 1.4396, "step": 63270 }, { "epoch": 0.26, "grad_norm": 2.973569869995117, "learning_rate": 0.0002, "loss": 1.3924, "step": 63280 }, { "epoch": 0.26, "grad_norm": 4.299533843994141, "learning_rate": 0.0002, "loss": 1.6555, "step": 63290 }, { "epoch": 0.26, "grad_norm": 2.9770331382751465, "learning_rate": 0.0002, "loss": 1.6986, "step": 63300 }, { "epoch": 0.26, "grad_norm": 2.657517910003662, "learning_rate": 0.0002, "loss": 1.5763, "step": 63310 }, { "epoch": 0.26, "grad_norm": 3.168354034423828, "learning_rate": 0.0002, "loss": 1.6282, "step": 63320 }, { "epoch": 0.26, "grad_norm": 1.9498846530914307, "learning_rate": 0.0002, "loss": 1.4263, "step": 63330 }, { "epoch": 0.26, "grad_norm": 5.178638458251953, "learning_rate": 0.0002, "loss": 1.4352, "step": 63340 }, { "epoch": 0.26, "grad_norm": 3.0861213207244873, "learning_rate": 0.0002, "loss": 1.8111, "step": 63350 }, { "epoch": 0.26, "grad_norm": 1.8261823654174805, "learning_rate": 0.0002, "loss": 1.3052, "step": 63360 }, { "epoch": 0.26, "grad_norm": 2.06613826751709, "learning_rate": 0.0002, "loss": 1.7704, "step": 63370 }, { "epoch": 0.26, "grad_norm": 2.227029323577881, "learning_rate": 0.0002, "loss": 1.6265, "step": 63380 }, { "epoch": 0.26, "grad_norm": 2.5950796604156494, "learning_rate": 0.0002, "loss": 1.4195, "step": 63390 }, { "epoch": 0.26, "grad_norm": 3.4075348377227783, "learning_rate": 0.0002, "loss": 1.5658, "step": 63400 }, { "epoch": 0.26, "grad_norm": 4.187744140625, "learning_rate": 0.0002, "loss": 1.5866, "step": 63410 }, { "epoch": 0.26, "grad_norm": 3.107438087463379, "learning_rate": 0.0002, "loss": 1.1533, "step": 63420 }, { "epoch": 0.26, "grad_norm": 2.9914212226867676, "learning_rate": 0.0002, "loss": 1.6012, "step": 63430 }, { "epoch": 0.26, "grad_norm": 2.635214328765869, "learning_rate": 0.0002, "loss": 1.5544, "step": 63440 }, { "epoch": 0.26, "grad_norm": 3.2783596515655518, "learning_rate": 0.0002, "loss": 1.3985, "step": 63450 }, { "epoch": 0.26, "grad_norm": 4.189993381500244, "learning_rate": 0.0002, "loss": 1.4969, "step": 63460 }, { "epoch": 0.26, "grad_norm": 1.3384164571762085, "learning_rate": 0.0002, "loss": 1.6381, "step": 63470 }, { "epoch": 0.26, "grad_norm": 3.2541310787200928, "learning_rate": 0.0002, "loss": 1.5799, "step": 63480 }, { "epoch": 0.26, "grad_norm": 3.8639485836029053, "learning_rate": 0.0002, "loss": 1.5998, "step": 63490 }, { "epoch": 0.26, "grad_norm": 1.7379578351974487, "learning_rate": 0.0002, "loss": 1.5489, "step": 63500 }, { "epoch": 0.26, "grad_norm": 3.5555567741394043, "learning_rate": 0.0002, "loss": 1.4656, "step": 63510 }, { "epoch": 0.26, "grad_norm": 1.5710182189941406, "learning_rate": 0.0002, "loss": 1.6491, "step": 63520 }, { "epoch": 0.26, "grad_norm": 2.068012237548828, "learning_rate": 0.0002, "loss": 1.7327, "step": 63530 }, { "epoch": 0.26, "grad_norm": 3.0427193641662598, "learning_rate": 0.0002, "loss": 1.7547, "step": 63540 }, { "epoch": 0.26, "grad_norm": 2.5033411979675293, "learning_rate": 0.0002, "loss": 1.5542, "step": 63550 }, { "epoch": 0.26, "grad_norm": 3.413140296936035, "learning_rate": 0.0002, "loss": 1.5406, "step": 63560 }, { "epoch": 0.26, "grad_norm": 3.524292230606079, "learning_rate": 0.0002, "loss": 1.6036, "step": 63570 }, { "epoch": 0.26, "grad_norm": 3.0758588314056396, "learning_rate": 0.0002, "loss": 1.635, "step": 63580 }, { "epoch": 0.26, "grad_norm": 2.630471706390381, "learning_rate": 0.0002, "loss": 1.6528, "step": 63590 }, { "epoch": 0.26, "grad_norm": 2.3368418216705322, "learning_rate": 0.0002, "loss": 1.6452, "step": 63600 }, { "epoch": 0.26, "grad_norm": 2.6126856803894043, "learning_rate": 0.0002, "loss": 1.8591, "step": 63610 }, { "epoch": 0.26, "grad_norm": 2.564927577972412, "learning_rate": 0.0002, "loss": 1.5485, "step": 63620 }, { "epoch": 0.26, "grad_norm": 4.525590896606445, "learning_rate": 0.0002, "loss": 1.5321, "step": 63630 }, { "epoch": 0.26, "grad_norm": 2.850301742553711, "learning_rate": 0.0002, "loss": 1.543, "step": 63640 }, { "epoch": 0.26, "grad_norm": 2.5689287185668945, "learning_rate": 0.0002, "loss": 1.6143, "step": 63650 }, { "epoch": 0.26, "grad_norm": 2.09171724319458, "learning_rate": 0.0002, "loss": 1.7493, "step": 63660 }, { "epoch": 0.26, "grad_norm": 2.196863889694214, "learning_rate": 0.0002, "loss": 1.5221, "step": 63670 }, { "epoch": 0.26, "grad_norm": 3.9282357692718506, "learning_rate": 0.0002, "loss": 1.5017, "step": 63680 }, { "epoch": 0.26, "grad_norm": 2.7753098011016846, "learning_rate": 0.0002, "loss": 1.7338, "step": 63690 }, { "epoch": 0.26, "grad_norm": 3.5941295623779297, "learning_rate": 0.0002, "loss": 1.7509, "step": 63700 }, { "epoch": 0.26, "grad_norm": 3.1683919429779053, "learning_rate": 0.0002, "loss": 1.4406, "step": 63710 }, { "epoch": 0.26, "grad_norm": 2.86624813079834, "learning_rate": 0.0002, "loss": 1.8241, "step": 63720 }, { "epoch": 0.26, "grad_norm": 3.5389251708984375, "learning_rate": 0.0002, "loss": 1.5045, "step": 63730 }, { "epoch": 0.26, "grad_norm": 2.760200023651123, "learning_rate": 0.0002, "loss": 1.541, "step": 63740 }, { "epoch": 0.26, "grad_norm": 2.918687343597412, "learning_rate": 0.0002, "loss": 1.35, "step": 63750 }, { "epoch": 0.26, "grad_norm": 4.0476765632629395, "learning_rate": 0.0002, "loss": 1.5345, "step": 63760 }, { "epoch": 0.26, "grad_norm": 2.072120189666748, "learning_rate": 0.0002, "loss": 1.5709, "step": 63770 }, { "epoch": 0.26, "grad_norm": 1.8664566278457642, "learning_rate": 0.0002, "loss": 1.5009, "step": 63780 }, { "epoch": 0.26, "grad_norm": 2.797748327255249, "learning_rate": 0.0002, "loss": 1.6063, "step": 63790 }, { "epoch": 0.26, "grad_norm": 3.5940122604370117, "learning_rate": 0.0002, "loss": 1.575, "step": 63800 }, { "epoch": 0.26, "grad_norm": 4.163031101226807, "learning_rate": 0.0002, "loss": 1.4577, "step": 63810 }, { "epoch": 0.26, "grad_norm": 2.2163031101226807, "learning_rate": 0.0002, "loss": 1.3542, "step": 63820 }, { "epoch": 0.26, "grad_norm": 3.3615593910217285, "learning_rate": 0.0002, "loss": 1.5594, "step": 63830 }, { "epoch": 0.26, "grad_norm": 2.5538387298583984, "learning_rate": 0.0002, "loss": 1.7982, "step": 63840 }, { "epoch": 0.26, "grad_norm": 1.8588811159133911, "learning_rate": 0.0002, "loss": 1.3965, "step": 63850 }, { "epoch": 0.26, "grad_norm": 2.4213218688964844, "learning_rate": 0.0002, "loss": 1.4608, "step": 63860 }, { "epoch": 0.26, "grad_norm": 2.7771289348602295, "learning_rate": 0.0002, "loss": 1.4808, "step": 63870 }, { "epoch": 0.26, "grad_norm": 3.006495714187622, "learning_rate": 0.0002, "loss": 1.4294, "step": 63880 }, { "epoch": 0.26, "grad_norm": 2.8495888710021973, "learning_rate": 0.0002, "loss": 1.6316, "step": 63890 }, { "epoch": 0.26, "grad_norm": 4.284115314483643, "learning_rate": 0.0002, "loss": 1.5887, "step": 63900 }, { "epoch": 0.26, "grad_norm": 3.370060920715332, "learning_rate": 0.0002, "loss": 1.6396, "step": 63910 }, { "epoch": 0.26, "grad_norm": 2.707848310470581, "learning_rate": 0.0002, "loss": 1.458, "step": 63920 }, { "epoch": 0.26, "grad_norm": 2.2868099212646484, "learning_rate": 0.0002, "loss": 1.6089, "step": 63930 }, { "epoch": 0.26, "grad_norm": 3.1005194187164307, "learning_rate": 0.0002, "loss": 1.7383, "step": 63940 }, { "epoch": 0.26, "grad_norm": 3.6150615215301514, "learning_rate": 0.0002, "loss": 1.6706, "step": 63950 }, { "epoch": 0.26, "grad_norm": 3.4432120323181152, "learning_rate": 0.0002, "loss": 1.3298, "step": 63960 }, { "epoch": 0.26, "grad_norm": 2.876406192779541, "learning_rate": 0.0002, "loss": 1.4999, "step": 63970 }, { "epoch": 0.26, "grad_norm": 2.0569140911102295, "learning_rate": 0.0002, "loss": 1.5081, "step": 63980 }, { "epoch": 0.26, "grad_norm": 4.520412445068359, "learning_rate": 0.0002, "loss": 1.3644, "step": 63990 }, { "epoch": 0.26, "grad_norm": 3.8046624660491943, "learning_rate": 0.0002, "loss": 1.6831, "step": 64000 }, { "epoch": 0.26, "grad_norm": 2.7498037815093994, "learning_rate": 0.0002, "loss": 1.8574, "step": 64010 }, { "epoch": 0.26, "grad_norm": 3.077562093734741, "learning_rate": 0.0002, "loss": 1.7901, "step": 64020 }, { "epoch": 0.26, "grad_norm": 3.1176586151123047, "learning_rate": 0.0002, "loss": 1.5851, "step": 64030 }, { "epoch": 0.26, "grad_norm": 2.447798728942871, "learning_rate": 0.0002, "loss": 1.4506, "step": 64040 }, { "epoch": 0.26, "grad_norm": 1.8157775402069092, "learning_rate": 0.0002, "loss": 1.494, "step": 64050 }, { "epoch": 0.26, "grad_norm": 2.48661208152771, "learning_rate": 0.0002, "loss": 1.7244, "step": 64060 }, { "epoch": 0.26, "grad_norm": 1.9119471311569214, "learning_rate": 0.0002, "loss": 1.7092, "step": 64070 }, { "epoch": 0.26, "grad_norm": 3.3229193687438965, "learning_rate": 0.0002, "loss": 1.542, "step": 64080 }, { "epoch": 0.26, "grad_norm": 1.6980618238449097, "learning_rate": 0.0002, "loss": 1.463, "step": 64090 }, { "epoch": 0.26, "grad_norm": 2.512129068374634, "learning_rate": 0.0002, "loss": 1.4793, "step": 64100 }, { "epoch": 0.26, "grad_norm": 2.2808384895324707, "learning_rate": 0.0002, "loss": 1.7399, "step": 64110 }, { "epoch": 0.26, "grad_norm": 4.542921543121338, "learning_rate": 0.0002, "loss": 1.7473, "step": 64120 }, { "epoch": 0.26, "grad_norm": 2.224628448486328, "learning_rate": 0.0002, "loss": 1.6781, "step": 64130 }, { "epoch": 0.26, "grad_norm": 4.99875020980835, "learning_rate": 0.0002, "loss": 1.7217, "step": 64140 }, { "epoch": 0.26, "grad_norm": 3.156714916229248, "learning_rate": 0.0002, "loss": 1.6812, "step": 64150 }, { "epoch": 0.26, "grad_norm": 2.2667644023895264, "learning_rate": 0.0002, "loss": 1.4471, "step": 64160 }, { "epoch": 0.26, "grad_norm": 1.488801121711731, "learning_rate": 0.0002, "loss": 1.3637, "step": 64170 }, { "epoch": 0.26, "grad_norm": 2.203162908554077, "learning_rate": 0.0002, "loss": 1.6038, "step": 64180 }, { "epoch": 0.26, "grad_norm": 2.197720527648926, "learning_rate": 0.0002, "loss": 1.6785, "step": 64190 }, { "epoch": 0.26, "grad_norm": 3.1995913982391357, "learning_rate": 0.0002, "loss": 1.4811, "step": 64200 }, { "epoch": 0.26, "grad_norm": 2.233346700668335, "learning_rate": 0.0002, "loss": 1.5683, "step": 64210 }, { "epoch": 0.26, "grad_norm": 3.3276448249816895, "learning_rate": 0.0002, "loss": 1.6937, "step": 64220 }, { "epoch": 0.26, "grad_norm": 3.3362162113189697, "learning_rate": 0.0002, "loss": 1.5664, "step": 64230 }, { "epoch": 0.26, "grad_norm": 2.381376028060913, "learning_rate": 0.0002, "loss": 1.5793, "step": 64240 }, { "epoch": 0.26, "grad_norm": 2.740236759185791, "learning_rate": 0.0002, "loss": 1.5798, "step": 64250 }, { "epoch": 0.26, "grad_norm": 3.346539258956909, "learning_rate": 0.0002, "loss": 1.3624, "step": 64260 }, { "epoch": 0.26, "grad_norm": 3.5655405521392822, "learning_rate": 0.0002, "loss": 1.6346, "step": 64270 }, { "epoch": 0.26, "grad_norm": 4.258228778839111, "learning_rate": 0.0002, "loss": 1.5481, "step": 64280 }, { "epoch": 0.26, "grad_norm": 3.2236616611480713, "learning_rate": 0.0002, "loss": 1.4493, "step": 64290 }, { "epoch": 0.26, "grad_norm": 5.239583969116211, "learning_rate": 0.0002, "loss": 1.6429, "step": 64300 }, { "epoch": 0.26, "grad_norm": 3.373607873916626, "learning_rate": 0.0002, "loss": 1.4024, "step": 64310 }, { "epoch": 0.26, "grad_norm": 2.7246687412261963, "learning_rate": 0.0002, "loss": 1.3838, "step": 64320 }, { "epoch": 0.26, "grad_norm": 3.3567051887512207, "learning_rate": 0.0002, "loss": 1.4858, "step": 64330 }, { "epoch": 0.26, "grad_norm": 3.777830123901367, "learning_rate": 0.0002, "loss": 1.4056, "step": 64340 }, { "epoch": 0.26, "grad_norm": 2.7168962955474854, "learning_rate": 0.0002, "loss": 1.6167, "step": 64350 }, { "epoch": 0.26, "grad_norm": 4.3101067543029785, "learning_rate": 0.0002, "loss": 1.2844, "step": 64360 }, { "epoch": 0.26, "grad_norm": 2.216871976852417, "learning_rate": 0.0002, "loss": 1.4017, "step": 64370 }, { "epoch": 0.26, "grad_norm": 1.9544812440872192, "learning_rate": 0.0002, "loss": 1.5713, "step": 64380 }, { "epoch": 0.26, "grad_norm": 4.061714172363281, "learning_rate": 0.0002, "loss": 1.3499, "step": 64390 }, { "epoch": 0.26, "grad_norm": 2.799420118331909, "learning_rate": 0.0002, "loss": 1.4398, "step": 64400 }, { "epoch": 0.26, "grad_norm": 3.086028575897217, "learning_rate": 0.0002, "loss": 1.4869, "step": 64410 }, { "epoch": 0.26, "grad_norm": 3.2798590660095215, "learning_rate": 0.0002, "loss": 1.3882, "step": 64420 }, { "epoch": 0.26, "grad_norm": 3.5027079582214355, "learning_rate": 0.0002, "loss": 1.5877, "step": 64430 }, { "epoch": 0.26, "grad_norm": 2.7311301231384277, "learning_rate": 0.0002, "loss": 1.2598, "step": 64440 }, { "epoch": 0.26, "grad_norm": 2.7512130737304688, "learning_rate": 0.0002, "loss": 1.727, "step": 64450 }, { "epoch": 0.26, "grad_norm": 4.879913330078125, "learning_rate": 0.0002, "loss": 1.2442, "step": 64460 }, { "epoch": 0.26, "grad_norm": 2.2204480171203613, "learning_rate": 0.0002, "loss": 1.4709, "step": 64470 }, { "epoch": 0.26, "grad_norm": 3.6552045345306396, "learning_rate": 0.0002, "loss": 1.5682, "step": 64480 }, { "epoch": 0.26, "grad_norm": 2.271838903427124, "learning_rate": 0.0002, "loss": 1.584, "step": 64490 }, { "epoch": 0.26, "grad_norm": 2.234663248062134, "learning_rate": 0.0002, "loss": 1.8259, "step": 64500 }, { "epoch": 0.26, "grad_norm": 2.392810821533203, "learning_rate": 0.0002, "loss": 1.6933, "step": 64510 }, { "epoch": 0.26, "grad_norm": 2.5745887756347656, "learning_rate": 0.0002, "loss": 1.7174, "step": 64520 }, { "epoch": 0.26, "grad_norm": 1.8996355533599854, "learning_rate": 0.0002, "loss": 1.2696, "step": 64530 }, { "epoch": 0.26, "grad_norm": 3.543470621109009, "learning_rate": 0.0002, "loss": 1.6063, "step": 64540 }, { "epoch": 0.26, "grad_norm": 3.156820058822632, "learning_rate": 0.0002, "loss": 1.4984, "step": 64550 }, { "epoch": 0.26, "grad_norm": 3.375598192214966, "learning_rate": 0.0002, "loss": 1.5375, "step": 64560 }, { "epoch": 0.26, "grad_norm": 2.472235918045044, "learning_rate": 0.0002, "loss": 1.5989, "step": 64570 }, { "epoch": 0.26, "grad_norm": 2.446469306945801, "learning_rate": 0.0002, "loss": 1.5595, "step": 64580 }, { "epoch": 0.26, "grad_norm": 3.9772186279296875, "learning_rate": 0.0002, "loss": 1.4042, "step": 64590 }, { "epoch": 0.26, "grad_norm": 2.8644065856933594, "learning_rate": 0.0002, "loss": 1.4548, "step": 64600 }, { "epoch": 0.26, "grad_norm": 2.632337808609009, "learning_rate": 0.0002, "loss": 1.3355, "step": 64610 }, { "epoch": 0.26, "grad_norm": 2.2982985973358154, "learning_rate": 0.0002, "loss": 1.5177, "step": 64620 }, { "epoch": 0.26, "grad_norm": 1.9173624515533447, "learning_rate": 0.0002, "loss": 1.6944, "step": 64630 }, { "epoch": 0.26, "grad_norm": 2.9920411109924316, "learning_rate": 0.0002, "loss": 1.4545, "step": 64640 }, { "epoch": 0.26, "grad_norm": 5.6900482177734375, "learning_rate": 0.0002, "loss": 1.2931, "step": 64650 }, { "epoch": 0.26, "grad_norm": 2.9343771934509277, "learning_rate": 0.0002, "loss": 1.6495, "step": 64660 }, { "epoch": 0.26, "grad_norm": 2.78898286819458, "learning_rate": 0.0002, "loss": 1.5755, "step": 64670 }, { "epoch": 0.26, "grad_norm": 3.29539155960083, "learning_rate": 0.0002, "loss": 1.5619, "step": 64680 }, { "epoch": 0.26, "grad_norm": 2.409728527069092, "learning_rate": 0.0002, "loss": 1.6581, "step": 64690 }, { "epoch": 0.26, "grad_norm": 1.9319368600845337, "learning_rate": 0.0002, "loss": 1.7627, "step": 64700 }, { "epoch": 0.26, "grad_norm": 1.6542484760284424, "learning_rate": 0.0002, "loss": 1.5276, "step": 64710 }, { "epoch": 0.26, "grad_norm": 2.726475238800049, "learning_rate": 0.0002, "loss": 1.3424, "step": 64720 }, { "epoch": 0.26, "grad_norm": 2.7895028591156006, "learning_rate": 0.0002, "loss": 1.4288, "step": 64730 }, { "epoch": 0.26, "grad_norm": 4.180301189422607, "learning_rate": 0.0002, "loss": 1.5489, "step": 64740 }, { "epoch": 0.26, "grad_norm": 3.8282840251922607, "learning_rate": 0.0002, "loss": 1.4357, "step": 64750 }, { "epoch": 0.26, "grad_norm": 2.2244222164154053, "learning_rate": 0.0002, "loss": 1.6684, "step": 64760 }, { "epoch": 0.26, "grad_norm": 2.586303949356079, "learning_rate": 0.0002, "loss": 1.8788, "step": 64770 }, { "epoch": 0.26, "grad_norm": 3.141008138656616, "learning_rate": 0.0002, "loss": 1.6214, "step": 64780 }, { "epoch": 0.26, "grad_norm": 3.3709025382995605, "learning_rate": 0.0002, "loss": 1.5966, "step": 64790 }, { "epoch": 0.26, "grad_norm": 2.996516227722168, "learning_rate": 0.0002, "loss": 1.4631, "step": 64800 }, { "epoch": 0.26, "grad_norm": 2.959601879119873, "learning_rate": 0.0002, "loss": 1.6235, "step": 64810 }, { "epoch": 0.26, "grad_norm": 2.9214107990264893, "learning_rate": 0.0002, "loss": 1.3163, "step": 64820 }, { "epoch": 0.26, "grad_norm": 2.475050687789917, "learning_rate": 0.0002, "loss": 1.6252, "step": 64830 }, { "epoch": 0.26, "grad_norm": 2.6907594203948975, "learning_rate": 0.0002, "loss": 1.4871, "step": 64840 }, { "epoch": 0.26, "grad_norm": 3.291923761367798, "learning_rate": 0.0002, "loss": 1.367, "step": 64850 }, { "epoch": 0.26, "grad_norm": 2.7381670475006104, "learning_rate": 0.0002, "loss": 1.5394, "step": 64860 }, { "epoch": 0.26, "grad_norm": 4.6759748458862305, "learning_rate": 0.0002, "loss": 1.4837, "step": 64870 }, { "epoch": 0.26, "grad_norm": 2.260469675064087, "learning_rate": 0.0002, "loss": 1.5432, "step": 64880 }, { "epoch": 0.26, "grad_norm": 2.80678653717041, "learning_rate": 0.0002, "loss": 1.4161, "step": 64890 }, { "epoch": 0.26, "grad_norm": 3.965959072113037, "learning_rate": 0.0002, "loss": 1.735, "step": 64900 }, { "epoch": 0.26, "grad_norm": 4.082244873046875, "learning_rate": 0.0002, "loss": 1.6295, "step": 64910 }, { "epoch": 0.26, "grad_norm": 3.181875228881836, "learning_rate": 0.0002, "loss": 1.8546, "step": 64920 }, { "epoch": 0.26, "grad_norm": 5.446808815002441, "learning_rate": 0.0002, "loss": 1.674, "step": 64930 }, { "epoch": 0.26, "grad_norm": 2.180877447128296, "learning_rate": 0.0002, "loss": 1.5115, "step": 64940 }, { "epoch": 0.26, "grad_norm": 2.6075422763824463, "learning_rate": 0.0002, "loss": 1.5895, "step": 64950 }, { "epoch": 0.26, "grad_norm": 2.2976951599121094, "learning_rate": 0.0002, "loss": 1.5447, "step": 64960 }, { "epoch": 0.26, "grad_norm": 2.832249164581299, "learning_rate": 0.0002, "loss": 1.5613, "step": 64970 }, { "epoch": 0.26, "grad_norm": 5.9630937576293945, "learning_rate": 0.0002, "loss": 1.6375, "step": 64980 }, { "epoch": 0.26, "grad_norm": 2.1846413612365723, "learning_rate": 0.0002, "loss": 1.5833, "step": 64990 }, { "epoch": 0.26, "grad_norm": 3.7831969261169434, "learning_rate": 0.0002, "loss": 1.3642, "step": 65000 }, { "epoch": 0.26, "grad_norm": 2.414379835128784, "learning_rate": 0.0002, "loss": 1.4913, "step": 65010 }, { "epoch": 0.26, "grad_norm": 3.759835720062256, "learning_rate": 0.0002, "loss": 1.641, "step": 65020 }, { "epoch": 0.26, "grad_norm": 4.180841445922852, "learning_rate": 0.0002, "loss": 1.6217, "step": 65030 }, { "epoch": 0.26, "grad_norm": 2.2655577659606934, "learning_rate": 0.0002, "loss": 1.6272, "step": 65040 }, { "epoch": 0.26, "grad_norm": 2.5248334407806396, "learning_rate": 0.0002, "loss": 1.5705, "step": 65050 }, { "epoch": 0.26, "grad_norm": 2.5260396003723145, "learning_rate": 0.0002, "loss": 1.5888, "step": 65060 }, { "epoch": 0.26, "grad_norm": 2.472245931625366, "learning_rate": 0.0002, "loss": 1.3427, "step": 65070 }, { "epoch": 0.26, "grad_norm": 2.073808431625366, "learning_rate": 0.0002, "loss": 1.614, "step": 65080 }, { "epoch": 0.26, "grad_norm": 2.397325277328491, "learning_rate": 0.0002, "loss": 1.6185, "step": 65090 }, { "epoch": 0.27, "grad_norm": 2.866027355194092, "learning_rate": 0.0002, "loss": 1.4046, "step": 65100 }, { "epoch": 0.27, "grad_norm": 3.440859079360962, "learning_rate": 0.0002, "loss": 1.4303, "step": 65110 }, { "epoch": 0.27, "grad_norm": 2.45481276512146, "learning_rate": 0.0002, "loss": 1.4625, "step": 65120 }, { "epoch": 0.27, "grad_norm": 3.0740911960601807, "learning_rate": 0.0002, "loss": 1.7677, "step": 65130 }, { "epoch": 0.27, "grad_norm": 2.844310760498047, "learning_rate": 0.0002, "loss": 1.6373, "step": 65140 }, { "epoch": 0.27, "grad_norm": 1.9685240983963013, "learning_rate": 0.0002, "loss": 1.3797, "step": 65150 }, { "epoch": 0.27, "grad_norm": 3.137688398361206, "learning_rate": 0.0002, "loss": 1.4634, "step": 65160 }, { "epoch": 0.27, "grad_norm": 3.764817714691162, "learning_rate": 0.0002, "loss": 1.7441, "step": 65170 }, { "epoch": 0.27, "grad_norm": 1.9562828540802002, "learning_rate": 0.0002, "loss": 1.582, "step": 65180 }, { "epoch": 0.27, "grad_norm": 3.0400900840759277, "learning_rate": 0.0002, "loss": 1.744, "step": 65190 }, { "epoch": 0.27, "grad_norm": 2.956557512283325, "learning_rate": 0.0002, "loss": 1.7641, "step": 65200 }, { "epoch": 0.27, "grad_norm": 2.6674563884735107, "learning_rate": 0.0002, "loss": 1.6287, "step": 65210 }, { "epoch": 0.27, "grad_norm": 3.3414697647094727, "learning_rate": 0.0002, "loss": 1.3885, "step": 65220 }, { "epoch": 0.27, "grad_norm": 2.662442207336426, "learning_rate": 0.0002, "loss": 1.6814, "step": 65230 }, { "epoch": 0.27, "grad_norm": 3.3327958583831787, "learning_rate": 0.0002, "loss": 1.3691, "step": 65240 }, { "epoch": 0.27, "grad_norm": 2.541321039199829, "learning_rate": 0.0002, "loss": 1.6816, "step": 65250 }, { "epoch": 0.27, "grad_norm": 2.4739198684692383, "learning_rate": 0.0002, "loss": 1.6835, "step": 65260 }, { "epoch": 0.27, "grad_norm": 2.526315450668335, "learning_rate": 0.0002, "loss": 1.8139, "step": 65270 }, { "epoch": 0.27, "grad_norm": 3.4809226989746094, "learning_rate": 0.0002, "loss": 1.637, "step": 65280 }, { "epoch": 0.27, "grad_norm": 1.9849168062210083, "learning_rate": 0.0002, "loss": 1.5747, "step": 65290 }, { "epoch": 0.27, "grad_norm": 1.9265450239181519, "learning_rate": 0.0002, "loss": 1.7416, "step": 65300 }, { "epoch": 0.27, "grad_norm": 1.5032227039337158, "learning_rate": 0.0002, "loss": 1.3015, "step": 65310 }, { "epoch": 0.27, "grad_norm": 3.0806503295898438, "learning_rate": 0.0002, "loss": 1.5453, "step": 65320 }, { "epoch": 0.27, "grad_norm": 2.2646517753601074, "learning_rate": 0.0002, "loss": 1.6206, "step": 65330 }, { "epoch": 0.27, "grad_norm": 3.5646300315856934, "learning_rate": 0.0002, "loss": 1.4421, "step": 65340 }, { "epoch": 0.27, "grad_norm": 2.9704785346984863, "learning_rate": 0.0002, "loss": 1.4973, "step": 65350 }, { "epoch": 0.27, "grad_norm": 2.7509360313415527, "learning_rate": 0.0002, "loss": 1.5755, "step": 65360 }, { "epoch": 0.27, "grad_norm": 2.0438122749328613, "learning_rate": 0.0002, "loss": 1.581, "step": 65370 }, { "epoch": 0.27, "grad_norm": 2.5901577472686768, "learning_rate": 0.0002, "loss": 1.7363, "step": 65380 }, { "epoch": 0.27, "grad_norm": 2.9084956645965576, "learning_rate": 0.0002, "loss": 1.4981, "step": 65390 }, { "epoch": 0.27, "grad_norm": 4.475045204162598, "learning_rate": 0.0002, "loss": 1.6013, "step": 65400 }, { "epoch": 0.27, "grad_norm": 2.9534482955932617, "learning_rate": 0.0002, "loss": 1.5577, "step": 65410 }, { "epoch": 0.27, "grad_norm": 2.523155927658081, "learning_rate": 0.0002, "loss": 1.4946, "step": 65420 }, { "epoch": 0.27, "grad_norm": 3.156325578689575, "learning_rate": 0.0002, "loss": 1.3509, "step": 65430 }, { "epoch": 0.27, "grad_norm": 6.398499965667725, "learning_rate": 0.0002, "loss": 1.6426, "step": 65440 }, { "epoch": 0.27, "grad_norm": 2.4594738483428955, "learning_rate": 0.0002, "loss": 1.7449, "step": 65450 }, { "epoch": 0.27, "grad_norm": 2.370314121246338, "learning_rate": 0.0002, "loss": 1.7012, "step": 65460 }, { "epoch": 0.27, "grad_norm": 2.5657153129577637, "learning_rate": 0.0002, "loss": 1.4797, "step": 65470 }, { "epoch": 0.27, "grad_norm": 1.9820282459259033, "learning_rate": 0.0002, "loss": 1.601, "step": 65480 }, { "epoch": 0.27, "grad_norm": 3.482633590698242, "learning_rate": 0.0002, "loss": 1.496, "step": 65490 }, { "epoch": 0.27, "grad_norm": 5.178989410400391, "learning_rate": 0.0002, "loss": 1.5801, "step": 65500 }, { "epoch": 0.27, "grad_norm": 2.8788275718688965, "learning_rate": 0.0002, "loss": 1.5495, "step": 65510 }, { "epoch": 0.27, "grad_norm": 2.487567901611328, "learning_rate": 0.0002, "loss": 1.6393, "step": 65520 }, { "epoch": 0.27, "grad_norm": 2.137748956680298, "learning_rate": 0.0002, "loss": 1.7485, "step": 65530 }, { "epoch": 0.27, "grad_norm": 3.7202014923095703, "learning_rate": 0.0002, "loss": 1.4037, "step": 65540 }, { "epoch": 0.27, "grad_norm": 3.3271262645721436, "learning_rate": 0.0002, "loss": 1.5508, "step": 65550 }, { "epoch": 0.27, "grad_norm": 2.898189067840576, "learning_rate": 0.0002, "loss": 1.5354, "step": 65560 }, { "epoch": 0.27, "grad_norm": 3.3975110054016113, "learning_rate": 0.0002, "loss": 1.2373, "step": 65570 }, { "epoch": 0.27, "grad_norm": 3.469923496246338, "learning_rate": 0.0002, "loss": 1.5279, "step": 65580 }, { "epoch": 0.27, "grad_norm": 2.879551887512207, "learning_rate": 0.0002, "loss": 1.6479, "step": 65590 }, { "epoch": 0.27, "grad_norm": 2.397451639175415, "learning_rate": 0.0002, "loss": 1.2376, "step": 65600 }, { "epoch": 0.27, "grad_norm": 3.404995918273926, "learning_rate": 0.0002, "loss": 1.4015, "step": 65610 }, { "epoch": 0.27, "grad_norm": 3.037989616394043, "learning_rate": 0.0002, "loss": 1.4801, "step": 65620 }, { "epoch": 0.27, "grad_norm": 3.8609490394592285, "learning_rate": 0.0002, "loss": 1.6325, "step": 65630 }, { "epoch": 0.27, "grad_norm": 2.984380006790161, "learning_rate": 0.0002, "loss": 1.7447, "step": 65640 }, { "epoch": 0.27, "grad_norm": 6.644119739532471, "learning_rate": 0.0002, "loss": 1.4929, "step": 65650 }, { "epoch": 0.27, "grad_norm": 2.5326058864593506, "learning_rate": 0.0002, "loss": 1.4505, "step": 65660 }, { "epoch": 0.27, "grad_norm": 2.1407108306884766, "learning_rate": 0.0002, "loss": 1.5996, "step": 65670 }, { "epoch": 0.27, "grad_norm": 5.189515590667725, "learning_rate": 0.0002, "loss": 1.4994, "step": 65680 }, { "epoch": 0.27, "grad_norm": 2.1827404499053955, "learning_rate": 0.0002, "loss": 1.4886, "step": 65690 }, { "epoch": 0.27, "grad_norm": 4.376789569854736, "learning_rate": 0.0002, "loss": 1.4516, "step": 65700 }, { "epoch": 0.27, "grad_norm": 3.2580926418304443, "learning_rate": 0.0002, "loss": 1.6658, "step": 65710 }, { "epoch": 0.27, "grad_norm": 5.539267539978027, "learning_rate": 0.0002, "loss": 1.5245, "step": 65720 }, { "epoch": 0.27, "grad_norm": 3.0956766605377197, "learning_rate": 0.0002, "loss": 1.5443, "step": 65730 }, { "epoch": 0.27, "grad_norm": 4.576757907867432, "learning_rate": 0.0002, "loss": 1.663, "step": 65740 }, { "epoch": 0.27, "grad_norm": 4.161850452423096, "learning_rate": 0.0002, "loss": 1.5761, "step": 65750 }, { "epoch": 0.27, "grad_norm": 1.9450725317001343, "learning_rate": 0.0002, "loss": 1.6892, "step": 65760 }, { "epoch": 0.27, "grad_norm": 2.8656537532806396, "learning_rate": 0.0002, "loss": 1.3215, "step": 65770 }, { "epoch": 0.27, "grad_norm": 3.1103246212005615, "learning_rate": 0.0002, "loss": 1.4225, "step": 65780 }, { "epoch": 0.27, "grad_norm": 6.001457214355469, "learning_rate": 0.0002, "loss": 1.6025, "step": 65790 }, { "epoch": 0.27, "grad_norm": 2.48659348487854, "learning_rate": 0.0002, "loss": 1.7249, "step": 65800 }, { "epoch": 0.27, "grad_norm": 3.3419651985168457, "learning_rate": 0.0002, "loss": 1.7932, "step": 65810 }, { "epoch": 0.27, "grad_norm": 2.471928834915161, "learning_rate": 0.0002, "loss": 1.5753, "step": 65820 }, { "epoch": 0.27, "grad_norm": 3.648390531539917, "learning_rate": 0.0002, "loss": 1.7352, "step": 65830 }, { "epoch": 0.27, "grad_norm": 3.4547629356384277, "learning_rate": 0.0002, "loss": 1.5302, "step": 65840 }, { "epoch": 0.27, "grad_norm": 3.1307709217071533, "learning_rate": 0.0002, "loss": 1.5064, "step": 65850 }, { "epoch": 0.27, "grad_norm": 1.2697505950927734, "learning_rate": 0.0002, "loss": 1.6386, "step": 65860 }, { "epoch": 0.27, "grad_norm": 2.7075021266937256, "learning_rate": 0.0002, "loss": 1.4209, "step": 65870 }, { "epoch": 0.27, "grad_norm": 3.3194127082824707, "learning_rate": 0.0002, "loss": 1.3545, "step": 65880 }, { "epoch": 0.27, "grad_norm": 3.4632060527801514, "learning_rate": 0.0002, "loss": 1.7381, "step": 65890 }, { "epoch": 0.27, "grad_norm": 3.1136679649353027, "learning_rate": 0.0002, "loss": 1.5016, "step": 65900 }, { "epoch": 0.27, "grad_norm": 2.2105376720428467, "learning_rate": 0.0002, "loss": 2.0166, "step": 65910 }, { "epoch": 0.27, "grad_norm": 1.4601366519927979, "learning_rate": 0.0002, "loss": 1.2583, "step": 65920 }, { "epoch": 0.27, "grad_norm": 3.1476192474365234, "learning_rate": 0.0002, "loss": 1.8104, "step": 65930 }, { "epoch": 0.27, "grad_norm": 3.410250663757324, "learning_rate": 0.0002, "loss": 1.5008, "step": 65940 }, { "epoch": 0.27, "grad_norm": 2.814772129058838, "learning_rate": 0.0002, "loss": 1.4855, "step": 65950 }, { "epoch": 0.27, "grad_norm": 3.0968384742736816, "learning_rate": 0.0002, "loss": 1.6976, "step": 65960 }, { "epoch": 0.27, "grad_norm": 1.87226140499115, "learning_rate": 0.0002, "loss": 1.6564, "step": 65970 }, { "epoch": 0.27, "grad_norm": 2.5760786533355713, "learning_rate": 0.0002, "loss": 1.4733, "step": 65980 }, { "epoch": 0.27, "grad_norm": 1.5350993871688843, "learning_rate": 0.0002, "loss": 1.5422, "step": 65990 }, { "epoch": 0.27, "grad_norm": 1.523711085319519, "learning_rate": 0.0002, "loss": 1.4111, "step": 66000 }, { "epoch": 0.27, "grad_norm": 2.801412343978882, "learning_rate": 0.0002, "loss": 1.697, "step": 66010 }, { "epoch": 0.27, "grad_norm": 3.109250545501709, "learning_rate": 0.0002, "loss": 1.774, "step": 66020 }, { "epoch": 0.27, "grad_norm": 3.6139750480651855, "learning_rate": 0.0002, "loss": 1.4474, "step": 66030 }, { "epoch": 0.27, "grad_norm": 3.813915491104126, "learning_rate": 0.0002, "loss": 1.3597, "step": 66040 }, { "epoch": 0.27, "grad_norm": 4.8892693519592285, "learning_rate": 0.0002, "loss": 1.557, "step": 66050 }, { "epoch": 0.27, "grad_norm": 1.7384825944900513, "learning_rate": 0.0002, "loss": 1.7097, "step": 66060 }, { "epoch": 0.27, "grad_norm": 3.3982505798339844, "learning_rate": 0.0002, "loss": 1.7324, "step": 66070 }, { "epoch": 0.27, "grad_norm": 3.473170280456543, "learning_rate": 0.0002, "loss": 1.3894, "step": 66080 }, { "epoch": 0.27, "grad_norm": 5.11995267868042, "learning_rate": 0.0002, "loss": 1.5506, "step": 66090 }, { "epoch": 0.27, "grad_norm": 3.205592632293701, "learning_rate": 0.0002, "loss": 1.7344, "step": 66100 }, { "epoch": 0.27, "grad_norm": 3.986126184463501, "learning_rate": 0.0002, "loss": 1.2883, "step": 66110 }, { "epoch": 0.27, "grad_norm": 2.485429048538208, "learning_rate": 0.0002, "loss": 1.4322, "step": 66120 }, { "epoch": 0.27, "grad_norm": 2.911484956741333, "learning_rate": 0.0002, "loss": 1.5892, "step": 66130 }, { "epoch": 0.27, "grad_norm": 2.6948299407958984, "learning_rate": 0.0002, "loss": 1.5838, "step": 66140 }, { "epoch": 0.27, "grad_norm": 2.7209458351135254, "learning_rate": 0.0002, "loss": 1.6314, "step": 66150 }, { "epoch": 0.27, "grad_norm": 3.0756685733795166, "learning_rate": 0.0002, "loss": 1.4237, "step": 66160 }, { "epoch": 0.27, "grad_norm": 2.884610176086426, "learning_rate": 0.0002, "loss": 1.653, "step": 66170 }, { "epoch": 0.27, "grad_norm": 3.0806081295013428, "learning_rate": 0.0002, "loss": 1.3162, "step": 66180 }, { "epoch": 0.27, "grad_norm": 3.561464309692383, "learning_rate": 0.0002, "loss": 1.4933, "step": 66190 }, { "epoch": 0.27, "grad_norm": 2.763998031616211, "learning_rate": 0.0002, "loss": 1.6063, "step": 66200 }, { "epoch": 0.27, "grad_norm": 4.146770477294922, "learning_rate": 0.0002, "loss": 1.4911, "step": 66210 }, { "epoch": 0.27, "grad_norm": 3.5138843059539795, "learning_rate": 0.0002, "loss": 1.3956, "step": 66220 }, { "epoch": 0.27, "grad_norm": 2.870089530944824, "learning_rate": 0.0002, "loss": 1.3311, "step": 66230 }, { "epoch": 0.27, "grad_norm": 3.3521337509155273, "learning_rate": 0.0002, "loss": 1.4965, "step": 66240 }, { "epoch": 0.27, "grad_norm": 2.68355131149292, "learning_rate": 0.0002, "loss": 1.5138, "step": 66250 }, { "epoch": 0.27, "grad_norm": 9.052323341369629, "learning_rate": 0.0002, "loss": 1.3955, "step": 66260 }, { "epoch": 0.27, "grad_norm": 2.7879021167755127, "learning_rate": 0.0002, "loss": 1.724, "step": 66270 }, { "epoch": 0.27, "grad_norm": 3.3372812271118164, "learning_rate": 0.0002, "loss": 1.581, "step": 66280 }, { "epoch": 0.27, "grad_norm": 2.6829094886779785, "learning_rate": 0.0002, "loss": 1.3992, "step": 66290 }, { "epoch": 0.27, "grad_norm": 3.6331560611724854, "learning_rate": 0.0002, "loss": 1.6249, "step": 66300 }, { "epoch": 0.27, "grad_norm": 4.108199119567871, "learning_rate": 0.0002, "loss": 1.5122, "step": 66310 }, { "epoch": 0.27, "grad_norm": 3.1453044414520264, "learning_rate": 0.0002, "loss": 1.3684, "step": 66320 }, { "epoch": 0.27, "grad_norm": 3.106062412261963, "learning_rate": 0.0002, "loss": 1.5077, "step": 66330 }, { "epoch": 0.27, "grad_norm": 6.773281574249268, "learning_rate": 0.0002, "loss": 1.3891, "step": 66340 }, { "epoch": 0.27, "grad_norm": 3.1403775215148926, "learning_rate": 0.0002, "loss": 1.4187, "step": 66350 }, { "epoch": 0.27, "grad_norm": 4.100008487701416, "learning_rate": 0.0002, "loss": 1.2784, "step": 66360 }, { "epoch": 0.27, "grad_norm": 2.564404010772705, "learning_rate": 0.0002, "loss": 1.5484, "step": 66370 }, { "epoch": 0.27, "grad_norm": 3.0423662662506104, "learning_rate": 0.0002, "loss": 1.5427, "step": 66380 }, { "epoch": 0.27, "grad_norm": 2.9776227474212646, "learning_rate": 0.0002, "loss": 1.7485, "step": 66390 }, { "epoch": 0.27, "grad_norm": 4.726428985595703, "learning_rate": 0.0002, "loss": 1.5218, "step": 66400 }, { "epoch": 0.27, "grad_norm": 2.621244430541992, "learning_rate": 0.0002, "loss": 1.425, "step": 66410 }, { "epoch": 0.27, "grad_norm": 2.919712781906128, "learning_rate": 0.0002, "loss": 1.5251, "step": 66420 }, { "epoch": 0.27, "grad_norm": 2.130737543106079, "learning_rate": 0.0002, "loss": 1.4577, "step": 66430 }, { "epoch": 0.27, "grad_norm": 3.630830764770508, "learning_rate": 0.0002, "loss": 1.5238, "step": 66440 }, { "epoch": 0.27, "grad_norm": 2.556926727294922, "learning_rate": 0.0002, "loss": 1.735, "step": 66450 }, { "epoch": 0.27, "grad_norm": 3.3954854011535645, "learning_rate": 0.0002, "loss": 1.7159, "step": 66460 }, { "epoch": 0.27, "grad_norm": 2.4498822689056396, "learning_rate": 0.0002, "loss": 1.4042, "step": 66470 }, { "epoch": 0.27, "grad_norm": 3.144036293029785, "learning_rate": 0.0002, "loss": 1.6284, "step": 66480 }, { "epoch": 0.27, "grad_norm": 3.2544922828674316, "learning_rate": 0.0002, "loss": 1.5314, "step": 66490 }, { "epoch": 0.27, "grad_norm": 4.195065498352051, "learning_rate": 0.0002, "loss": 1.8955, "step": 66500 }, { "epoch": 0.27, "grad_norm": 2.2840166091918945, "learning_rate": 0.0002, "loss": 1.542, "step": 66510 }, { "epoch": 0.27, "grad_norm": 1.9521340131759644, "learning_rate": 0.0002, "loss": 1.4073, "step": 66520 }, { "epoch": 0.27, "grad_norm": 2.3680098056793213, "learning_rate": 0.0002, "loss": 1.5518, "step": 66530 }, { "epoch": 0.27, "grad_norm": 3.551133394241333, "learning_rate": 0.0002, "loss": 1.5163, "step": 66540 }, { "epoch": 0.27, "grad_norm": 2.524678945541382, "learning_rate": 0.0002, "loss": 1.6889, "step": 66550 }, { "epoch": 0.27, "grad_norm": 2.5670900344848633, "learning_rate": 0.0002, "loss": 1.4804, "step": 66560 }, { "epoch": 0.27, "grad_norm": 3.6194207668304443, "learning_rate": 0.0002, "loss": 1.4494, "step": 66570 }, { "epoch": 0.27, "grad_norm": 3.115696668624878, "learning_rate": 0.0002, "loss": 1.809, "step": 66580 }, { "epoch": 0.27, "grad_norm": 4.054843902587891, "learning_rate": 0.0002, "loss": 1.8044, "step": 66590 }, { "epoch": 0.27, "grad_norm": 4.157585620880127, "learning_rate": 0.0002, "loss": 1.6486, "step": 66600 }, { "epoch": 0.27, "grad_norm": 2.7307543754577637, "learning_rate": 0.0002, "loss": 1.3468, "step": 66610 }, { "epoch": 0.27, "grad_norm": 2.245248556137085, "learning_rate": 0.0002, "loss": 1.5516, "step": 66620 }, { "epoch": 0.27, "grad_norm": 1.7594332695007324, "learning_rate": 0.0002, "loss": 1.4585, "step": 66630 }, { "epoch": 0.27, "grad_norm": 3.2224905490875244, "learning_rate": 0.0002, "loss": 1.7072, "step": 66640 }, { "epoch": 0.27, "grad_norm": 3.568005084991455, "learning_rate": 0.0002, "loss": 1.4328, "step": 66650 }, { "epoch": 0.27, "grad_norm": 3.239528179168701, "learning_rate": 0.0002, "loss": 1.5054, "step": 66660 }, { "epoch": 0.27, "grad_norm": 1.8254083395004272, "learning_rate": 0.0002, "loss": 1.6205, "step": 66670 }, { "epoch": 0.27, "grad_norm": 2.6624293327331543, "learning_rate": 0.0002, "loss": 1.7959, "step": 66680 }, { "epoch": 0.27, "grad_norm": 2.700819253921509, "learning_rate": 0.0002, "loss": 1.6427, "step": 66690 }, { "epoch": 0.27, "grad_norm": 2.1836843490600586, "learning_rate": 0.0002, "loss": 1.7881, "step": 66700 }, { "epoch": 0.27, "grad_norm": 2.248955726623535, "learning_rate": 0.0002, "loss": 1.6558, "step": 66710 }, { "epoch": 0.27, "grad_norm": 2.9341018199920654, "learning_rate": 0.0002, "loss": 1.6742, "step": 66720 }, { "epoch": 0.27, "grad_norm": 4.0316362380981445, "learning_rate": 0.0002, "loss": 1.5857, "step": 66730 }, { "epoch": 0.27, "grad_norm": 2.5805320739746094, "learning_rate": 0.0002, "loss": 1.556, "step": 66740 }, { "epoch": 0.27, "grad_norm": 3.5853734016418457, "learning_rate": 0.0002, "loss": 1.6741, "step": 66750 }, { "epoch": 0.27, "grad_norm": 3.551715850830078, "learning_rate": 0.0002, "loss": 1.4493, "step": 66760 }, { "epoch": 0.27, "grad_norm": 2.0938093662261963, "learning_rate": 0.0002, "loss": 1.4325, "step": 66770 }, { "epoch": 0.27, "grad_norm": 3.932709217071533, "learning_rate": 0.0002, "loss": 1.5447, "step": 66780 }, { "epoch": 0.27, "grad_norm": 2.2000179290771484, "learning_rate": 0.0002, "loss": 1.6525, "step": 66790 }, { "epoch": 0.27, "grad_norm": 3.0867533683776855, "learning_rate": 0.0002, "loss": 1.6847, "step": 66800 }, { "epoch": 0.27, "grad_norm": 3.8326683044433594, "learning_rate": 0.0002, "loss": 1.7329, "step": 66810 }, { "epoch": 0.27, "grad_norm": 2.8720285892486572, "learning_rate": 0.0002, "loss": 1.5164, "step": 66820 }, { "epoch": 0.27, "grad_norm": 2.775959014892578, "learning_rate": 0.0002, "loss": 1.6771, "step": 66830 }, { "epoch": 0.27, "grad_norm": 3.192431688308716, "learning_rate": 0.0002, "loss": 1.5361, "step": 66840 }, { "epoch": 0.27, "grad_norm": 3.251875638961792, "learning_rate": 0.0002, "loss": 1.5391, "step": 66850 }, { "epoch": 0.27, "grad_norm": 3.7742865085601807, "learning_rate": 0.0002, "loss": 1.8864, "step": 66860 }, { "epoch": 0.27, "grad_norm": 2.8884317874908447, "learning_rate": 0.0002, "loss": 1.6276, "step": 66870 }, { "epoch": 0.27, "grad_norm": 2.4793708324432373, "learning_rate": 0.0002, "loss": 1.5846, "step": 66880 }, { "epoch": 0.27, "grad_norm": 2.059624433517456, "learning_rate": 0.0002, "loss": 1.8764, "step": 66890 }, { "epoch": 0.27, "grad_norm": 1.9907087087631226, "learning_rate": 0.0002, "loss": 1.5806, "step": 66900 }, { "epoch": 0.27, "grad_norm": 2.375537633895874, "learning_rate": 0.0002, "loss": 1.4548, "step": 66910 }, { "epoch": 0.27, "grad_norm": 4.646564960479736, "learning_rate": 0.0002, "loss": 1.5591, "step": 66920 }, { "epoch": 0.27, "grad_norm": 2.3721797466278076, "learning_rate": 0.0002, "loss": 1.6759, "step": 66930 }, { "epoch": 0.27, "grad_norm": 3.2470526695251465, "learning_rate": 0.0002, "loss": 1.5048, "step": 66940 }, { "epoch": 0.27, "grad_norm": 4.179333686828613, "learning_rate": 0.0002, "loss": 1.4116, "step": 66950 }, { "epoch": 0.27, "grad_norm": 2.6777143478393555, "learning_rate": 0.0002, "loss": 1.6119, "step": 66960 }, { "epoch": 0.27, "grad_norm": 3.833712100982666, "learning_rate": 0.0002, "loss": 1.5842, "step": 66970 }, { "epoch": 0.27, "grad_norm": 2.6009838581085205, "learning_rate": 0.0002, "loss": 1.6398, "step": 66980 }, { "epoch": 0.27, "grad_norm": 2.681295871734619, "learning_rate": 0.0002, "loss": 1.7986, "step": 66990 }, { "epoch": 0.27, "grad_norm": 1.6856235265731812, "learning_rate": 0.0002, "loss": 1.4315, "step": 67000 }, { "epoch": 0.27, "grad_norm": 2.994852066040039, "learning_rate": 0.0002, "loss": 1.5476, "step": 67010 }, { "epoch": 0.27, "grad_norm": 4.402631759643555, "learning_rate": 0.0002, "loss": 1.7342, "step": 67020 }, { "epoch": 0.27, "grad_norm": 3.7073452472686768, "learning_rate": 0.0002, "loss": 1.6513, "step": 67030 }, { "epoch": 0.27, "grad_norm": 2.129173755645752, "learning_rate": 0.0002, "loss": 1.5883, "step": 67040 }, { "epoch": 0.27, "grad_norm": 4.119531154632568, "learning_rate": 0.0002, "loss": 1.5203, "step": 67050 }, { "epoch": 0.27, "grad_norm": 3.578123092651367, "learning_rate": 0.0002, "loss": 1.3541, "step": 67060 }, { "epoch": 0.27, "grad_norm": 2.4682161808013916, "learning_rate": 0.0002, "loss": 1.6549, "step": 67070 }, { "epoch": 0.27, "grad_norm": 4.504716873168945, "learning_rate": 0.0002, "loss": 1.7667, "step": 67080 }, { "epoch": 0.27, "grad_norm": 2.3753817081451416, "learning_rate": 0.0002, "loss": 1.4932, "step": 67090 }, { "epoch": 0.27, "grad_norm": 2.518909215927124, "learning_rate": 0.0002, "loss": 1.6477, "step": 67100 }, { "epoch": 0.27, "grad_norm": 2.7610297203063965, "learning_rate": 0.0002, "loss": 1.7693, "step": 67110 }, { "epoch": 0.27, "grad_norm": 3.3350179195404053, "learning_rate": 0.0002, "loss": 1.587, "step": 67120 }, { "epoch": 0.27, "grad_norm": 2.127856492996216, "learning_rate": 0.0002, "loss": 1.5324, "step": 67130 }, { "epoch": 0.27, "grad_norm": 1.8460180759429932, "learning_rate": 0.0002, "loss": 1.8897, "step": 67140 }, { "epoch": 0.27, "grad_norm": 2.665705919265747, "learning_rate": 0.0002, "loss": 1.6996, "step": 67150 }, { "epoch": 0.27, "grad_norm": 2.2483906745910645, "learning_rate": 0.0002, "loss": 1.6001, "step": 67160 }, { "epoch": 0.27, "grad_norm": 2.6966543197631836, "learning_rate": 0.0002, "loss": 1.8107, "step": 67170 }, { "epoch": 0.27, "grad_norm": 2.4105305671691895, "learning_rate": 0.0002, "loss": 1.3594, "step": 67180 }, { "epoch": 0.27, "grad_norm": 3.3860740661621094, "learning_rate": 0.0002, "loss": 1.634, "step": 67190 }, { "epoch": 0.27, "grad_norm": 3.099823474884033, "learning_rate": 0.0002, "loss": 1.5864, "step": 67200 }, { "epoch": 0.27, "grad_norm": 2.167163372039795, "learning_rate": 0.0002, "loss": 1.5461, "step": 67210 }, { "epoch": 0.27, "grad_norm": 3.0791215896606445, "learning_rate": 0.0002, "loss": 1.6979, "step": 67220 }, { "epoch": 0.27, "grad_norm": 3.474747896194458, "learning_rate": 0.0002, "loss": 1.434, "step": 67230 }, { "epoch": 0.27, "grad_norm": 2.9369757175445557, "learning_rate": 0.0002, "loss": 1.5351, "step": 67240 }, { "epoch": 0.27, "grad_norm": 3.6354188919067383, "learning_rate": 0.0002, "loss": 1.7071, "step": 67250 }, { "epoch": 0.27, "grad_norm": 3.1675775051116943, "learning_rate": 0.0002, "loss": 1.4784, "step": 67260 }, { "epoch": 0.27, "grad_norm": 2.0014257431030273, "learning_rate": 0.0002, "loss": 1.5761, "step": 67270 }, { "epoch": 0.27, "grad_norm": 2.9136030673980713, "learning_rate": 0.0002, "loss": 1.6225, "step": 67280 }, { "epoch": 0.27, "grad_norm": 2.893514394760132, "learning_rate": 0.0002, "loss": 1.4119, "step": 67290 }, { "epoch": 0.27, "grad_norm": 1.7763924598693848, "learning_rate": 0.0002, "loss": 1.3763, "step": 67300 }, { "epoch": 0.27, "grad_norm": 2.633315324783325, "learning_rate": 0.0002, "loss": 1.6243, "step": 67310 }, { "epoch": 0.27, "grad_norm": 3.0721216201782227, "learning_rate": 0.0002, "loss": 1.6678, "step": 67320 }, { "epoch": 0.27, "grad_norm": 3.360506057739258, "learning_rate": 0.0002, "loss": 1.8215, "step": 67330 }, { "epoch": 0.27, "grad_norm": 3.674057960510254, "learning_rate": 0.0002, "loss": 1.6999, "step": 67340 }, { "epoch": 0.27, "grad_norm": 5.195867538452148, "learning_rate": 0.0002, "loss": 1.6915, "step": 67350 }, { "epoch": 0.27, "grad_norm": 3.293114423751831, "learning_rate": 0.0002, "loss": 1.616, "step": 67360 }, { "epoch": 0.27, "grad_norm": 2.9315414428710938, "learning_rate": 0.0002, "loss": 1.4916, "step": 67370 }, { "epoch": 0.27, "grad_norm": 3.2248027324676514, "learning_rate": 0.0002, "loss": 1.5746, "step": 67380 }, { "epoch": 0.27, "grad_norm": 5.786603927612305, "learning_rate": 0.0002, "loss": 1.5028, "step": 67390 }, { "epoch": 0.27, "grad_norm": 4.345974922180176, "learning_rate": 0.0002, "loss": 1.6239, "step": 67400 }, { "epoch": 0.27, "grad_norm": 2.8659348487854004, "learning_rate": 0.0002, "loss": 1.586, "step": 67410 }, { "epoch": 0.27, "grad_norm": 2.161736488342285, "learning_rate": 0.0002, "loss": 1.3152, "step": 67420 }, { "epoch": 0.27, "grad_norm": 3.1936228275299072, "learning_rate": 0.0002, "loss": 1.279, "step": 67430 }, { "epoch": 0.27, "grad_norm": 4.767535209655762, "learning_rate": 0.0002, "loss": 1.5681, "step": 67440 }, { "epoch": 0.27, "grad_norm": 4.638792037963867, "learning_rate": 0.0002, "loss": 1.8757, "step": 67450 }, { "epoch": 0.27, "grad_norm": 5.060746192932129, "learning_rate": 0.0002, "loss": 1.7391, "step": 67460 }, { "epoch": 0.27, "grad_norm": 1.897297739982605, "learning_rate": 0.0002, "loss": 1.4168, "step": 67470 }, { "epoch": 0.27, "grad_norm": 2.5903074741363525, "learning_rate": 0.0002, "loss": 1.5269, "step": 67480 }, { "epoch": 0.27, "grad_norm": 3.284862995147705, "learning_rate": 0.0002, "loss": 1.628, "step": 67490 }, { "epoch": 0.27, "grad_norm": 3.031662940979004, "learning_rate": 0.0002, "loss": 1.5133, "step": 67500 }, { "epoch": 0.27, "grad_norm": 3.0585179328918457, "learning_rate": 0.0002, "loss": 1.6665, "step": 67510 }, { "epoch": 0.27, "grad_norm": 3.147427797317505, "learning_rate": 0.0002, "loss": 1.7055, "step": 67520 }, { "epoch": 0.27, "grad_norm": 3.117306709289551, "learning_rate": 0.0002, "loss": 1.6352, "step": 67530 }, { "epoch": 0.27, "grad_norm": 3.046409845352173, "learning_rate": 0.0002, "loss": 1.3965, "step": 67540 }, { "epoch": 0.27, "grad_norm": 2.0940613746643066, "learning_rate": 0.0002, "loss": 1.6365, "step": 67550 }, { "epoch": 0.28, "grad_norm": 2.445981740951538, "learning_rate": 0.0002, "loss": 1.4997, "step": 67560 }, { "epoch": 0.28, "grad_norm": 3.538051128387451, "learning_rate": 0.0002, "loss": 1.4876, "step": 67570 }, { "epoch": 0.28, "grad_norm": 2.841731548309326, "learning_rate": 0.0002, "loss": 1.7078, "step": 67580 }, { "epoch": 0.28, "grad_norm": 3.8132309913635254, "learning_rate": 0.0002, "loss": 1.5132, "step": 67590 }, { "epoch": 0.28, "grad_norm": 3.271960496902466, "learning_rate": 0.0002, "loss": 1.5462, "step": 67600 }, { "epoch": 0.28, "grad_norm": 3.327958345413208, "learning_rate": 0.0002, "loss": 1.6819, "step": 67610 }, { "epoch": 0.28, "grad_norm": 2.688385248184204, "learning_rate": 0.0002, "loss": 1.449, "step": 67620 }, { "epoch": 0.28, "grad_norm": 2.4922878742218018, "learning_rate": 0.0002, "loss": 1.6281, "step": 67630 }, { "epoch": 0.28, "grad_norm": 2.545626401901245, "learning_rate": 0.0002, "loss": 1.6233, "step": 67640 }, { "epoch": 0.28, "grad_norm": 2.4092156887054443, "learning_rate": 0.0002, "loss": 1.374, "step": 67650 }, { "epoch": 0.28, "grad_norm": 2.8083271980285645, "learning_rate": 0.0002, "loss": 1.565, "step": 67660 }, { "epoch": 0.28, "grad_norm": 1.6273798942565918, "learning_rate": 0.0002, "loss": 1.4353, "step": 67670 }, { "epoch": 0.28, "grad_norm": 2.423539161682129, "learning_rate": 0.0002, "loss": 1.3472, "step": 67680 }, { "epoch": 0.28, "grad_norm": 3.0401573181152344, "learning_rate": 0.0002, "loss": 1.2851, "step": 67690 }, { "epoch": 0.28, "grad_norm": 3.2959117889404297, "learning_rate": 0.0002, "loss": 1.6508, "step": 67700 }, { "epoch": 0.28, "grad_norm": 2.40523099899292, "learning_rate": 0.0002, "loss": 1.7759, "step": 67710 }, { "epoch": 0.28, "grad_norm": 3.3385472297668457, "learning_rate": 0.0002, "loss": 1.4982, "step": 67720 }, { "epoch": 0.28, "grad_norm": 4.7095866203308105, "learning_rate": 0.0002, "loss": 1.535, "step": 67730 }, { "epoch": 0.28, "grad_norm": 5.186809539794922, "learning_rate": 0.0002, "loss": 1.7505, "step": 67740 }, { "epoch": 0.28, "grad_norm": 2.129236936569214, "learning_rate": 0.0002, "loss": 1.466, "step": 67750 }, { "epoch": 0.28, "grad_norm": 3.2989187240600586, "learning_rate": 0.0002, "loss": 1.6273, "step": 67760 }, { "epoch": 0.28, "grad_norm": 2.315490484237671, "learning_rate": 0.0002, "loss": 1.6124, "step": 67770 }, { "epoch": 0.28, "grad_norm": 3.0378592014312744, "learning_rate": 0.0002, "loss": 1.5242, "step": 67780 }, { "epoch": 0.28, "grad_norm": 2.185793876647949, "learning_rate": 0.0002, "loss": 1.6564, "step": 67790 }, { "epoch": 0.28, "grad_norm": 3.6300222873687744, "learning_rate": 0.0002, "loss": 1.6469, "step": 67800 }, { "epoch": 0.28, "grad_norm": 2.6102802753448486, "learning_rate": 0.0002, "loss": 1.7747, "step": 67810 }, { "epoch": 0.28, "grad_norm": 3.9192895889282227, "learning_rate": 0.0002, "loss": 1.4356, "step": 67820 }, { "epoch": 0.28, "grad_norm": 3.893634796142578, "learning_rate": 0.0002, "loss": 1.8835, "step": 67830 }, { "epoch": 0.28, "grad_norm": 2.5522241592407227, "learning_rate": 0.0002, "loss": 1.6831, "step": 67840 }, { "epoch": 0.28, "grad_norm": 3.186086416244507, "learning_rate": 0.0002, "loss": 1.5573, "step": 67850 }, { "epoch": 0.28, "grad_norm": 3.4228246212005615, "learning_rate": 0.0002, "loss": 1.846, "step": 67860 }, { "epoch": 0.28, "grad_norm": 3.2421019077301025, "learning_rate": 0.0002, "loss": 1.5842, "step": 67870 }, { "epoch": 0.28, "grad_norm": 2.316330671310425, "learning_rate": 0.0002, "loss": 1.5802, "step": 67880 }, { "epoch": 0.28, "grad_norm": 2.9496774673461914, "learning_rate": 0.0002, "loss": 1.7392, "step": 67890 }, { "epoch": 0.28, "grad_norm": 2.073452949523926, "learning_rate": 0.0002, "loss": 1.6204, "step": 67900 }, { "epoch": 0.28, "grad_norm": 1.7176485061645508, "learning_rate": 0.0002, "loss": 1.419, "step": 67910 }, { "epoch": 0.28, "grad_norm": 2.983206033706665, "learning_rate": 0.0002, "loss": 1.6574, "step": 67920 }, { "epoch": 0.28, "grad_norm": 2.830005168914795, "learning_rate": 0.0002, "loss": 1.5364, "step": 67930 }, { "epoch": 0.28, "grad_norm": 3.611997127532959, "learning_rate": 0.0002, "loss": 1.442, "step": 67940 }, { "epoch": 0.28, "grad_norm": 2.1322457790374756, "learning_rate": 0.0002, "loss": 1.4031, "step": 67950 }, { "epoch": 0.28, "grad_norm": 4.179970741271973, "learning_rate": 0.0002, "loss": 1.8458, "step": 67960 }, { "epoch": 0.28, "grad_norm": 1.4306355714797974, "learning_rate": 0.0002, "loss": 1.3374, "step": 67970 }, { "epoch": 0.28, "grad_norm": 2.5331215858459473, "learning_rate": 0.0002, "loss": 1.4291, "step": 67980 }, { "epoch": 0.28, "grad_norm": 3.2195117473602295, "learning_rate": 0.0002, "loss": 1.6831, "step": 67990 }, { "epoch": 0.28, "grad_norm": 2.9210047721862793, "learning_rate": 0.0002, "loss": 1.6433, "step": 68000 }, { "epoch": 0.28, "grad_norm": 4.717226982116699, "learning_rate": 0.0002, "loss": 1.6832, "step": 68010 }, { "epoch": 0.28, "grad_norm": 1.8180787563323975, "learning_rate": 0.0002, "loss": 1.655, "step": 68020 }, { "epoch": 0.28, "grad_norm": 2.853703737258911, "learning_rate": 0.0002, "loss": 1.511, "step": 68030 }, { "epoch": 0.28, "grad_norm": 5.805026054382324, "learning_rate": 0.0002, "loss": 1.2358, "step": 68040 }, { "epoch": 0.28, "grad_norm": 3.140889883041382, "learning_rate": 0.0002, "loss": 1.6262, "step": 68050 }, { "epoch": 0.28, "grad_norm": 2.710618734359741, "learning_rate": 0.0002, "loss": 1.5276, "step": 68060 }, { "epoch": 0.28, "grad_norm": 3.0442209243774414, "learning_rate": 0.0002, "loss": 1.4087, "step": 68070 }, { "epoch": 0.28, "grad_norm": 3.011648416519165, "learning_rate": 0.0002, "loss": 1.5038, "step": 68080 }, { "epoch": 0.28, "grad_norm": 3.428203821182251, "learning_rate": 0.0002, "loss": 1.6689, "step": 68090 }, { "epoch": 0.28, "grad_norm": 3.593226432800293, "learning_rate": 0.0002, "loss": 1.7545, "step": 68100 }, { "epoch": 0.28, "grad_norm": 3.549844741821289, "learning_rate": 0.0002, "loss": 1.7481, "step": 68110 }, { "epoch": 0.28, "grad_norm": 3.5765461921691895, "learning_rate": 0.0002, "loss": 1.8441, "step": 68120 }, { "epoch": 0.28, "grad_norm": 3.705186605453491, "learning_rate": 0.0002, "loss": 1.5724, "step": 68130 }, { "epoch": 0.28, "grad_norm": 5.481603145599365, "learning_rate": 0.0002, "loss": 1.7509, "step": 68140 }, { "epoch": 0.28, "grad_norm": 3.0995423793792725, "learning_rate": 0.0002, "loss": 1.6005, "step": 68150 }, { "epoch": 0.28, "grad_norm": 3.06638503074646, "learning_rate": 0.0002, "loss": 1.5917, "step": 68160 }, { "epoch": 0.28, "grad_norm": 2.711489200592041, "learning_rate": 0.0002, "loss": 1.7864, "step": 68170 }, { "epoch": 0.28, "grad_norm": 2.3162152767181396, "learning_rate": 0.0002, "loss": 1.416, "step": 68180 }, { "epoch": 0.28, "grad_norm": 2.1954874992370605, "learning_rate": 0.0002, "loss": 1.4968, "step": 68190 }, { "epoch": 0.28, "grad_norm": 3.237644672393799, "learning_rate": 0.0002, "loss": 1.4825, "step": 68200 }, { "epoch": 0.28, "grad_norm": 3.3829212188720703, "learning_rate": 0.0002, "loss": 1.5355, "step": 68210 }, { "epoch": 0.28, "grad_norm": 3.9492852687835693, "learning_rate": 0.0002, "loss": 1.678, "step": 68220 }, { "epoch": 0.28, "grad_norm": 3.0339910984039307, "learning_rate": 0.0002, "loss": 1.6117, "step": 68230 }, { "epoch": 0.28, "grad_norm": 1.9557080268859863, "learning_rate": 0.0002, "loss": 1.5351, "step": 68240 }, { "epoch": 0.28, "grad_norm": 3.505234479904175, "learning_rate": 0.0002, "loss": 1.7581, "step": 68250 }, { "epoch": 0.28, "grad_norm": 2.1667709350585938, "learning_rate": 0.0002, "loss": 1.482, "step": 68260 }, { "epoch": 0.28, "grad_norm": 3.974470376968384, "learning_rate": 0.0002, "loss": 1.6621, "step": 68270 }, { "epoch": 0.28, "grad_norm": 1.4019676446914673, "learning_rate": 0.0002, "loss": 1.6231, "step": 68280 }, { "epoch": 0.28, "grad_norm": 3.3113465309143066, "learning_rate": 0.0002, "loss": 1.4301, "step": 68290 }, { "epoch": 0.28, "grad_norm": 2.5013480186462402, "learning_rate": 0.0002, "loss": 1.5569, "step": 68300 }, { "epoch": 0.28, "grad_norm": 3.3558430671691895, "learning_rate": 0.0002, "loss": 1.6078, "step": 68310 }, { "epoch": 0.28, "grad_norm": 4.045673847198486, "learning_rate": 0.0002, "loss": 1.4506, "step": 68320 }, { "epoch": 0.28, "grad_norm": 2.7837440967559814, "learning_rate": 0.0002, "loss": 1.7713, "step": 68330 }, { "epoch": 0.28, "grad_norm": 3.9708070755004883, "learning_rate": 0.0002, "loss": 1.7986, "step": 68340 }, { "epoch": 0.28, "grad_norm": 2.227102756500244, "learning_rate": 0.0002, "loss": 1.3611, "step": 68350 }, { "epoch": 0.28, "grad_norm": 2.947596311569214, "learning_rate": 0.0002, "loss": 1.6283, "step": 68360 }, { "epoch": 0.28, "grad_norm": 3.335547924041748, "learning_rate": 0.0002, "loss": 1.6081, "step": 68370 }, { "epoch": 0.28, "grad_norm": 4.589605331420898, "learning_rate": 0.0002, "loss": 1.2424, "step": 68380 }, { "epoch": 0.28, "grad_norm": 2.542894124984741, "learning_rate": 0.0002, "loss": 1.4013, "step": 68390 }, { "epoch": 0.28, "grad_norm": 3.010521650314331, "learning_rate": 0.0002, "loss": 1.4192, "step": 68400 }, { "epoch": 0.28, "grad_norm": 2.580794095993042, "learning_rate": 0.0002, "loss": 1.777, "step": 68410 }, { "epoch": 0.28, "grad_norm": 2.3382153511047363, "learning_rate": 0.0002, "loss": 1.5137, "step": 68420 }, { "epoch": 0.28, "grad_norm": 2.5737476348876953, "learning_rate": 0.0002, "loss": 1.9531, "step": 68430 }, { "epoch": 0.28, "grad_norm": 3.3253631591796875, "learning_rate": 0.0002, "loss": 1.6192, "step": 68440 }, { "epoch": 0.28, "grad_norm": 3.3629825115203857, "learning_rate": 0.0002, "loss": 1.6167, "step": 68450 }, { "epoch": 0.28, "grad_norm": 1.8377329111099243, "learning_rate": 0.0002, "loss": 1.5203, "step": 68460 }, { "epoch": 0.28, "grad_norm": 2.3410112857818604, "learning_rate": 0.0002, "loss": 1.7802, "step": 68470 }, { "epoch": 0.28, "grad_norm": 3.5654332637786865, "learning_rate": 0.0002, "loss": 1.7164, "step": 68480 }, { "epoch": 0.28, "grad_norm": 4.992631435394287, "learning_rate": 0.0002, "loss": 1.4114, "step": 68490 }, { "epoch": 0.28, "grad_norm": 2.6423120498657227, "learning_rate": 0.0002, "loss": 1.4873, "step": 68500 }, { "epoch": 0.28, "grad_norm": 3.0352721214294434, "learning_rate": 0.0002, "loss": 1.4826, "step": 68510 }, { "epoch": 0.28, "grad_norm": 3.62459659576416, "learning_rate": 0.0002, "loss": 1.7224, "step": 68520 }, { "epoch": 0.28, "grad_norm": 3.492384910583496, "learning_rate": 0.0002, "loss": 1.4884, "step": 68530 }, { "epoch": 0.28, "grad_norm": 2.85286283493042, "learning_rate": 0.0002, "loss": 1.6166, "step": 68540 }, { "epoch": 0.28, "grad_norm": 6.547679901123047, "learning_rate": 0.0002, "loss": 1.5414, "step": 68550 }, { "epoch": 0.28, "grad_norm": 5.235560894012451, "learning_rate": 0.0002, "loss": 1.6054, "step": 68560 }, { "epoch": 0.28, "grad_norm": 3.296252489089966, "learning_rate": 0.0002, "loss": 1.5706, "step": 68570 }, { "epoch": 0.28, "grad_norm": 3.9079394340515137, "learning_rate": 0.0002, "loss": 1.4381, "step": 68580 }, { "epoch": 0.28, "grad_norm": 4.498558044433594, "learning_rate": 0.0002, "loss": 1.6089, "step": 68590 }, { "epoch": 0.28, "grad_norm": 2.9600729942321777, "learning_rate": 0.0002, "loss": 1.7393, "step": 68600 }, { "epoch": 0.28, "grad_norm": 3.0778653621673584, "learning_rate": 0.0002, "loss": 1.6215, "step": 68610 }, { "epoch": 0.28, "grad_norm": 1.8958734273910522, "learning_rate": 0.0002, "loss": 1.5028, "step": 68620 }, { "epoch": 0.28, "grad_norm": 2.8030388355255127, "learning_rate": 0.0002, "loss": 1.5937, "step": 68630 }, { "epoch": 0.28, "grad_norm": 2.7582476139068604, "learning_rate": 0.0002, "loss": 1.3696, "step": 68640 }, { "epoch": 0.28, "grad_norm": 1.991922378540039, "learning_rate": 0.0002, "loss": 1.6231, "step": 68650 }, { "epoch": 0.28, "grad_norm": 3.6889736652374268, "learning_rate": 0.0002, "loss": 1.4619, "step": 68660 }, { "epoch": 0.28, "grad_norm": 3.643745183944702, "learning_rate": 0.0002, "loss": 1.6878, "step": 68670 }, { "epoch": 0.28, "grad_norm": 2.1355955600738525, "learning_rate": 0.0002, "loss": 1.5095, "step": 68680 }, { "epoch": 0.28, "grad_norm": 1.978060007095337, "learning_rate": 0.0002, "loss": 1.3862, "step": 68690 }, { "epoch": 0.28, "grad_norm": 3.000035285949707, "learning_rate": 0.0002, "loss": 1.5205, "step": 68700 }, { "epoch": 0.28, "grad_norm": 4.083842754364014, "learning_rate": 0.0002, "loss": 1.738, "step": 68710 }, { "epoch": 0.28, "grad_norm": 3.7002484798431396, "learning_rate": 0.0002, "loss": 1.4837, "step": 68720 }, { "epoch": 0.28, "grad_norm": 3.9474222660064697, "learning_rate": 0.0002, "loss": 1.6537, "step": 68730 }, { "epoch": 0.28, "grad_norm": 3.5112528800964355, "learning_rate": 0.0002, "loss": 1.6899, "step": 68740 }, { "epoch": 0.28, "grad_norm": 2.258695602416992, "learning_rate": 0.0002, "loss": 1.5033, "step": 68750 }, { "epoch": 0.28, "grad_norm": 1.6823545694351196, "learning_rate": 0.0002, "loss": 1.4951, "step": 68760 }, { "epoch": 0.28, "grad_norm": 3.2126762866973877, "learning_rate": 0.0002, "loss": 1.6431, "step": 68770 }, { "epoch": 0.28, "grad_norm": 3.0893394947052, "learning_rate": 0.0002, "loss": 1.165, "step": 68780 }, { "epoch": 0.28, "grad_norm": 2.4186248779296875, "learning_rate": 0.0002, "loss": 1.4896, "step": 68790 }, { "epoch": 0.28, "grad_norm": 9.333301544189453, "learning_rate": 0.0002, "loss": 1.6283, "step": 68800 }, { "epoch": 0.28, "grad_norm": 2.186408519744873, "learning_rate": 0.0002, "loss": 1.61, "step": 68810 }, { "epoch": 0.28, "grad_norm": 2.4981374740600586, "learning_rate": 0.0002, "loss": 1.8771, "step": 68820 }, { "epoch": 0.28, "grad_norm": 2.2412643432617188, "learning_rate": 0.0002, "loss": 1.6556, "step": 68830 }, { "epoch": 0.28, "grad_norm": 2.672948122024536, "learning_rate": 0.0002, "loss": 1.5255, "step": 68840 }, { "epoch": 0.28, "grad_norm": 2.4392447471618652, "learning_rate": 0.0002, "loss": 1.666, "step": 68850 }, { "epoch": 0.28, "grad_norm": 3.4958627223968506, "learning_rate": 0.0002, "loss": 1.5484, "step": 68860 }, { "epoch": 0.28, "grad_norm": 2.885354995727539, "learning_rate": 0.0002, "loss": 1.7821, "step": 68870 }, { "epoch": 0.28, "grad_norm": 3.0669713020324707, "learning_rate": 0.0002, "loss": 1.5961, "step": 68880 }, { "epoch": 0.28, "grad_norm": 3.0429928302764893, "learning_rate": 0.0002, "loss": 1.6972, "step": 68890 }, { "epoch": 0.28, "grad_norm": 2.6279401779174805, "learning_rate": 0.0002, "loss": 1.4897, "step": 68900 }, { "epoch": 0.28, "grad_norm": 2.4802963733673096, "learning_rate": 0.0002, "loss": 1.827, "step": 68910 }, { "epoch": 0.28, "grad_norm": 3.1263740062713623, "learning_rate": 0.0002, "loss": 1.2667, "step": 68920 }, { "epoch": 0.28, "grad_norm": 3.5248029232025146, "learning_rate": 0.0002, "loss": 1.4347, "step": 68930 }, { "epoch": 0.28, "grad_norm": 2.8246912956237793, "learning_rate": 0.0002, "loss": 1.6732, "step": 68940 }, { "epoch": 0.28, "grad_norm": 3.569091320037842, "learning_rate": 0.0002, "loss": 1.4624, "step": 68950 }, { "epoch": 0.28, "grad_norm": 1.8310673236846924, "learning_rate": 0.0002, "loss": 1.6152, "step": 68960 }, { "epoch": 0.28, "grad_norm": 2.6721580028533936, "learning_rate": 0.0002, "loss": 1.4668, "step": 68970 }, { "epoch": 0.28, "grad_norm": 2.83591890335083, "learning_rate": 0.0002, "loss": 1.841, "step": 68980 }, { "epoch": 0.28, "grad_norm": 2.883424997329712, "learning_rate": 0.0002, "loss": 1.4471, "step": 68990 }, { "epoch": 0.28, "grad_norm": 3.3684029579162598, "learning_rate": 0.0002, "loss": 1.4579, "step": 69000 }, { "epoch": 0.28, "grad_norm": 4.09816837310791, "learning_rate": 0.0002, "loss": 1.5347, "step": 69010 }, { "epoch": 0.28, "grad_norm": 2.27292799949646, "learning_rate": 0.0002, "loss": 1.6398, "step": 69020 }, { "epoch": 0.28, "grad_norm": 2.3807811737060547, "learning_rate": 0.0002, "loss": 1.8206, "step": 69030 }, { "epoch": 0.28, "grad_norm": 2.484636068344116, "learning_rate": 0.0002, "loss": 1.4626, "step": 69040 }, { "epoch": 0.28, "grad_norm": 2.7663228511810303, "learning_rate": 0.0002, "loss": 1.7071, "step": 69050 }, { "epoch": 0.28, "grad_norm": 2.2634437084198, "learning_rate": 0.0002, "loss": 1.7315, "step": 69060 }, { "epoch": 0.28, "grad_norm": 2.3402841091156006, "learning_rate": 0.0002, "loss": 1.5936, "step": 69070 }, { "epoch": 0.28, "grad_norm": 2.673327684402466, "learning_rate": 0.0002, "loss": 1.456, "step": 69080 }, { "epoch": 0.28, "grad_norm": 3.570038080215454, "learning_rate": 0.0002, "loss": 1.6632, "step": 69090 }, { "epoch": 0.28, "grad_norm": 2.403996229171753, "learning_rate": 0.0002, "loss": 1.7711, "step": 69100 }, { "epoch": 0.28, "grad_norm": 2.074012279510498, "learning_rate": 0.0002, "loss": 1.444, "step": 69110 }, { "epoch": 0.28, "grad_norm": 2.7865512371063232, "learning_rate": 0.0002, "loss": 1.4873, "step": 69120 }, { "epoch": 0.28, "grad_norm": 2.7067267894744873, "learning_rate": 0.0002, "loss": 1.7676, "step": 69130 }, { "epoch": 0.28, "grad_norm": 2.876897096633911, "learning_rate": 0.0002, "loss": 1.4598, "step": 69140 }, { "epoch": 0.28, "grad_norm": 3.7360336780548096, "learning_rate": 0.0002, "loss": 1.568, "step": 69150 }, { "epoch": 0.28, "grad_norm": 2.5575742721557617, "learning_rate": 0.0002, "loss": 1.6555, "step": 69160 }, { "epoch": 0.28, "grad_norm": 3.860919952392578, "learning_rate": 0.0002, "loss": 1.6396, "step": 69170 }, { "epoch": 0.28, "grad_norm": 1.942491054534912, "learning_rate": 0.0002, "loss": 1.4736, "step": 69180 }, { "epoch": 0.28, "grad_norm": 2.613398551940918, "learning_rate": 0.0002, "loss": 1.5673, "step": 69190 }, { "epoch": 0.28, "grad_norm": 3.306546211242676, "learning_rate": 0.0002, "loss": 1.4551, "step": 69200 }, { "epoch": 0.28, "grad_norm": 4.301321029663086, "learning_rate": 0.0002, "loss": 1.5544, "step": 69210 }, { "epoch": 0.28, "grad_norm": 2.66549015045166, "learning_rate": 0.0002, "loss": 1.5422, "step": 69220 }, { "epoch": 0.28, "grad_norm": 2.7241921424865723, "learning_rate": 0.0002, "loss": 1.518, "step": 69230 }, { "epoch": 0.28, "grad_norm": 3.049057960510254, "learning_rate": 0.0002, "loss": 1.4768, "step": 69240 }, { "epoch": 0.28, "grad_norm": 2.548546552658081, "learning_rate": 0.0002, "loss": 1.8333, "step": 69250 }, { "epoch": 0.28, "grad_norm": 3.024811267852783, "learning_rate": 0.0002, "loss": 1.5442, "step": 69260 }, { "epoch": 0.28, "grad_norm": 3.0888497829437256, "learning_rate": 0.0002, "loss": 1.5079, "step": 69270 }, { "epoch": 0.28, "grad_norm": 3.1907615661621094, "learning_rate": 0.0002, "loss": 1.3856, "step": 69280 }, { "epoch": 0.28, "grad_norm": 2.6924729347229004, "learning_rate": 0.0002, "loss": 1.691, "step": 69290 }, { "epoch": 0.28, "grad_norm": 3.962596893310547, "learning_rate": 0.0002, "loss": 1.6832, "step": 69300 }, { "epoch": 0.28, "grad_norm": 2.826601982116699, "learning_rate": 0.0002, "loss": 1.5244, "step": 69310 }, { "epoch": 0.28, "grad_norm": 2.6163952350616455, "learning_rate": 0.0002, "loss": 1.3239, "step": 69320 }, { "epoch": 0.28, "grad_norm": 2.7408483028411865, "learning_rate": 0.0002, "loss": 1.5575, "step": 69330 }, { "epoch": 0.28, "grad_norm": 2.6793360710144043, "learning_rate": 0.0002, "loss": 1.5062, "step": 69340 }, { "epoch": 0.28, "grad_norm": 2.0977280139923096, "learning_rate": 0.0002, "loss": 1.609, "step": 69350 }, { "epoch": 0.28, "grad_norm": 1.82844877243042, "learning_rate": 0.0002, "loss": 1.6157, "step": 69360 }, { "epoch": 0.28, "grad_norm": 1.9852315187454224, "learning_rate": 0.0002, "loss": 1.8351, "step": 69370 }, { "epoch": 0.28, "grad_norm": 2.5999929904937744, "learning_rate": 0.0002, "loss": 1.3281, "step": 69380 }, { "epoch": 0.28, "grad_norm": 3.676835298538208, "learning_rate": 0.0002, "loss": 1.584, "step": 69390 }, { "epoch": 0.28, "grad_norm": 3.0039148330688477, "learning_rate": 0.0002, "loss": 1.5396, "step": 69400 }, { "epoch": 0.28, "grad_norm": 1.9199533462524414, "learning_rate": 0.0002, "loss": 1.5725, "step": 69410 }, { "epoch": 0.28, "grad_norm": 2.862330436706543, "learning_rate": 0.0002, "loss": 1.565, "step": 69420 }, { "epoch": 0.28, "grad_norm": 2.599417209625244, "learning_rate": 0.0002, "loss": 1.5143, "step": 69430 }, { "epoch": 0.28, "grad_norm": 2.283545732498169, "learning_rate": 0.0002, "loss": 1.6965, "step": 69440 }, { "epoch": 0.28, "grad_norm": 3.574753999710083, "learning_rate": 0.0002, "loss": 1.5728, "step": 69450 }, { "epoch": 0.28, "grad_norm": 2.37652850151062, "learning_rate": 0.0002, "loss": 1.434, "step": 69460 }, { "epoch": 0.28, "grad_norm": 4.124129772186279, "learning_rate": 0.0002, "loss": 1.3994, "step": 69470 }, { "epoch": 0.28, "grad_norm": 2.9146416187286377, "learning_rate": 0.0002, "loss": 1.6132, "step": 69480 }, { "epoch": 0.28, "grad_norm": 2.3750648498535156, "learning_rate": 0.0002, "loss": 1.7242, "step": 69490 }, { "epoch": 0.28, "grad_norm": 2.7942676544189453, "learning_rate": 0.0002, "loss": 1.6369, "step": 69500 }, { "epoch": 0.28, "grad_norm": 2.4088728427886963, "learning_rate": 0.0002, "loss": 1.5344, "step": 69510 }, { "epoch": 0.28, "grad_norm": 3.1194939613342285, "learning_rate": 0.0002, "loss": 1.5366, "step": 69520 }, { "epoch": 0.28, "grad_norm": 3.736149311065674, "learning_rate": 0.0002, "loss": 1.5585, "step": 69530 }, { "epoch": 0.28, "grad_norm": 3.1598148345947266, "learning_rate": 0.0002, "loss": 1.6642, "step": 69540 }, { "epoch": 0.28, "grad_norm": 3.3456714153289795, "learning_rate": 0.0002, "loss": 1.5626, "step": 69550 }, { "epoch": 0.28, "grad_norm": 1.929316520690918, "learning_rate": 0.0002, "loss": 1.6698, "step": 69560 }, { "epoch": 0.28, "grad_norm": 1.9572376012802124, "learning_rate": 0.0002, "loss": 1.5886, "step": 69570 }, { "epoch": 0.28, "grad_norm": 3.3605599403381348, "learning_rate": 0.0002, "loss": 1.5773, "step": 69580 }, { "epoch": 0.28, "grad_norm": 2.8365273475646973, "learning_rate": 0.0002, "loss": 1.7625, "step": 69590 }, { "epoch": 0.28, "grad_norm": 1.8699617385864258, "learning_rate": 0.0002, "loss": 1.8528, "step": 69600 }, { "epoch": 0.28, "grad_norm": 2.44106125831604, "learning_rate": 0.0002, "loss": 1.3748, "step": 69610 }, { "epoch": 0.28, "grad_norm": 1.1173862218856812, "learning_rate": 0.0002, "loss": 1.4618, "step": 69620 }, { "epoch": 0.28, "grad_norm": 2.231724262237549, "learning_rate": 0.0002, "loss": 1.518, "step": 69630 }, { "epoch": 0.28, "grad_norm": 1.9407378435134888, "learning_rate": 0.0002, "loss": 1.4856, "step": 69640 }, { "epoch": 0.28, "grad_norm": 2.736311197280884, "learning_rate": 0.0002, "loss": 1.7297, "step": 69650 }, { "epoch": 0.28, "grad_norm": 3.1019394397735596, "learning_rate": 0.0002, "loss": 1.572, "step": 69660 }, { "epoch": 0.28, "grad_norm": 3.918574094772339, "learning_rate": 0.0002, "loss": 1.6511, "step": 69670 }, { "epoch": 0.28, "grad_norm": 3.8788609504699707, "learning_rate": 0.0002, "loss": 1.8005, "step": 69680 }, { "epoch": 0.28, "grad_norm": 3.0405092239379883, "learning_rate": 0.0002, "loss": 1.7675, "step": 69690 }, { "epoch": 0.28, "grad_norm": Infinity, "learning_rate": 0.0002, "loss": 1.4194, "step": 69700 }, { "epoch": 0.28, "grad_norm": 2.7639846801757812, "learning_rate": 0.0002, "loss": 1.5928, "step": 69710 }, { "epoch": 0.28, "grad_norm": 5.004994869232178, "learning_rate": 0.0002, "loss": 1.4654, "step": 69720 }, { "epoch": 0.28, "grad_norm": 4.080597877502441, "learning_rate": 0.0002, "loss": 1.5818, "step": 69730 }, { "epoch": 0.28, "grad_norm": 2.94486403465271, "learning_rate": 0.0002, "loss": 1.6018, "step": 69740 }, { "epoch": 0.28, "grad_norm": 2.333455801010132, "learning_rate": 0.0002, "loss": 1.4651, "step": 69750 }, { "epoch": 0.28, "grad_norm": 2.9259603023529053, "learning_rate": 0.0002, "loss": 1.5274, "step": 69760 }, { "epoch": 0.28, "grad_norm": 2.9479715824127197, "learning_rate": 0.0002, "loss": 1.5714, "step": 69770 }, { "epoch": 0.28, "grad_norm": 5.445416450500488, "learning_rate": 0.0002, "loss": 1.6146, "step": 69780 }, { "epoch": 0.28, "grad_norm": 2.8534202575683594, "learning_rate": 0.0002, "loss": 1.3908, "step": 69790 }, { "epoch": 0.28, "grad_norm": 3.7972850799560547, "learning_rate": 0.0002, "loss": 1.6518, "step": 69800 }, { "epoch": 0.28, "grad_norm": 3.010453224182129, "learning_rate": 0.0002, "loss": 1.3286, "step": 69810 }, { "epoch": 0.28, "grad_norm": 4.462553024291992, "learning_rate": 0.0002, "loss": 1.6103, "step": 69820 }, { "epoch": 0.28, "grad_norm": 3.5382723808288574, "learning_rate": 0.0002, "loss": 1.774, "step": 69830 }, { "epoch": 0.28, "grad_norm": 1.9496639966964722, "learning_rate": 0.0002, "loss": 1.6604, "step": 69840 }, { "epoch": 0.28, "grad_norm": 1.8032346963882446, "learning_rate": 0.0002, "loss": 1.4392, "step": 69850 }, { "epoch": 0.28, "grad_norm": 1.7209125757217407, "learning_rate": 0.0002, "loss": 1.6937, "step": 69860 }, { "epoch": 0.28, "grad_norm": 3.05755615234375, "learning_rate": 0.0002, "loss": 1.4807, "step": 69870 }, { "epoch": 0.28, "grad_norm": 4.886841773986816, "learning_rate": 0.0002, "loss": 1.785, "step": 69880 }, { "epoch": 0.28, "grad_norm": 3.297773599624634, "learning_rate": 0.0002, "loss": 1.3492, "step": 69890 }, { "epoch": 0.28, "grad_norm": 2.609867572784424, "learning_rate": 0.0002, "loss": 1.6277, "step": 69900 }, { "epoch": 0.28, "grad_norm": 2.573716163635254, "learning_rate": 0.0002, "loss": 1.3814, "step": 69910 }, { "epoch": 0.28, "grad_norm": 3.60123610496521, "learning_rate": 0.0002, "loss": 1.5839, "step": 69920 }, { "epoch": 0.28, "grad_norm": 2.460649251937866, "learning_rate": 0.0002, "loss": 1.5211, "step": 69930 }, { "epoch": 0.28, "grad_norm": 3.8016858100891113, "learning_rate": 0.0002, "loss": 1.4393, "step": 69940 }, { "epoch": 0.28, "grad_norm": 2.403744697570801, "learning_rate": 0.0002, "loss": 1.6448, "step": 69950 }, { "epoch": 0.28, "grad_norm": 2.351194381713867, "learning_rate": 0.0002, "loss": 1.4812, "step": 69960 }, { "epoch": 0.28, "grad_norm": 2.6788902282714844, "learning_rate": 0.0002, "loss": 1.5682, "step": 69970 }, { "epoch": 0.28, "grad_norm": 1.8279591798782349, "learning_rate": 0.0002, "loss": 1.2937, "step": 69980 }, { "epoch": 0.28, "grad_norm": 3.3961918354034424, "learning_rate": 0.0002, "loss": 1.4672, "step": 69990 }, { "epoch": 0.28, "grad_norm": 2.685248851776123, "learning_rate": 0.0002, "loss": 1.5083, "step": 70000 }, { "epoch": 0.29, "grad_norm": 2.1856932640075684, "learning_rate": 0.0002, "loss": 1.4447, "step": 70010 }, { "epoch": 0.29, "grad_norm": 5.124725818634033, "learning_rate": 0.0002, "loss": 1.5217, "step": 70020 }, { "epoch": 0.29, "grad_norm": 3.5725841522216797, "learning_rate": 0.0002, "loss": 1.7558, "step": 70030 }, { "epoch": 0.29, "grad_norm": 2.3015968799591064, "learning_rate": 0.0002, "loss": 1.5383, "step": 70040 }, { "epoch": 0.29, "grad_norm": 3.019097328186035, "learning_rate": 0.0002, "loss": 1.4703, "step": 70050 }, { "epoch": 0.29, "grad_norm": 2.5644967555999756, "learning_rate": 0.0002, "loss": 1.6238, "step": 70060 }, { "epoch": 0.29, "grad_norm": 2.8182332515716553, "learning_rate": 0.0002, "loss": 1.5512, "step": 70070 }, { "epoch": 0.29, "grad_norm": 3.429710865020752, "learning_rate": 0.0002, "loss": 1.3402, "step": 70080 }, { "epoch": 0.29, "grad_norm": 4.517882823944092, "learning_rate": 0.0002, "loss": 1.4426, "step": 70090 }, { "epoch": 0.29, "grad_norm": 2.2777040004730225, "learning_rate": 0.0002, "loss": 1.6052, "step": 70100 }, { "epoch": 0.29, "grad_norm": 2.617518424987793, "learning_rate": 0.0002, "loss": 1.6049, "step": 70110 }, { "epoch": 0.29, "grad_norm": 1.1290555000305176, "learning_rate": 0.0002, "loss": 1.645, "step": 70120 }, { "epoch": 0.29, "grad_norm": 2.8746607303619385, "learning_rate": 0.0002, "loss": 1.5701, "step": 70130 }, { "epoch": 0.29, "grad_norm": 2.8717257976531982, "learning_rate": 0.0002, "loss": 1.8539, "step": 70140 }, { "epoch": 0.29, "grad_norm": 2.2601399421691895, "learning_rate": 0.0002, "loss": 1.4636, "step": 70150 }, { "epoch": 0.29, "grad_norm": 2.388652801513672, "learning_rate": 0.0002, "loss": 1.7019, "step": 70160 }, { "epoch": 0.29, "grad_norm": 3.252215623855591, "learning_rate": 0.0002, "loss": 1.5567, "step": 70170 }, { "epoch": 0.29, "grad_norm": 4.242118835449219, "learning_rate": 0.0002, "loss": 1.5917, "step": 70180 }, { "epoch": 0.29, "grad_norm": 2.616832733154297, "learning_rate": 0.0002, "loss": 1.5001, "step": 70190 }, { "epoch": 0.29, "grad_norm": 3.1939306259155273, "learning_rate": 0.0002, "loss": 1.4094, "step": 70200 }, { "epoch": 0.29, "grad_norm": 2.198505401611328, "learning_rate": 0.0002, "loss": 1.6334, "step": 70210 }, { "epoch": 0.29, "grad_norm": 3.326580762863159, "learning_rate": 0.0002, "loss": 1.6288, "step": 70220 }, { "epoch": 0.29, "grad_norm": 2.072871208190918, "learning_rate": 0.0002, "loss": 1.4223, "step": 70230 }, { "epoch": 0.29, "grad_norm": 2.070228099822998, "learning_rate": 0.0002, "loss": 1.7882, "step": 70240 }, { "epoch": 0.29, "grad_norm": 2.827277660369873, "learning_rate": 0.0002, "loss": 1.5059, "step": 70250 }, { "epoch": 0.29, "grad_norm": 2.3233962059020996, "learning_rate": 0.0002, "loss": 1.7254, "step": 70260 }, { "epoch": 0.29, "grad_norm": 2.4373326301574707, "learning_rate": 0.0002, "loss": 1.5487, "step": 70270 }, { "epoch": 0.29, "grad_norm": 2.5071897506713867, "learning_rate": 0.0002, "loss": 1.4687, "step": 70280 }, { "epoch": 0.29, "grad_norm": 3.3315863609313965, "learning_rate": 0.0002, "loss": 1.7535, "step": 70290 }, { "epoch": 0.29, "grad_norm": 3.774963140487671, "learning_rate": 0.0002, "loss": 1.5532, "step": 70300 }, { "epoch": 0.29, "grad_norm": 3.466625452041626, "learning_rate": 0.0002, "loss": 1.3472, "step": 70310 }, { "epoch": 0.29, "grad_norm": 1.734036922454834, "learning_rate": 0.0002, "loss": 1.6486, "step": 70320 }, { "epoch": 0.29, "grad_norm": 4.824047088623047, "learning_rate": 0.0002, "loss": 1.735, "step": 70330 }, { "epoch": 0.29, "grad_norm": 2.0664823055267334, "learning_rate": 0.0002, "loss": 1.7267, "step": 70340 }, { "epoch": 0.29, "grad_norm": 3.2074527740478516, "learning_rate": 0.0002, "loss": 1.5678, "step": 70350 }, { "epoch": 0.29, "grad_norm": 2.3971266746520996, "learning_rate": 0.0002, "loss": 1.4165, "step": 70360 }, { "epoch": 0.29, "grad_norm": 7.830455780029297, "learning_rate": 0.0002, "loss": 1.5994, "step": 70370 }, { "epoch": 0.29, "grad_norm": 3.416457176208496, "learning_rate": 0.0002, "loss": 1.556, "step": 70380 }, { "epoch": 0.29, "grad_norm": 8.13358211517334, "learning_rate": 0.0002, "loss": 1.6704, "step": 70390 }, { "epoch": 0.29, "grad_norm": 3.707852840423584, "learning_rate": 0.0002, "loss": 1.4238, "step": 70400 }, { "epoch": 0.29, "grad_norm": 4.849029064178467, "learning_rate": 0.0002, "loss": 1.6284, "step": 70410 }, { "epoch": 0.29, "grad_norm": 3.555778741836548, "learning_rate": 0.0002, "loss": 1.445, "step": 70420 }, { "epoch": 0.29, "grad_norm": 6.473235607147217, "learning_rate": 0.0002, "loss": 1.6665, "step": 70430 }, { "epoch": 0.29, "grad_norm": 4.8142900466918945, "learning_rate": 0.0002, "loss": 1.4925, "step": 70440 }, { "epoch": 0.29, "grad_norm": 2.076070785522461, "learning_rate": 0.0002, "loss": 1.785, "step": 70450 }, { "epoch": 0.29, "grad_norm": 4.080598831176758, "learning_rate": 0.0002, "loss": 1.3572, "step": 70460 }, { "epoch": 0.29, "grad_norm": 1.6032021045684814, "learning_rate": 0.0002, "loss": 1.7654, "step": 70470 }, { "epoch": 0.29, "grad_norm": 2.32959246635437, "learning_rate": 0.0002, "loss": 1.5398, "step": 70480 }, { "epoch": 0.29, "grad_norm": 3.774437427520752, "learning_rate": 0.0002, "loss": 1.5881, "step": 70490 }, { "epoch": 0.29, "grad_norm": 1.593152642250061, "learning_rate": 0.0002, "loss": 1.7721, "step": 70500 }, { "epoch": 0.29, "grad_norm": 5.782285690307617, "learning_rate": 0.0002, "loss": 1.5534, "step": 70510 }, { "epoch": 0.29, "grad_norm": 2.3231749534606934, "learning_rate": 0.0002, "loss": 1.6223, "step": 70520 }, { "epoch": 0.29, "grad_norm": 2.9733753204345703, "learning_rate": 0.0002, "loss": 1.8178, "step": 70530 }, { "epoch": 0.29, "grad_norm": 5.5623650550842285, "learning_rate": 0.0002, "loss": 1.458, "step": 70540 }, { "epoch": 0.29, "grad_norm": 2.584956407546997, "learning_rate": 0.0002, "loss": 1.4183, "step": 70550 }, { "epoch": 0.29, "grad_norm": 3.0882368087768555, "learning_rate": 0.0002, "loss": 1.5444, "step": 70560 }, { "epoch": 0.29, "grad_norm": 2.9889557361602783, "learning_rate": 0.0002, "loss": 1.5974, "step": 70570 }, { "epoch": 0.29, "grad_norm": 1.5325591564178467, "learning_rate": 0.0002, "loss": 1.6886, "step": 70580 }, { "epoch": 0.29, "grad_norm": 2.7416210174560547, "learning_rate": 0.0002, "loss": 1.6862, "step": 70590 }, { "epoch": 0.29, "grad_norm": 1.5132734775543213, "learning_rate": 0.0002, "loss": 1.5938, "step": 70600 }, { "epoch": 0.29, "grad_norm": 2.9838383197784424, "learning_rate": 0.0002, "loss": 1.75, "step": 70610 }, { "epoch": 0.29, "grad_norm": 2.793883800506592, "learning_rate": 0.0002, "loss": 1.7905, "step": 70620 }, { "epoch": 0.29, "grad_norm": 2.4923555850982666, "learning_rate": 0.0002, "loss": 1.5635, "step": 70630 }, { "epoch": 0.29, "grad_norm": 5.018949031829834, "learning_rate": 0.0002, "loss": 1.7469, "step": 70640 }, { "epoch": 0.29, "grad_norm": 2.0351309776306152, "learning_rate": 0.0002, "loss": 1.5101, "step": 70650 }, { "epoch": 0.29, "grad_norm": 3.372972249984741, "learning_rate": 0.0002, "loss": 1.5795, "step": 70660 }, { "epoch": 0.29, "grad_norm": 8.768430709838867, "learning_rate": 0.0002, "loss": 1.5228, "step": 70670 }, { "epoch": 0.29, "grad_norm": 2.9584474563598633, "learning_rate": 0.0002, "loss": 1.7446, "step": 70680 }, { "epoch": 0.29, "grad_norm": 3.5819568634033203, "learning_rate": 0.0002, "loss": 1.3635, "step": 70690 }, { "epoch": 0.29, "grad_norm": 3.893738031387329, "learning_rate": 0.0002, "loss": 1.6938, "step": 70700 }, { "epoch": 0.29, "grad_norm": 3.1393983364105225, "learning_rate": 0.0002, "loss": 1.563, "step": 70710 }, { "epoch": 0.29, "grad_norm": 3.6116943359375, "learning_rate": 0.0002, "loss": 1.4796, "step": 70720 }, { "epoch": 0.29, "grad_norm": 2.3568434715270996, "learning_rate": 0.0002, "loss": 1.8078, "step": 70730 }, { "epoch": 0.29, "grad_norm": 2.9541375637054443, "learning_rate": 0.0002, "loss": 1.4418, "step": 70740 }, { "epoch": 0.29, "grad_norm": 4.160953521728516, "learning_rate": 0.0002, "loss": 1.4113, "step": 70750 }, { "epoch": 0.29, "grad_norm": 3.2509796619415283, "learning_rate": 0.0002, "loss": 1.4846, "step": 70760 }, { "epoch": 0.29, "grad_norm": 5.08464241027832, "learning_rate": 0.0002, "loss": 1.6212, "step": 70770 }, { "epoch": 0.29, "grad_norm": 4.8816657066345215, "learning_rate": 0.0002, "loss": 1.5804, "step": 70780 }, { "epoch": 0.29, "grad_norm": 5.02716064453125, "learning_rate": 0.0002, "loss": 1.6823, "step": 70790 }, { "epoch": 0.29, "grad_norm": 2.6141746044158936, "learning_rate": 0.0002, "loss": 1.7225, "step": 70800 }, { "epoch": 0.29, "grad_norm": 4.009904861450195, "learning_rate": 0.0002, "loss": 1.6084, "step": 70810 }, { "epoch": 0.29, "grad_norm": 4.342108249664307, "learning_rate": 0.0002, "loss": 1.7886, "step": 70820 }, { "epoch": 0.29, "grad_norm": 1.685933232307434, "learning_rate": 0.0002, "loss": 1.5896, "step": 70830 }, { "epoch": 0.29, "grad_norm": 3.098151206970215, "learning_rate": 0.0002, "loss": 1.4307, "step": 70840 }, { "epoch": 0.29, "grad_norm": 3.2764601707458496, "learning_rate": 0.0002, "loss": 1.4776, "step": 70850 }, { "epoch": 0.29, "grad_norm": 9.97590446472168, "learning_rate": 0.0002, "loss": 1.7288, "step": 70860 }, { "epoch": 0.29, "grad_norm": 5.326457500457764, "learning_rate": 0.0002, "loss": 1.4222, "step": 70870 }, { "epoch": 0.29, "grad_norm": 3.1710546016693115, "learning_rate": 0.0002, "loss": 1.5351, "step": 70880 }, { "epoch": 0.29, "grad_norm": 2.153041124343872, "learning_rate": 0.0002, "loss": 1.6017, "step": 70890 }, { "epoch": 0.29, "grad_norm": 2.56404185295105, "learning_rate": 0.0002, "loss": 1.3734, "step": 70900 }, { "epoch": 0.29, "grad_norm": 8.508578300476074, "learning_rate": 0.0002, "loss": 1.7316, "step": 70910 }, { "epoch": 0.29, "grad_norm": 3.093573570251465, "learning_rate": 0.0002, "loss": 1.6802, "step": 70920 }, { "epoch": 0.29, "grad_norm": 2.658935546875, "learning_rate": 0.0002, "loss": 1.5822, "step": 70930 }, { "epoch": 0.29, "grad_norm": 3.1174263954162598, "learning_rate": 0.0002, "loss": 1.307, "step": 70940 }, { "epoch": 0.29, "grad_norm": 2.99316143989563, "learning_rate": 0.0002, "loss": 1.641, "step": 70950 }, { "epoch": 0.29, "grad_norm": 2.478912115097046, "learning_rate": 0.0002, "loss": 1.29, "step": 70960 }, { "epoch": 0.29, "grad_norm": 2.35866379737854, "learning_rate": 0.0002, "loss": 1.4694, "step": 70970 }, { "epoch": 0.29, "grad_norm": 2.707488536834717, "learning_rate": 0.0002, "loss": 1.5859, "step": 70980 }, { "epoch": 0.29, "grad_norm": 3.3884341716766357, "learning_rate": 0.0002, "loss": 1.8943, "step": 70990 }, { "epoch": 0.29, "grad_norm": 1.7574822902679443, "learning_rate": 0.0002, "loss": 1.6001, "step": 71000 }, { "epoch": 0.29, "grad_norm": 3.1350514888763428, "learning_rate": 0.0002, "loss": 1.6809, "step": 71010 }, { "epoch": 0.29, "grad_norm": 2.920494318008423, "learning_rate": 0.0002, "loss": 1.4813, "step": 71020 }, { "epoch": 0.29, "grad_norm": 3.4602673053741455, "learning_rate": 0.0002, "loss": 1.5416, "step": 71030 }, { "epoch": 0.29, "grad_norm": 3.5700957775115967, "learning_rate": 0.0002, "loss": 1.7148, "step": 71040 }, { "epoch": 0.29, "grad_norm": 2.218773365020752, "learning_rate": 0.0002, "loss": 1.5809, "step": 71050 }, { "epoch": 0.29, "grad_norm": 2.869035482406616, "learning_rate": 0.0002, "loss": 1.5461, "step": 71060 }, { "epoch": 0.29, "grad_norm": 2.4143998622894287, "learning_rate": 0.0002, "loss": 1.6696, "step": 71070 }, { "epoch": 0.29, "grad_norm": 3.3191959857940674, "learning_rate": 0.0002, "loss": 1.6039, "step": 71080 }, { "epoch": 0.29, "grad_norm": 3.0453696250915527, "learning_rate": 0.0002, "loss": 1.6351, "step": 71090 }, { "epoch": 0.29, "grad_norm": 2.547473669052124, "learning_rate": 0.0002, "loss": 1.694, "step": 71100 }, { "epoch": 0.29, "grad_norm": 3.0861105918884277, "learning_rate": 0.0002, "loss": 1.7985, "step": 71110 }, { "epoch": 0.29, "grad_norm": 3.011765718460083, "learning_rate": 0.0002, "loss": 1.4969, "step": 71120 }, { "epoch": 0.29, "grad_norm": 3.5839991569519043, "learning_rate": 0.0002, "loss": 1.5569, "step": 71130 }, { "epoch": 0.29, "grad_norm": 3.190246343612671, "learning_rate": 0.0002, "loss": 1.6239, "step": 71140 }, { "epoch": 0.29, "grad_norm": 4.636346817016602, "learning_rate": 0.0002, "loss": 1.8005, "step": 71150 }, { "epoch": 0.29, "grad_norm": 2.57833194732666, "learning_rate": 0.0002, "loss": 1.6304, "step": 71160 }, { "epoch": 0.29, "grad_norm": 2.5823943614959717, "learning_rate": 0.0002, "loss": 1.4445, "step": 71170 }, { "epoch": 0.29, "grad_norm": 3.861086130142212, "learning_rate": 0.0002, "loss": 1.8684, "step": 71180 }, { "epoch": 0.29, "grad_norm": 2.891153335571289, "learning_rate": 0.0002, "loss": 1.8365, "step": 71190 }, { "epoch": 0.29, "grad_norm": 9.688650131225586, "learning_rate": 0.0002, "loss": 1.4942, "step": 71200 }, { "epoch": 0.29, "grad_norm": 3.31793475151062, "learning_rate": 0.0002, "loss": 1.4092, "step": 71210 }, { "epoch": 0.29, "grad_norm": 3.50569486618042, "learning_rate": 0.0002, "loss": 1.659, "step": 71220 }, { "epoch": 0.29, "grad_norm": 6.005074501037598, "learning_rate": 0.0002, "loss": 1.4941, "step": 71230 }, { "epoch": 0.29, "grad_norm": 4.776493072509766, "learning_rate": 0.0002, "loss": 1.6676, "step": 71240 }, { "epoch": 0.29, "grad_norm": 3.7635788917541504, "learning_rate": 0.0002, "loss": 1.4811, "step": 71250 }, { "epoch": 0.29, "grad_norm": 2.030545473098755, "learning_rate": 0.0002, "loss": 1.6458, "step": 71260 }, { "epoch": 0.29, "grad_norm": 2.123655319213867, "learning_rate": 0.0002, "loss": 1.4385, "step": 71270 }, { "epoch": 0.29, "grad_norm": 3.2644762992858887, "learning_rate": 0.0002, "loss": 1.6973, "step": 71280 }, { "epoch": 0.29, "grad_norm": 3.2300162315368652, "learning_rate": 0.0002, "loss": 1.6244, "step": 71290 }, { "epoch": 0.29, "grad_norm": 2.813969612121582, "learning_rate": 0.0002, "loss": 1.5423, "step": 71300 }, { "epoch": 0.29, "grad_norm": 5.502201557159424, "learning_rate": 0.0002, "loss": 1.4623, "step": 71310 }, { "epoch": 0.29, "grad_norm": 3.17699933052063, "learning_rate": 0.0002, "loss": 1.5553, "step": 71320 }, { "epoch": 0.29, "grad_norm": 6.280470848083496, "learning_rate": 0.0002, "loss": 1.6455, "step": 71330 }, { "epoch": 0.29, "grad_norm": 2.2347846031188965, "learning_rate": 0.0002, "loss": 1.7131, "step": 71340 }, { "epoch": 0.29, "grad_norm": 3.5127768516540527, "learning_rate": 0.0002, "loss": 1.7291, "step": 71350 }, { "epoch": 0.29, "grad_norm": 3.6017045974731445, "learning_rate": 0.0002, "loss": 1.4711, "step": 71360 }, { "epoch": 0.29, "grad_norm": 2.6691901683807373, "learning_rate": 0.0002, "loss": 1.475, "step": 71370 }, { "epoch": 0.29, "grad_norm": 3.3484110832214355, "learning_rate": 0.0002, "loss": 1.4878, "step": 71380 }, { "epoch": 0.29, "grad_norm": 2.196164846420288, "learning_rate": 0.0002, "loss": 1.895, "step": 71390 }, { "epoch": 0.29, "grad_norm": 3.1190812587738037, "learning_rate": 0.0002, "loss": 1.3497, "step": 71400 }, { "epoch": 0.29, "grad_norm": 1.6445292234420776, "learning_rate": 0.0002, "loss": 1.4758, "step": 71410 }, { "epoch": 0.29, "grad_norm": 3.3253531455993652, "learning_rate": 0.0002, "loss": 1.5617, "step": 71420 }, { "epoch": 0.29, "grad_norm": 3.6354804039001465, "learning_rate": 0.0002, "loss": 1.7202, "step": 71430 }, { "epoch": 0.29, "grad_norm": 2.551908254623413, "learning_rate": 0.0002, "loss": 1.6491, "step": 71440 }, { "epoch": 0.29, "grad_norm": 2.5667521953582764, "learning_rate": 0.0002, "loss": 1.6317, "step": 71450 }, { "epoch": 0.29, "grad_norm": 2.2371938228607178, "learning_rate": 0.0002, "loss": 1.5952, "step": 71460 }, { "epoch": 0.29, "grad_norm": 3.868208885192871, "learning_rate": 0.0002, "loss": 1.4231, "step": 71470 }, { "epoch": 0.29, "grad_norm": 6.411454677581787, "learning_rate": 0.0002, "loss": 1.4614, "step": 71480 }, { "epoch": 0.29, "grad_norm": 2.795851707458496, "learning_rate": 0.0002, "loss": 1.7045, "step": 71490 }, { "epoch": 0.29, "grad_norm": 2.5273172855377197, "learning_rate": 0.0002, "loss": 1.9456, "step": 71500 }, { "epoch": 0.29, "grad_norm": 3.515766143798828, "learning_rate": 0.0002, "loss": 1.6505, "step": 71510 }, { "epoch": 0.29, "grad_norm": 1.6319727897644043, "learning_rate": 0.0002, "loss": 1.7349, "step": 71520 }, { "epoch": 0.29, "grad_norm": 3.910428285598755, "learning_rate": 0.0002, "loss": 1.7, "step": 71530 }, { "epoch": 0.29, "grad_norm": 2.014413833618164, "learning_rate": 0.0002, "loss": 1.5169, "step": 71540 }, { "epoch": 0.29, "grad_norm": 1.8086400032043457, "learning_rate": 0.0002, "loss": 1.5498, "step": 71550 }, { "epoch": 0.29, "grad_norm": 2.9648146629333496, "learning_rate": 0.0002, "loss": 1.605, "step": 71560 }, { "epoch": 0.29, "grad_norm": 2.127490758895874, "learning_rate": 0.0002, "loss": 1.5414, "step": 71570 }, { "epoch": 0.29, "grad_norm": 2.174873113632202, "learning_rate": 0.0002, "loss": 1.5223, "step": 71580 }, { "epoch": 0.29, "grad_norm": 2.556199312210083, "learning_rate": 0.0002, "loss": 1.679, "step": 71590 }, { "epoch": 0.29, "grad_norm": 4.232773780822754, "learning_rate": 0.0002, "loss": 1.7144, "step": 71600 }, { "epoch": 0.29, "grad_norm": 3.594045639038086, "learning_rate": 0.0002, "loss": 1.8226, "step": 71610 }, { "epoch": 0.29, "grad_norm": 2.424145221710205, "learning_rate": 0.0002, "loss": 1.6289, "step": 71620 }, { "epoch": 0.29, "grad_norm": 2.394366502761841, "learning_rate": 0.0002, "loss": 1.5226, "step": 71630 }, { "epoch": 0.29, "grad_norm": 2.5495731830596924, "learning_rate": 0.0002, "loss": 1.5255, "step": 71640 }, { "epoch": 0.29, "grad_norm": 2.259629249572754, "learning_rate": 0.0002, "loss": 1.5004, "step": 71650 }, { "epoch": 0.29, "grad_norm": 3.0151498317718506, "learning_rate": 0.0002, "loss": 1.5152, "step": 71660 }, { "epoch": 0.29, "grad_norm": 3.8953020572662354, "learning_rate": 0.0002, "loss": 1.4978, "step": 71670 }, { "epoch": 0.29, "grad_norm": 2.2988274097442627, "learning_rate": 0.0002, "loss": 1.3668, "step": 71680 }, { "epoch": 0.29, "grad_norm": 5.013329982757568, "learning_rate": 0.0002, "loss": 1.5918, "step": 71690 }, { "epoch": 0.29, "grad_norm": 2.019015073776245, "learning_rate": 0.0002, "loss": 1.6872, "step": 71700 }, { "epoch": 0.29, "grad_norm": 1.8250679969787598, "learning_rate": 0.0002, "loss": 1.591, "step": 71710 }, { "epoch": 0.29, "grad_norm": 2.7854931354522705, "learning_rate": 0.0002, "loss": 1.5072, "step": 71720 }, { "epoch": 0.29, "grad_norm": 2.361025810241699, "learning_rate": 0.0002, "loss": 1.8488, "step": 71730 }, { "epoch": 0.29, "grad_norm": 5.559101104736328, "learning_rate": 0.0002, "loss": 1.7127, "step": 71740 }, { "epoch": 0.29, "grad_norm": 2.838230609893799, "learning_rate": 0.0002, "loss": 1.6981, "step": 71750 }, { "epoch": 0.29, "grad_norm": 2.775139093399048, "learning_rate": 0.0002, "loss": 1.9024, "step": 71760 }, { "epoch": 0.29, "grad_norm": 2.750924825668335, "learning_rate": 0.0002, "loss": 1.6267, "step": 71770 }, { "epoch": 0.29, "grad_norm": 2.5946083068847656, "learning_rate": 0.0002, "loss": 1.562, "step": 71780 }, { "epoch": 0.29, "grad_norm": 2.8679819107055664, "learning_rate": 0.0002, "loss": 1.4624, "step": 71790 }, { "epoch": 0.29, "grad_norm": 4.465829372406006, "learning_rate": 0.0002, "loss": 1.6586, "step": 71800 }, { "epoch": 0.29, "grad_norm": 2.7434332370758057, "learning_rate": 0.0002, "loss": 1.424, "step": 71810 }, { "epoch": 0.29, "grad_norm": 3.9866979122161865, "learning_rate": 0.0002, "loss": 1.6562, "step": 71820 }, { "epoch": 0.29, "grad_norm": 2.0910074710845947, "learning_rate": 0.0002, "loss": 1.5334, "step": 71830 }, { "epoch": 0.29, "grad_norm": 2.5862576961517334, "learning_rate": 0.0002, "loss": 1.7107, "step": 71840 }, { "epoch": 0.29, "grad_norm": 3.9358816146850586, "learning_rate": 0.0002, "loss": 1.5359, "step": 71850 }, { "epoch": 0.29, "grad_norm": 2.300452470779419, "learning_rate": 0.0002, "loss": 1.4586, "step": 71860 }, { "epoch": 0.29, "grad_norm": 1.5422029495239258, "learning_rate": 0.0002, "loss": 1.6376, "step": 71870 }, { "epoch": 0.29, "grad_norm": 3.6514151096343994, "learning_rate": 0.0002, "loss": 1.626, "step": 71880 }, { "epoch": 0.29, "grad_norm": 3.149372100830078, "learning_rate": 0.0002, "loss": 1.3615, "step": 71890 }, { "epoch": 0.29, "grad_norm": 1.7284462451934814, "learning_rate": 0.0002, "loss": 1.5877, "step": 71900 }, { "epoch": 0.29, "grad_norm": 5.3501482009887695, "learning_rate": 0.0002, "loss": 1.6636, "step": 71910 }, { "epoch": 0.29, "grad_norm": 2.4698681831359863, "learning_rate": 0.0002, "loss": 1.551, "step": 71920 }, { "epoch": 0.29, "grad_norm": 3.558263063430786, "learning_rate": 0.0002, "loss": 1.4078, "step": 71930 }, { "epoch": 0.29, "grad_norm": 3.615831136703491, "learning_rate": 0.0002, "loss": 1.4138, "step": 71940 }, { "epoch": 0.29, "grad_norm": 2.340155601501465, "learning_rate": 0.0002, "loss": 1.6296, "step": 71950 }, { "epoch": 0.29, "grad_norm": 4.13592004776001, "learning_rate": 0.0002, "loss": 1.5914, "step": 71960 }, { "epoch": 0.29, "grad_norm": 3.527449369430542, "learning_rate": 0.0002, "loss": 1.4393, "step": 71970 }, { "epoch": 0.29, "grad_norm": 4.340115547180176, "learning_rate": 0.0002, "loss": 1.4554, "step": 71980 }, { "epoch": 0.29, "grad_norm": 3.485819101333618, "learning_rate": 0.0002, "loss": 1.4552, "step": 71990 }, { "epoch": 0.29, "grad_norm": 3.9417288303375244, "learning_rate": 0.0002, "loss": 1.4687, "step": 72000 }, { "epoch": 0.29, "grad_norm": 4.0184502601623535, "learning_rate": 0.0002, "loss": 1.4284, "step": 72010 }, { "epoch": 0.29, "grad_norm": 2.0012168884277344, "learning_rate": 0.0002, "loss": 1.5334, "step": 72020 }, { "epoch": 0.29, "grad_norm": 2.892256736755371, "learning_rate": 0.0002, "loss": 1.3462, "step": 72030 }, { "epoch": 0.29, "grad_norm": 14.545550346374512, "learning_rate": 0.0002, "loss": 1.5292, "step": 72040 }, { "epoch": 0.29, "grad_norm": 2.558382749557495, "learning_rate": 0.0002, "loss": 1.5043, "step": 72050 }, { "epoch": 0.29, "grad_norm": 3.211981773376465, "learning_rate": 0.0002, "loss": 1.5187, "step": 72060 }, { "epoch": 0.29, "grad_norm": 3.170754909515381, "learning_rate": 0.0002, "loss": 1.6124, "step": 72070 }, { "epoch": 0.29, "grad_norm": 3.3288445472717285, "learning_rate": 0.0002, "loss": 1.5802, "step": 72080 }, { "epoch": 0.29, "grad_norm": 3.113337993621826, "learning_rate": 0.0002, "loss": 1.5252, "step": 72090 }, { "epoch": 0.29, "grad_norm": 3.626422882080078, "learning_rate": 0.0002, "loss": 1.4355, "step": 72100 }, { "epoch": 0.29, "grad_norm": 3.334606409072876, "learning_rate": 0.0002, "loss": 1.653, "step": 72110 }, { "epoch": 0.29, "grad_norm": 3.2895538806915283, "learning_rate": 0.0002, "loss": 1.4928, "step": 72120 }, { "epoch": 0.29, "grad_norm": 2.65061092376709, "learning_rate": 0.0002, "loss": 1.8277, "step": 72130 }, { "epoch": 0.29, "grad_norm": 2.628563165664673, "learning_rate": 0.0002, "loss": 1.6498, "step": 72140 }, { "epoch": 0.29, "grad_norm": 3.569000005722046, "learning_rate": 0.0002, "loss": 1.3705, "step": 72150 }, { "epoch": 0.29, "grad_norm": 3.35032057762146, "learning_rate": 0.0002, "loss": 1.5427, "step": 72160 }, { "epoch": 0.29, "grad_norm": 3.0424838066101074, "learning_rate": 0.0002, "loss": 1.7807, "step": 72170 }, { "epoch": 0.29, "grad_norm": 2.5089187622070312, "learning_rate": 0.0002, "loss": 1.3837, "step": 72180 }, { "epoch": 0.29, "grad_norm": 1.8638324737548828, "learning_rate": 0.0002, "loss": 1.6163, "step": 72190 }, { "epoch": 0.29, "grad_norm": 3.3725593090057373, "learning_rate": 0.0002, "loss": 1.7744, "step": 72200 }, { "epoch": 0.29, "grad_norm": 4.355192184448242, "learning_rate": 0.0002, "loss": 1.7473, "step": 72210 }, { "epoch": 0.29, "grad_norm": 3.319453001022339, "learning_rate": 0.0002, "loss": 1.5651, "step": 72220 }, { "epoch": 0.29, "grad_norm": 4.2005767822265625, "learning_rate": 0.0002, "loss": 1.5111, "step": 72230 }, { "epoch": 0.29, "grad_norm": 4.5024309158325195, "learning_rate": 0.0002, "loss": 1.6389, "step": 72240 }, { "epoch": 0.29, "grad_norm": 3.022761106491089, "learning_rate": 0.0002, "loss": 1.4472, "step": 72250 }, { "epoch": 0.29, "grad_norm": 2.915191411972046, "learning_rate": 0.0002, "loss": 1.7793, "step": 72260 }, { "epoch": 0.29, "grad_norm": 2.9458892345428467, "learning_rate": 0.0002, "loss": 1.4466, "step": 72270 }, { "epoch": 0.29, "grad_norm": 3.192685127258301, "learning_rate": 0.0002, "loss": 1.504, "step": 72280 }, { "epoch": 0.29, "grad_norm": 2.7191321849823, "learning_rate": 0.0002, "loss": 1.626, "step": 72290 }, { "epoch": 0.29, "grad_norm": 2.544221878051758, "learning_rate": 0.0002, "loss": 1.5445, "step": 72300 }, { "epoch": 0.29, "grad_norm": 2.6907637119293213, "learning_rate": 0.0002, "loss": 1.698, "step": 72310 }, { "epoch": 0.29, "grad_norm": 4.800714492797852, "learning_rate": 0.0002, "loss": 1.6967, "step": 72320 }, { "epoch": 0.29, "grad_norm": 2.48358154296875, "learning_rate": 0.0002, "loss": 1.5109, "step": 72330 }, { "epoch": 0.29, "grad_norm": 2.4480154514312744, "learning_rate": 0.0002, "loss": 1.6137, "step": 72340 }, { "epoch": 0.29, "grad_norm": 2.944852828979492, "learning_rate": 0.0002, "loss": 1.634, "step": 72350 }, { "epoch": 0.29, "grad_norm": 1.9203342199325562, "learning_rate": 0.0002, "loss": 1.5415, "step": 72360 }, { "epoch": 0.29, "grad_norm": 2.609208583831787, "learning_rate": 0.0002, "loss": 1.5944, "step": 72370 }, { "epoch": 0.29, "grad_norm": 2.695880889892578, "learning_rate": 0.0002, "loss": 1.4624, "step": 72380 }, { "epoch": 0.29, "grad_norm": 2.485690116882324, "learning_rate": 0.0002, "loss": 1.4475, "step": 72390 }, { "epoch": 0.29, "grad_norm": 3.1561667919158936, "learning_rate": 0.0002, "loss": 1.6925, "step": 72400 }, { "epoch": 0.29, "grad_norm": 2.1260077953338623, "learning_rate": 0.0002, "loss": 1.7047, "step": 72410 }, { "epoch": 0.29, "grad_norm": 3.128263473510742, "learning_rate": 0.0002, "loss": 1.715, "step": 72420 }, { "epoch": 0.29, "grad_norm": 3.4999520778656006, "learning_rate": 0.0002, "loss": 1.4703, "step": 72430 }, { "epoch": 0.29, "grad_norm": 2.823911666870117, "learning_rate": 0.0002, "loss": 1.6372, "step": 72440 }, { "epoch": 0.29, "grad_norm": 2.9357359409332275, "learning_rate": 0.0002, "loss": 1.4038, "step": 72450 }, { "epoch": 0.29, "grad_norm": 5.280838489532471, "learning_rate": 0.0002, "loss": 1.5698, "step": 72460 }, { "epoch": 0.3, "grad_norm": 3.3047916889190674, "learning_rate": 0.0002, "loss": 1.7328, "step": 72470 }, { "epoch": 0.3, "grad_norm": 4.507194995880127, "learning_rate": 0.0002, "loss": 1.6654, "step": 72480 }, { "epoch": 0.3, "grad_norm": 4.842397212982178, "learning_rate": 0.0002, "loss": 1.6514, "step": 72490 }, { "epoch": 0.3, "grad_norm": 2.1987709999084473, "learning_rate": 0.0002, "loss": 1.6566, "step": 72500 }, { "epoch": 0.3, "grad_norm": 5.634579181671143, "learning_rate": 0.0002, "loss": 1.5844, "step": 72510 }, { "epoch": 0.3, "grad_norm": 3.4099924564361572, "learning_rate": 0.0002, "loss": 1.4056, "step": 72520 }, { "epoch": 0.3, "grad_norm": 4.312732696533203, "learning_rate": 0.0002, "loss": 1.4879, "step": 72530 }, { "epoch": 0.3, "grad_norm": 4.953462600708008, "learning_rate": 0.0002, "loss": 1.6077, "step": 72540 }, { "epoch": 0.3, "grad_norm": 5.076373100280762, "learning_rate": 0.0002, "loss": 1.5203, "step": 72550 }, { "epoch": 0.3, "grad_norm": 2.5582144260406494, "learning_rate": 0.0002, "loss": 1.4899, "step": 72560 }, { "epoch": 0.3, "grad_norm": 1.6688952445983887, "learning_rate": 0.0002, "loss": 1.4661, "step": 72570 }, { "epoch": 0.3, "grad_norm": 3.7975239753723145, "learning_rate": 0.0002, "loss": 1.4284, "step": 72580 }, { "epoch": 0.3, "grad_norm": 2.9551331996917725, "learning_rate": 0.0002, "loss": 1.4861, "step": 72590 }, { "epoch": 0.3, "grad_norm": 2.838463544845581, "learning_rate": 0.0002, "loss": 1.3913, "step": 72600 }, { "epoch": 0.3, "grad_norm": 2.815019130706787, "learning_rate": 0.0002, "loss": 1.6865, "step": 72610 }, { "epoch": 0.3, "grad_norm": 2.7002909183502197, "learning_rate": 0.0002, "loss": 1.3398, "step": 72620 }, { "epoch": 0.3, "grad_norm": 2.526909589767456, "learning_rate": 0.0002, "loss": 1.5817, "step": 72630 }, { "epoch": 0.3, "grad_norm": 3.5534508228302, "learning_rate": 0.0002, "loss": 1.6688, "step": 72640 }, { "epoch": 0.3, "grad_norm": 2.3121559619903564, "learning_rate": 0.0002, "loss": 1.7589, "step": 72650 }, { "epoch": 0.3, "grad_norm": 3.3296265602111816, "learning_rate": 0.0002, "loss": 1.6525, "step": 72660 }, { "epoch": 0.3, "grad_norm": 2.2776072025299072, "learning_rate": 0.0002, "loss": 1.6971, "step": 72670 }, { "epoch": 0.3, "grad_norm": 3.709907054901123, "learning_rate": 0.0002, "loss": 1.5939, "step": 72680 }, { "epoch": 0.3, "grad_norm": 1.9202589988708496, "learning_rate": 0.0002, "loss": 1.7186, "step": 72690 }, { "epoch": 0.3, "grad_norm": 4.914927959442139, "learning_rate": 0.0002, "loss": 1.4434, "step": 72700 }, { "epoch": 0.3, "grad_norm": 3.2542574405670166, "learning_rate": 0.0002, "loss": 1.438, "step": 72710 }, { "epoch": 0.3, "grad_norm": 3.53938889503479, "learning_rate": 0.0002, "loss": 1.8229, "step": 72720 }, { "epoch": 0.3, "grad_norm": 5.196858882904053, "learning_rate": 0.0002, "loss": 1.5049, "step": 72730 }, { "epoch": 0.3, "grad_norm": 3.047555685043335, "learning_rate": 0.0002, "loss": 1.857, "step": 72740 }, { "epoch": 0.3, "grad_norm": 2.7818078994750977, "learning_rate": 0.0002, "loss": 1.6618, "step": 72750 }, { "epoch": 0.3, "grad_norm": 2.919348955154419, "learning_rate": 0.0002, "loss": 1.6428, "step": 72760 }, { "epoch": 0.3, "grad_norm": 5.553791046142578, "learning_rate": 0.0002, "loss": 1.7102, "step": 72770 }, { "epoch": 0.3, "grad_norm": 3.7634119987487793, "learning_rate": 0.0002, "loss": 1.5638, "step": 72780 }, { "epoch": 0.3, "grad_norm": 2.574930191040039, "learning_rate": 0.0002, "loss": 1.769, "step": 72790 }, { "epoch": 0.3, "grad_norm": 1.8038958311080933, "learning_rate": 0.0002, "loss": 1.4442, "step": 72800 }, { "epoch": 0.3, "grad_norm": 3.2924132347106934, "learning_rate": 0.0002, "loss": 1.5286, "step": 72810 }, { "epoch": 0.3, "grad_norm": 2.2235147953033447, "learning_rate": 0.0002, "loss": 1.5632, "step": 72820 }, { "epoch": 0.3, "grad_norm": 2.104949474334717, "learning_rate": 0.0002, "loss": 1.6133, "step": 72830 }, { "epoch": 0.3, "grad_norm": 2.999375581741333, "learning_rate": 0.0002, "loss": 1.4404, "step": 72840 }, { "epoch": 0.3, "grad_norm": 4.285690784454346, "learning_rate": 0.0002, "loss": 1.6376, "step": 72850 }, { "epoch": 0.3, "grad_norm": 3.8266310691833496, "learning_rate": 0.0002, "loss": 1.4409, "step": 72860 }, { "epoch": 0.3, "grad_norm": 2.1942965984344482, "learning_rate": 0.0002, "loss": 1.5748, "step": 72870 }, { "epoch": 0.3, "grad_norm": 6.933484077453613, "learning_rate": 0.0002, "loss": 1.5434, "step": 72880 }, { "epoch": 0.3, "grad_norm": 3.2609052658081055, "learning_rate": 0.0002, "loss": 1.7889, "step": 72890 }, { "epoch": 0.3, "grad_norm": 3.6103174686431885, "learning_rate": 0.0002, "loss": 1.6255, "step": 72900 }, { "epoch": 0.3, "grad_norm": 4.290640354156494, "learning_rate": 0.0002, "loss": 1.5718, "step": 72910 }, { "epoch": 0.3, "grad_norm": 1.5621665716171265, "learning_rate": 0.0002, "loss": 1.395, "step": 72920 }, { "epoch": 0.3, "grad_norm": 2.119062662124634, "learning_rate": 0.0002, "loss": 1.4712, "step": 72930 }, { "epoch": 0.3, "grad_norm": 2.921745777130127, "learning_rate": 0.0002, "loss": 1.7829, "step": 72940 }, { "epoch": 0.3, "grad_norm": 4.001779079437256, "learning_rate": 0.0002, "loss": 1.2607, "step": 72950 }, { "epoch": 0.3, "grad_norm": 3.2499032020568848, "learning_rate": 0.0002, "loss": 1.8909, "step": 72960 }, { "epoch": 0.3, "grad_norm": 3.293131113052368, "learning_rate": 0.0002, "loss": 1.6431, "step": 72970 }, { "epoch": 0.3, "grad_norm": 2.9678845405578613, "learning_rate": 0.0002, "loss": 1.5893, "step": 72980 }, { "epoch": 0.3, "grad_norm": 2.200204610824585, "learning_rate": 0.0002, "loss": 1.3559, "step": 72990 }, { "epoch": 0.3, "grad_norm": 3.938209056854248, "learning_rate": 0.0002, "loss": 1.4452, "step": 73000 }, { "epoch": 0.3, "grad_norm": 2.465193510055542, "learning_rate": 0.0002, "loss": 1.3487, "step": 73010 }, { "epoch": 0.3, "grad_norm": 2.4740207195281982, "learning_rate": 0.0002, "loss": 1.5683, "step": 73020 }, { "epoch": 0.3, "grad_norm": 2.760658025741577, "learning_rate": 0.0002, "loss": 1.4587, "step": 73030 }, { "epoch": 0.3, "grad_norm": 3.947916269302368, "learning_rate": 0.0002, "loss": 1.5867, "step": 73040 }, { "epoch": 0.3, "grad_norm": 2.1722395420074463, "learning_rate": 0.0002, "loss": 1.2169, "step": 73050 }, { "epoch": 0.3, "grad_norm": 2.3453643321990967, "learning_rate": 0.0002, "loss": 1.6335, "step": 73060 }, { "epoch": 0.3, "grad_norm": 2.553473711013794, "learning_rate": 0.0002, "loss": 1.5452, "step": 73070 }, { "epoch": 0.3, "grad_norm": 2.490288257598877, "learning_rate": 0.0002, "loss": 1.4846, "step": 73080 }, { "epoch": 0.3, "grad_norm": 3.996070146560669, "learning_rate": 0.0002, "loss": 1.4446, "step": 73090 }, { "epoch": 0.3, "grad_norm": 3.195584297180176, "learning_rate": 0.0002, "loss": 1.5896, "step": 73100 }, { "epoch": 0.3, "grad_norm": 2.5812785625457764, "learning_rate": 0.0002, "loss": 1.6639, "step": 73110 }, { "epoch": 0.3, "grad_norm": 2.5958776473999023, "learning_rate": 0.0002, "loss": 1.5205, "step": 73120 }, { "epoch": 0.3, "grad_norm": 1.7321240901947021, "learning_rate": 0.0002, "loss": 1.3385, "step": 73130 }, { "epoch": 0.3, "grad_norm": 2.8745875358581543, "learning_rate": 0.0002, "loss": 1.8175, "step": 73140 }, { "epoch": 0.3, "grad_norm": 5.61305046081543, "learning_rate": 0.0002, "loss": 1.7733, "step": 73150 }, { "epoch": 0.3, "grad_norm": 4.390225887298584, "learning_rate": 0.0002, "loss": 1.4771, "step": 73160 }, { "epoch": 0.3, "grad_norm": 4.419487953186035, "learning_rate": 0.0002, "loss": 1.3813, "step": 73170 }, { "epoch": 0.3, "grad_norm": 1.8012325763702393, "learning_rate": 0.0002, "loss": 1.3842, "step": 73180 }, { "epoch": 0.3, "grad_norm": 2.134326219558716, "learning_rate": 0.0002, "loss": 1.3754, "step": 73190 }, { "epoch": 0.3, "grad_norm": 2.6402065753936768, "learning_rate": 0.0002, "loss": 1.5355, "step": 73200 }, { "epoch": 0.3, "grad_norm": 2.2461373805999756, "learning_rate": 0.0002, "loss": 1.484, "step": 73210 }, { "epoch": 0.3, "grad_norm": 4.214131832122803, "learning_rate": 0.0002, "loss": 1.8193, "step": 73220 }, { "epoch": 0.3, "grad_norm": 2.775294542312622, "learning_rate": 0.0002, "loss": 1.5789, "step": 73230 }, { "epoch": 0.3, "grad_norm": 3.1480355262756348, "learning_rate": 0.0002, "loss": 1.4195, "step": 73240 }, { "epoch": 0.3, "grad_norm": 3.276992082595825, "learning_rate": 0.0002, "loss": 1.8178, "step": 73250 }, { "epoch": 0.3, "grad_norm": 3.3577260971069336, "learning_rate": 0.0002, "loss": 1.4385, "step": 73260 }, { "epoch": 0.3, "grad_norm": 3.4261929988861084, "learning_rate": 0.0002, "loss": 1.909, "step": 73270 }, { "epoch": 0.3, "grad_norm": 2.400987148284912, "learning_rate": 0.0002, "loss": 1.6272, "step": 73280 }, { "epoch": 0.3, "grad_norm": 2.641448974609375, "learning_rate": 0.0002, "loss": 1.5096, "step": 73290 }, { "epoch": 0.3, "grad_norm": 2.450038194656372, "learning_rate": 0.0002, "loss": 1.5818, "step": 73300 }, { "epoch": 0.3, "grad_norm": 2.7099876403808594, "learning_rate": 0.0002, "loss": 1.5811, "step": 73310 }, { "epoch": 0.3, "grad_norm": 2.8291501998901367, "learning_rate": 0.0002, "loss": 1.7684, "step": 73320 }, { "epoch": 0.3, "grad_norm": 3.982102632522583, "learning_rate": 0.0002, "loss": 1.5083, "step": 73330 }, { "epoch": 0.3, "grad_norm": 3.996250867843628, "learning_rate": 0.0002, "loss": 1.7432, "step": 73340 }, { "epoch": 0.3, "grad_norm": 4.455594062805176, "learning_rate": 0.0002, "loss": 1.4795, "step": 73350 }, { "epoch": 0.3, "grad_norm": 4.386459827423096, "learning_rate": 0.0002, "loss": 1.716, "step": 73360 }, { "epoch": 0.3, "grad_norm": 2.8647499084472656, "learning_rate": 0.0002, "loss": 1.8691, "step": 73370 }, { "epoch": 0.3, "grad_norm": 2.9401307106018066, "learning_rate": 0.0002, "loss": 1.2023, "step": 73380 }, { "epoch": 0.3, "grad_norm": 2.5997087955474854, "learning_rate": 0.0002, "loss": 1.5692, "step": 73390 }, { "epoch": 0.3, "grad_norm": 2.8206543922424316, "learning_rate": 0.0002, "loss": 1.6619, "step": 73400 }, { "epoch": 0.3, "grad_norm": 2.6936991214752197, "learning_rate": 0.0002, "loss": 1.6059, "step": 73410 }, { "epoch": 0.3, "grad_norm": 2.455772638320923, "learning_rate": 0.0002, "loss": 1.6061, "step": 73420 }, { "epoch": 0.3, "grad_norm": 2.864607810974121, "learning_rate": 0.0002, "loss": 1.385, "step": 73430 }, { "epoch": 0.3, "grad_norm": 4.263782978057861, "learning_rate": 0.0002, "loss": 1.6646, "step": 73440 }, { "epoch": 0.3, "grad_norm": 1.8894600868225098, "learning_rate": 0.0002, "loss": 1.6265, "step": 73450 }, { "epoch": 0.3, "grad_norm": 3.483173370361328, "learning_rate": 0.0002, "loss": 1.424, "step": 73460 }, { "epoch": 0.3, "grad_norm": 2.3409085273742676, "learning_rate": 0.0002, "loss": 1.7015, "step": 73470 }, { "epoch": 0.3, "grad_norm": 2.113959550857544, "learning_rate": 0.0002, "loss": 1.7806, "step": 73480 }, { "epoch": 0.3, "grad_norm": 3.1145007610321045, "learning_rate": 0.0002, "loss": 1.6529, "step": 73490 }, { "epoch": 0.3, "grad_norm": 3.827226400375366, "learning_rate": 0.0002, "loss": 1.8847, "step": 73500 }, { "epoch": 0.3, "grad_norm": 2.7269091606140137, "learning_rate": 0.0002, "loss": 1.7264, "step": 73510 }, { "epoch": 0.3, "grad_norm": 2.1724648475646973, "learning_rate": 0.0002, "loss": 1.7416, "step": 73520 }, { "epoch": 0.3, "grad_norm": 3.456515073776245, "learning_rate": 0.0002, "loss": 1.5562, "step": 73530 }, { "epoch": 0.3, "grad_norm": 3.179224729537964, "learning_rate": 0.0002, "loss": 1.549, "step": 73540 }, { "epoch": 0.3, "grad_norm": 2.1097517013549805, "learning_rate": 0.0002, "loss": 1.6599, "step": 73550 }, { "epoch": 0.3, "grad_norm": 2.8307688236236572, "learning_rate": 0.0002, "loss": 1.4354, "step": 73560 }, { "epoch": 0.3, "grad_norm": 2.093736410140991, "learning_rate": 0.0002, "loss": 1.6262, "step": 73570 }, { "epoch": 0.3, "grad_norm": 2.00044846534729, "learning_rate": 0.0002, "loss": 1.3116, "step": 73580 }, { "epoch": 0.3, "grad_norm": 4.135803699493408, "learning_rate": 0.0002, "loss": 1.5648, "step": 73590 }, { "epoch": 0.3, "grad_norm": 4.356758117675781, "learning_rate": 0.0002, "loss": 1.5591, "step": 73600 }, { "epoch": 0.3, "grad_norm": 2.78149676322937, "learning_rate": 0.0002, "loss": 1.5294, "step": 73610 }, { "epoch": 0.3, "grad_norm": 2.022176742553711, "learning_rate": 0.0002, "loss": 1.4825, "step": 73620 }, { "epoch": 0.3, "grad_norm": 2.566364288330078, "learning_rate": 0.0002, "loss": 1.6034, "step": 73630 }, { "epoch": 0.3, "grad_norm": 2.972036600112915, "learning_rate": 0.0002, "loss": 1.5718, "step": 73640 }, { "epoch": 0.3, "grad_norm": 4.098713397979736, "learning_rate": 0.0002, "loss": 1.7967, "step": 73650 }, { "epoch": 0.3, "grad_norm": 3.2687487602233887, "learning_rate": 0.0002, "loss": 1.5282, "step": 73660 }, { "epoch": 0.3, "grad_norm": 2.712085247039795, "learning_rate": 0.0002, "loss": 1.5129, "step": 73670 }, { "epoch": 0.3, "grad_norm": 3.5277490615844727, "learning_rate": 0.0002, "loss": 1.4929, "step": 73680 }, { "epoch": 0.3, "grad_norm": 4.25246000289917, "learning_rate": 0.0002, "loss": 1.4703, "step": 73690 }, { "epoch": 0.3, "grad_norm": 2.5573458671569824, "learning_rate": 0.0002, "loss": 1.8562, "step": 73700 }, { "epoch": 0.3, "grad_norm": 3.325678586959839, "learning_rate": 0.0002, "loss": 1.5927, "step": 73710 }, { "epoch": 0.3, "grad_norm": 2.684896469116211, "learning_rate": 0.0002, "loss": 1.6813, "step": 73720 }, { "epoch": 0.3, "grad_norm": 3.268117666244507, "learning_rate": 0.0002, "loss": 1.6005, "step": 73730 }, { "epoch": 0.3, "grad_norm": 2.8902292251586914, "learning_rate": 0.0002, "loss": 1.5788, "step": 73740 }, { "epoch": 0.3, "grad_norm": 3.660875082015991, "learning_rate": 0.0002, "loss": 1.7529, "step": 73750 }, { "epoch": 0.3, "grad_norm": 2.2513163089752197, "learning_rate": 0.0002, "loss": 1.7162, "step": 73760 }, { "epoch": 0.3, "grad_norm": 2.686687707901001, "learning_rate": 0.0002, "loss": 1.6552, "step": 73770 }, { "epoch": 0.3, "grad_norm": 2.379610776901245, "learning_rate": 0.0002, "loss": 1.6236, "step": 73780 }, { "epoch": 0.3, "grad_norm": 2.9795775413513184, "learning_rate": 0.0002, "loss": 1.6723, "step": 73790 }, { "epoch": 0.3, "grad_norm": 2.681918144226074, "learning_rate": 0.0002, "loss": 1.666, "step": 73800 }, { "epoch": 0.3, "grad_norm": 3.7688307762145996, "learning_rate": 0.0002, "loss": 1.6497, "step": 73810 }, { "epoch": 0.3, "grad_norm": 3.6889045238494873, "learning_rate": 0.0002, "loss": 1.4933, "step": 73820 }, { "epoch": 0.3, "grad_norm": 1.716384768486023, "learning_rate": 0.0002, "loss": 1.4846, "step": 73830 }, { "epoch": 0.3, "grad_norm": 2.4314916133880615, "learning_rate": 0.0002, "loss": 1.4125, "step": 73840 }, { "epoch": 0.3, "grad_norm": 2.1819052696228027, "learning_rate": 0.0002, "loss": 1.4056, "step": 73850 }, { "epoch": 0.3, "grad_norm": 3.5909268856048584, "learning_rate": 0.0002, "loss": 1.6979, "step": 73860 }, { "epoch": 0.3, "grad_norm": 1.9224363565444946, "learning_rate": 0.0002, "loss": 1.6927, "step": 73870 }, { "epoch": 0.3, "grad_norm": 2.0666189193725586, "learning_rate": 0.0002, "loss": 1.3339, "step": 73880 }, { "epoch": 0.3, "grad_norm": 3.619079351425171, "learning_rate": 0.0002, "loss": 1.7634, "step": 73890 }, { "epoch": 0.3, "grad_norm": 2.280637741088867, "learning_rate": 0.0002, "loss": 1.4413, "step": 73900 }, { "epoch": 0.3, "grad_norm": 3.31440806388855, "learning_rate": 0.0002, "loss": 1.5968, "step": 73910 }, { "epoch": 0.3, "grad_norm": 2.699471950531006, "learning_rate": 0.0002, "loss": 1.5894, "step": 73920 }, { "epoch": 0.3, "grad_norm": 2.3775367736816406, "learning_rate": 0.0002, "loss": 1.5432, "step": 73930 }, { "epoch": 0.3, "grad_norm": 1.524243712425232, "learning_rate": 0.0002, "loss": 1.7019, "step": 73940 }, { "epoch": 0.3, "grad_norm": 2.535783290863037, "learning_rate": 0.0002, "loss": 1.4629, "step": 73950 }, { "epoch": 0.3, "grad_norm": Infinity, "learning_rate": 0.0002, "loss": 1.6454, "step": 73960 }, { "epoch": 0.3, "grad_norm": 3.149456739425659, "learning_rate": 0.0002, "loss": 1.5268, "step": 73970 }, { "epoch": 0.3, "grad_norm": 2.4481558799743652, "learning_rate": 0.0002, "loss": 1.6544, "step": 73980 }, { "epoch": 0.3, "grad_norm": 3.7335472106933594, "learning_rate": 0.0002, "loss": 1.6942, "step": 73990 }, { "epoch": 0.3, "grad_norm": 2.561753749847412, "learning_rate": 0.0002, "loss": 1.6397, "step": 74000 }, { "epoch": 0.3, "grad_norm": 3.3761777877807617, "learning_rate": 0.0002, "loss": 1.6376, "step": 74010 }, { "epoch": 0.3, "grad_norm": 3.472356081008911, "learning_rate": 0.0002, "loss": 1.6543, "step": 74020 }, { "epoch": 0.3, "grad_norm": 3.673867702484131, "learning_rate": 0.0002, "loss": 1.4156, "step": 74030 }, { "epoch": 0.3, "grad_norm": 2.3989102840423584, "learning_rate": 0.0002, "loss": 1.7306, "step": 74040 }, { "epoch": 0.3, "grad_norm": 4.0328450202941895, "learning_rate": 0.0002, "loss": 1.6561, "step": 74050 }, { "epoch": 0.3, "grad_norm": 3.363147020339966, "learning_rate": 0.0002, "loss": 1.4634, "step": 74060 }, { "epoch": 0.3, "grad_norm": 2.4352707862854004, "learning_rate": 0.0002, "loss": 1.5068, "step": 74070 }, { "epoch": 0.3, "grad_norm": 2.068053722381592, "learning_rate": 0.0002, "loss": 1.4508, "step": 74080 }, { "epoch": 0.3, "grad_norm": 2.6750636100769043, "learning_rate": 0.0002, "loss": 1.6096, "step": 74090 }, { "epoch": 0.3, "grad_norm": 2.18681263923645, "learning_rate": 0.0002, "loss": 1.5331, "step": 74100 }, { "epoch": 0.3, "grad_norm": 5.063231468200684, "learning_rate": 0.0002, "loss": 1.6329, "step": 74110 }, { "epoch": 0.3, "grad_norm": 2.959117889404297, "learning_rate": 0.0002, "loss": 1.3795, "step": 74120 }, { "epoch": 0.3, "grad_norm": 3.972902536392212, "learning_rate": 0.0002, "loss": 1.7649, "step": 74130 }, { "epoch": 0.3, "grad_norm": 2.3003768920898438, "learning_rate": 0.0002, "loss": 1.4644, "step": 74140 }, { "epoch": 0.3, "grad_norm": 3.491872787475586, "learning_rate": 0.0002, "loss": 1.4879, "step": 74150 }, { "epoch": 0.3, "grad_norm": 2.027487277984619, "learning_rate": 0.0002, "loss": 1.6462, "step": 74160 }, { "epoch": 0.3, "grad_norm": 3.9878087043762207, "learning_rate": 0.0002, "loss": 1.8108, "step": 74170 }, { "epoch": 0.3, "grad_norm": 2.7482051849365234, "learning_rate": 0.0002, "loss": 1.3015, "step": 74180 }, { "epoch": 0.3, "grad_norm": 2.7288432121276855, "learning_rate": 0.0002, "loss": 1.3041, "step": 74190 }, { "epoch": 0.3, "grad_norm": 2.119380474090576, "learning_rate": 0.0002, "loss": 1.5396, "step": 74200 }, { "epoch": 0.3, "grad_norm": 3.4094293117523193, "learning_rate": 0.0002, "loss": 1.8343, "step": 74210 }, { "epoch": 0.3, "grad_norm": 2.1322715282440186, "learning_rate": 0.0002, "loss": 1.7744, "step": 74220 }, { "epoch": 0.3, "grad_norm": 4.1491475105285645, "learning_rate": 0.0002, "loss": 1.5503, "step": 74230 }, { "epoch": 0.3, "grad_norm": 3.14717960357666, "learning_rate": 0.0002, "loss": 1.4539, "step": 74240 }, { "epoch": 0.3, "grad_norm": 2.1791980266571045, "learning_rate": 0.0002, "loss": 1.6937, "step": 74250 }, { "epoch": 0.3, "grad_norm": 3.517878293991089, "learning_rate": 0.0002, "loss": 1.7803, "step": 74260 }, { "epoch": 0.3, "grad_norm": 1.7901889085769653, "learning_rate": 0.0002, "loss": 1.5071, "step": 74270 }, { "epoch": 0.3, "grad_norm": 3.7661261558532715, "learning_rate": 0.0002, "loss": 1.6038, "step": 74280 }, { "epoch": 0.3, "grad_norm": 2.960883140563965, "learning_rate": 0.0002, "loss": 1.5279, "step": 74290 }, { "epoch": 0.3, "grad_norm": 2.3341474533081055, "learning_rate": 0.0002, "loss": 1.4384, "step": 74300 }, { "epoch": 0.3, "grad_norm": 2.5463411808013916, "learning_rate": 0.0002, "loss": 1.4081, "step": 74310 }, { "epoch": 0.3, "grad_norm": 2.841456174850464, "learning_rate": 0.0002, "loss": 1.4524, "step": 74320 }, { "epoch": 0.3, "grad_norm": 2.5464375019073486, "learning_rate": 0.0002, "loss": 1.526, "step": 74330 }, { "epoch": 0.3, "grad_norm": 2.2037580013275146, "learning_rate": 0.0002, "loss": 1.4477, "step": 74340 }, { "epoch": 0.3, "grad_norm": 1.6969037055969238, "learning_rate": 0.0002, "loss": 1.569, "step": 74350 }, { "epoch": 0.3, "grad_norm": 4.868793964385986, "learning_rate": 0.0002, "loss": 1.24, "step": 74360 }, { "epoch": 0.3, "grad_norm": 4.410988807678223, "learning_rate": 0.0002, "loss": 1.8163, "step": 74370 }, { "epoch": 0.3, "grad_norm": 3.734109878540039, "learning_rate": 0.0002, "loss": 1.4386, "step": 74380 }, { "epoch": 0.3, "grad_norm": 2.44230055809021, "learning_rate": 0.0002, "loss": 1.6361, "step": 74390 }, { "epoch": 0.3, "grad_norm": 2.2365105152130127, "learning_rate": 0.0002, "loss": 1.3784, "step": 74400 }, { "epoch": 0.3, "grad_norm": 2.829589366912842, "learning_rate": 0.0002, "loss": 1.5787, "step": 74410 }, { "epoch": 0.3, "grad_norm": 3.326666831970215, "learning_rate": 0.0002, "loss": 1.524, "step": 74420 }, { "epoch": 0.3, "grad_norm": 2.7260875701904297, "learning_rate": 0.0002, "loss": 1.4199, "step": 74430 }, { "epoch": 0.3, "grad_norm": 2.280330181121826, "learning_rate": 0.0002, "loss": 1.4889, "step": 74440 }, { "epoch": 0.3, "grad_norm": 3.2372241020202637, "learning_rate": 0.0002, "loss": 1.8013, "step": 74450 }, { "epoch": 0.3, "grad_norm": 4.576924800872803, "learning_rate": 0.0002, "loss": 1.4931, "step": 74460 }, { "epoch": 0.3, "grad_norm": 3.7255661487579346, "learning_rate": 0.0002, "loss": 1.5506, "step": 74470 }, { "epoch": 0.3, "grad_norm": 2.625276565551758, "learning_rate": 0.0002, "loss": 1.558, "step": 74480 }, { "epoch": 0.3, "grad_norm": 3.252333402633667, "learning_rate": 0.0002, "loss": 1.5022, "step": 74490 }, { "epoch": 0.3, "grad_norm": 3.4402048587799072, "learning_rate": 0.0002, "loss": 1.7111, "step": 74500 }, { "epoch": 0.3, "grad_norm": 1.863511562347412, "learning_rate": 0.0002, "loss": 1.4871, "step": 74510 }, { "epoch": 0.3, "grad_norm": 3.0921053886413574, "learning_rate": 0.0002, "loss": 1.4587, "step": 74520 }, { "epoch": 0.3, "grad_norm": 3.122739553451538, "learning_rate": 0.0002, "loss": 1.8387, "step": 74530 }, { "epoch": 0.3, "grad_norm": 3.1743648052215576, "learning_rate": 0.0002, "loss": 1.5385, "step": 74540 }, { "epoch": 0.3, "grad_norm": 3.7098898887634277, "learning_rate": 0.0002, "loss": 1.7386, "step": 74550 }, { "epoch": 0.3, "grad_norm": 4.3902268409729, "learning_rate": 0.0002, "loss": 1.3961, "step": 74560 }, { "epoch": 0.3, "grad_norm": 4.209216594696045, "learning_rate": 0.0002, "loss": 1.3935, "step": 74570 }, { "epoch": 0.3, "grad_norm": 1.558815360069275, "learning_rate": 0.0002, "loss": 1.5585, "step": 74580 }, { "epoch": 0.3, "grad_norm": 2.273836612701416, "learning_rate": 0.0002, "loss": 1.3468, "step": 74590 }, { "epoch": 0.3, "grad_norm": 1.9963078498840332, "learning_rate": 0.0002, "loss": 1.6104, "step": 74600 }, { "epoch": 0.3, "grad_norm": 5.109038352966309, "learning_rate": 0.0002, "loss": 1.565, "step": 74610 }, { "epoch": 0.3, "grad_norm": 4.896045207977295, "learning_rate": 0.0002, "loss": 1.4746, "step": 74620 }, { "epoch": 0.3, "grad_norm": 2.8299834728240967, "learning_rate": 0.0002, "loss": 1.4615, "step": 74630 }, { "epoch": 0.3, "grad_norm": 3.8184168338775635, "learning_rate": 0.0002, "loss": 1.6017, "step": 74640 }, { "epoch": 0.3, "grad_norm": 11.854121208190918, "learning_rate": 0.0002, "loss": 1.5155, "step": 74650 }, { "epoch": 0.3, "grad_norm": 2.98516845703125, "learning_rate": 0.0002, "loss": 1.4976, "step": 74660 }, { "epoch": 0.3, "grad_norm": 1.596513032913208, "learning_rate": 0.0002, "loss": 1.7578, "step": 74670 }, { "epoch": 0.3, "grad_norm": 2.538806915283203, "learning_rate": 0.0002, "loss": 1.494, "step": 74680 }, { "epoch": 0.3, "grad_norm": 4.157443523406982, "learning_rate": 0.0002, "loss": 1.5119, "step": 74690 }, { "epoch": 0.3, "grad_norm": 2.4830551147460938, "learning_rate": 0.0002, "loss": 1.4093, "step": 74700 }, { "epoch": 0.3, "grad_norm": 2.1385183334350586, "learning_rate": 0.0002, "loss": 1.6565, "step": 74710 }, { "epoch": 0.3, "grad_norm": 2.467471122741699, "learning_rate": 0.0002, "loss": 1.4344, "step": 74720 }, { "epoch": 0.3, "grad_norm": 4.814074993133545, "learning_rate": 0.0002, "loss": 1.3897, "step": 74730 }, { "epoch": 0.3, "grad_norm": 2.906886100769043, "learning_rate": 0.0002, "loss": 1.4697, "step": 74740 }, { "epoch": 0.3, "grad_norm": 2.1343281269073486, "learning_rate": 0.0002, "loss": 1.551, "step": 74750 }, { "epoch": 0.3, "grad_norm": 1.2551870346069336, "learning_rate": 0.0002, "loss": 1.4664, "step": 74760 }, { "epoch": 0.3, "grad_norm": 2.287269115447998, "learning_rate": 0.0002, "loss": 1.4218, "step": 74770 }, { "epoch": 0.3, "grad_norm": 4.292294025421143, "learning_rate": 0.0002, "loss": 1.6482, "step": 74780 }, { "epoch": 0.3, "grad_norm": 1.3661967515945435, "learning_rate": 0.0002, "loss": 1.4136, "step": 74790 }, { "epoch": 0.3, "grad_norm": 3.063103199005127, "learning_rate": 0.0002, "loss": 1.5601, "step": 74800 }, { "epoch": 0.3, "grad_norm": 2.448777675628662, "learning_rate": 0.0002, "loss": 1.6181, "step": 74810 }, { "epoch": 0.3, "grad_norm": 2.0697221755981445, "learning_rate": 0.0002, "loss": 1.432, "step": 74820 }, { "epoch": 0.3, "grad_norm": 3.024667263031006, "learning_rate": 0.0002, "loss": 1.5491, "step": 74830 }, { "epoch": 0.3, "grad_norm": 1.979468822479248, "learning_rate": 0.0002, "loss": 1.4112, "step": 74840 }, { "epoch": 0.3, "grad_norm": 5.654714107513428, "learning_rate": 0.0002, "loss": 1.8716, "step": 74850 }, { "epoch": 0.3, "grad_norm": 2.5868730545043945, "learning_rate": 0.0002, "loss": 1.6149, "step": 74860 }, { "epoch": 0.3, "grad_norm": 1.7857869863510132, "learning_rate": 0.0002, "loss": 1.4809, "step": 74870 }, { "epoch": 0.3, "grad_norm": 5.852680683135986, "learning_rate": 0.0002, "loss": 1.4607, "step": 74880 }, { "epoch": 0.3, "grad_norm": 2.5633959770202637, "learning_rate": 0.0002, "loss": 1.8149, "step": 74890 }, { "epoch": 0.3, "grad_norm": 2.1338624954223633, "learning_rate": 0.0002, "loss": 1.5937, "step": 74900 }, { "epoch": 0.3, "grad_norm": 3.0511584281921387, "learning_rate": 0.0002, "loss": 1.4026, "step": 74910 }, { "epoch": 0.3, "grad_norm": 1.9522242546081543, "learning_rate": 0.0002, "loss": 1.591, "step": 74920 }, { "epoch": 0.31, "grad_norm": 3.0326576232910156, "learning_rate": 0.0002, "loss": 1.582, "step": 74930 }, { "epoch": 0.31, "grad_norm": 4.379084587097168, "learning_rate": 0.0002, "loss": 1.7242, "step": 74940 }, { "epoch": 0.31, "grad_norm": 1.7204285860061646, "learning_rate": 0.0002, "loss": 1.6969, "step": 74950 }, { "epoch": 0.31, "grad_norm": 3.5626676082611084, "learning_rate": 0.0002, "loss": 1.3832, "step": 74960 }, { "epoch": 0.31, "grad_norm": 5.116307735443115, "learning_rate": 0.0002, "loss": 1.5706, "step": 74970 }, { "epoch": 0.31, "grad_norm": 1.7771165370941162, "learning_rate": 0.0002, "loss": 1.3566, "step": 74980 }, { "epoch": 0.31, "grad_norm": 3.1715638637542725, "learning_rate": 0.0002, "loss": 1.667, "step": 74990 }, { "epoch": 0.31, "grad_norm": 3.175452947616577, "learning_rate": 0.0002, "loss": 1.4486, "step": 75000 }, { "epoch": 0.31, "grad_norm": 2.9424691200256348, "learning_rate": 0.0002, "loss": 1.6967, "step": 75010 }, { "epoch": 0.31, "grad_norm": 2.2699317932128906, "learning_rate": 0.0002, "loss": 1.5805, "step": 75020 }, { "epoch": 0.31, "grad_norm": 2.0940539836883545, "learning_rate": 0.0002, "loss": 1.5912, "step": 75030 }, { "epoch": 0.31, "grad_norm": 4.911186695098877, "learning_rate": 0.0002, "loss": 1.7454, "step": 75040 }, { "epoch": 0.31, "grad_norm": 2.0999224185943604, "learning_rate": 0.0002, "loss": 1.473, "step": 75050 }, { "epoch": 0.31, "grad_norm": 4.443829536437988, "learning_rate": 0.0002, "loss": 1.608, "step": 75060 }, { "epoch": 0.31, "grad_norm": 2.6343932151794434, "learning_rate": 0.0002, "loss": 1.6305, "step": 75070 }, { "epoch": 0.31, "grad_norm": 5.913074016571045, "learning_rate": 0.0002, "loss": 1.7188, "step": 75080 }, { "epoch": 0.31, "grad_norm": 2.036215305328369, "learning_rate": 0.0002, "loss": 1.2319, "step": 75090 }, { "epoch": 0.31, "grad_norm": 1.9735373258590698, "learning_rate": 0.0002, "loss": 1.5571, "step": 75100 }, { "epoch": 0.31, "grad_norm": 2.7626349925994873, "learning_rate": 0.0002, "loss": 1.6646, "step": 75110 }, { "epoch": 0.31, "grad_norm": 2.3051342964172363, "learning_rate": 0.0002, "loss": 1.5059, "step": 75120 }, { "epoch": 0.31, "grad_norm": 3.896195411682129, "learning_rate": 0.0002, "loss": 1.7204, "step": 75130 }, { "epoch": 0.31, "grad_norm": 3.0200910568237305, "learning_rate": 0.0002, "loss": 1.7313, "step": 75140 }, { "epoch": 0.31, "grad_norm": 2.858872890472412, "learning_rate": 0.0002, "loss": 1.451, "step": 75150 }, { "epoch": 0.31, "grad_norm": 2.5440402030944824, "learning_rate": 0.0002, "loss": 1.8451, "step": 75160 }, { "epoch": 0.31, "grad_norm": 2.9148054122924805, "learning_rate": 0.0002, "loss": 1.4791, "step": 75170 }, { "epoch": 0.31, "grad_norm": 2.4930319786071777, "learning_rate": 0.0002, "loss": 1.7102, "step": 75180 }, { "epoch": 0.31, "grad_norm": 1.686896562576294, "learning_rate": 0.0002, "loss": 1.6128, "step": 75190 }, { "epoch": 0.31, "grad_norm": 3.0907108783721924, "learning_rate": 0.0002, "loss": 1.4942, "step": 75200 }, { "epoch": 0.31, "grad_norm": 2.150974750518799, "learning_rate": 0.0002, "loss": 1.5738, "step": 75210 }, { "epoch": 0.31, "grad_norm": 3.9453210830688477, "learning_rate": 0.0002, "loss": 1.7465, "step": 75220 }, { "epoch": 0.31, "grad_norm": 3.5494019985198975, "learning_rate": 0.0002, "loss": 1.4647, "step": 75230 }, { "epoch": 0.31, "grad_norm": 2.3306655883789062, "learning_rate": 0.0002, "loss": 1.6527, "step": 75240 }, { "epoch": 0.31, "grad_norm": 4.005533695220947, "learning_rate": 0.0002, "loss": 1.6864, "step": 75250 }, { "epoch": 0.31, "grad_norm": 3.1060214042663574, "learning_rate": 0.0002, "loss": 1.2914, "step": 75260 }, { "epoch": 0.31, "grad_norm": 2.201817035675049, "learning_rate": 0.0002, "loss": 1.4869, "step": 75270 }, { "epoch": 0.31, "grad_norm": 3.4219117164611816, "learning_rate": 0.0002, "loss": 1.4943, "step": 75280 }, { "epoch": 0.31, "grad_norm": 2.534076452255249, "learning_rate": 0.0002, "loss": 1.6726, "step": 75290 }, { "epoch": 0.31, "grad_norm": 2.7729361057281494, "learning_rate": 0.0002, "loss": 1.5972, "step": 75300 }, { "epoch": 0.31, "grad_norm": 2.6023542881011963, "learning_rate": 0.0002, "loss": 1.5186, "step": 75310 }, { "epoch": 0.31, "grad_norm": 3.297780752182007, "learning_rate": 0.0002, "loss": 1.5467, "step": 75320 }, { "epoch": 0.31, "grad_norm": 1.7219666242599487, "learning_rate": 0.0002, "loss": 1.4794, "step": 75330 }, { "epoch": 0.31, "grad_norm": 2.3883931636810303, "learning_rate": 0.0002, "loss": 1.4723, "step": 75340 }, { "epoch": 0.31, "grad_norm": 4.282846927642822, "learning_rate": 0.0002, "loss": 1.6437, "step": 75350 }, { "epoch": 0.31, "grad_norm": 2.508991241455078, "learning_rate": 0.0002, "loss": 1.625, "step": 75360 }, { "epoch": 0.31, "grad_norm": 2.6195249557495117, "learning_rate": 0.0002, "loss": 1.4075, "step": 75370 }, { "epoch": 0.31, "grad_norm": 4.3008904457092285, "learning_rate": 0.0002, "loss": 1.6396, "step": 75380 }, { "epoch": 0.31, "grad_norm": 3.0478270053863525, "learning_rate": 0.0002, "loss": 1.6382, "step": 75390 }, { "epoch": 0.31, "grad_norm": 2.483978509902954, "learning_rate": 0.0002, "loss": 1.1757, "step": 75400 }, { "epoch": 0.31, "grad_norm": 3.124798536300659, "learning_rate": 0.0002, "loss": 1.3577, "step": 75410 }, { "epoch": 0.31, "grad_norm": 2.150444269180298, "learning_rate": 0.0002, "loss": 1.6001, "step": 75420 }, { "epoch": 0.31, "grad_norm": 3.0432257652282715, "learning_rate": 0.0002, "loss": 1.7398, "step": 75430 }, { "epoch": 0.31, "grad_norm": 3.25785493850708, "learning_rate": 0.0002, "loss": 1.3585, "step": 75440 }, { "epoch": 0.31, "grad_norm": 3.0392305850982666, "learning_rate": 0.0002, "loss": 1.4354, "step": 75450 }, { "epoch": 0.31, "grad_norm": 3.947983741760254, "learning_rate": 0.0002, "loss": 1.4343, "step": 75460 }, { "epoch": 0.31, "grad_norm": 3.304276466369629, "learning_rate": 0.0002, "loss": 1.5809, "step": 75470 }, { "epoch": 0.31, "grad_norm": 2.4974446296691895, "learning_rate": 0.0002, "loss": 1.6843, "step": 75480 }, { "epoch": 0.31, "grad_norm": 3.0064151287078857, "learning_rate": 0.0002, "loss": 1.5943, "step": 75490 }, { "epoch": 0.31, "grad_norm": 10.431425094604492, "learning_rate": 0.0002, "loss": 1.4461, "step": 75500 }, { "epoch": 0.31, "grad_norm": 2.7437832355499268, "learning_rate": 0.0002, "loss": 1.6655, "step": 75510 }, { "epoch": 0.31, "grad_norm": 3.697169780731201, "learning_rate": 0.0002, "loss": 1.7571, "step": 75520 }, { "epoch": 0.31, "grad_norm": 3.5045158863067627, "learning_rate": 0.0002, "loss": 1.6059, "step": 75530 }, { "epoch": 0.31, "grad_norm": 2.9901363849639893, "learning_rate": 0.0002, "loss": 1.6739, "step": 75540 }, { "epoch": 0.31, "grad_norm": 4.449699401855469, "learning_rate": 0.0002, "loss": 1.5505, "step": 75550 }, { "epoch": 0.31, "grad_norm": 4.254663467407227, "learning_rate": 0.0002, "loss": 1.6331, "step": 75560 }, { "epoch": 0.31, "grad_norm": 4.506298065185547, "learning_rate": 0.0002, "loss": 1.5091, "step": 75570 }, { "epoch": 0.31, "grad_norm": 2.138885498046875, "learning_rate": 0.0002, "loss": 1.4444, "step": 75580 }, { "epoch": 0.31, "grad_norm": 4.742640018463135, "learning_rate": 0.0002, "loss": 1.6478, "step": 75590 }, { "epoch": 0.31, "grad_norm": 1.997516393661499, "learning_rate": 0.0002, "loss": 1.7703, "step": 75600 }, { "epoch": 0.31, "grad_norm": 2.7190451622009277, "learning_rate": 0.0002, "loss": 1.5154, "step": 75610 }, { "epoch": 0.31, "grad_norm": 1.9220073223114014, "learning_rate": 0.0002, "loss": 1.3874, "step": 75620 }, { "epoch": 0.31, "grad_norm": 2.1782360076904297, "learning_rate": 0.0002, "loss": 1.5929, "step": 75630 }, { "epoch": 0.31, "grad_norm": 5.649733543395996, "learning_rate": 0.0002, "loss": 1.8012, "step": 75640 }, { "epoch": 0.31, "grad_norm": 2.423125743865967, "learning_rate": 0.0002, "loss": 1.6305, "step": 75650 }, { "epoch": 0.31, "grad_norm": 2.7670514583587646, "learning_rate": 0.0002, "loss": 1.5646, "step": 75660 }, { "epoch": 0.31, "grad_norm": 2.345656156539917, "learning_rate": 0.0002, "loss": 1.6185, "step": 75670 }, { "epoch": 0.31, "grad_norm": 2.800175428390503, "learning_rate": 0.0002, "loss": 1.6186, "step": 75680 }, { "epoch": 0.31, "grad_norm": 2.1027212142944336, "learning_rate": 0.0002, "loss": 1.6042, "step": 75690 }, { "epoch": 0.31, "grad_norm": 2.3465256690979004, "learning_rate": 0.0002, "loss": 1.6034, "step": 75700 }, { "epoch": 0.31, "grad_norm": 2.754624366760254, "learning_rate": 0.0002, "loss": 1.5963, "step": 75710 }, { "epoch": 0.31, "grad_norm": 2.92706036567688, "learning_rate": 0.0002, "loss": 1.741, "step": 75720 }, { "epoch": 0.31, "grad_norm": 2.8611841201782227, "learning_rate": 0.0002, "loss": 1.6659, "step": 75730 }, { "epoch": 0.31, "grad_norm": 3.0269429683685303, "learning_rate": 0.0002, "loss": 1.7429, "step": 75740 }, { "epoch": 0.31, "grad_norm": 1.6856390237808228, "learning_rate": 0.0002, "loss": 1.6165, "step": 75750 }, { "epoch": 0.31, "grad_norm": 4.019306182861328, "learning_rate": 0.0002, "loss": 1.5739, "step": 75760 }, { "epoch": 0.31, "grad_norm": 4.1665358543396, "learning_rate": 0.0002, "loss": 1.592, "step": 75770 }, { "epoch": 0.31, "grad_norm": 2.070045232772827, "learning_rate": 0.0002, "loss": 1.7293, "step": 75780 }, { "epoch": 0.31, "grad_norm": 2.1124446392059326, "learning_rate": 0.0002, "loss": 1.5978, "step": 75790 }, { "epoch": 0.31, "grad_norm": 2.1750481128692627, "learning_rate": 0.0002, "loss": 1.4436, "step": 75800 }, { "epoch": 0.31, "grad_norm": 3.994765281677246, "learning_rate": 0.0002, "loss": 1.6828, "step": 75810 }, { "epoch": 0.31, "grad_norm": 4.376077651977539, "learning_rate": 0.0002, "loss": 1.3459, "step": 75820 }, { "epoch": 0.31, "grad_norm": 3.1657376289367676, "learning_rate": 0.0002, "loss": 1.6512, "step": 75830 }, { "epoch": 0.31, "grad_norm": 3.6089112758636475, "learning_rate": 0.0002, "loss": 1.5489, "step": 75840 }, { "epoch": 0.31, "grad_norm": 3.101624011993408, "learning_rate": 0.0002, "loss": 1.4316, "step": 75850 }, { "epoch": 0.31, "grad_norm": 3.6516966819763184, "learning_rate": 0.0002, "loss": 1.5921, "step": 75860 }, { "epoch": 0.31, "grad_norm": 1.702275037765503, "learning_rate": 0.0002, "loss": 1.6945, "step": 75870 }, { "epoch": 0.31, "grad_norm": 1.0800145864486694, "learning_rate": 0.0002, "loss": 1.5462, "step": 75880 }, { "epoch": 0.31, "grad_norm": 2.828780174255371, "learning_rate": 0.0002, "loss": 1.7872, "step": 75890 }, { "epoch": 0.31, "grad_norm": 3.1238512992858887, "learning_rate": 0.0002, "loss": 1.7642, "step": 75900 }, { "epoch": 0.31, "grad_norm": 2.9865612983703613, "learning_rate": 0.0002, "loss": 1.6505, "step": 75910 }, { "epoch": 0.31, "grad_norm": 3.4407520294189453, "learning_rate": 0.0002, "loss": 1.8865, "step": 75920 }, { "epoch": 0.31, "grad_norm": 4.3199310302734375, "learning_rate": 0.0002, "loss": 1.5761, "step": 75930 }, { "epoch": 0.31, "grad_norm": 1.712629795074463, "learning_rate": 0.0002, "loss": 1.2548, "step": 75940 }, { "epoch": 0.31, "grad_norm": 5.029647350311279, "learning_rate": 0.0002, "loss": 1.6874, "step": 75950 }, { "epoch": 0.31, "grad_norm": 5.373976707458496, "learning_rate": 0.0002, "loss": 1.4512, "step": 75960 }, { "epoch": 0.31, "grad_norm": 2.385350227355957, "learning_rate": 0.0002, "loss": 1.7181, "step": 75970 }, { "epoch": 0.31, "grad_norm": 2.6653387546539307, "learning_rate": 0.0002, "loss": 1.5053, "step": 75980 }, { "epoch": 0.31, "grad_norm": 2.3599135875701904, "learning_rate": 0.0002, "loss": 1.485, "step": 75990 }, { "epoch": 0.31, "grad_norm": 2.7630600929260254, "learning_rate": 0.0002, "loss": 1.3733, "step": 76000 }, { "epoch": 0.31, "grad_norm": 4.241135597229004, "learning_rate": 0.0002, "loss": 1.5161, "step": 76010 }, { "epoch": 0.31, "grad_norm": 3.9981467723846436, "learning_rate": 0.0002, "loss": 1.629, "step": 76020 }, { "epoch": 0.31, "grad_norm": 3.512486457824707, "learning_rate": 0.0002, "loss": 1.7012, "step": 76030 }, { "epoch": 0.31, "grad_norm": 3.925297260284424, "learning_rate": 0.0002, "loss": 1.4729, "step": 76040 }, { "epoch": 0.31, "grad_norm": 2.9732558727264404, "learning_rate": 0.0002, "loss": 1.6008, "step": 76050 }, { "epoch": 0.31, "grad_norm": 3.6120693683624268, "learning_rate": 0.0002, "loss": 1.6227, "step": 76060 }, { "epoch": 0.31, "grad_norm": 2.6896109580993652, "learning_rate": 0.0002, "loss": 1.6422, "step": 76070 }, { "epoch": 0.31, "grad_norm": 3.563908100128174, "learning_rate": 0.0002, "loss": 1.5749, "step": 76080 }, { "epoch": 0.31, "grad_norm": 2.8789145946502686, "learning_rate": 0.0002, "loss": 1.4999, "step": 76090 }, { "epoch": 0.31, "grad_norm": 2.6749930381774902, "learning_rate": 0.0002, "loss": 1.5097, "step": 76100 }, { "epoch": 0.31, "grad_norm": 3.526893377304077, "learning_rate": 0.0002, "loss": 1.5522, "step": 76110 }, { "epoch": 0.31, "grad_norm": 6.232969760894775, "learning_rate": 0.0002, "loss": 1.6558, "step": 76120 }, { "epoch": 0.31, "grad_norm": 3.015108108520508, "learning_rate": 0.0002, "loss": 1.5084, "step": 76130 }, { "epoch": 0.31, "grad_norm": 3.9580962657928467, "learning_rate": 0.0002, "loss": 1.5639, "step": 76140 }, { "epoch": 0.31, "grad_norm": 1.9774435758590698, "learning_rate": 0.0002, "loss": 1.6147, "step": 76150 }, { "epoch": 0.31, "grad_norm": 3.7272238731384277, "learning_rate": 0.0002, "loss": 1.6742, "step": 76160 }, { "epoch": 0.31, "grad_norm": 1.7499580383300781, "learning_rate": 0.0002, "loss": 1.459, "step": 76170 }, { "epoch": 0.31, "grad_norm": 1.828046202659607, "learning_rate": 0.0002, "loss": 1.5948, "step": 76180 }, { "epoch": 0.31, "grad_norm": 3.5466647148132324, "learning_rate": 0.0002, "loss": 1.4637, "step": 76190 }, { "epoch": 0.31, "grad_norm": 3.01641845703125, "learning_rate": 0.0002, "loss": 1.4565, "step": 76200 }, { "epoch": 0.31, "grad_norm": 3.3370375633239746, "learning_rate": 0.0002, "loss": 1.5229, "step": 76210 }, { "epoch": 0.31, "grad_norm": 3.8218228816986084, "learning_rate": 0.0002, "loss": 1.4029, "step": 76220 }, { "epoch": 0.31, "grad_norm": 3.0366714000701904, "learning_rate": 0.0002, "loss": 1.424, "step": 76230 }, { "epoch": 0.31, "grad_norm": 2.267165184020996, "learning_rate": 0.0002, "loss": 1.7704, "step": 76240 }, { "epoch": 0.31, "grad_norm": 3.5139474868774414, "learning_rate": 0.0002, "loss": 1.6258, "step": 76250 }, { "epoch": 0.31, "grad_norm": 3.856501579284668, "learning_rate": 0.0002, "loss": 1.3003, "step": 76260 }, { "epoch": 0.31, "grad_norm": 1.7389822006225586, "learning_rate": 0.0002, "loss": 1.4698, "step": 76270 }, { "epoch": 0.31, "grad_norm": 2.376851797103882, "learning_rate": 0.0002, "loss": 1.5213, "step": 76280 }, { "epoch": 0.31, "grad_norm": 4.0646185874938965, "learning_rate": 0.0002, "loss": 1.4439, "step": 76290 }, { "epoch": 0.31, "grad_norm": 2.5339579582214355, "learning_rate": 0.0002, "loss": 1.6568, "step": 76300 }, { "epoch": 0.31, "grad_norm": 2.9306068420410156, "learning_rate": 0.0002, "loss": 1.5363, "step": 76310 }, { "epoch": 0.31, "grad_norm": 1.6694049835205078, "learning_rate": 0.0002, "loss": 1.3865, "step": 76320 }, { "epoch": 0.31, "grad_norm": 5.108231067657471, "learning_rate": 0.0002, "loss": 1.5982, "step": 76330 }, { "epoch": 0.31, "grad_norm": 4.016899108886719, "learning_rate": 0.0002, "loss": 1.5642, "step": 76340 }, { "epoch": 0.31, "grad_norm": 2.2479710578918457, "learning_rate": 0.0002, "loss": 1.6176, "step": 76350 }, { "epoch": 0.31, "grad_norm": 1.746384859085083, "learning_rate": 0.0002, "loss": 1.5743, "step": 76360 }, { "epoch": 0.31, "grad_norm": 3.5214576721191406, "learning_rate": 0.0002, "loss": 1.4122, "step": 76370 }, { "epoch": 0.31, "grad_norm": 3.5813815593719482, "learning_rate": 0.0002, "loss": 1.7235, "step": 76380 }, { "epoch": 0.31, "grad_norm": 4.261086463928223, "learning_rate": 0.0002, "loss": 1.7951, "step": 76390 }, { "epoch": 0.31, "grad_norm": 2.6350269317626953, "learning_rate": 0.0002, "loss": 1.6614, "step": 76400 }, { "epoch": 0.31, "grad_norm": 5.303382396697998, "learning_rate": 0.0002, "loss": 1.6799, "step": 76410 }, { "epoch": 0.31, "grad_norm": 3.118873357772827, "learning_rate": 0.0002, "loss": 1.4362, "step": 76420 }, { "epoch": 0.31, "grad_norm": 3.0697364807128906, "learning_rate": 0.0002, "loss": 1.4496, "step": 76430 }, { "epoch": 0.31, "grad_norm": 2.3838117122650146, "learning_rate": 0.0002, "loss": 1.5481, "step": 76440 }, { "epoch": 0.31, "grad_norm": 2.3664677143096924, "learning_rate": 0.0002, "loss": 1.2724, "step": 76450 }, { "epoch": 0.31, "grad_norm": 4.648527145385742, "learning_rate": 0.0002, "loss": 1.196, "step": 76460 }, { "epoch": 0.31, "grad_norm": 3.5805015563964844, "learning_rate": 0.0002, "loss": 1.6478, "step": 76470 }, { "epoch": 0.31, "grad_norm": 3.283259391784668, "learning_rate": 0.0002, "loss": 1.3233, "step": 76480 }, { "epoch": 0.31, "grad_norm": 3.8571605682373047, "learning_rate": 0.0002, "loss": 1.6711, "step": 76490 }, { "epoch": 0.31, "grad_norm": 1.0492981672286987, "learning_rate": 0.0002, "loss": 1.3868, "step": 76500 }, { "epoch": 0.31, "grad_norm": 3.109753370285034, "learning_rate": 0.0002, "loss": 1.6233, "step": 76510 }, { "epoch": 0.31, "grad_norm": 1.9397767782211304, "learning_rate": 0.0002, "loss": 1.4146, "step": 76520 }, { "epoch": 0.31, "grad_norm": 1.971883773803711, "learning_rate": 0.0002, "loss": 1.4444, "step": 76530 }, { "epoch": 0.31, "grad_norm": 2.0887625217437744, "learning_rate": 0.0002, "loss": 1.3865, "step": 76540 }, { "epoch": 0.31, "grad_norm": 3.0981245040893555, "learning_rate": 0.0002, "loss": 1.5576, "step": 76550 }, { "epoch": 0.31, "grad_norm": 3.572573184967041, "learning_rate": 0.0002, "loss": 1.7834, "step": 76560 }, { "epoch": 0.31, "grad_norm": 2.100189208984375, "learning_rate": 0.0002, "loss": 1.5102, "step": 76570 }, { "epoch": 0.31, "grad_norm": 2.1216092109680176, "learning_rate": 0.0002, "loss": 1.6107, "step": 76580 }, { "epoch": 0.31, "grad_norm": 3.4463939666748047, "learning_rate": 0.0002, "loss": 1.6732, "step": 76590 }, { "epoch": 0.31, "grad_norm": 3.8814187049865723, "learning_rate": 0.0002, "loss": 1.5791, "step": 76600 }, { "epoch": 0.31, "grad_norm": 2.5058071613311768, "learning_rate": 0.0002, "loss": 1.4675, "step": 76610 }, { "epoch": 0.31, "grad_norm": 3.9312245845794678, "learning_rate": 0.0002, "loss": 1.4177, "step": 76620 }, { "epoch": 0.31, "grad_norm": 3.1302125453948975, "learning_rate": 0.0002, "loss": 1.6102, "step": 76630 }, { "epoch": 0.31, "grad_norm": 2.9235098361968994, "learning_rate": 0.0002, "loss": 1.6319, "step": 76640 }, { "epoch": 0.31, "grad_norm": 3.4965157508850098, "learning_rate": 0.0002, "loss": 1.6832, "step": 76650 }, { "epoch": 0.31, "grad_norm": 3.524379014968872, "learning_rate": 0.0002, "loss": 1.4206, "step": 76660 }, { "epoch": 0.31, "grad_norm": 1.470584750175476, "learning_rate": 0.0002, "loss": 1.6316, "step": 76670 }, { "epoch": 0.31, "grad_norm": 2.455359935760498, "learning_rate": 0.0002, "loss": 1.7044, "step": 76680 }, { "epoch": 0.31, "grad_norm": 3.193175792694092, "learning_rate": 0.0002, "loss": 1.5188, "step": 76690 }, { "epoch": 0.31, "grad_norm": 3.6927638053894043, "learning_rate": 0.0002, "loss": 1.564, "step": 76700 }, { "epoch": 0.31, "grad_norm": 2.2532870769500732, "learning_rate": 0.0002, "loss": 1.2723, "step": 76710 }, { "epoch": 0.31, "grad_norm": 3.048469066619873, "learning_rate": 0.0002, "loss": 1.511, "step": 76720 }, { "epoch": 0.31, "grad_norm": 5.515524387359619, "learning_rate": 0.0002, "loss": 1.3469, "step": 76730 }, { "epoch": 0.31, "grad_norm": 3.3039448261260986, "learning_rate": 0.0002, "loss": 1.5854, "step": 76740 }, { "epoch": 0.31, "grad_norm": 2.440842628479004, "learning_rate": 0.0002, "loss": 1.6108, "step": 76750 }, { "epoch": 0.31, "grad_norm": 3.36895489692688, "learning_rate": 0.0002, "loss": 1.5338, "step": 76760 }, { "epoch": 0.31, "grad_norm": 2.8289012908935547, "learning_rate": 0.0002, "loss": 1.7332, "step": 76770 }, { "epoch": 0.31, "grad_norm": 4.538959980010986, "learning_rate": 0.0002, "loss": 1.5034, "step": 76780 }, { "epoch": 0.31, "grad_norm": 2.754350185394287, "learning_rate": 0.0002, "loss": 1.5536, "step": 76790 }, { "epoch": 0.31, "grad_norm": 3.015096426010132, "learning_rate": 0.0002, "loss": 1.5501, "step": 76800 }, { "epoch": 0.31, "grad_norm": 3.9253551959991455, "learning_rate": 0.0002, "loss": 1.3362, "step": 76810 }, { "epoch": 0.31, "grad_norm": 2.808772325515747, "learning_rate": 0.0002, "loss": 1.6102, "step": 76820 }, { "epoch": 0.31, "grad_norm": 4.383108615875244, "learning_rate": 0.0002, "loss": 1.5129, "step": 76830 }, { "epoch": 0.31, "grad_norm": 1.9241434335708618, "learning_rate": 0.0002, "loss": 1.5553, "step": 76840 }, { "epoch": 0.31, "grad_norm": 1.6756012439727783, "learning_rate": 0.0002, "loss": 1.648, "step": 76850 }, { "epoch": 0.31, "grad_norm": 1.9411736726760864, "learning_rate": 0.0002, "loss": 1.456, "step": 76860 }, { "epoch": 0.31, "grad_norm": 2.3199656009674072, "learning_rate": 0.0002, "loss": 1.5554, "step": 76870 }, { "epoch": 0.31, "grad_norm": 3.2018203735351562, "learning_rate": 0.0002, "loss": 1.5598, "step": 76880 }, { "epoch": 0.31, "grad_norm": 5.627322196960449, "learning_rate": 0.0002, "loss": 1.5409, "step": 76890 }, { "epoch": 0.31, "grad_norm": 3.422650098800659, "learning_rate": 0.0002, "loss": 1.7565, "step": 76900 }, { "epoch": 0.31, "grad_norm": 2.595475196838379, "learning_rate": 0.0002, "loss": 1.5133, "step": 76910 }, { "epoch": 0.31, "grad_norm": 1.880841851234436, "learning_rate": 0.0002, "loss": 1.8131, "step": 76920 }, { "epoch": 0.31, "grad_norm": 3.649839401245117, "learning_rate": 0.0002, "loss": 1.5426, "step": 76930 }, { "epoch": 0.31, "grad_norm": 3.0385055541992188, "learning_rate": 0.0002, "loss": 1.593, "step": 76940 }, { "epoch": 0.31, "grad_norm": 2.3669116497039795, "learning_rate": 0.0002, "loss": 1.582, "step": 76950 }, { "epoch": 0.31, "grad_norm": 2.9378511905670166, "learning_rate": 0.0002, "loss": 1.6518, "step": 76960 }, { "epoch": 0.31, "grad_norm": 3.0534541606903076, "learning_rate": 0.0002, "loss": 1.6766, "step": 76970 }, { "epoch": 0.31, "grad_norm": 1.7925292253494263, "learning_rate": 0.0002, "loss": 1.3904, "step": 76980 }, { "epoch": 0.31, "grad_norm": 6.528576374053955, "learning_rate": 0.0002, "loss": 1.3968, "step": 76990 }, { "epoch": 0.31, "grad_norm": 4.910038471221924, "learning_rate": 0.0002, "loss": 1.6052, "step": 77000 }, { "epoch": 0.31, "grad_norm": 3.9623541831970215, "learning_rate": 0.0002, "loss": 1.6086, "step": 77010 }, { "epoch": 0.31, "grad_norm": 2.4653193950653076, "learning_rate": 0.0002, "loss": 1.3323, "step": 77020 }, { "epoch": 0.31, "grad_norm": 4.170115947723389, "learning_rate": 0.0002, "loss": 1.7633, "step": 77030 }, { "epoch": 0.31, "grad_norm": 4.257908344268799, "learning_rate": 0.0002, "loss": 1.5501, "step": 77040 }, { "epoch": 0.31, "grad_norm": 2.457225799560547, "learning_rate": 0.0002, "loss": 1.6102, "step": 77050 }, { "epoch": 0.31, "grad_norm": 2.4934628009796143, "learning_rate": 0.0002, "loss": 1.4806, "step": 77060 }, { "epoch": 0.31, "grad_norm": 1.8769373893737793, "learning_rate": 0.0002, "loss": 1.4512, "step": 77070 }, { "epoch": 0.31, "grad_norm": 3.4350247383117676, "learning_rate": 0.0002, "loss": 1.2791, "step": 77080 }, { "epoch": 0.31, "grad_norm": 2.1879653930664062, "learning_rate": 0.0002, "loss": 1.5885, "step": 77090 }, { "epoch": 0.31, "grad_norm": 2.5482234954833984, "learning_rate": 0.0002, "loss": 1.4656, "step": 77100 }, { "epoch": 0.31, "grad_norm": 1.939687728881836, "learning_rate": 0.0002, "loss": 1.629, "step": 77110 }, { "epoch": 0.31, "grad_norm": 3.5901150703430176, "learning_rate": 0.0002, "loss": 1.5683, "step": 77120 }, { "epoch": 0.31, "grad_norm": 2.038999557495117, "learning_rate": 0.0002, "loss": 1.529, "step": 77130 }, { "epoch": 0.31, "grad_norm": 3.0486643314361572, "learning_rate": 0.0002, "loss": 1.7203, "step": 77140 }, { "epoch": 0.31, "grad_norm": 2.8343310356140137, "learning_rate": 0.0002, "loss": 1.6245, "step": 77150 }, { "epoch": 0.31, "grad_norm": 3.2204878330230713, "learning_rate": 0.0002, "loss": 1.5419, "step": 77160 }, { "epoch": 0.31, "grad_norm": 2.4248979091644287, "learning_rate": 0.0002, "loss": 1.3133, "step": 77170 }, { "epoch": 0.31, "grad_norm": 3.5351462364196777, "learning_rate": 0.0002, "loss": 1.6997, "step": 77180 }, { "epoch": 0.31, "grad_norm": 1.666654348373413, "learning_rate": 0.0002, "loss": 1.4716, "step": 77190 }, { "epoch": 0.31, "grad_norm": 2.6112804412841797, "learning_rate": 0.0002, "loss": 1.6847, "step": 77200 }, { "epoch": 0.31, "grad_norm": 3.2385358810424805, "learning_rate": 0.0002, "loss": 1.5419, "step": 77210 }, { "epoch": 0.31, "grad_norm": 6.239445209503174, "learning_rate": 0.0002, "loss": 1.6523, "step": 77220 }, { "epoch": 0.31, "grad_norm": 1.7130299806594849, "learning_rate": 0.0002, "loss": 1.4355, "step": 77230 }, { "epoch": 0.31, "grad_norm": 7.726775169372559, "learning_rate": 0.0002, "loss": 1.852, "step": 77240 }, { "epoch": 0.31, "grad_norm": 2.9554426670074463, "learning_rate": 0.0002, "loss": 1.3591, "step": 77250 }, { "epoch": 0.31, "grad_norm": 2.4777445793151855, "learning_rate": 0.0002, "loss": 1.5032, "step": 77260 }, { "epoch": 0.31, "grad_norm": 3.2923619747161865, "learning_rate": 0.0002, "loss": 1.5299, "step": 77270 }, { "epoch": 0.31, "grad_norm": 4.179256439208984, "learning_rate": 0.0002, "loss": 1.5836, "step": 77280 }, { "epoch": 0.31, "grad_norm": 3.288787603378296, "learning_rate": 0.0002, "loss": 1.3799, "step": 77290 }, { "epoch": 0.31, "grad_norm": 2.741107940673828, "learning_rate": 0.0002, "loss": 1.6828, "step": 77300 }, { "epoch": 0.31, "grad_norm": 6.894762992858887, "learning_rate": 0.0002, "loss": 1.3668, "step": 77310 }, { "epoch": 0.31, "grad_norm": 2.9521853923797607, "learning_rate": 0.0002, "loss": 1.6375, "step": 77320 }, { "epoch": 0.31, "grad_norm": 2.80692458152771, "learning_rate": 0.0002, "loss": 1.4661, "step": 77330 }, { "epoch": 0.31, "grad_norm": 3.4429609775543213, "learning_rate": 0.0002, "loss": 1.4111, "step": 77340 }, { "epoch": 0.31, "grad_norm": 3.050342321395874, "learning_rate": 0.0002, "loss": 1.3768, "step": 77350 }, { "epoch": 0.31, "grad_norm": 5.663331508636475, "learning_rate": 0.0002, "loss": 1.3822, "step": 77360 }, { "epoch": 0.31, "grad_norm": 4.6294403076171875, "learning_rate": 0.0002, "loss": 1.5954, "step": 77370 }, { "epoch": 0.32, "grad_norm": 4.226558208465576, "learning_rate": 0.0002, "loss": 1.5277, "step": 77380 }, { "epoch": 0.32, "grad_norm": 4.583700180053711, "learning_rate": 0.0002, "loss": 1.6499, "step": 77390 }, { "epoch": 0.32, "grad_norm": 3.358964204788208, "learning_rate": 0.0002, "loss": 1.4872, "step": 77400 }, { "epoch": 0.32, "grad_norm": 1.478040099143982, "learning_rate": 0.0002, "loss": 1.674, "step": 77410 }, { "epoch": 0.32, "grad_norm": 3.0103020668029785, "learning_rate": 0.0002, "loss": 1.7835, "step": 77420 }, { "epoch": 0.32, "grad_norm": 3.274939775466919, "learning_rate": 0.0002, "loss": 1.7448, "step": 77430 }, { "epoch": 0.32, "grad_norm": 2.3654046058654785, "learning_rate": 0.0002, "loss": 1.5922, "step": 77440 }, { "epoch": 0.32, "grad_norm": 2.544268846511841, "learning_rate": 0.0002, "loss": 1.741, "step": 77450 }, { "epoch": 0.32, "grad_norm": 2.8102378845214844, "learning_rate": 0.0002, "loss": 1.4634, "step": 77460 }, { "epoch": 0.32, "grad_norm": 3.888651132583618, "learning_rate": 0.0002, "loss": 1.6792, "step": 77470 }, { "epoch": 0.32, "grad_norm": 3.407654047012329, "learning_rate": 0.0002, "loss": 1.6239, "step": 77480 }, { "epoch": 0.32, "grad_norm": 3.189662218093872, "learning_rate": 0.0002, "loss": 1.6563, "step": 77490 }, { "epoch": 0.32, "grad_norm": 3.8224518299102783, "learning_rate": 0.0002, "loss": 1.7072, "step": 77500 }, { "epoch": 0.32, "grad_norm": 1.8491164445877075, "learning_rate": 0.0002, "loss": 1.6705, "step": 77510 }, { "epoch": 0.32, "grad_norm": 1.1275078058242798, "learning_rate": 0.0002, "loss": 1.5141, "step": 77520 }, { "epoch": 0.32, "grad_norm": 2.578420877456665, "learning_rate": 0.0002, "loss": 1.508, "step": 77530 }, { "epoch": 0.32, "grad_norm": 2.7206664085388184, "learning_rate": 0.0002, "loss": 1.5975, "step": 77540 }, { "epoch": 0.32, "grad_norm": 3.3985400199890137, "learning_rate": 0.0002, "loss": 1.6241, "step": 77550 }, { "epoch": 0.32, "grad_norm": 4.61665678024292, "learning_rate": 0.0002, "loss": 1.5786, "step": 77560 }, { "epoch": 0.32, "grad_norm": 4.5268354415893555, "learning_rate": 0.0002, "loss": 1.845, "step": 77570 }, { "epoch": 0.32, "grad_norm": 4.24569034576416, "learning_rate": 0.0002, "loss": 1.6827, "step": 77580 }, { "epoch": 0.32, "grad_norm": 2.880967378616333, "learning_rate": 0.0002, "loss": 1.6616, "step": 77590 }, { "epoch": 0.32, "grad_norm": 1.8592296838760376, "learning_rate": 0.0002, "loss": 1.5208, "step": 77600 }, { "epoch": 0.32, "grad_norm": 2.319833755493164, "learning_rate": 0.0002, "loss": 1.4832, "step": 77610 }, { "epoch": 0.32, "grad_norm": 4.188943862915039, "learning_rate": 0.0002, "loss": 1.8902, "step": 77620 }, { "epoch": 0.32, "grad_norm": 2.4523887634277344, "learning_rate": 0.0002, "loss": 1.6724, "step": 77630 }, { "epoch": 0.32, "grad_norm": 2.4094271659851074, "learning_rate": 0.0002, "loss": 1.5472, "step": 77640 }, { "epoch": 0.32, "grad_norm": 3.2481002807617188, "learning_rate": 0.0002, "loss": 1.6458, "step": 77650 }, { "epoch": 0.32, "grad_norm": 3.8030667304992676, "learning_rate": 0.0002, "loss": 1.4954, "step": 77660 }, { "epoch": 0.32, "grad_norm": 3.5810351371765137, "learning_rate": 0.0002, "loss": 1.5163, "step": 77670 }, { "epoch": 0.32, "grad_norm": 4.070285797119141, "learning_rate": 0.0002, "loss": 1.3759, "step": 77680 }, { "epoch": 0.32, "grad_norm": 3.6140904426574707, "learning_rate": 0.0002, "loss": 1.6741, "step": 77690 }, { "epoch": 0.32, "grad_norm": 2.660539388656616, "learning_rate": 0.0002, "loss": 1.4691, "step": 77700 }, { "epoch": 0.32, "grad_norm": 2.975393772125244, "learning_rate": 0.0002, "loss": 1.5259, "step": 77710 }, { "epoch": 0.32, "grad_norm": 2.004376173019409, "learning_rate": 0.0002, "loss": 1.5453, "step": 77720 }, { "epoch": 0.32, "grad_norm": 1.9911761283874512, "learning_rate": 0.0002, "loss": 1.5759, "step": 77730 }, { "epoch": 0.32, "grad_norm": 2.5323193073272705, "learning_rate": 0.0002, "loss": 1.6366, "step": 77740 }, { "epoch": 0.32, "grad_norm": 2.484564781188965, "learning_rate": 0.0002, "loss": 1.409, "step": 77750 }, { "epoch": 0.32, "grad_norm": 2.283109664916992, "learning_rate": 0.0002, "loss": 1.8302, "step": 77760 }, { "epoch": 0.32, "grad_norm": 5.392778396606445, "learning_rate": 0.0002, "loss": 1.7372, "step": 77770 }, { "epoch": 0.32, "grad_norm": 2.516787528991699, "learning_rate": 0.0002, "loss": 1.5821, "step": 77780 }, { "epoch": 0.32, "grad_norm": 3.360732078552246, "learning_rate": 0.0002, "loss": 1.4394, "step": 77790 }, { "epoch": 0.32, "grad_norm": 2.2082481384277344, "learning_rate": 0.0002, "loss": 1.5432, "step": 77800 }, { "epoch": 0.32, "grad_norm": 2.5378665924072266, "learning_rate": 0.0002, "loss": 1.8599, "step": 77810 }, { "epoch": 0.32, "grad_norm": 3.077399492263794, "learning_rate": 0.0002, "loss": 1.646, "step": 77820 }, { "epoch": 0.32, "grad_norm": 2.8533973693847656, "learning_rate": 0.0002, "loss": 1.7093, "step": 77830 }, { "epoch": 0.32, "grad_norm": 2.434457302093506, "learning_rate": 0.0002, "loss": 1.662, "step": 77840 }, { "epoch": 0.32, "grad_norm": 1.818461537361145, "learning_rate": 0.0002, "loss": 1.6564, "step": 77850 }, { "epoch": 0.32, "grad_norm": 4.674780368804932, "learning_rate": 0.0002, "loss": 1.6745, "step": 77860 }, { "epoch": 0.32, "grad_norm": 2.247868776321411, "learning_rate": 0.0002, "loss": 1.5616, "step": 77870 }, { "epoch": 0.32, "grad_norm": 3.034316301345825, "learning_rate": 0.0002, "loss": 1.5468, "step": 77880 }, { "epoch": 0.32, "grad_norm": 3.1501989364624023, "learning_rate": 0.0002, "loss": 1.7571, "step": 77890 }, { "epoch": 0.32, "grad_norm": 2.482598304748535, "learning_rate": 0.0002, "loss": 1.4844, "step": 77900 }, { "epoch": 0.32, "grad_norm": 2.7751927375793457, "learning_rate": 0.0002, "loss": 1.7933, "step": 77910 }, { "epoch": 0.32, "grad_norm": 4.974668979644775, "learning_rate": 0.0002, "loss": 1.5247, "step": 77920 }, { "epoch": 0.32, "grad_norm": 3.466921329498291, "learning_rate": 0.0002, "loss": 1.4582, "step": 77930 }, { "epoch": 0.32, "grad_norm": 3.4688572883605957, "learning_rate": 0.0002, "loss": 1.6076, "step": 77940 }, { "epoch": 0.32, "grad_norm": 3.706450939178467, "learning_rate": 0.0002, "loss": 1.563, "step": 77950 }, { "epoch": 0.32, "grad_norm": 2.251298427581787, "learning_rate": 0.0002, "loss": 1.8279, "step": 77960 }, { "epoch": 0.32, "grad_norm": 2.140742778778076, "learning_rate": 0.0002, "loss": 1.601, "step": 77970 }, { "epoch": 0.32, "grad_norm": 4.967226982116699, "learning_rate": 0.0002, "loss": 1.5835, "step": 77980 }, { "epoch": 0.32, "grad_norm": 6.145202159881592, "learning_rate": 0.0002, "loss": 1.4125, "step": 77990 }, { "epoch": 0.32, "grad_norm": 1.7923738956451416, "learning_rate": 0.0002, "loss": 1.592, "step": 78000 }, { "epoch": 0.32, "grad_norm": 1.7168290615081787, "learning_rate": 0.0002, "loss": 1.2651, "step": 78010 }, { "epoch": 0.32, "grad_norm": 3.3795547485351562, "learning_rate": 0.0002, "loss": 1.5309, "step": 78020 }, { "epoch": 0.32, "grad_norm": 1.859135627746582, "learning_rate": 0.0002, "loss": 1.3247, "step": 78030 }, { "epoch": 0.32, "grad_norm": 2.795840263366699, "learning_rate": 0.0002, "loss": 1.5376, "step": 78040 }, { "epoch": 0.32, "grad_norm": 3.80043888092041, "learning_rate": 0.0002, "loss": 1.5014, "step": 78050 }, { "epoch": 0.32, "grad_norm": 2.2586305141448975, "learning_rate": 0.0002, "loss": 1.3524, "step": 78060 }, { "epoch": 0.32, "grad_norm": 3.2905654907226562, "learning_rate": 0.0002, "loss": 1.6176, "step": 78070 }, { "epoch": 0.32, "grad_norm": 2.6083755493164062, "learning_rate": 0.0002, "loss": 1.8415, "step": 78080 }, { "epoch": 0.32, "grad_norm": 2.5987932682037354, "learning_rate": 0.0002, "loss": 1.5465, "step": 78090 }, { "epoch": 0.32, "grad_norm": 2.840958595275879, "learning_rate": 0.0002, "loss": 1.5525, "step": 78100 }, { "epoch": 0.32, "grad_norm": 2.391855239868164, "learning_rate": 0.0002, "loss": 1.4944, "step": 78110 }, { "epoch": 0.32, "grad_norm": 2.7849578857421875, "learning_rate": 0.0002, "loss": 1.7175, "step": 78120 }, { "epoch": 0.32, "grad_norm": 2.1663408279418945, "learning_rate": 0.0002, "loss": 1.3964, "step": 78130 }, { "epoch": 0.32, "grad_norm": 2.318711996078491, "learning_rate": 0.0002, "loss": 1.4691, "step": 78140 }, { "epoch": 0.32, "grad_norm": 2.695051670074463, "learning_rate": 0.0002, "loss": 1.543, "step": 78150 }, { "epoch": 0.32, "grad_norm": 2.7276666164398193, "learning_rate": 0.0002, "loss": 1.47, "step": 78160 }, { "epoch": 0.32, "grad_norm": 3.6014389991760254, "learning_rate": 0.0002, "loss": 1.4827, "step": 78170 }, { "epoch": 0.32, "grad_norm": 2.5073845386505127, "learning_rate": 0.0002, "loss": 1.5905, "step": 78180 }, { "epoch": 0.32, "grad_norm": 2.466020107269287, "learning_rate": 0.0002, "loss": 1.4819, "step": 78190 }, { "epoch": 0.32, "grad_norm": 3.6820387840270996, "learning_rate": 0.0002, "loss": 1.6382, "step": 78200 }, { "epoch": 0.32, "grad_norm": 2.983733892440796, "learning_rate": 0.0002, "loss": 1.5692, "step": 78210 }, { "epoch": 0.32, "grad_norm": 2.6827752590179443, "learning_rate": 0.0002, "loss": 1.5321, "step": 78220 }, { "epoch": 0.32, "grad_norm": 3.5546791553497314, "learning_rate": 0.0002, "loss": 1.5867, "step": 78230 }, { "epoch": 0.32, "grad_norm": 2.2156214714050293, "learning_rate": 0.0002, "loss": 1.6968, "step": 78240 }, { "epoch": 0.32, "grad_norm": 1.9301785230636597, "learning_rate": 0.0002, "loss": 1.8267, "step": 78250 }, { "epoch": 0.32, "grad_norm": 2.806049346923828, "learning_rate": 0.0002, "loss": 1.6067, "step": 78260 }, { "epoch": 0.32, "grad_norm": 2.292954206466675, "learning_rate": 0.0002, "loss": 1.798, "step": 78270 }, { "epoch": 0.32, "grad_norm": 3.3185043334960938, "learning_rate": 0.0002, "loss": 1.4088, "step": 78280 }, { "epoch": 0.32, "grad_norm": 3.9364876747131348, "learning_rate": 0.0002, "loss": 1.5149, "step": 78290 }, { "epoch": 0.32, "grad_norm": 2.518765449523926, "learning_rate": 0.0002, "loss": 1.3968, "step": 78300 }, { "epoch": 0.32, "grad_norm": 3.111104726791382, "learning_rate": 0.0002, "loss": 1.5483, "step": 78310 }, { "epoch": 0.32, "grad_norm": 4.1366400718688965, "learning_rate": 0.0002, "loss": 1.4453, "step": 78320 }, { "epoch": 0.32, "grad_norm": 2.0335171222686768, "learning_rate": 0.0002, "loss": 1.5147, "step": 78330 }, { "epoch": 0.32, "grad_norm": 4.0149006843566895, "learning_rate": 0.0002, "loss": 1.3998, "step": 78340 }, { "epoch": 0.32, "grad_norm": 3.437486410140991, "learning_rate": 0.0002, "loss": 1.7096, "step": 78350 }, { "epoch": 0.32, "grad_norm": 3.5292558670043945, "learning_rate": 0.0002, "loss": 1.4374, "step": 78360 }, { "epoch": 0.32, "grad_norm": 1.798442006111145, "learning_rate": 0.0002, "loss": 1.583, "step": 78370 }, { "epoch": 0.32, "grad_norm": 3.002375841140747, "learning_rate": 0.0002, "loss": 1.3621, "step": 78380 }, { "epoch": 0.32, "grad_norm": 3.1543688774108887, "learning_rate": 0.0002, "loss": 1.5634, "step": 78390 }, { "epoch": 0.32, "grad_norm": 2.0317630767822266, "learning_rate": 0.0002, "loss": 1.5707, "step": 78400 }, { "epoch": 0.32, "grad_norm": 4.375999450683594, "learning_rate": 0.0002, "loss": 1.6754, "step": 78410 }, { "epoch": 0.32, "grad_norm": 3.1209654808044434, "learning_rate": 0.0002, "loss": 1.6065, "step": 78420 }, { "epoch": 0.32, "grad_norm": 4.970181465148926, "learning_rate": 0.0002, "loss": 1.4507, "step": 78430 }, { "epoch": 0.32, "grad_norm": 3.691617250442505, "learning_rate": 0.0002, "loss": 1.4984, "step": 78440 }, { "epoch": 0.32, "grad_norm": 2.9552359580993652, "learning_rate": 0.0002, "loss": 1.959, "step": 78450 }, { "epoch": 0.32, "grad_norm": 3.499677896499634, "learning_rate": 0.0002, "loss": 1.728, "step": 78460 }, { "epoch": 0.32, "grad_norm": 1.6350871324539185, "learning_rate": 0.0002, "loss": 1.4563, "step": 78470 }, { "epoch": 0.32, "grad_norm": 8.167790412902832, "learning_rate": 0.0002, "loss": 1.7193, "step": 78480 }, { "epoch": 0.32, "grad_norm": 2.0092055797576904, "learning_rate": 0.0002, "loss": 1.7123, "step": 78490 }, { "epoch": 0.32, "grad_norm": 3.195072889328003, "learning_rate": 0.0002, "loss": 1.4598, "step": 78500 }, { "epoch": 0.32, "grad_norm": 3.4449336528778076, "learning_rate": 0.0002, "loss": 1.4971, "step": 78510 }, { "epoch": 0.32, "grad_norm": 3.168339252471924, "learning_rate": 0.0002, "loss": 1.5932, "step": 78520 }, { "epoch": 0.32, "grad_norm": 2.417429208755493, "learning_rate": 0.0002, "loss": 1.556, "step": 78530 }, { "epoch": 0.32, "grad_norm": 3.350097894668579, "learning_rate": 0.0002, "loss": 1.4182, "step": 78540 }, { "epoch": 0.32, "grad_norm": 5.654017925262451, "learning_rate": 0.0002, "loss": 1.828, "step": 78550 }, { "epoch": 0.32, "grad_norm": 2.488396167755127, "learning_rate": 0.0002, "loss": 1.6584, "step": 78560 }, { "epoch": 0.32, "grad_norm": 2.1007344722747803, "learning_rate": 0.0002, "loss": 1.5188, "step": 78570 }, { "epoch": 0.32, "grad_norm": 2.090568780899048, "learning_rate": 0.0002, "loss": 1.6548, "step": 78580 }, { "epoch": 0.32, "grad_norm": 2.1441328525543213, "learning_rate": 0.0002, "loss": 1.4464, "step": 78590 }, { "epoch": 0.32, "grad_norm": 2.342472791671753, "learning_rate": 0.0002, "loss": 1.6631, "step": 78600 }, { "epoch": 0.32, "grad_norm": 1.9692113399505615, "learning_rate": 0.0002, "loss": 1.5519, "step": 78610 }, { "epoch": 0.32, "grad_norm": 2.2013888359069824, "learning_rate": 0.0002, "loss": 1.6948, "step": 78620 }, { "epoch": 0.32, "grad_norm": 2.5215325355529785, "learning_rate": 0.0002, "loss": 1.6906, "step": 78630 }, { "epoch": 0.32, "grad_norm": 3.6822001934051514, "learning_rate": 0.0002, "loss": 1.4424, "step": 78640 }, { "epoch": 0.32, "grad_norm": 2.7263267040252686, "learning_rate": 0.0002, "loss": 1.4899, "step": 78650 }, { "epoch": 0.32, "grad_norm": 1.7123116254806519, "learning_rate": 0.0002, "loss": 1.3127, "step": 78660 }, { "epoch": 0.32, "grad_norm": 4.203833103179932, "learning_rate": 0.0002, "loss": 1.629, "step": 78670 }, { "epoch": 0.32, "grad_norm": 3.414795160293579, "learning_rate": 0.0002, "loss": 1.6841, "step": 78680 }, { "epoch": 0.32, "grad_norm": 3.895385265350342, "learning_rate": 0.0002, "loss": 1.5523, "step": 78690 }, { "epoch": 0.32, "grad_norm": 2.499865770339966, "learning_rate": 0.0002, "loss": 1.4671, "step": 78700 }, { "epoch": 0.32, "grad_norm": 2.5134358406066895, "learning_rate": 0.0002, "loss": 1.555, "step": 78710 }, { "epoch": 0.32, "grad_norm": 2.079347610473633, "learning_rate": 0.0002, "loss": 1.67, "step": 78720 }, { "epoch": 0.32, "grad_norm": 3.045549154281616, "learning_rate": 0.0002, "loss": 1.6471, "step": 78730 }, { "epoch": 0.32, "grad_norm": 2.857867956161499, "learning_rate": 0.0002, "loss": 1.5589, "step": 78740 }, { "epoch": 0.32, "grad_norm": 2.15838885307312, "learning_rate": 0.0002, "loss": 1.5072, "step": 78750 }, { "epoch": 0.32, "grad_norm": 2.358454942703247, "learning_rate": 0.0002, "loss": 1.7932, "step": 78760 }, { "epoch": 0.32, "grad_norm": 4.31422233581543, "learning_rate": 0.0002, "loss": 1.4853, "step": 78770 }, { "epoch": 0.32, "grad_norm": 2.2983644008636475, "learning_rate": 0.0002, "loss": 1.8058, "step": 78780 }, { "epoch": 0.32, "grad_norm": 3.631422281265259, "learning_rate": 0.0002, "loss": 1.7561, "step": 78790 }, { "epoch": 0.32, "grad_norm": 3.0402793884277344, "learning_rate": 0.0002, "loss": 1.6341, "step": 78800 }, { "epoch": 0.32, "grad_norm": 2.6020941734313965, "learning_rate": 0.0002, "loss": 1.7861, "step": 78810 }, { "epoch": 0.32, "grad_norm": 2.40641713142395, "learning_rate": 0.0002, "loss": 1.6327, "step": 78820 }, { "epoch": 0.32, "grad_norm": 2.257559061050415, "learning_rate": 0.0002, "loss": 1.461, "step": 78830 }, { "epoch": 0.32, "grad_norm": 4.44782829284668, "learning_rate": 0.0002, "loss": 1.6571, "step": 78840 }, { "epoch": 0.32, "grad_norm": 3.548335313796997, "learning_rate": 0.0002, "loss": 1.7125, "step": 78850 }, { "epoch": 0.32, "grad_norm": 2.696990966796875, "learning_rate": 0.0002, "loss": 1.8111, "step": 78860 }, { "epoch": 0.32, "grad_norm": 4.130670547485352, "learning_rate": 0.0002, "loss": 1.4943, "step": 78870 }, { "epoch": 0.32, "grad_norm": 2.9128639698028564, "learning_rate": 0.0002, "loss": 1.4981, "step": 78880 }, { "epoch": 0.32, "grad_norm": 1.5609256029129028, "learning_rate": 0.0002, "loss": 1.6431, "step": 78890 }, { "epoch": 0.32, "grad_norm": 2.595855236053467, "learning_rate": 0.0002, "loss": 1.7055, "step": 78900 }, { "epoch": 0.32, "grad_norm": 3.0283243656158447, "learning_rate": 0.0002, "loss": 1.4498, "step": 78910 }, { "epoch": 0.32, "grad_norm": 3.0109519958496094, "learning_rate": 0.0002, "loss": 1.5194, "step": 78920 }, { "epoch": 0.32, "grad_norm": 2.844846725463867, "learning_rate": 0.0002, "loss": 1.7042, "step": 78930 }, { "epoch": 0.32, "grad_norm": 2.05212140083313, "learning_rate": 0.0002, "loss": 1.6322, "step": 78940 }, { "epoch": 0.32, "grad_norm": 2.707669258117676, "learning_rate": 0.0002, "loss": 1.6535, "step": 78950 }, { "epoch": 0.32, "grad_norm": 3.641571283340454, "learning_rate": 0.0002, "loss": 1.6484, "step": 78960 }, { "epoch": 0.32, "grad_norm": 3.1014623641967773, "learning_rate": 0.0002, "loss": 1.6382, "step": 78970 }, { "epoch": 0.32, "grad_norm": 4.773438930511475, "learning_rate": 0.0002, "loss": 1.605, "step": 78980 }, { "epoch": 0.32, "grad_norm": 2.844083547592163, "learning_rate": 0.0002, "loss": 1.576, "step": 78990 }, { "epoch": 0.32, "grad_norm": 4.3371262550354, "learning_rate": 0.0002, "loss": 1.5053, "step": 79000 }, { "epoch": 0.32, "grad_norm": 3.118537425994873, "learning_rate": 0.0002, "loss": 1.6371, "step": 79010 }, { "epoch": 0.32, "grad_norm": 4.094257354736328, "learning_rate": 0.0002, "loss": 1.612, "step": 79020 }, { "epoch": 0.32, "grad_norm": 3.531883478164673, "learning_rate": 0.0002, "loss": 1.6079, "step": 79030 }, { "epoch": 0.32, "grad_norm": 3.1177690029144287, "learning_rate": 0.0002, "loss": 1.5356, "step": 79040 }, { "epoch": 0.32, "grad_norm": 2.3060712814331055, "learning_rate": 0.0002, "loss": 1.905, "step": 79050 }, { "epoch": 0.32, "grad_norm": 3.2660131454467773, "learning_rate": 0.0002, "loss": 1.4458, "step": 79060 }, { "epoch": 0.32, "grad_norm": 8.59930419921875, "learning_rate": 0.0002, "loss": 1.5502, "step": 79070 }, { "epoch": 0.32, "grad_norm": 3.7064261436462402, "learning_rate": 0.0002, "loss": 1.2642, "step": 79080 }, { "epoch": 0.32, "grad_norm": 3.6816327571868896, "learning_rate": 0.0002, "loss": 1.4196, "step": 79090 }, { "epoch": 0.32, "grad_norm": 3.434319496154785, "learning_rate": 0.0002, "loss": 1.6709, "step": 79100 }, { "epoch": 0.32, "grad_norm": 2.2298085689544678, "learning_rate": 0.0002, "loss": 1.3415, "step": 79110 }, { "epoch": 0.32, "grad_norm": 1.4986382722854614, "learning_rate": 0.0002, "loss": 1.6429, "step": 79120 }, { "epoch": 0.32, "grad_norm": 3.366760730743408, "learning_rate": 0.0002, "loss": 1.5591, "step": 79130 }, { "epoch": 0.32, "grad_norm": 3.2027969360351562, "learning_rate": 0.0002, "loss": 1.8828, "step": 79140 }, { "epoch": 0.32, "grad_norm": 3.427863121032715, "learning_rate": 0.0002, "loss": 1.5537, "step": 79150 }, { "epoch": 0.32, "grad_norm": 3.227452278137207, "learning_rate": 0.0002, "loss": 1.5463, "step": 79160 }, { "epoch": 0.32, "grad_norm": 1.8654613494873047, "learning_rate": 0.0002, "loss": 1.5752, "step": 79170 }, { "epoch": 0.32, "grad_norm": 1.7686625719070435, "learning_rate": 0.0002, "loss": 1.37, "step": 79180 }, { "epoch": 0.32, "grad_norm": 1.7145562171936035, "learning_rate": 0.0002, "loss": 1.5914, "step": 79190 }, { "epoch": 0.32, "grad_norm": 3.3556454181671143, "learning_rate": 0.0002, "loss": 2.0568, "step": 79200 }, { "epoch": 0.32, "grad_norm": 3.0353646278381348, "learning_rate": 0.0002, "loss": 1.6306, "step": 79210 }, { "epoch": 0.32, "grad_norm": 2.4014856815338135, "learning_rate": 0.0002, "loss": 1.672, "step": 79220 }, { "epoch": 0.32, "grad_norm": 3.007282018661499, "learning_rate": 0.0002, "loss": 1.867, "step": 79230 }, { "epoch": 0.32, "grad_norm": 3.1287543773651123, "learning_rate": 0.0002, "loss": 1.5909, "step": 79240 }, { "epoch": 0.32, "grad_norm": 3.138430118560791, "learning_rate": 0.0002, "loss": 1.5353, "step": 79250 }, { "epoch": 0.32, "grad_norm": 2.9685280323028564, "learning_rate": 0.0002, "loss": 1.6462, "step": 79260 }, { "epoch": 0.32, "grad_norm": 3.5605082511901855, "learning_rate": 0.0002, "loss": 1.5735, "step": 79270 }, { "epoch": 0.32, "grad_norm": 1.6248902082443237, "learning_rate": 0.0002, "loss": 1.3932, "step": 79280 }, { "epoch": 0.32, "grad_norm": 2.4146621227264404, "learning_rate": 0.0002, "loss": 1.6659, "step": 79290 }, { "epoch": 0.32, "grad_norm": 3.455857038497925, "learning_rate": 0.0002, "loss": 1.5532, "step": 79300 }, { "epoch": 0.32, "grad_norm": 2.416719913482666, "learning_rate": 0.0002, "loss": 1.6234, "step": 79310 }, { "epoch": 0.32, "grad_norm": 2.8532111644744873, "learning_rate": 0.0002, "loss": 1.5484, "step": 79320 }, { "epoch": 0.32, "grad_norm": 3.606661319732666, "learning_rate": 0.0002, "loss": 1.6176, "step": 79330 }, { "epoch": 0.32, "grad_norm": 2.808206558227539, "learning_rate": 0.0002, "loss": 1.7977, "step": 79340 }, { "epoch": 0.32, "grad_norm": 3.3175411224365234, "learning_rate": 0.0002, "loss": 1.6429, "step": 79350 }, { "epoch": 0.32, "grad_norm": 2.5510544776916504, "learning_rate": 0.0002, "loss": 1.3196, "step": 79360 }, { "epoch": 0.32, "grad_norm": 2.3877222537994385, "learning_rate": 0.0002, "loss": 1.4242, "step": 79370 }, { "epoch": 0.32, "grad_norm": 3.938779354095459, "learning_rate": 0.0002, "loss": 1.6297, "step": 79380 }, { "epoch": 0.32, "grad_norm": 2.831338882446289, "learning_rate": 0.0002, "loss": 1.7335, "step": 79390 }, { "epoch": 0.32, "grad_norm": 2.8989474773406982, "learning_rate": 0.0002, "loss": 1.4963, "step": 79400 }, { "epoch": 0.32, "grad_norm": 3.5289077758789062, "learning_rate": 0.0002, "loss": 1.4394, "step": 79410 }, { "epoch": 0.32, "grad_norm": 3.3980259895324707, "learning_rate": 0.0002, "loss": 1.4942, "step": 79420 }, { "epoch": 0.32, "grad_norm": 4.3802170753479, "learning_rate": 0.0002, "loss": 1.5037, "step": 79430 }, { "epoch": 0.32, "grad_norm": 2.017080783843994, "learning_rate": 0.0002, "loss": 1.4501, "step": 79440 }, { "epoch": 0.32, "grad_norm": 4.274224758148193, "learning_rate": 0.0002, "loss": 1.3165, "step": 79450 }, { "epoch": 0.32, "grad_norm": 2.796058416366577, "learning_rate": 0.0002, "loss": 1.6811, "step": 79460 }, { "epoch": 0.32, "grad_norm": 3.0644426345825195, "learning_rate": 0.0002, "loss": 1.6168, "step": 79470 }, { "epoch": 0.32, "grad_norm": 2.776820421218872, "learning_rate": 0.0002, "loss": 1.5586, "step": 79480 }, { "epoch": 0.32, "grad_norm": 4.221393585205078, "learning_rate": 0.0002, "loss": 1.7653, "step": 79490 }, { "epoch": 0.32, "grad_norm": 2.2608277797698975, "learning_rate": 0.0002, "loss": 1.5556, "step": 79500 }, { "epoch": 0.32, "grad_norm": 3.7234959602355957, "learning_rate": 0.0002, "loss": 1.7754, "step": 79510 }, { "epoch": 0.32, "grad_norm": 3.706007480621338, "learning_rate": 0.0002, "loss": 1.5088, "step": 79520 }, { "epoch": 0.32, "grad_norm": 3.6264455318450928, "learning_rate": 0.0002, "loss": 1.7273, "step": 79530 }, { "epoch": 0.32, "grad_norm": 3.522613525390625, "learning_rate": 0.0002, "loss": 1.7664, "step": 79540 }, { "epoch": 0.32, "grad_norm": 3.081535577774048, "learning_rate": 0.0002, "loss": 1.7476, "step": 79550 }, { "epoch": 0.32, "grad_norm": 2.0344841480255127, "learning_rate": 0.0002, "loss": 1.3772, "step": 79560 }, { "epoch": 0.32, "grad_norm": 3.326280117034912, "learning_rate": 0.0002, "loss": 1.6212, "step": 79570 }, { "epoch": 0.32, "grad_norm": 4.171829700469971, "learning_rate": 0.0002, "loss": 1.6205, "step": 79580 }, { "epoch": 0.32, "grad_norm": 4.019883632659912, "learning_rate": 0.0002, "loss": 1.6108, "step": 79590 }, { "epoch": 0.32, "grad_norm": 1.9038670063018799, "learning_rate": 0.0002, "loss": 1.6444, "step": 79600 }, { "epoch": 0.32, "grad_norm": 5.1304216384887695, "learning_rate": 0.0002, "loss": 1.5657, "step": 79610 }, { "epoch": 0.32, "grad_norm": 1.895887017250061, "learning_rate": 0.0002, "loss": 1.0782, "step": 79620 }, { "epoch": 0.32, "grad_norm": 2.4891905784606934, "learning_rate": 0.0002, "loss": 1.5285, "step": 79630 }, { "epoch": 0.32, "grad_norm": 3.1425113677978516, "learning_rate": 0.0002, "loss": 1.5892, "step": 79640 }, { "epoch": 0.32, "grad_norm": 3.150838851928711, "learning_rate": 0.0002, "loss": 1.4981, "step": 79650 }, { "epoch": 0.32, "grad_norm": 1.7230088710784912, "learning_rate": 0.0002, "loss": 1.5649, "step": 79660 }, { "epoch": 0.32, "grad_norm": 2.2093539237976074, "learning_rate": 0.0002, "loss": 1.5069, "step": 79670 }, { "epoch": 0.32, "grad_norm": 2.2575600147247314, "learning_rate": 0.0002, "loss": 1.7845, "step": 79680 }, { "epoch": 0.32, "grad_norm": 4.349958896636963, "learning_rate": 0.0002, "loss": 1.5664, "step": 79690 }, { "epoch": 0.32, "grad_norm": 3.1524529457092285, "learning_rate": 0.0002, "loss": 1.5612, "step": 79700 }, { "epoch": 0.32, "grad_norm": 4.257206916809082, "learning_rate": 0.0002, "loss": 1.7631, "step": 79710 }, { "epoch": 0.32, "grad_norm": 2.3233273029327393, "learning_rate": 0.0002, "loss": 1.4692, "step": 79720 }, { "epoch": 0.32, "grad_norm": 1.6669470071792603, "learning_rate": 0.0002, "loss": 1.6416, "step": 79730 }, { "epoch": 0.32, "grad_norm": 3.0708377361297607, "learning_rate": 0.0002, "loss": 1.6497, "step": 79740 }, { "epoch": 0.32, "grad_norm": 3.7758467197418213, "learning_rate": 0.0002, "loss": 1.7619, "step": 79750 }, { "epoch": 0.32, "grad_norm": 2.440908670425415, "learning_rate": 0.0002, "loss": 1.5821, "step": 79760 }, { "epoch": 0.32, "grad_norm": 2.426614761352539, "learning_rate": 0.0002, "loss": 1.7246, "step": 79770 }, { "epoch": 0.32, "grad_norm": 4.249275207519531, "learning_rate": 0.0002, "loss": 1.5562, "step": 79780 }, { "epoch": 0.32, "grad_norm": 2.781069040298462, "learning_rate": 0.0002, "loss": 1.795, "step": 79790 }, { "epoch": 0.32, "grad_norm": 3.5512712001800537, "learning_rate": 0.0002, "loss": 1.6955, "step": 79800 }, { "epoch": 0.32, "grad_norm": 2.8798766136169434, "learning_rate": 0.0002, "loss": 1.6963, "step": 79810 }, { "epoch": 0.32, "grad_norm": 2.2444851398468018, "learning_rate": 0.0002, "loss": 1.4076, "step": 79820 }, { "epoch": 0.32, "grad_norm": 3.201918840408325, "learning_rate": 0.0002, "loss": 1.4452, "step": 79830 }, { "epoch": 0.33, "grad_norm": 1.97382652759552, "learning_rate": 0.0002, "loss": 1.5551, "step": 79840 }, { "epoch": 0.33, "grad_norm": 2.181135892868042, "learning_rate": 0.0002, "loss": 1.5023, "step": 79850 }, { "epoch": 0.33, "grad_norm": 4.975688934326172, "learning_rate": 0.0002, "loss": 1.5121, "step": 79860 }, { "epoch": 0.33, "grad_norm": 2.762540578842163, "learning_rate": 0.0002, "loss": 1.6143, "step": 79870 }, { "epoch": 0.33, "grad_norm": 2.5916826725006104, "learning_rate": 0.0002, "loss": 1.4776, "step": 79880 }, { "epoch": 0.33, "grad_norm": 2.412118911743164, "learning_rate": 0.0002, "loss": 1.7356, "step": 79890 }, { "epoch": 0.33, "grad_norm": 5.938919544219971, "learning_rate": 0.0002, "loss": 1.8425, "step": 79900 }, { "epoch": 0.33, "grad_norm": 6.746525764465332, "learning_rate": 0.0002, "loss": 1.6459, "step": 79910 }, { "epoch": 0.33, "grad_norm": 4.29860782623291, "learning_rate": 0.0002, "loss": 1.5074, "step": 79920 }, { "epoch": 0.33, "grad_norm": 2.640010118484497, "learning_rate": 0.0002, "loss": 1.7563, "step": 79930 }, { "epoch": 0.33, "grad_norm": 3.167226791381836, "learning_rate": 0.0002, "loss": 1.6646, "step": 79940 }, { "epoch": 0.33, "grad_norm": 4.2506279945373535, "learning_rate": 0.0002, "loss": 1.8193, "step": 79950 }, { "epoch": 0.33, "grad_norm": 2.672779083251953, "learning_rate": 0.0002, "loss": 1.6557, "step": 79960 }, { "epoch": 0.33, "grad_norm": 2.755908489227295, "learning_rate": 0.0002, "loss": 1.5626, "step": 79970 }, { "epoch": 0.33, "grad_norm": 3.2762255668640137, "learning_rate": 0.0002, "loss": 1.4979, "step": 79980 }, { "epoch": 0.33, "grad_norm": 2.3165030479431152, "learning_rate": 0.0002, "loss": 1.8476, "step": 79990 }, { "epoch": 0.33, "grad_norm": 2.4018728733062744, "learning_rate": 0.0002, "loss": 1.5631, "step": 80000 }, { "epoch": 0.33, "grad_norm": 2.6103179454803467, "learning_rate": 0.0002, "loss": 1.4296, "step": 80010 }, { "epoch": 0.33, "grad_norm": 1.800265908241272, "learning_rate": 0.0002, "loss": 1.5489, "step": 80020 }, { "epoch": 0.33, "grad_norm": 8.213186264038086, "learning_rate": 0.0002, "loss": 1.671, "step": 80030 }, { "epoch": 0.33, "grad_norm": 4.576501846313477, "learning_rate": 0.0002, "loss": 1.8344, "step": 80040 }, { "epoch": 0.33, "grad_norm": 2.1877171993255615, "learning_rate": 0.0002, "loss": 1.2556, "step": 80050 }, { "epoch": 0.33, "grad_norm": 2.8724002838134766, "learning_rate": 0.0002, "loss": 1.5551, "step": 80060 }, { "epoch": 0.33, "grad_norm": 2.7193450927734375, "learning_rate": 0.0002, "loss": 1.4466, "step": 80070 }, { "epoch": 0.33, "grad_norm": 3.975935220718384, "learning_rate": 0.0002, "loss": 1.7139, "step": 80080 }, { "epoch": 0.33, "grad_norm": 3.856511116027832, "learning_rate": 0.0002, "loss": 1.6628, "step": 80090 }, { "epoch": 0.33, "grad_norm": 4.643406867980957, "learning_rate": 0.0002, "loss": 1.883, "step": 80100 }, { "epoch": 0.33, "grad_norm": 4.338275909423828, "learning_rate": 0.0002, "loss": 1.5839, "step": 80110 }, { "epoch": 0.33, "grad_norm": 2.5880606174468994, "learning_rate": 0.0002, "loss": 1.5907, "step": 80120 }, { "epoch": 0.33, "grad_norm": 4.883690357208252, "learning_rate": 0.0002, "loss": 1.5018, "step": 80130 }, { "epoch": 0.33, "grad_norm": 3.2486002445220947, "learning_rate": 0.0002, "loss": 1.6582, "step": 80140 }, { "epoch": 0.33, "grad_norm": 1.613191843032837, "learning_rate": 0.0002, "loss": 1.4469, "step": 80150 }, { "epoch": 0.33, "grad_norm": 3.066718816757202, "learning_rate": 0.0002, "loss": 1.396, "step": 80160 }, { "epoch": 0.33, "grad_norm": 3.3400580883026123, "learning_rate": 0.0002, "loss": 1.508, "step": 80170 }, { "epoch": 0.33, "grad_norm": 3.5032787322998047, "learning_rate": 0.0002, "loss": 1.4949, "step": 80180 }, { "epoch": 0.33, "grad_norm": 4.505490779876709, "learning_rate": 0.0002, "loss": 1.6052, "step": 80190 }, { "epoch": 0.33, "grad_norm": 2.947216033935547, "learning_rate": 0.0002, "loss": 1.4385, "step": 80200 }, { "epoch": 0.33, "grad_norm": 5.4392595291137695, "learning_rate": 0.0002, "loss": 1.6109, "step": 80210 }, { "epoch": 0.33, "grad_norm": 3.25162410736084, "learning_rate": 0.0002, "loss": 1.5814, "step": 80220 }, { "epoch": 0.33, "grad_norm": 2.8282318115234375, "learning_rate": 0.0002, "loss": 1.1976, "step": 80230 }, { "epoch": 0.33, "grad_norm": 3.1872851848602295, "learning_rate": 0.0002, "loss": 1.4053, "step": 80240 }, { "epoch": 0.33, "grad_norm": 3.3682281970977783, "learning_rate": 0.0002, "loss": 1.6649, "step": 80250 }, { "epoch": 0.33, "grad_norm": 3.7932322025299072, "learning_rate": 0.0002, "loss": 1.7745, "step": 80260 }, { "epoch": 0.33, "grad_norm": 2.9771275520324707, "learning_rate": 0.0002, "loss": 1.59, "step": 80270 }, { "epoch": 0.33, "grad_norm": 2.9194936752319336, "learning_rate": 0.0002, "loss": 1.4189, "step": 80280 }, { "epoch": 0.33, "grad_norm": 2.749645471572876, "learning_rate": 0.0002, "loss": 1.6467, "step": 80290 }, { "epoch": 0.33, "grad_norm": 3.183030605316162, "learning_rate": 0.0002, "loss": 1.5113, "step": 80300 }, { "epoch": 0.33, "grad_norm": 5.9102678298950195, "learning_rate": 0.0002, "loss": 1.6403, "step": 80310 }, { "epoch": 0.33, "grad_norm": 3.3229384422302246, "learning_rate": 0.0002, "loss": 1.4954, "step": 80320 }, { "epoch": 0.33, "grad_norm": 1.872981071472168, "learning_rate": 0.0002, "loss": 1.7717, "step": 80330 }, { "epoch": 0.33, "grad_norm": 3.6601645946502686, "learning_rate": 0.0002, "loss": 1.4378, "step": 80340 }, { "epoch": 0.33, "grad_norm": 3.272590160369873, "learning_rate": 0.0002, "loss": 1.6305, "step": 80350 }, { "epoch": 0.33, "grad_norm": 5.295382499694824, "learning_rate": 0.0002, "loss": 1.5826, "step": 80360 }, { "epoch": 0.33, "grad_norm": 2.2394227981567383, "learning_rate": 0.0002, "loss": 1.4855, "step": 80370 }, { "epoch": 0.33, "grad_norm": 3.48944354057312, "learning_rate": 0.0002, "loss": 1.5042, "step": 80380 }, { "epoch": 0.33, "grad_norm": 3.721872091293335, "learning_rate": 0.0002, "loss": 1.7242, "step": 80390 }, { "epoch": 0.33, "grad_norm": 2.261662721633911, "learning_rate": 0.0002, "loss": 1.6481, "step": 80400 }, { "epoch": 0.33, "grad_norm": 4.686532497406006, "learning_rate": 0.0002, "loss": 1.3343, "step": 80410 }, { "epoch": 0.33, "grad_norm": 2.5002269744873047, "learning_rate": 0.0002, "loss": 1.5546, "step": 80420 }, { "epoch": 0.33, "grad_norm": 3.2986819744110107, "learning_rate": 0.0002, "loss": 1.6026, "step": 80430 }, { "epoch": 0.33, "grad_norm": 2.0909969806671143, "learning_rate": 0.0002, "loss": 1.5979, "step": 80440 }, { "epoch": 0.33, "grad_norm": 5.626099586486816, "learning_rate": 0.0002, "loss": 1.3993, "step": 80450 }, { "epoch": 0.33, "grad_norm": 9.080403327941895, "learning_rate": 0.0002, "loss": 1.4018, "step": 80460 }, { "epoch": 0.33, "grad_norm": 12.079238891601562, "learning_rate": 0.0002, "loss": 1.4506, "step": 80470 }, { "epoch": 0.33, "grad_norm": 2.601128339767456, "learning_rate": 0.0002, "loss": 1.6005, "step": 80480 }, { "epoch": 0.33, "grad_norm": 2.4473354816436768, "learning_rate": 0.0002, "loss": 1.5138, "step": 80490 }, { "epoch": 0.33, "grad_norm": 2.9289305210113525, "learning_rate": 0.0002, "loss": 1.7607, "step": 80500 }, { "epoch": 0.33, "grad_norm": 5.452178478240967, "learning_rate": 0.0002, "loss": 1.6219, "step": 80510 }, { "epoch": 0.33, "grad_norm": 2.9448559284210205, "learning_rate": 0.0002, "loss": 1.4176, "step": 80520 }, { "epoch": 0.33, "grad_norm": 4.272102355957031, "learning_rate": 0.0002, "loss": 1.4405, "step": 80530 }, { "epoch": 0.33, "grad_norm": 3.3459291458129883, "learning_rate": 0.0002, "loss": 1.7211, "step": 80540 }, { "epoch": 0.33, "grad_norm": 3.435328722000122, "learning_rate": 0.0002, "loss": 1.5997, "step": 80550 }, { "epoch": 0.33, "grad_norm": 3.3386788368225098, "learning_rate": 0.0002, "loss": 1.7561, "step": 80560 }, { "epoch": 0.33, "grad_norm": 2.6201868057250977, "learning_rate": 0.0002, "loss": 1.5747, "step": 80570 }, { "epoch": 0.33, "grad_norm": 1.833735466003418, "learning_rate": 0.0002, "loss": 1.5962, "step": 80580 }, { "epoch": 0.33, "grad_norm": 2.6562302112579346, "learning_rate": 0.0002, "loss": 1.5385, "step": 80590 }, { "epoch": 0.33, "grad_norm": 2.1140127182006836, "learning_rate": 0.0002, "loss": 1.8576, "step": 80600 }, { "epoch": 0.33, "grad_norm": 3.3363595008850098, "learning_rate": 0.0002, "loss": 1.5198, "step": 80610 }, { "epoch": 0.33, "grad_norm": 3.648111343383789, "learning_rate": 0.0002, "loss": 1.6682, "step": 80620 }, { "epoch": 0.33, "grad_norm": 6.249873638153076, "learning_rate": 0.0002, "loss": 1.8211, "step": 80630 }, { "epoch": 0.33, "grad_norm": 2.6183536052703857, "learning_rate": 0.0002, "loss": 1.2827, "step": 80640 }, { "epoch": 0.33, "grad_norm": 2.3337602615356445, "learning_rate": 0.0002, "loss": 1.5393, "step": 80650 }, { "epoch": 0.33, "grad_norm": 2.661848545074463, "learning_rate": 0.0002, "loss": 1.737, "step": 80660 }, { "epoch": 0.33, "grad_norm": 3.3914124965667725, "learning_rate": 0.0002, "loss": 1.4659, "step": 80670 }, { "epoch": 0.33, "grad_norm": 3.1920032501220703, "learning_rate": 0.0002, "loss": 1.4956, "step": 80680 }, { "epoch": 0.33, "grad_norm": 1.7372410297393799, "learning_rate": 0.0002, "loss": 1.3386, "step": 80690 }, { "epoch": 0.33, "grad_norm": 4.127894878387451, "learning_rate": 0.0002, "loss": 1.6735, "step": 80700 }, { "epoch": 0.33, "grad_norm": 5.067364692687988, "learning_rate": 0.0002, "loss": 1.4537, "step": 80710 }, { "epoch": 0.33, "grad_norm": 2.869544506072998, "learning_rate": 0.0002, "loss": 1.6391, "step": 80720 }, { "epoch": 0.33, "grad_norm": 2.463224411010742, "learning_rate": 0.0002, "loss": 1.6983, "step": 80730 }, { "epoch": 0.33, "grad_norm": 3.1103336811065674, "learning_rate": 0.0002, "loss": 1.8058, "step": 80740 }, { "epoch": 0.33, "grad_norm": 2.6106631755828857, "learning_rate": 0.0002, "loss": 1.6797, "step": 80750 }, { "epoch": 0.33, "grad_norm": 2.6717782020568848, "learning_rate": 0.0002, "loss": 1.5953, "step": 80760 }, { "epoch": 0.33, "grad_norm": 3.0131635665893555, "learning_rate": 0.0002, "loss": 1.7422, "step": 80770 }, { "epoch": 0.33, "grad_norm": 1.472108244895935, "learning_rate": 0.0002, "loss": 1.8979, "step": 80780 }, { "epoch": 0.33, "grad_norm": 1.8941360712051392, "learning_rate": 0.0002, "loss": 1.5169, "step": 80790 }, { "epoch": 0.33, "grad_norm": 4.498475074768066, "learning_rate": 0.0002, "loss": 1.5558, "step": 80800 }, { "epoch": 0.33, "grad_norm": 3.1166718006134033, "learning_rate": 0.0002, "loss": 1.603, "step": 80810 }, { "epoch": 0.33, "grad_norm": 2.8020763397216797, "learning_rate": 0.0002, "loss": 1.6573, "step": 80820 }, { "epoch": 0.33, "grad_norm": 2.5438883304595947, "learning_rate": 0.0002, "loss": 1.6035, "step": 80830 }, { "epoch": 0.33, "grad_norm": 3.9013805389404297, "learning_rate": 0.0002, "loss": 1.4596, "step": 80840 }, { "epoch": 0.33, "grad_norm": 4.724158763885498, "learning_rate": 0.0002, "loss": 1.7126, "step": 80850 }, { "epoch": 0.33, "grad_norm": 3.207192897796631, "learning_rate": 0.0002, "loss": 1.6168, "step": 80860 }, { "epoch": 0.33, "grad_norm": 2.876093864440918, "learning_rate": 0.0002, "loss": 1.5893, "step": 80870 }, { "epoch": 0.33, "grad_norm": 3.504297971725464, "learning_rate": 0.0002, "loss": 1.3722, "step": 80880 }, { "epoch": 0.33, "grad_norm": 2.989903450012207, "learning_rate": 0.0002, "loss": 1.4517, "step": 80890 }, { "epoch": 0.33, "grad_norm": 4.143836975097656, "learning_rate": 0.0002, "loss": 1.4957, "step": 80900 }, { "epoch": 0.33, "grad_norm": 3.8233859539031982, "learning_rate": 0.0002, "loss": 1.6311, "step": 80910 }, { "epoch": 0.33, "grad_norm": 2.8287837505340576, "learning_rate": 0.0002, "loss": 1.4436, "step": 80920 }, { "epoch": 0.33, "grad_norm": 4.582725524902344, "learning_rate": 0.0002, "loss": 1.6323, "step": 80930 }, { "epoch": 0.33, "grad_norm": 1.9626600742340088, "learning_rate": 0.0002, "loss": 1.5444, "step": 80940 }, { "epoch": 0.33, "grad_norm": 4.213545322418213, "learning_rate": 0.0002, "loss": 1.6504, "step": 80950 }, { "epoch": 0.33, "grad_norm": 3.395534038543701, "learning_rate": 0.0002, "loss": 1.4265, "step": 80960 }, { "epoch": 0.33, "grad_norm": 2.8653366565704346, "learning_rate": 0.0002, "loss": 1.3436, "step": 80970 }, { "epoch": 0.33, "grad_norm": 3.4791712760925293, "learning_rate": 0.0002, "loss": 1.6805, "step": 80980 }, { "epoch": 0.33, "grad_norm": 4.803690433502197, "learning_rate": 0.0002, "loss": 1.5127, "step": 80990 }, { "epoch": 0.33, "grad_norm": 2.425872325897217, "learning_rate": 0.0002, "loss": 1.6997, "step": 81000 }, { "epoch": 0.33, "grad_norm": 1.6988693475723267, "learning_rate": 0.0002, "loss": 1.8036, "step": 81010 }, { "epoch": 0.33, "grad_norm": 2.7499301433563232, "learning_rate": 0.0002, "loss": 1.4561, "step": 81020 }, { "epoch": 0.33, "grad_norm": 1.2662901878356934, "learning_rate": 0.0002, "loss": 1.6039, "step": 81030 }, { "epoch": 0.33, "grad_norm": 2.1313793659210205, "learning_rate": 0.0002, "loss": 1.3307, "step": 81040 }, { "epoch": 0.33, "grad_norm": 2.2840325832366943, "learning_rate": 0.0002, "loss": 1.3679, "step": 81050 }, { "epoch": 0.33, "grad_norm": 3.1758205890655518, "learning_rate": 0.0002, "loss": 1.4325, "step": 81060 }, { "epoch": 0.33, "grad_norm": 12.024871826171875, "learning_rate": 0.0002, "loss": 1.4343, "step": 81070 }, { "epoch": 0.33, "grad_norm": 3.633028984069824, "learning_rate": 0.0002, "loss": 1.6291, "step": 81080 }, { "epoch": 0.33, "grad_norm": 3.8431801795959473, "learning_rate": 0.0002, "loss": 1.5735, "step": 81090 }, { "epoch": 0.33, "grad_norm": 3.1819632053375244, "learning_rate": 0.0002, "loss": 1.5098, "step": 81100 }, { "epoch": 0.33, "grad_norm": 3.9736294746398926, "learning_rate": 0.0002, "loss": 1.6178, "step": 81110 }, { "epoch": 0.33, "grad_norm": 3.5064449310302734, "learning_rate": 0.0002, "loss": 1.5711, "step": 81120 }, { "epoch": 0.33, "grad_norm": 2.3198461532592773, "learning_rate": 0.0002, "loss": 1.4213, "step": 81130 }, { "epoch": 0.33, "grad_norm": 4.00584077835083, "learning_rate": 0.0002, "loss": 1.4668, "step": 81140 }, { "epoch": 0.33, "grad_norm": 2.197190999984741, "learning_rate": 0.0002, "loss": 1.7771, "step": 81150 }, { "epoch": 0.33, "grad_norm": 3.512697696685791, "learning_rate": 0.0002, "loss": 1.5072, "step": 81160 }, { "epoch": 0.33, "grad_norm": 3.567190647125244, "learning_rate": 0.0002, "loss": 1.6685, "step": 81170 }, { "epoch": 0.33, "grad_norm": 3.1938350200653076, "learning_rate": 0.0002, "loss": 1.5232, "step": 81180 }, { "epoch": 0.33, "grad_norm": 3.2019460201263428, "learning_rate": 0.0002, "loss": 1.8246, "step": 81190 }, { "epoch": 0.33, "grad_norm": 2.675717353820801, "learning_rate": 0.0002, "loss": 1.6311, "step": 81200 }, { "epoch": 0.33, "grad_norm": 2.4779162406921387, "learning_rate": 0.0002, "loss": 1.465, "step": 81210 }, { "epoch": 0.33, "grad_norm": 4.149249076843262, "learning_rate": 0.0002, "loss": 1.5584, "step": 81220 }, { "epoch": 0.33, "grad_norm": 2.488823413848877, "learning_rate": 0.0002, "loss": 1.6338, "step": 81230 }, { "epoch": 0.33, "grad_norm": 3.0101327896118164, "learning_rate": 0.0002, "loss": 1.4743, "step": 81240 }, { "epoch": 0.33, "grad_norm": 4.086623191833496, "learning_rate": 0.0002, "loss": 1.4821, "step": 81250 }, { "epoch": 0.33, "grad_norm": 3.351426839828491, "learning_rate": 0.0002, "loss": 1.6038, "step": 81260 }, { "epoch": 0.33, "grad_norm": 2.116434097290039, "learning_rate": 0.0002, "loss": 1.917, "step": 81270 }, { "epoch": 0.33, "grad_norm": 4.508080959320068, "learning_rate": 0.0002, "loss": 1.614, "step": 81280 }, { "epoch": 0.33, "grad_norm": 2.1804075241088867, "learning_rate": 0.0002, "loss": 1.6129, "step": 81290 }, { "epoch": 0.33, "grad_norm": 3.407680034637451, "learning_rate": 0.0002, "loss": 1.722, "step": 81300 }, { "epoch": 0.33, "grad_norm": 2.528658628463745, "learning_rate": 0.0002, "loss": 1.5976, "step": 81310 }, { "epoch": 0.33, "grad_norm": 2.353142261505127, "learning_rate": 0.0002, "loss": 1.3828, "step": 81320 }, { "epoch": 0.33, "grad_norm": 3.6127607822418213, "learning_rate": 0.0002, "loss": 1.4293, "step": 81330 }, { "epoch": 0.33, "grad_norm": 3.3672821521759033, "learning_rate": 0.0002, "loss": 1.4767, "step": 81340 }, { "epoch": 0.33, "grad_norm": 2.2018587589263916, "learning_rate": 0.0002, "loss": 1.5643, "step": 81350 }, { "epoch": 0.33, "grad_norm": 2.5507822036743164, "learning_rate": 0.0002, "loss": 1.5555, "step": 81360 }, { "epoch": 0.33, "grad_norm": 4.907535076141357, "learning_rate": 0.0002, "loss": 1.7099, "step": 81370 }, { "epoch": 0.33, "grad_norm": 2.875042676925659, "learning_rate": 0.0002, "loss": 1.6682, "step": 81380 }, { "epoch": 0.33, "grad_norm": 2.980029821395874, "learning_rate": 0.0002, "loss": 1.6528, "step": 81390 }, { "epoch": 0.33, "grad_norm": 4.179745674133301, "learning_rate": 0.0002, "loss": 1.4395, "step": 81400 }, { "epoch": 0.33, "grad_norm": 3.794940710067749, "learning_rate": 0.0002, "loss": 1.7964, "step": 81410 }, { "epoch": 0.33, "grad_norm": 3.276500701904297, "learning_rate": 0.0002, "loss": 1.3334, "step": 81420 }, { "epoch": 0.33, "grad_norm": 3.0418241024017334, "learning_rate": 0.0002, "loss": 1.5602, "step": 81430 }, { "epoch": 0.33, "grad_norm": 3.343761920928955, "learning_rate": 0.0002, "loss": 1.563, "step": 81440 }, { "epoch": 0.33, "grad_norm": 2.493736982345581, "learning_rate": 0.0002, "loss": 1.4348, "step": 81450 }, { "epoch": 0.33, "grad_norm": 3.3370628356933594, "learning_rate": 0.0002, "loss": 1.4787, "step": 81460 }, { "epoch": 0.33, "grad_norm": 3.2973923683166504, "learning_rate": 0.0002, "loss": 1.597, "step": 81470 }, { "epoch": 0.33, "grad_norm": 3.0231974124908447, "learning_rate": 0.0002, "loss": 1.5769, "step": 81480 }, { "epoch": 0.33, "grad_norm": 2.5598840713500977, "learning_rate": 0.0002, "loss": 1.409, "step": 81490 }, { "epoch": 0.33, "grad_norm": 3.4548299312591553, "learning_rate": 0.0002, "loss": 1.7228, "step": 81500 }, { "epoch": 0.33, "grad_norm": 3.837127923965454, "learning_rate": 0.0002, "loss": 1.4388, "step": 81510 }, { "epoch": 0.33, "grad_norm": 3.1523680686950684, "learning_rate": 0.0002, "loss": 1.5501, "step": 81520 }, { "epoch": 0.33, "grad_norm": 4.192455291748047, "learning_rate": 0.0002, "loss": 1.5652, "step": 81530 }, { "epoch": 0.33, "grad_norm": 2.1898062229156494, "learning_rate": 0.0002, "loss": 1.5067, "step": 81540 }, { "epoch": 0.33, "grad_norm": 3.7286839485168457, "learning_rate": 0.0002, "loss": 1.4248, "step": 81550 }, { "epoch": 0.33, "grad_norm": 2.875922918319702, "learning_rate": 0.0002, "loss": 1.619, "step": 81560 }, { "epoch": 0.33, "grad_norm": 2.787451982498169, "learning_rate": 0.0002, "loss": 1.7354, "step": 81570 }, { "epoch": 0.33, "grad_norm": 3.317335605621338, "learning_rate": 0.0002, "loss": 1.6492, "step": 81580 }, { "epoch": 0.33, "grad_norm": 3.3593037128448486, "learning_rate": 0.0002, "loss": 1.725, "step": 81590 }, { "epoch": 0.33, "grad_norm": 2.765833854675293, "learning_rate": 0.0002, "loss": 1.4117, "step": 81600 }, { "epoch": 0.33, "grad_norm": 3.339472770690918, "learning_rate": 0.0002, "loss": 1.3134, "step": 81610 }, { "epoch": 0.33, "grad_norm": 3.480180263519287, "learning_rate": 0.0002, "loss": 1.6941, "step": 81620 }, { "epoch": 0.33, "grad_norm": 1.9760626554489136, "learning_rate": 0.0002, "loss": 1.6453, "step": 81630 }, { "epoch": 0.33, "grad_norm": 2.9526402950286865, "learning_rate": 0.0002, "loss": 1.6873, "step": 81640 }, { "epoch": 0.33, "grad_norm": 2.817040205001831, "learning_rate": 0.0002, "loss": 1.6293, "step": 81650 }, { "epoch": 0.33, "grad_norm": 3.486607789993286, "learning_rate": 0.0002, "loss": 1.3754, "step": 81660 }, { "epoch": 0.33, "grad_norm": 1.8913602828979492, "learning_rate": 0.0002, "loss": 1.3562, "step": 81670 }, { "epoch": 0.33, "grad_norm": 3.2625672817230225, "learning_rate": 0.0002, "loss": 1.5778, "step": 81680 }, { "epoch": 0.33, "grad_norm": 2.4880528450012207, "learning_rate": 0.0002, "loss": 1.7317, "step": 81690 }, { "epoch": 0.33, "grad_norm": 3.8608083724975586, "learning_rate": 0.0002, "loss": 1.5747, "step": 81700 }, { "epoch": 0.33, "grad_norm": 3.582197666168213, "learning_rate": 0.0002, "loss": 1.6655, "step": 81710 }, { "epoch": 0.33, "grad_norm": 3.0363667011260986, "learning_rate": 0.0002, "loss": 1.5935, "step": 81720 }, { "epoch": 0.33, "grad_norm": 3.1153953075408936, "learning_rate": 0.0002, "loss": 1.3354, "step": 81730 }, { "epoch": 0.33, "grad_norm": 3.152649402618408, "learning_rate": 0.0002, "loss": 1.5965, "step": 81740 }, { "epoch": 0.33, "grad_norm": 2.0048415660858154, "learning_rate": 0.0002, "loss": 1.5829, "step": 81750 }, { "epoch": 0.33, "grad_norm": 2.9188661575317383, "learning_rate": 0.0002, "loss": 1.5894, "step": 81760 }, { "epoch": 0.33, "grad_norm": 1.9986119270324707, "learning_rate": 0.0002, "loss": 1.5434, "step": 81770 }, { "epoch": 0.33, "grad_norm": 2.2073748111724854, "learning_rate": 0.0002, "loss": 1.5701, "step": 81780 }, { "epoch": 0.33, "grad_norm": 3.5644991397857666, "learning_rate": 0.0002, "loss": 1.3291, "step": 81790 }, { "epoch": 0.33, "grad_norm": 2.5934221744537354, "learning_rate": 0.0002, "loss": 1.761, "step": 81800 }, { "epoch": 0.33, "grad_norm": 1.77375328540802, "learning_rate": 0.0002, "loss": 1.477, "step": 81810 }, { "epoch": 0.33, "grad_norm": 3.0139732360839844, "learning_rate": 0.0002, "loss": 1.7099, "step": 81820 }, { "epoch": 0.33, "grad_norm": 2.855886459350586, "learning_rate": 0.0002, "loss": 1.5398, "step": 81830 }, { "epoch": 0.33, "grad_norm": 3.2868621349334717, "learning_rate": 0.0002, "loss": 1.6299, "step": 81840 }, { "epoch": 0.33, "grad_norm": 4.400993824005127, "learning_rate": 0.0002, "loss": 1.6372, "step": 81850 }, { "epoch": 0.33, "grad_norm": 3.4861724376678467, "learning_rate": 0.0002, "loss": 1.5073, "step": 81860 }, { "epoch": 0.33, "grad_norm": 3.203984498977661, "learning_rate": 0.0002, "loss": 1.7921, "step": 81870 }, { "epoch": 0.33, "grad_norm": 1.9894484281539917, "learning_rate": 0.0002, "loss": 1.4585, "step": 81880 }, { "epoch": 0.33, "grad_norm": 2.071728467941284, "learning_rate": 0.0002, "loss": 1.8066, "step": 81890 }, { "epoch": 0.33, "grad_norm": 3.4765758514404297, "learning_rate": 0.0002, "loss": 1.8214, "step": 81900 }, { "epoch": 0.33, "grad_norm": 4.284848213195801, "learning_rate": 0.0002, "loss": 1.5804, "step": 81910 }, { "epoch": 0.33, "grad_norm": 2.1858115196228027, "learning_rate": 0.0002, "loss": 1.5128, "step": 81920 }, { "epoch": 0.33, "grad_norm": 3.044393539428711, "learning_rate": 0.0002, "loss": 1.4145, "step": 81930 }, { "epoch": 0.33, "grad_norm": 3.099069118499756, "learning_rate": 0.0002, "loss": 1.4876, "step": 81940 }, { "epoch": 0.33, "grad_norm": 2.0312998294830322, "learning_rate": 0.0002, "loss": 1.5132, "step": 81950 }, { "epoch": 0.33, "grad_norm": 2.3680083751678467, "learning_rate": 0.0002, "loss": 1.9428, "step": 81960 }, { "epoch": 0.33, "grad_norm": 2.2085249423980713, "learning_rate": 0.0002, "loss": 1.6548, "step": 81970 }, { "epoch": 0.33, "grad_norm": 4.1463422775268555, "learning_rate": 0.0002, "loss": 1.7753, "step": 81980 }, { "epoch": 0.33, "grad_norm": 3.253006935119629, "learning_rate": 0.0002, "loss": 1.3632, "step": 81990 }, { "epoch": 0.33, "grad_norm": 1.5929445028305054, "learning_rate": 0.0002, "loss": 1.7328, "step": 82000 }, { "epoch": 0.33, "grad_norm": 2.2971255779266357, "learning_rate": 0.0002, "loss": 1.4942, "step": 82010 }, { "epoch": 0.33, "grad_norm": 2.230602502822876, "learning_rate": 0.0002, "loss": 1.6307, "step": 82020 }, { "epoch": 0.33, "grad_norm": 2.1680169105529785, "learning_rate": 0.0002, "loss": 1.5144, "step": 82030 }, { "epoch": 0.33, "grad_norm": 1.2301076650619507, "learning_rate": 0.0002, "loss": 1.4021, "step": 82040 }, { "epoch": 0.33, "grad_norm": 4.486583232879639, "learning_rate": 0.0002, "loss": 1.6593, "step": 82050 }, { "epoch": 0.33, "grad_norm": 12.645435333251953, "learning_rate": 0.0002, "loss": 1.6549, "step": 82060 }, { "epoch": 0.33, "grad_norm": 3.88800048828125, "learning_rate": 0.0002, "loss": 1.5572, "step": 82070 }, { "epoch": 0.33, "grad_norm": 2.1665468215942383, "learning_rate": 0.0002, "loss": 1.6539, "step": 82080 }, { "epoch": 0.33, "grad_norm": 3.703946113586426, "learning_rate": 0.0002, "loss": 1.5847, "step": 82090 }, { "epoch": 0.33, "grad_norm": 2.832801342010498, "learning_rate": 0.0002, "loss": 1.6285, "step": 82100 }, { "epoch": 0.33, "grad_norm": 3.3705642223358154, "learning_rate": 0.0002, "loss": 1.6432, "step": 82110 }, { "epoch": 0.33, "grad_norm": 3.024442672729492, "learning_rate": 0.0002, "loss": 1.6402, "step": 82120 }, { "epoch": 0.33, "grad_norm": 4.085395336151123, "learning_rate": 0.0002, "loss": 1.4565, "step": 82130 }, { "epoch": 0.33, "grad_norm": 2.936805486679077, "learning_rate": 0.0002, "loss": 1.2665, "step": 82140 }, { "epoch": 0.33, "grad_norm": 3.83202862739563, "learning_rate": 0.0002, "loss": 1.6578, "step": 82150 }, { "epoch": 0.33, "grad_norm": 2.6751561164855957, "learning_rate": 0.0002, "loss": 1.7096, "step": 82160 }, { "epoch": 0.33, "grad_norm": 3.125934362411499, "learning_rate": 0.0002, "loss": 1.8413, "step": 82170 }, { "epoch": 0.33, "grad_norm": 2.0399723052978516, "learning_rate": 0.0002, "loss": 1.6309, "step": 82180 }, { "epoch": 0.33, "grad_norm": 4.816909313201904, "learning_rate": 0.0002, "loss": 1.3662, "step": 82190 }, { "epoch": 0.33, "grad_norm": 4.803248882293701, "learning_rate": 0.0002, "loss": 1.5947, "step": 82200 }, { "epoch": 0.33, "grad_norm": 3.8389792442321777, "learning_rate": 0.0002, "loss": 1.7667, "step": 82210 }, { "epoch": 0.33, "grad_norm": 3.2869935035705566, "learning_rate": 0.0002, "loss": 1.8358, "step": 82220 }, { "epoch": 0.33, "grad_norm": 3.7970991134643555, "learning_rate": 0.0002, "loss": 1.6006, "step": 82230 }, { "epoch": 0.33, "grad_norm": 4.400908946990967, "learning_rate": 0.0002, "loss": 1.6499, "step": 82240 }, { "epoch": 0.33, "grad_norm": 2.257742166519165, "learning_rate": 0.0002, "loss": 1.6767, "step": 82250 }, { "epoch": 0.33, "grad_norm": 3.5473501682281494, "learning_rate": 0.0002, "loss": 1.6725, "step": 82260 }, { "epoch": 0.33, "grad_norm": 3.304994821548462, "learning_rate": 0.0002, "loss": 1.6408, "step": 82270 }, { "epoch": 0.33, "grad_norm": 2.438802480697632, "learning_rate": 0.0002, "loss": 1.4605, "step": 82280 }, { "epoch": 0.33, "grad_norm": 3.091078996658325, "learning_rate": 0.0002, "loss": 1.3782, "step": 82290 }, { "epoch": 0.34, "grad_norm": 4.07246732711792, "learning_rate": 0.0002, "loss": 1.6165, "step": 82300 }, { "epoch": 0.34, "grad_norm": 2.986661911010742, "learning_rate": 0.0002, "loss": 1.4965, "step": 82310 }, { "epoch": 0.34, "grad_norm": 2.8754899501800537, "learning_rate": 0.0002, "loss": 1.4868, "step": 82320 }, { "epoch": 0.34, "grad_norm": 3.4252572059631348, "learning_rate": 0.0002, "loss": 1.5435, "step": 82330 }, { "epoch": 0.34, "grad_norm": 2.5760669708251953, "learning_rate": 0.0002, "loss": 1.6415, "step": 82340 }, { "epoch": 0.34, "grad_norm": 4.651770114898682, "learning_rate": 0.0002, "loss": 1.4775, "step": 82350 }, { "epoch": 0.34, "grad_norm": 3.2636358737945557, "learning_rate": 0.0002, "loss": 1.4927, "step": 82360 }, { "epoch": 0.34, "grad_norm": 2.9115030765533447, "learning_rate": 0.0002, "loss": 1.4822, "step": 82370 }, { "epoch": 0.34, "grad_norm": 4.083564758300781, "learning_rate": 0.0002, "loss": 1.4364, "step": 82380 }, { "epoch": 0.34, "grad_norm": 3.7801389694213867, "learning_rate": 0.0002, "loss": 1.4801, "step": 82390 }, { "epoch": 0.34, "grad_norm": 2.1837592124938965, "learning_rate": 0.0002, "loss": 1.7851, "step": 82400 }, { "epoch": 0.34, "grad_norm": 1.4688944816589355, "learning_rate": 0.0002, "loss": 1.7343, "step": 82410 }, { "epoch": 0.34, "grad_norm": 1.8334258794784546, "learning_rate": 0.0002, "loss": 1.8177, "step": 82420 }, { "epoch": 0.34, "grad_norm": 3.3068928718566895, "learning_rate": 0.0002, "loss": 1.5828, "step": 82430 }, { "epoch": 0.34, "grad_norm": 3.5500376224517822, "learning_rate": 0.0002, "loss": 1.5296, "step": 82440 }, { "epoch": 0.34, "grad_norm": 2.1655216217041016, "learning_rate": 0.0002, "loss": 1.805, "step": 82450 }, { "epoch": 0.34, "grad_norm": 5.555638790130615, "learning_rate": 0.0002, "loss": 1.573, "step": 82460 }, { "epoch": 0.34, "grad_norm": 3.153000831604004, "learning_rate": 0.0002, "loss": 1.5474, "step": 82470 }, { "epoch": 0.34, "grad_norm": 2.2142910957336426, "learning_rate": 0.0002, "loss": 1.4117, "step": 82480 }, { "epoch": 0.34, "grad_norm": 3.3151891231536865, "learning_rate": 0.0002, "loss": 1.7335, "step": 82490 }, { "epoch": 0.34, "grad_norm": 3.361534357070923, "learning_rate": 0.0002, "loss": 1.8213, "step": 82500 }, { "epoch": 0.34, "grad_norm": 3.9282374382019043, "learning_rate": 0.0002, "loss": 1.655, "step": 82510 }, { "epoch": 0.34, "grad_norm": 2.3419435024261475, "learning_rate": 0.0002, "loss": 1.6474, "step": 82520 }, { "epoch": 0.34, "grad_norm": 3.6180343627929688, "learning_rate": 0.0002, "loss": 1.8882, "step": 82530 }, { "epoch": 0.34, "grad_norm": 3.5667600631713867, "learning_rate": 0.0002, "loss": 1.6106, "step": 82540 }, { "epoch": 0.34, "grad_norm": 3.0838091373443604, "learning_rate": 0.0002, "loss": 1.7458, "step": 82550 }, { "epoch": 0.34, "grad_norm": 3.7832083702087402, "learning_rate": 0.0002, "loss": 1.8288, "step": 82560 }, { "epoch": 0.34, "grad_norm": 4.349891662597656, "learning_rate": 0.0002, "loss": 1.5892, "step": 82570 }, { "epoch": 0.34, "grad_norm": 3.232603073120117, "learning_rate": 0.0002, "loss": 1.7579, "step": 82580 }, { "epoch": 0.34, "grad_norm": 2.374171018600464, "learning_rate": 0.0002, "loss": 1.4791, "step": 82590 }, { "epoch": 0.34, "grad_norm": 3.6272411346435547, "learning_rate": 0.0002, "loss": 1.6875, "step": 82600 }, { "epoch": 0.34, "grad_norm": 3.5594496726989746, "learning_rate": 0.0002, "loss": 1.6138, "step": 82610 }, { "epoch": 0.34, "grad_norm": 3.090162515640259, "learning_rate": 0.0002, "loss": 1.5823, "step": 82620 }, { "epoch": 0.34, "grad_norm": 2.4331114292144775, "learning_rate": 0.0002, "loss": 1.3166, "step": 82630 }, { "epoch": 0.34, "grad_norm": 3.538285970687866, "learning_rate": 0.0002, "loss": 1.4872, "step": 82640 }, { "epoch": 0.34, "grad_norm": 4.994937419891357, "learning_rate": 0.0002, "loss": 1.6336, "step": 82650 }, { "epoch": 0.34, "grad_norm": 3.1099190711975098, "learning_rate": 0.0002, "loss": 1.3817, "step": 82660 }, { "epoch": 0.34, "grad_norm": 3.178476095199585, "learning_rate": 0.0002, "loss": 1.4641, "step": 82670 }, { "epoch": 0.34, "grad_norm": 1.6369330883026123, "learning_rate": 0.0002, "loss": 1.5003, "step": 82680 }, { "epoch": 0.34, "grad_norm": 3.798149824142456, "learning_rate": 0.0002, "loss": 1.7195, "step": 82690 }, { "epoch": 0.34, "grad_norm": 2.146768569946289, "learning_rate": 0.0002, "loss": 1.428, "step": 82700 }, { "epoch": 0.34, "grad_norm": 2.846832036972046, "learning_rate": 0.0002, "loss": 1.5536, "step": 82710 }, { "epoch": 0.34, "grad_norm": 4.4650774002075195, "learning_rate": 0.0002, "loss": 1.5972, "step": 82720 }, { "epoch": 0.34, "grad_norm": 3.666710615158081, "learning_rate": 0.0002, "loss": 1.7131, "step": 82730 }, { "epoch": 0.34, "grad_norm": 3.838850259780884, "learning_rate": 0.0002, "loss": 1.5574, "step": 82740 }, { "epoch": 0.34, "grad_norm": 2.0617873668670654, "learning_rate": 0.0002, "loss": 1.7625, "step": 82750 }, { "epoch": 0.34, "grad_norm": 2.520869731903076, "learning_rate": 0.0002, "loss": 1.7633, "step": 82760 }, { "epoch": 0.34, "grad_norm": 5.384804725646973, "learning_rate": 0.0002, "loss": 1.6112, "step": 82770 }, { "epoch": 0.34, "grad_norm": 2.842343807220459, "learning_rate": 0.0002, "loss": 1.6011, "step": 82780 }, { "epoch": 0.34, "grad_norm": 3.448467969894409, "learning_rate": 0.0002, "loss": 1.5608, "step": 82790 }, { "epoch": 0.34, "grad_norm": 2.8250246047973633, "learning_rate": 0.0002, "loss": 1.5316, "step": 82800 }, { "epoch": 0.34, "grad_norm": 2.4220452308654785, "learning_rate": 0.0002, "loss": 1.6064, "step": 82810 }, { "epoch": 0.34, "grad_norm": 3.93572998046875, "learning_rate": 0.0002, "loss": 1.6765, "step": 82820 }, { "epoch": 0.34, "grad_norm": 5.507757663726807, "learning_rate": 0.0002, "loss": 1.4046, "step": 82830 }, { "epoch": 0.34, "grad_norm": 4.650675296783447, "learning_rate": 0.0002, "loss": 1.4718, "step": 82840 }, { "epoch": 0.34, "grad_norm": 4.817191123962402, "learning_rate": 0.0002, "loss": 1.5806, "step": 82850 }, { "epoch": 0.34, "grad_norm": 3.1603689193725586, "learning_rate": 0.0002, "loss": 1.546, "step": 82860 }, { "epoch": 0.34, "grad_norm": 3.575423002243042, "learning_rate": 0.0002, "loss": 1.3304, "step": 82870 }, { "epoch": 0.34, "grad_norm": 2.0228285789489746, "learning_rate": 0.0002, "loss": 1.6712, "step": 82880 }, { "epoch": 0.34, "grad_norm": 2.691767930984497, "learning_rate": 0.0002, "loss": 1.6477, "step": 82890 }, { "epoch": 0.34, "grad_norm": 3.2990851402282715, "learning_rate": 0.0002, "loss": 1.7082, "step": 82900 }, { "epoch": 0.34, "grad_norm": 2.493852376937866, "learning_rate": 0.0002, "loss": 1.4981, "step": 82910 }, { "epoch": 0.34, "grad_norm": 4.331239700317383, "learning_rate": 0.0002, "loss": 1.5922, "step": 82920 }, { "epoch": 0.34, "grad_norm": 2.1934587955474854, "learning_rate": 0.0002, "loss": 1.6034, "step": 82930 }, { "epoch": 0.34, "grad_norm": 4.652095794677734, "learning_rate": 0.0002, "loss": 1.5329, "step": 82940 }, { "epoch": 0.34, "grad_norm": 4.065576076507568, "learning_rate": 0.0002, "loss": 1.6866, "step": 82950 }, { "epoch": 0.34, "grad_norm": 2.332240104675293, "learning_rate": 0.0002, "loss": 1.6205, "step": 82960 }, { "epoch": 0.34, "grad_norm": 4.017209529876709, "learning_rate": 0.0002, "loss": 1.6722, "step": 82970 }, { "epoch": 0.34, "grad_norm": 2.1385622024536133, "learning_rate": 0.0002, "loss": 1.498, "step": 82980 }, { "epoch": 0.34, "grad_norm": 2.619088649749756, "learning_rate": 0.0002, "loss": 1.5557, "step": 82990 }, { "epoch": 0.34, "grad_norm": 2.061680555343628, "learning_rate": 0.0002, "loss": 1.6141, "step": 83000 }, { "epoch": 0.34, "grad_norm": 2.0838165283203125, "learning_rate": 0.0002, "loss": 1.6852, "step": 83010 }, { "epoch": 0.34, "grad_norm": 6.57518196105957, "learning_rate": 0.0002, "loss": 1.4545, "step": 83020 }, { "epoch": 0.34, "grad_norm": 3.3327431678771973, "learning_rate": 0.0002, "loss": 1.7207, "step": 83030 }, { "epoch": 0.34, "grad_norm": 2.1686699390411377, "learning_rate": 0.0002, "loss": 1.3756, "step": 83040 }, { "epoch": 0.34, "grad_norm": 2.201975107192993, "learning_rate": 0.0002, "loss": 1.4499, "step": 83050 }, { "epoch": 0.34, "grad_norm": 3.2509992122650146, "learning_rate": 0.0002, "loss": 1.6727, "step": 83060 }, { "epoch": 0.34, "grad_norm": 2.8004558086395264, "learning_rate": 0.0002, "loss": 1.3394, "step": 83070 }, { "epoch": 0.34, "grad_norm": 2.967241048812866, "learning_rate": 0.0002, "loss": 1.6443, "step": 83080 }, { "epoch": 0.34, "grad_norm": 1.6006009578704834, "learning_rate": 0.0002, "loss": 1.5736, "step": 83090 }, { "epoch": 0.34, "grad_norm": 2.7469842433929443, "learning_rate": 0.0002, "loss": 1.4539, "step": 83100 }, { "epoch": 0.34, "grad_norm": 1.7989052534103394, "learning_rate": 0.0002, "loss": 1.4016, "step": 83110 }, { "epoch": 0.34, "grad_norm": 3.093064069747925, "learning_rate": 0.0002, "loss": 1.4958, "step": 83120 }, { "epoch": 0.34, "grad_norm": 2.228031873703003, "learning_rate": 0.0002, "loss": 1.4521, "step": 83130 }, { "epoch": 0.34, "grad_norm": 2.0688865184783936, "learning_rate": 0.0002, "loss": 1.3828, "step": 83140 }, { "epoch": 0.34, "grad_norm": 2.7574002742767334, "learning_rate": 0.0002, "loss": 1.5414, "step": 83150 }, { "epoch": 0.34, "grad_norm": 2.7775683403015137, "learning_rate": 0.0002, "loss": 1.4688, "step": 83160 }, { "epoch": 0.34, "grad_norm": 3.1898434162139893, "learning_rate": 0.0002, "loss": 1.7134, "step": 83170 }, { "epoch": 0.34, "grad_norm": 3.15159010887146, "learning_rate": 0.0002, "loss": 1.3225, "step": 83180 }, { "epoch": 0.34, "grad_norm": 1.837944507598877, "learning_rate": 0.0002, "loss": 1.6604, "step": 83190 }, { "epoch": 0.34, "grad_norm": 2.68666672706604, "learning_rate": 0.0002, "loss": 1.618, "step": 83200 }, { "epoch": 0.34, "grad_norm": 3.015157699584961, "learning_rate": 0.0002, "loss": 1.6084, "step": 83210 }, { "epoch": 0.34, "grad_norm": 4.841835975646973, "learning_rate": 0.0002, "loss": 1.6051, "step": 83220 }, { "epoch": 0.34, "grad_norm": 1.9303277730941772, "learning_rate": 0.0002, "loss": 1.4532, "step": 83230 }, { "epoch": 0.34, "grad_norm": 2.5831751823425293, "learning_rate": 0.0002, "loss": 1.3889, "step": 83240 }, { "epoch": 0.34, "grad_norm": 0.9904382824897766, "learning_rate": 0.0002, "loss": 1.5038, "step": 83250 }, { "epoch": 0.34, "grad_norm": 9.652763366699219, "learning_rate": 0.0002, "loss": 1.5571, "step": 83260 }, { "epoch": 0.34, "grad_norm": 3.35974383354187, "learning_rate": 0.0002, "loss": 1.6062, "step": 83270 }, { "epoch": 0.34, "grad_norm": 2.601842164993286, "learning_rate": 0.0002, "loss": 1.4246, "step": 83280 }, { "epoch": 0.34, "grad_norm": 1.9612022638320923, "learning_rate": 0.0002, "loss": 1.6409, "step": 83290 }, { "epoch": 0.34, "grad_norm": 2.3492228984832764, "learning_rate": 0.0002, "loss": 1.6602, "step": 83300 }, { "epoch": 0.34, "grad_norm": 4.590086460113525, "learning_rate": 0.0002, "loss": 1.5486, "step": 83310 }, { "epoch": 0.34, "grad_norm": 3.4906094074249268, "learning_rate": 0.0002, "loss": 1.5715, "step": 83320 }, { "epoch": 0.34, "grad_norm": 4.588594436645508, "learning_rate": 0.0002, "loss": 1.4452, "step": 83330 }, { "epoch": 0.34, "grad_norm": 3.5055413246154785, "learning_rate": 0.0002, "loss": 1.6645, "step": 83340 }, { "epoch": 0.34, "grad_norm": 4.3424224853515625, "learning_rate": 0.0002, "loss": 1.719, "step": 83350 }, { "epoch": 0.34, "grad_norm": 3.262354612350464, "learning_rate": 0.0002, "loss": 1.5651, "step": 83360 }, { "epoch": 0.34, "grad_norm": 3.995274543762207, "learning_rate": 0.0002, "loss": 1.5769, "step": 83370 }, { "epoch": 0.34, "grad_norm": 4.402369499206543, "learning_rate": 0.0002, "loss": 1.2625, "step": 83380 }, { "epoch": 0.34, "grad_norm": 2.308429002761841, "learning_rate": 0.0002, "loss": 1.5711, "step": 83390 }, { "epoch": 0.34, "grad_norm": 2.1035854816436768, "learning_rate": 0.0002, "loss": 1.5733, "step": 83400 }, { "epoch": 0.34, "grad_norm": 2.097153425216675, "learning_rate": 0.0002, "loss": 1.6396, "step": 83410 }, { "epoch": 0.34, "grad_norm": 5.397677898406982, "learning_rate": 0.0002, "loss": 1.7209, "step": 83420 }, { "epoch": 0.34, "grad_norm": 3.6718032360076904, "learning_rate": 0.0002, "loss": 1.5094, "step": 83430 }, { "epoch": 0.34, "grad_norm": 1.8695404529571533, "learning_rate": 0.0002, "loss": 1.4052, "step": 83440 }, { "epoch": 0.34, "grad_norm": 3.0026967525482178, "learning_rate": 0.0002, "loss": 1.4206, "step": 83450 }, { "epoch": 0.34, "grad_norm": 2.418970823287964, "learning_rate": 0.0002, "loss": 1.3845, "step": 83460 }, { "epoch": 0.34, "grad_norm": 3.038879156112671, "learning_rate": 0.0002, "loss": 1.6608, "step": 83470 }, { "epoch": 0.34, "grad_norm": 2.8589677810668945, "learning_rate": 0.0002, "loss": 1.6016, "step": 83480 }, { "epoch": 0.34, "grad_norm": 3.530087947845459, "learning_rate": 0.0002, "loss": 1.5091, "step": 83490 }, { "epoch": 0.34, "grad_norm": 6.785974025726318, "learning_rate": 0.0002, "loss": 1.5618, "step": 83500 }, { "epoch": 0.34, "grad_norm": 4.168082237243652, "learning_rate": 0.0002, "loss": 1.5828, "step": 83510 }, { "epoch": 0.34, "grad_norm": 3.6175129413604736, "learning_rate": 0.0002, "loss": 1.6919, "step": 83520 }, { "epoch": 0.34, "grad_norm": 3.3711345195770264, "learning_rate": 0.0002, "loss": 1.7662, "step": 83530 }, { "epoch": 0.34, "grad_norm": 3.620812177658081, "learning_rate": 0.0002, "loss": 1.6312, "step": 83540 }, { "epoch": 0.34, "grad_norm": 3.6504523754119873, "learning_rate": 0.0002, "loss": 1.4982, "step": 83550 }, { "epoch": 0.34, "grad_norm": 3.6638572216033936, "learning_rate": 0.0002, "loss": 1.618, "step": 83560 }, { "epoch": 0.34, "grad_norm": 3.7928032875061035, "learning_rate": 0.0002, "loss": 1.5846, "step": 83570 }, { "epoch": 0.34, "grad_norm": 2.2795729637145996, "learning_rate": 0.0002, "loss": 1.5755, "step": 83580 }, { "epoch": 0.34, "grad_norm": 3.541421890258789, "learning_rate": 0.0002, "loss": 1.7233, "step": 83590 }, { "epoch": 0.34, "grad_norm": 5.863702297210693, "learning_rate": 0.0002, "loss": 1.8362, "step": 83600 }, { "epoch": 0.34, "grad_norm": 3.311985731124878, "learning_rate": 0.0002, "loss": 1.5684, "step": 83610 }, { "epoch": 0.34, "grad_norm": 3.3652143478393555, "learning_rate": 0.0002, "loss": 1.5821, "step": 83620 }, { "epoch": 0.34, "grad_norm": 3.876281499862671, "learning_rate": 0.0002, "loss": 1.5576, "step": 83630 }, { "epoch": 0.34, "grad_norm": 2.709298610687256, "learning_rate": 0.0002, "loss": 1.8845, "step": 83640 }, { "epoch": 0.34, "grad_norm": 3.754645347595215, "learning_rate": 0.0002, "loss": 1.6619, "step": 83650 }, { "epoch": 0.34, "grad_norm": 2.3918676376342773, "learning_rate": 0.0002, "loss": 1.472, "step": 83660 }, { "epoch": 0.34, "grad_norm": 1.973572850227356, "learning_rate": 0.0002, "loss": 1.858, "step": 83670 }, { "epoch": 0.34, "grad_norm": 2.5079922676086426, "learning_rate": 0.0002, "loss": 1.5522, "step": 83680 }, { "epoch": 0.34, "grad_norm": 3.1127755641937256, "learning_rate": 0.0002, "loss": 1.5353, "step": 83690 }, { "epoch": 0.34, "grad_norm": 4.328488826751709, "learning_rate": 0.0002, "loss": 1.6013, "step": 83700 }, { "epoch": 0.34, "grad_norm": 2.746185064315796, "learning_rate": 0.0002, "loss": 1.2848, "step": 83710 }, { "epoch": 0.34, "grad_norm": 1.1752389669418335, "learning_rate": 0.0002, "loss": 1.5224, "step": 83720 }, { "epoch": 0.34, "grad_norm": 3.548518180847168, "learning_rate": 0.0002, "loss": 1.5443, "step": 83730 }, { "epoch": 0.34, "grad_norm": 3.722118854522705, "learning_rate": 0.0002, "loss": 1.4309, "step": 83740 }, { "epoch": 0.34, "grad_norm": 5.942780017852783, "learning_rate": 0.0002, "loss": 1.5548, "step": 83750 }, { "epoch": 0.34, "grad_norm": 2.0552361011505127, "learning_rate": 0.0002, "loss": 1.7036, "step": 83760 }, { "epoch": 0.34, "grad_norm": 1.9682809114456177, "learning_rate": 0.0002, "loss": 1.6252, "step": 83770 }, { "epoch": 0.34, "grad_norm": 3.8564422130584717, "learning_rate": 0.0002, "loss": 1.7684, "step": 83780 }, { "epoch": 0.34, "grad_norm": 2.969381093978882, "learning_rate": 0.0002, "loss": 1.4498, "step": 83790 }, { "epoch": 0.34, "grad_norm": 2.5693955421447754, "learning_rate": 0.0002, "loss": 1.6269, "step": 83800 }, { "epoch": 0.34, "grad_norm": 4.122805595397949, "learning_rate": 0.0002, "loss": 1.6723, "step": 83810 }, { "epoch": 0.34, "grad_norm": 2.2191321849823, "learning_rate": 0.0002, "loss": 1.5524, "step": 83820 }, { "epoch": 0.34, "grad_norm": 2.6665263175964355, "learning_rate": 0.0002, "loss": 1.48, "step": 83830 }, { "epoch": 0.34, "grad_norm": 2.9392075538635254, "learning_rate": 0.0002, "loss": 1.4403, "step": 83840 }, { "epoch": 0.34, "grad_norm": 3.1137290000915527, "learning_rate": 0.0002, "loss": 1.5491, "step": 83850 }, { "epoch": 0.34, "grad_norm": 2.6105294227600098, "learning_rate": 0.0002, "loss": 1.632, "step": 83860 }, { "epoch": 0.34, "grad_norm": 3.6515636444091797, "learning_rate": 0.0002, "loss": 1.4721, "step": 83870 }, { "epoch": 0.34, "grad_norm": 3.4250946044921875, "learning_rate": 0.0002, "loss": 1.6996, "step": 83880 }, { "epoch": 0.34, "grad_norm": 1.4899483919143677, "learning_rate": 0.0002, "loss": 1.6035, "step": 83890 }, { "epoch": 0.34, "grad_norm": 2.3679022789001465, "learning_rate": 0.0002, "loss": 1.6322, "step": 83900 }, { "epoch": 0.34, "grad_norm": 8.045554161071777, "learning_rate": 0.0002, "loss": 1.8098, "step": 83910 }, { "epoch": 0.34, "grad_norm": 2.1946861743927, "learning_rate": 0.0002, "loss": 1.4921, "step": 83920 }, { "epoch": 0.34, "grad_norm": 2.4988887310028076, "learning_rate": 0.0002, "loss": 1.706, "step": 83930 }, { "epoch": 0.34, "grad_norm": 2.8030080795288086, "learning_rate": 0.0002, "loss": 1.8229, "step": 83940 }, { "epoch": 0.34, "grad_norm": 3.6146953105926514, "learning_rate": 0.0002, "loss": 1.716, "step": 83950 }, { "epoch": 0.34, "grad_norm": 5.4950714111328125, "learning_rate": 0.0002, "loss": 1.636, "step": 83960 }, { "epoch": 0.34, "grad_norm": 3.5327882766723633, "learning_rate": 0.0002, "loss": 1.5188, "step": 83970 }, { "epoch": 0.34, "grad_norm": 3.04669189453125, "learning_rate": 0.0002, "loss": 1.6979, "step": 83980 }, { "epoch": 0.34, "grad_norm": 2.533679723739624, "learning_rate": 0.0002, "loss": 1.6211, "step": 83990 }, { "epoch": 0.34, "grad_norm": 2.9179015159606934, "learning_rate": 0.0002, "loss": 1.4742, "step": 84000 }, { "epoch": 0.34, "grad_norm": 3.7878096103668213, "learning_rate": 0.0002, "loss": 1.5488, "step": 84010 }, { "epoch": 0.34, "grad_norm": 3.0837669372558594, "learning_rate": 0.0002, "loss": 1.511, "step": 84020 }, { "epoch": 0.34, "grad_norm": 4.944210529327393, "learning_rate": 0.0002, "loss": 1.4143, "step": 84030 }, { "epoch": 0.34, "grad_norm": 3.5136098861694336, "learning_rate": 0.0002, "loss": 1.3124, "step": 84040 }, { "epoch": 0.34, "grad_norm": 4.332968235015869, "learning_rate": 0.0002, "loss": 1.7281, "step": 84050 }, { "epoch": 0.34, "grad_norm": 2.982394218444824, "learning_rate": 0.0002, "loss": 1.5675, "step": 84060 }, { "epoch": 0.34, "grad_norm": 2.9945459365844727, "learning_rate": 0.0002, "loss": 1.8083, "step": 84070 }, { "epoch": 0.34, "grad_norm": 2.2046337127685547, "learning_rate": 0.0002, "loss": 1.3819, "step": 84080 }, { "epoch": 0.34, "grad_norm": 3.097432851791382, "learning_rate": 0.0002, "loss": 1.5174, "step": 84090 }, { "epoch": 0.34, "grad_norm": 3.6172518730163574, "learning_rate": 0.0002, "loss": 1.3764, "step": 84100 }, { "epoch": 0.34, "grad_norm": 4.548862934112549, "learning_rate": 0.0002, "loss": 1.5822, "step": 84110 }, { "epoch": 0.34, "grad_norm": 2.5312728881835938, "learning_rate": 0.0002, "loss": 1.525, "step": 84120 }, { "epoch": 0.34, "grad_norm": 1.8826457262039185, "learning_rate": 0.0002, "loss": 1.4763, "step": 84130 }, { "epoch": 0.34, "grad_norm": 3.9551589488983154, "learning_rate": 0.0002, "loss": 1.4118, "step": 84140 }, { "epoch": 0.34, "grad_norm": 2.9103810787200928, "learning_rate": 0.0002, "loss": 1.5794, "step": 84150 }, { "epoch": 0.34, "grad_norm": 3.3632559776306152, "learning_rate": 0.0002, "loss": 1.6732, "step": 84160 }, { "epoch": 0.34, "grad_norm": 3.4834342002868652, "learning_rate": 0.0002, "loss": 1.5259, "step": 84170 }, { "epoch": 0.34, "grad_norm": 1.9427971839904785, "learning_rate": 0.0002, "loss": 1.609, "step": 84180 }, { "epoch": 0.34, "grad_norm": 1.6533284187316895, "learning_rate": 0.0002, "loss": 1.5745, "step": 84190 }, { "epoch": 0.34, "grad_norm": 3.382079601287842, "learning_rate": 0.0002, "loss": 1.4972, "step": 84200 }, { "epoch": 0.34, "grad_norm": 4.330073356628418, "learning_rate": 0.0002, "loss": 1.4578, "step": 84210 }, { "epoch": 0.34, "grad_norm": 2.714354991912842, "learning_rate": 0.0002, "loss": 1.4345, "step": 84220 }, { "epoch": 0.34, "grad_norm": 2.4938478469848633, "learning_rate": 0.0002, "loss": 1.888, "step": 84230 }, { "epoch": 0.34, "grad_norm": 3.543220043182373, "learning_rate": 0.0002, "loss": 1.7657, "step": 84240 }, { "epoch": 0.34, "grad_norm": 2.603405475616455, "learning_rate": 0.0002, "loss": 1.5314, "step": 84250 }, { "epoch": 0.34, "grad_norm": 2.299285411834717, "learning_rate": 0.0002, "loss": 1.3531, "step": 84260 }, { "epoch": 0.34, "grad_norm": 3.1916797161102295, "learning_rate": 0.0002, "loss": 1.5568, "step": 84270 }, { "epoch": 0.34, "grad_norm": 3.5622446537017822, "learning_rate": 0.0002, "loss": 1.6842, "step": 84280 }, { "epoch": 0.34, "grad_norm": 14.608893394470215, "learning_rate": 0.0002, "loss": 1.2532, "step": 84290 }, { "epoch": 0.34, "grad_norm": 3.0164380073547363, "learning_rate": 0.0002, "loss": 1.4904, "step": 84300 }, { "epoch": 0.34, "grad_norm": 4.286513805389404, "learning_rate": 0.0002, "loss": 1.5708, "step": 84310 }, { "epoch": 0.34, "grad_norm": 5.409970760345459, "learning_rate": 0.0002, "loss": 1.4181, "step": 84320 }, { "epoch": 0.34, "grad_norm": 2.257647752761841, "learning_rate": 0.0002, "loss": 1.4913, "step": 84330 }, { "epoch": 0.34, "grad_norm": 4.307560443878174, "learning_rate": 0.0002, "loss": 1.2674, "step": 84340 }, { "epoch": 0.34, "grad_norm": 2.198552370071411, "learning_rate": 0.0002, "loss": 1.7574, "step": 84350 }, { "epoch": 0.34, "grad_norm": 1.7669546604156494, "learning_rate": 0.0002, "loss": 1.7915, "step": 84360 }, { "epoch": 0.34, "grad_norm": 3.1289336681365967, "learning_rate": 0.0002, "loss": 1.6145, "step": 84370 }, { "epoch": 0.34, "grad_norm": 2.750427484512329, "learning_rate": 0.0002, "loss": 1.6375, "step": 84380 }, { "epoch": 0.34, "grad_norm": 2.3680033683776855, "learning_rate": 0.0002, "loss": 1.7235, "step": 84390 }, { "epoch": 0.34, "grad_norm": 3.407147169113159, "learning_rate": 0.0002, "loss": 1.3732, "step": 84400 }, { "epoch": 0.34, "grad_norm": 2.887585401535034, "learning_rate": 0.0002, "loss": 1.5286, "step": 84410 }, { "epoch": 0.34, "grad_norm": 2.7255218029022217, "learning_rate": 0.0002, "loss": 1.4901, "step": 84420 }, { "epoch": 0.34, "grad_norm": 4.543237686157227, "learning_rate": 0.0002, "loss": 1.436, "step": 84430 }, { "epoch": 0.34, "grad_norm": 3.4794211387634277, "learning_rate": 0.0002, "loss": 1.781, "step": 84440 }, { "epoch": 0.34, "grad_norm": 4.271875381469727, "learning_rate": 0.0002, "loss": 1.5796, "step": 84450 }, { "epoch": 0.34, "grad_norm": 4.1558613777160645, "learning_rate": 0.0002, "loss": 1.4755, "step": 84460 }, { "epoch": 0.34, "grad_norm": 3.5221059322357178, "learning_rate": 0.0002, "loss": 1.4069, "step": 84470 }, { "epoch": 0.34, "grad_norm": 7.399062156677246, "learning_rate": 0.0002, "loss": 1.507, "step": 84480 }, { "epoch": 0.34, "grad_norm": 3.2695536613464355, "learning_rate": 0.0002, "loss": 1.3061, "step": 84490 }, { "epoch": 0.34, "grad_norm": 1.6199575662612915, "learning_rate": 0.0002, "loss": 1.7619, "step": 84500 }, { "epoch": 0.34, "grad_norm": 3.398946762084961, "learning_rate": 0.0002, "loss": 1.4224, "step": 84510 }, { "epoch": 0.34, "grad_norm": 3.175123929977417, "learning_rate": 0.0002, "loss": 1.8136, "step": 84520 }, { "epoch": 0.34, "grad_norm": 2.976508617401123, "learning_rate": 0.0002, "loss": 1.3982, "step": 84530 }, { "epoch": 0.34, "grad_norm": 2.8930578231811523, "learning_rate": 0.0002, "loss": 1.4595, "step": 84540 }, { "epoch": 0.34, "grad_norm": 3.9547674655914307, "learning_rate": 0.0002, "loss": 1.4516, "step": 84550 }, { "epoch": 0.34, "grad_norm": 3.257533311843872, "learning_rate": 0.0002, "loss": 1.4583, "step": 84560 }, { "epoch": 0.34, "grad_norm": 2.344794273376465, "learning_rate": 0.0002, "loss": 1.491, "step": 84570 }, { "epoch": 0.34, "grad_norm": 2.1949665546417236, "learning_rate": 0.0002, "loss": 1.6856, "step": 84580 }, { "epoch": 0.34, "grad_norm": 3.544955253601074, "learning_rate": 0.0002, "loss": 1.5741, "step": 84590 }, { "epoch": 0.34, "grad_norm": 2.398740291595459, "learning_rate": 0.0002, "loss": 1.4053, "step": 84600 }, { "epoch": 0.34, "grad_norm": 2.213873863220215, "learning_rate": 0.0002, "loss": 1.6604, "step": 84610 }, { "epoch": 0.34, "grad_norm": 6.352673530578613, "learning_rate": 0.0002, "loss": 1.5521, "step": 84620 }, { "epoch": 0.34, "grad_norm": 3.806593418121338, "learning_rate": 0.0002, "loss": 1.4025, "step": 84630 }, { "epoch": 0.34, "grad_norm": 4.6866631507873535, "learning_rate": 0.0002, "loss": 1.6118, "step": 84640 }, { "epoch": 0.34, "grad_norm": 2.4818267822265625, "learning_rate": 0.0002, "loss": 1.4558, "step": 84650 }, { "epoch": 0.34, "grad_norm": 3.5173254013061523, "learning_rate": 0.0002, "loss": 1.6769, "step": 84660 }, { "epoch": 0.34, "grad_norm": 2.7881572246551514, "learning_rate": 0.0002, "loss": 1.3703, "step": 84670 }, { "epoch": 0.34, "grad_norm": 3.779263734817505, "learning_rate": 0.0002, "loss": 1.3661, "step": 84680 }, { "epoch": 0.34, "grad_norm": 2.0043978691101074, "learning_rate": 0.0002, "loss": 1.4764, "step": 84690 }, { "epoch": 0.34, "grad_norm": 2.505417585372925, "learning_rate": 0.0002, "loss": 1.689, "step": 84700 }, { "epoch": 0.34, "grad_norm": 2.458798885345459, "learning_rate": 0.0002, "loss": 1.6586, "step": 84710 }, { "epoch": 0.34, "grad_norm": 3.1343114376068115, "learning_rate": 0.0002, "loss": 1.5632, "step": 84720 }, { "epoch": 0.34, "grad_norm": 2.895805597305298, "learning_rate": 0.0002, "loss": 1.5019, "step": 84730 }, { "epoch": 0.34, "grad_norm": 2.442864179611206, "learning_rate": 0.0002, "loss": 1.7598, "step": 84740 }, { "epoch": 0.35, "grad_norm": 2.7997097969055176, "learning_rate": 0.0002, "loss": 1.4923, "step": 84750 }, { "epoch": 0.35, "grad_norm": 6.153561115264893, "learning_rate": 0.0002, "loss": 1.4893, "step": 84760 }, { "epoch": 0.35, "grad_norm": 2.5757007598876953, "learning_rate": 0.0002, "loss": 1.4369, "step": 84770 }, { "epoch": 0.35, "grad_norm": 2.798802137374878, "learning_rate": 0.0002, "loss": 1.3407, "step": 84780 }, { "epoch": 0.35, "grad_norm": 3.7768654823303223, "learning_rate": 0.0002, "loss": 1.4076, "step": 84790 }, { "epoch": 0.35, "grad_norm": 2.684235095977783, "learning_rate": 0.0002, "loss": 1.9549, "step": 84800 }, { "epoch": 0.35, "grad_norm": 3.4509668350219727, "learning_rate": 0.0002, "loss": 1.6422, "step": 84810 }, { "epoch": 0.35, "grad_norm": 4.918641567230225, "learning_rate": 0.0002, "loss": 1.4792, "step": 84820 }, { "epoch": 0.35, "grad_norm": 6.1877570152282715, "learning_rate": 0.0002, "loss": 1.7171, "step": 84830 }, { "epoch": 0.35, "grad_norm": 2.329800844192505, "learning_rate": 0.0002, "loss": 1.5653, "step": 84840 }, { "epoch": 0.35, "grad_norm": 2.59102201461792, "learning_rate": 0.0002, "loss": 1.9504, "step": 84850 }, { "epoch": 0.35, "grad_norm": 3.094316244125366, "learning_rate": 0.0002, "loss": 1.684, "step": 84860 }, { "epoch": 0.35, "grad_norm": 4.8089141845703125, "learning_rate": 0.0002, "loss": 1.6407, "step": 84870 }, { "epoch": 0.35, "grad_norm": 2.8050320148468018, "learning_rate": 0.0002, "loss": 1.4379, "step": 84880 }, { "epoch": 0.35, "grad_norm": 2.342654228210449, "learning_rate": 0.0002, "loss": 1.3858, "step": 84890 }, { "epoch": 0.35, "grad_norm": 4.183233737945557, "learning_rate": 0.0002, "loss": 1.6679, "step": 84900 }, { "epoch": 0.35, "grad_norm": 3.4459991455078125, "learning_rate": 0.0002, "loss": 1.6097, "step": 84910 }, { "epoch": 0.35, "grad_norm": 1.9769891500473022, "learning_rate": 0.0002, "loss": 1.7363, "step": 84920 }, { "epoch": 0.35, "grad_norm": 3.2040884494781494, "learning_rate": 0.0002, "loss": 1.5572, "step": 84930 }, { "epoch": 0.35, "grad_norm": 3.2140815258026123, "learning_rate": 0.0002, "loss": 1.5677, "step": 84940 }, { "epoch": 0.35, "grad_norm": 4.582052230834961, "learning_rate": 0.0002, "loss": 1.5574, "step": 84950 }, { "epoch": 0.35, "grad_norm": 3.479463577270508, "learning_rate": 0.0002, "loss": 1.5757, "step": 84960 }, { "epoch": 0.35, "grad_norm": 2.3005151748657227, "learning_rate": 0.0002, "loss": 1.6278, "step": 84970 }, { "epoch": 0.35, "grad_norm": 2.3697733879089355, "learning_rate": 0.0002, "loss": 1.6045, "step": 84980 }, { "epoch": 0.35, "grad_norm": 3.3438844680786133, "learning_rate": 0.0002, "loss": 1.5531, "step": 84990 }, { "epoch": 0.35, "grad_norm": 3.0818910598754883, "learning_rate": 0.0002, "loss": 1.5307, "step": 85000 }, { "epoch": 0.35, "grad_norm": 3.243053913116455, "learning_rate": 0.0002, "loss": 1.4006, "step": 85010 }, { "epoch": 0.35, "grad_norm": 1.9023668766021729, "learning_rate": 0.0002, "loss": 1.6378, "step": 85020 }, { "epoch": 0.35, "grad_norm": 3.921287775039673, "learning_rate": 0.0002, "loss": 1.5947, "step": 85030 }, { "epoch": 0.35, "grad_norm": 3.438314199447632, "learning_rate": 0.0002, "loss": 1.8112, "step": 85040 }, { "epoch": 0.35, "grad_norm": 5.149091720581055, "learning_rate": 0.0002, "loss": 1.4083, "step": 85050 }, { "epoch": 0.35, "grad_norm": 2.668605089187622, "learning_rate": 0.0002, "loss": 1.6283, "step": 85060 }, { "epoch": 0.35, "grad_norm": 2.75372314453125, "learning_rate": 0.0002, "loss": 1.3981, "step": 85070 }, { "epoch": 0.35, "grad_norm": 2.5667121410369873, "learning_rate": 0.0002, "loss": 1.4467, "step": 85080 }, { "epoch": 0.35, "grad_norm": 2.41877818107605, "learning_rate": 0.0002, "loss": 1.598, "step": 85090 }, { "epoch": 0.35, "grad_norm": 1.7571207284927368, "learning_rate": 0.0002, "loss": 1.5501, "step": 85100 }, { "epoch": 0.35, "grad_norm": 4.056834697723389, "learning_rate": 0.0002, "loss": 1.4847, "step": 85110 }, { "epoch": 0.35, "grad_norm": 2.849454641342163, "learning_rate": 0.0002, "loss": 1.743, "step": 85120 }, { "epoch": 0.35, "grad_norm": 1.890178918838501, "learning_rate": 0.0002, "loss": 1.508, "step": 85130 }, { "epoch": 0.35, "grad_norm": 3.7848753929138184, "learning_rate": 0.0002, "loss": 1.6007, "step": 85140 }, { "epoch": 0.35, "grad_norm": 2.3518402576446533, "learning_rate": 0.0002, "loss": 1.7697, "step": 85150 }, { "epoch": 0.35, "grad_norm": 3.249964714050293, "learning_rate": 0.0002, "loss": 1.6698, "step": 85160 }, { "epoch": 0.35, "grad_norm": 1.9190583229064941, "learning_rate": 0.0002, "loss": 1.309, "step": 85170 }, { "epoch": 0.35, "grad_norm": 2.33186674118042, "learning_rate": 0.0002, "loss": 1.5724, "step": 85180 }, { "epoch": 0.35, "grad_norm": 3.6779074668884277, "learning_rate": 0.0002, "loss": 1.6409, "step": 85190 }, { "epoch": 0.35, "grad_norm": 4.6274614334106445, "learning_rate": 0.0002, "loss": 1.7245, "step": 85200 }, { "epoch": 0.35, "grad_norm": 2.7656514644622803, "learning_rate": 0.0002, "loss": 1.4146, "step": 85210 }, { "epoch": 0.35, "grad_norm": 4.238759994506836, "learning_rate": 0.0002, "loss": 1.5345, "step": 85220 }, { "epoch": 0.35, "grad_norm": 2.541820526123047, "learning_rate": 0.0002, "loss": 1.6303, "step": 85230 }, { "epoch": 0.35, "grad_norm": 2.7886526584625244, "learning_rate": 0.0002, "loss": 1.3356, "step": 85240 }, { "epoch": 0.35, "grad_norm": 3.854278326034546, "learning_rate": 0.0002, "loss": 1.4006, "step": 85250 }, { "epoch": 0.35, "grad_norm": 3.2382373809814453, "learning_rate": 0.0002, "loss": 1.3794, "step": 85260 }, { "epoch": 0.35, "grad_norm": 3.721985340118408, "learning_rate": 0.0002, "loss": 1.6025, "step": 85270 }, { "epoch": 0.35, "grad_norm": 3.927868127822876, "learning_rate": 0.0002, "loss": 1.4875, "step": 85280 }, { "epoch": 0.35, "grad_norm": 2.9291746616363525, "learning_rate": 0.0002, "loss": 1.6324, "step": 85290 }, { "epoch": 0.35, "grad_norm": 3.4622714519500732, "learning_rate": 0.0002, "loss": 1.7616, "step": 85300 }, { "epoch": 0.35, "grad_norm": 2.4362471103668213, "learning_rate": 0.0002, "loss": 1.5504, "step": 85310 }, { "epoch": 0.35, "grad_norm": 2.6497631072998047, "learning_rate": 0.0002, "loss": 1.3659, "step": 85320 }, { "epoch": 0.35, "grad_norm": 2.08243989944458, "learning_rate": 0.0002, "loss": 1.4588, "step": 85330 }, { "epoch": 0.35, "grad_norm": 3.854118824005127, "learning_rate": 0.0002, "loss": 1.4356, "step": 85340 }, { "epoch": 0.35, "grad_norm": 2.1674435138702393, "learning_rate": 0.0002, "loss": 1.5275, "step": 85350 }, { "epoch": 0.35, "grad_norm": 2.3991382122039795, "learning_rate": 0.0002, "loss": 1.2602, "step": 85360 }, { "epoch": 0.35, "grad_norm": 3.286292314529419, "learning_rate": 0.0002, "loss": 1.7601, "step": 85370 }, { "epoch": 0.35, "grad_norm": 3.347170829772949, "learning_rate": 0.0002, "loss": 1.7326, "step": 85380 }, { "epoch": 0.35, "grad_norm": 3.2458176612854004, "learning_rate": 0.0002, "loss": 1.4361, "step": 85390 }, { "epoch": 0.35, "grad_norm": 2.8470144271850586, "learning_rate": 0.0002, "loss": 1.4692, "step": 85400 }, { "epoch": 0.35, "grad_norm": 3.155651569366455, "learning_rate": 0.0002, "loss": 1.789, "step": 85410 }, { "epoch": 0.35, "grad_norm": 0.9284573197364807, "learning_rate": 0.0002, "loss": 1.383, "step": 85420 }, { "epoch": 0.35, "grad_norm": 3.0593135356903076, "learning_rate": 0.0002, "loss": 1.3713, "step": 85430 }, { "epoch": 0.35, "grad_norm": 1.8830983638763428, "learning_rate": 0.0002, "loss": 1.6181, "step": 85440 }, { "epoch": 0.35, "grad_norm": 3.8016743659973145, "learning_rate": 0.0002, "loss": 1.6207, "step": 85450 }, { "epoch": 0.35, "grad_norm": 2.0849111080169678, "learning_rate": 0.0002, "loss": 1.8166, "step": 85460 }, { "epoch": 0.35, "grad_norm": 1.8148428201675415, "learning_rate": 0.0002, "loss": 1.4363, "step": 85470 }, { "epoch": 0.35, "grad_norm": 2.5612502098083496, "learning_rate": 0.0002, "loss": 1.6936, "step": 85480 }, { "epoch": 0.35, "grad_norm": 1.673599123954773, "learning_rate": 0.0002, "loss": 1.5729, "step": 85490 }, { "epoch": 0.35, "grad_norm": 1.7982099056243896, "learning_rate": 0.0002, "loss": 1.507, "step": 85500 }, { "epoch": 0.35, "grad_norm": 2.192356824874878, "learning_rate": 0.0002, "loss": 1.5806, "step": 85510 }, { "epoch": 0.35, "grad_norm": 2.4996025562286377, "learning_rate": 0.0002, "loss": 1.5054, "step": 85520 }, { "epoch": 0.35, "grad_norm": 2.808833360671997, "learning_rate": 0.0002, "loss": 1.6471, "step": 85530 }, { "epoch": 0.35, "grad_norm": 3.2881338596343994, "learning_rate": 0.0002, "loss": 1.5197, "step": 85540 }, { "epoch": 0.35, "grad_norm": 2.79292893409729, "learning_rate": 0.0002, "loss": 1.6535, "step": 85550 }, { "epoch": 0.35, "grad_norm": 3.2485601902008057, "learning_rate": 0.0002, "loss": 1.5123, "step": 85560 }, { "epoch": 0.35, "grad_norm": 3.5245866775512695, "learning_rate": 0.0002, "loss": 1.5983, "step": 85570 }, { "epoch": 0.35, "grad_norm": 4.510332107543945, "learning_rate": 0.0002, "loss": 1.718, "step": 85580 }, { "epoch": 0.35, "grad_norm": 2.407001256942749, "learning_rate": 0.0002, "loss": 1.7795, "step": 85590 }, { "epoch": 0.35, "grad_norm": 1.9543253183364868, "learning_rate": 0.0002, "loss": 1.5047, "step": 85600 }, { "epoch": 0.35, "grad_norm": 3.915879011154175, "learning_rate": 0.0002, "loss": 1.671, "step": 85610 }, { "epoch": 0.35, "grad_norm": 2.255577325820923, "learning_rate": 0.0002, "loss": 1.8174, "step": 85620 }, { "epoch": 0.35, "grad_norm": 3.290224552154541, "learning_rate": 0.0002, "loss": 1.4706, "step": 85630 }, { "epoch": 0.35, "grad_norm": 1.8387809991836548, "learning_rate": 0.0002, "loss": 1.5833, "step": 85640 }, { "epoch": 0.35, "grad_norm": 2.5615391731262207, "learning_rate": 0.0002, "loss": 1.5296, "step": 85650 }, { "epoch": 0.35, "grad_norm": 2.8552889823913574, "learning_rate": 0.0002, "loss": 1.6119, "step": 85660 }, { "epoch": 0.35, "grad_norm": 4.602022647857666, "learning_rate": 0.0002, "loss": 1.5734, "step": 85670 }, { "epoch": 0.35, "grad_norm": 3.8745040893554688, "learning_rate": 0.0002, "loss": 1.7422, "step": 85680 }, { "epoch": 0.35, "grad_norm": 2.37420392036438, "learning_rate": 0.0002, "loss": 1.5758, "step": 85690 }, { "epoch": 0.35, "grad_norm": 3.0593409538269043, "learning_rate": 0.0002, "loss": 1.6915, "step": 85700 }, { "epoch": 0.35, "grad_norm": 1.880055546760559, "learning_rate": 0.0002, "loss": 1.344, "step": 85710 }, { "epoch": 0.35, "grad_norm": 2.584571123123169, "learning_rate": 0.0002, "loss": 1.796, "step": 85720 }, { "epoch": 0.35, "grad_norm": 3.295269727706909, "learning_rate": 0.0002, "loss": 1.5496, "step": 85730 }, { "epoch": 0.35, "grad_norm": 3.4546701908111572, "learning_rate": 0.0002, "loss": 1.6367, "step": 85740 }, { "epoch": 0.35, "grad_norm": 2.7850003242492676, "learning_rate": 0.0002, "loss": 1.4986, "step": 85750 }, { "epoch": 0.35, "grad_norm": 2.5221900939941406, "learning_rate": 0.0002, "loss": 1.9177, "step": 85760 }, { "epoch": 0.35, "grad_norm": 2.0698487758636475, "learning_rate": 0.0002, "loss": 1.5462, "step": 85770 }, { "epoch": 0.35, "grad_norm": 2.371452808380127, "learning_rate": 0.0002, "loss": 1.7352, "step": 85780 }, { "epoch": 0.35, "grad_norm": 2.23705792427063, "learning_rate": 0.0002, "loss": 1.6389, "step": 85790 }, { "epoch": 0.35, "grad_norm": 4.066937446594238, "learning_rate": 0.0002, "loss": 1.4904, "step": 85800 }, { "epoch": 0.35, "grad_norm": 2.3059372901916504, "learning_rate": 0.0002, "loss": 1.3532, "step": 85810 }, { "epoch": 0.35, "grad_norm": 3.6387579441070557, "learning_rate": 0.0002, "loss": 1.7998, "step": 85820 }, { "epoch": 0.35, "grad_norm": 3.092134952545166, "learning_rate": 0.0002, "loss": 1.7519, "step": 85830 }, { "epoch": 0.35, "grad_norm": 2.765559434890747, "learning_rate": 0.0002, "loss": 1.5168, "step": 85840 }, { "epoch": 0.35, "grad_norm": 3.9888994693756104, "learning_rate": 0.0002, "loss": 1.7443, "step": 85850 }, { "epoch": 0.35, "grad_norm": 3.920438289642334, "learning_rate": 0.0002, "loss": 1.54, "step": 85860 }, { "epoch": 0.35, "grad_norm": 1.9207547903060913, "learning_rate": 0.0002, "loss": 1.4715, "step": 85870 }, { "epoch": 0.35, "grad_norm": 2.0071704387664795, "learning_rate": 0.0002, "loss": 1.6429, "step": 85880 }, { "epoch": 0.35, "grad_norm": 2.434494733810425, "learning_rate": 0.0002, "loss": 1.5749, "step": 85890 }, { "epoch": 0.35, "grad_norm": 2.6850674152374268, "learning_rate": 0.0002, "loss": 1.6089, "step": 85900 }, { "epoch": 0.35, "grad_norm": 1.7782636880874634, "learning_rate": 0.0002, "loss": 1.6663, "step": 85910 }, { "epoch": 0.35, "grad_norm": 2.2694811820983887, "learning_rate": 0.0002, "loss": 1.4416, "step": 85920 }, { "epoch": 0.35, "grad_norm": 3.0812833309173584, "learning_rate": 0.0002, "loss": 1.5892, "step": 85930 }, { "epoch": 0.35, "grad_norm": 1.9282238483428955, "learning_rate": 0.0002, "loss": 1.6167, "step": 85940 }, { "epoch": 0.35, "grad_norm": 4.318924903869629, "learning_rate": 0.0002, "loss": 1.6937, "step": 85950 }, { "epoch": 0.35, "grad_norm": 3.466829299926758, "learning_rate": 0.0002, "loss": 1.3631, "step": 85960 }, { "epoch": 0.35, "grad_norm": 2.4406492710113525, "learning_rate": 0.0002, "loss": 1.3789, "step": 85970 }, { "epoch": 0.35, "grad_norm": 4.114589691162109, "learning_rate": 0.0002, "loss": 1.48, "step": 85980 }, { "epoch": 0.35, "grad_norm": 3.464778423309326, "learning_rate": 0.0002, "loss": 1.7043, "step": 85990 }, { "epoch": 0.35, "grad_norm": 3.153893232345581, "learning_rate": 0.0002, "loss": 1.5559, "step": 86000 }, { "epoch": 0.35, "grad_norm": 1.4556370973587036, "learning_rate": 0.0002, "loss": 1.404, "step": 86010 }, { "epoch": 0.35, "grad_norm": 3.1383609771728516, "learning_rate": 0.0002, "loss": 1.562, "step": 86020 }, { "epoch": 0.35, "grad_norm": 3.4574759006500244, "learning_rate": 0.0002, "loss": 1.5744, "step": 86030 }, { "epoch": 0.35, "grad_norm": 3.3289735317230225, "learning_rate": 0.0002, "loss": 1.7386, "step": 86040 }, { "epoch": 0.35, "grad_norm": 3.575960874557495, "learning_rate": 0.0002, "loss": 1.4291, "step": 86050 }, { "epoch": 0.35, "grad_norm": 2.0273754596710205, "learning_rate": 0.0002, "loss": 1.8338, "step": 86060 }, { "epoch": 0.35, "grad_norm": 3.246664524078369, "learning_rate": 0.0002, "loss": 1.6072, "step": 86070 }, { "epoch": 0.35, "grad_norm": 5.994160175323486, "learning_rate": 0.0002, "loss": 1.4039, "step": 86080 }, { "epoch": 0.35, "grad_norm": 4.2870378494262695, "learning_rate": 0.0002, "loss": 1.5755, "step": 86090 }, { "epoch": 0.35, "grad_norm": 4.944954872131348, "learning_rate": 0.0002, "loss": 1.6055, "step": 86100 }, { "epoch": 0.35, "grad_norm": 3.0709285736083984, "learning_rate": 0.0002, "loss": 1.5993, "step": 86110 }, { "epoch": 0.35, "grad_norm": 2.773249864578247, "learning_rate": 0.0002, "loss": 1.4992, "step": 86120 }, { "epoch": 0.35, "grad_norm": 2.8007214069366455, "learning_rate": 0.0002, "loss": 1.4512, "step": 86130 }, { "epoch": 0.35, "grad_norm": 3.0189921855926514, "learning_rate": 0.0002, "loss": 1.613, "step": 86140 }, { "epoch": 0.35, "grad_norm": 2.7427709102630615, "learning_rate": 0.0002, "loss": 1.7752, "step": 86150 }, { "epoch": 0.35, "grad_norm": 2.857828140258789, "learning_rate": 0.0002, "loss": 1.5845, "step": 86160 }, { "epoch": 0.35, "grad_norm": 3.1839866638183594, "learning_rate": 0.0002, "loss": 1.5468, "step": 86170 }, { "epoch": 0.35, "grad_norm": 2.5476179122924805, "learning_rate": 0.0002, "loss": 1.5388, "step": 86180 }, { "epoch": 0.35, "grad_norm": 2.8963871002197266, "learning_rate": 0.0002, "loss": 1.7132, "step": 86190 }, { "epoch": 0.35, "grad_norm": 3.0824761390686035, "learning_rate": 0.0002, "loss": 1.6029, "step": 86200 }, { "epoch": 0.35, "grad_norm": 2.0279133319854736, "learning_rate": 0.0002, "loss": 1.5259, "step": 86210 }, { "epoch": 0.35, "grad_norm": 3.4121344089508057, "learning_rate": 0.0002, "loss": 1.2687, "step": 86220 }, { "epoch": 0.35, "grad_norm": 2.6955058574676514, "learning_rate": 0.0002, "loss": 1.5811, "step": 86230 }, { "epoch": 0.35, "grad_norm": 5.101370334625244, "learning_rate": 0.0002, "loss": 1.5428, "step": 86240 }, { "epoch": 0.35, "grad_norm": 4.610106945037842, "learning_rate": 0.0002, "loss": 1.7141, "step": 86250 }, { "epoch": 0.35, "grad_norm": 4.299625873565674, "learning_rate": 0.0002, "loss": 1.6244, "step": 86260 }, { "epoch": 0.35, "grad_norm": 2.515451669692993, "learning_rate": 0.0002, "loss": 1.4376, "step": 86270 }, { "epoch": 0.35, "grad_norm": 2.106088638305664, "learning_rate": 0.0002, "loss": 1.6652, "step": 86280 }, { "epoch": 0.35, "grad_norm": 3.873718738555908, "learning_rate": 0.0002, "loss": 1.3103, "step": 86290 }, { "epoch": 0.35, "grad_norm": 3.0248372554779053, "learning_rate": 0.0002, "loss": 1.4981, "step": 86300 }, { "epoch": 0.35, "grad_norm": 4.934451580047607, "learning_rate": 0.0002, "loss": 1.7074, "step": 86310 }, { "epoch": 0.35, "grad_norm": 2.2458744049072266, "learning_rate": 0.0002, "loss": 1.5982, "step": 86320 }, { "epoch": 0.35, "grad_norm": 2.998389959335327, "learning_rate": 0.0002, "loss": 1.4247, "step": 86330 }, { "epoch": 0.35, "grad_norm": 3.1288106441497803, "learning_rate": 0.0002, "loss": 1.5603, "step": 86340 }, { "epoch": 0.35, "grad_norm": 5.144457817077637, "learning_rate": 0.0002, "loss": 1.5197, "step": 86350 }, { "epoch": 0.35, "grad_norm": 2.1532931327819824, "learning_rate": 0.0002, "loss": 1.3628, "step": 86360 }, { "epoch": 0.35, "grad_norm": 2.82810378074646, "learning_rate": 0.0002, "loss": 1.6164, "step": 86370 }, { "epoch": 0.35, "grad_norm": 2.4714479446411133, "learning_rate": 0.0002, "loss": 1.4715, "step": 86380 }, { "epoch": 0.35, "grad_norm": 3.3257274627685547, "learning_rate": 0.0002, "loss": 1.5032, "step": 86390 }, { "epoch": 0.35, "grad_norm": 4.391778945922852, "learning_rate": 0.0002, "loss": 1.4206, "step": 86400 }, { "epoch": 0.35, "grad_norm": 4.062916278839111, "learning_rate": 0.0002, "loss": 1.7152, "step": 86410 }, { "epoch": 0.35, "grad_norm": 1.8950761556625366, "learning_rate": 0.0002, "loss": 1.6222, "step": 86420 }, { "epoch": 0.35, "grad_norm": 5.3842926025390625, "learning_rate": 0.0002, "loss": 1.7114, "step": 86430 }, { "epoch": 0.35, "grad_norm": 4.966969013214111, "learning_rate": 0.0002, "loss": 1.5885, "step": 86440 }, { "epoch": 0.35, "grad_norm": 3.8773770332336426, "learning_rate": 0.0002, "loss": 1.7014, "step": 86450 }, { "epoch": 0.35, "grad_norm": 4.501932621002197, "learning_rate": 0.0002, "loss": 1.7087, "step": 86460 }, { "epoch": 0.35, "grad_norm": 1.7782567739486694, "learning_rate": 0.0002, "loss": 1.6421, "step": 86470 }, { "epoch": 0.35, "grad_norm": 5.2443528175354, "learning_rate": 0.0002, "loss": 1.6688, "step": 86480 }, { "epoch": 0.35, "grad_norm": 2.572600841522217, "learning_rate": 0.0002, "loss": 1.3909, "step": 86490 }, { "epoch": 0.35, "grad_norm": 1.8389832973480225, "learning_rate": 0.0002, "loss": 1.4977, "step": 86500 }, { "epoch": 0.35, "grad_norm": 4.789827346801758, "learning_rate": 0.0002, "loss": 1.3648, "step": 86510 }, { "epoch": 0.35, "grad_norm": 2.2269208431243896, "learning_rate": 0.0002, "loss": 1.5699, "step": 86520 }, { "epoch": 0.35, "grad_norm": 2.701326608657837, "learning_rate": 0.0002, "loss": 1.5163, "step": 86530 }, { "epoch": 0.35, "grad_norm": 3.266521453857422, "learning_rate": 0.0002, "loss": 1.1207, "step": 86540 }, { "epoch": 0.35, "grad_norm": 3.593992233276367, "learning_rate": 0.0002, "loss": 1.8583, "step": 86550 }, { "epoch": 0.35, "grad_norm": 2.2711706161499023, "learning_rate": 0.0002, "loss": 1.6237, "step": 86560 }, { "epoch": 0.35, "grad_norm": 5.838717460632324, "learning_rate": 0.0002, "loss": 1.6015, "step": 86570 }, { "epoch": 0.35, "grad_norm": 2.5727415084838867, "learning_rate": 0.0002, "loss": 1.6233, "step": 86580 }, { "epoch": 0.35, "grad_norm": 2.1539692878723145, "learning_rate": 0.0002, "loss": 1.4363, "step": 86590 }, { "epoch": 0.35, "grad_norm": 3.5564823150634766, "learning_rate": 0.0002, "loss": 1.5932, "step": 86600 }, { "epoch": 0.35, "grad_norm": 2.232938289642334, "learning_rate": 0.0002, "loss": 1.522, "step": 86610 }, { "epoch": 0.35, "grad_norm": 2.530855178833008, "learning_rate": 0.0002, "loss": 1.4228, "step": 86620 }, { "epoch": 0.35, "grad_norm": 4.0182671546936035, "learning_rate": 0.0002, "loss": 1.7366, "step": 86630 }, { "epoch": 0.35, "grad_norm": 4.076262950897217, "learning_rate": 0.0002, "loss": 1.5816, "step": 86640 }, { "epoch": 0.35, "grad_norm": 2.2422797679901123, "learning_rate": 0.0002, "loss": 1.7453, "step": 86650 }, { "epoch": 0.35, "grad_norm": 2.3309335708618164, "learning_rate": 0.0002, "loss": 1.4749, "step": 86660 }, { "epoch": 0.35, "grad_norm": 2.9920544624328613, "learning_rate": 0.0002, "loss": 1.2918, "step": 86670 }, { "epoch": 0.35, "grad_norm": 3.0310401916503906, "learning_rate": 0.0002, "loss": 1.8125, "step": 86680 }, { "epoch": 0.35, "grad_norm": 3.2562520503997803, "learning_rate": 0.0002, "loss": 1.3699, "step": 86690 }, { "epoch": 0.35, "grad_norm": 2.5820744037628174, "learning_rate": 0.0002, "loss": 1.5107, "step": 86700 }, { "epoch": 0.35, "grad_norm": 4.797235012054443, "learning_rate": 0.0002, "loss": 1.8243, "step": 86710 }, { "epoch": 0.35, "grad_norm": 2.8453304767608643, "learning_rate": 0.0002, "loss": 1.9364, "step": 86720 }, { "epoch": 0.35, "grad_norm": 2.631922483444214, "learning_rate": 0.0002, "loss": 1.5328, "step": 86730 }, { "epoch": 0.35, "grad_norm": 3.588514566421509, "learning_rate": 0.0002, "loss": 1.8844, "step": 86740 }, { "epoch": 0.35, "grad_norm": 2.447659730911255, "learning_rate": 0.0002, "loss": 1.6375, "step": 86750 }, { "epoch": 0.35, "grad_norm": 3.241302013397217, "learning_rate": 0.0002, "loss": 1.4432, "step": 86760 }, { "epoch": 0.35, "grad_norm": 3.6614716053009033, "learning_rate": 0.0002, "loss": 1.5226, "step": 86770 }, { "epoch": 0.35, "grad_norm": 1.1043919324874878, "learning_rate": 0.0002, "loss": 1.6869, "step": 86780 }, { "epoch": 0.35, "grad_norm": 2.8913419246673584, "learning_rate": 0.0002, "loss": 1.5197, "step": 86790 }, { "epoch": 0.35, "grad_norm": 3.009425163269043, "learning_rate": 0.0002, "loss": 1.5618, "step": 86800 }, { "epoch": 0.35, "grad_norm": 4.898156642913818, "learning_rate": 0.0002, "loss": 1.6686, "step": 86810 }, { "epoch": 0.35, "grad_norm": 4.848369598388672, "learning_rate": 0.0002, "loss": 1.6344, "step": 86820 }, { "epoch": 0.35, "grad_norm": 2.4394302368164062, "learning_rate": 0.0002, "loss": 1.6975, "step": 86830 }, { "epoch": 0.35, "grad_norm": 3.013324737548828, "learning_rate": 0.0002, "loss": 1.7423, "step": 86840 }, { "epoch": 0.35, "grad_norm": 3.2792012691497803, "learning_rate": 0.0002, "loss": 1.6051, "step": 86850 }, { "epoch": 0.35, "grad_norm": 3.64064621925354, "learning_rate": 0.0002, "loss": 1.7939, "step": 86860 }, { "epoch": 0.35, "grad_norm": 2.4147353172302246, "learning_rate": 0.0002, "loss": 1.4958, "step": 86870 }, { "epoch": 0.35, "grad_norm": 2.7790608406066895, "learning_rate": 0.0002, "loss": 1.2793, "step": 86880 }, { "epoch": 0.35, "grad_norm": 3.453199863433838, "learning_rate": 0.0002, "loss": 1.7811, "step": 86890 }, { "epoch": 0.35, "grad_norm": 2.1212000846862793, "learning_rate": 0.0002, "loss": 1.7971, "step": 86900 }, { "epoch": 0.35, "grad_norm": 2.1618263721466064, "learning_rate": 0.0002, "loss": 1.3966, "step": 86910 }, { "epoch": 0.35, "grad_norm": 2.8609061241149902, "learning_rate": 0.0002, "loss": 1.4614, "step": 86920 }, { "epoch": 0.35, "grad_norm": 2.910754680633545, "learning_rate": 0.0002, "loss": 1.4404, "step": 86930 }, { "epoch": 0.35, "grad_norm": 2.6782422065734863, "learning_rate": 0.0002, "loss": 1.4661, "step": 86940 }, { "epoch": 0.35, "grad_norm": 4.64600133895874, "learning_rate": 0.0002, "loss": 1.38, "step": 86950 }, { "epoch": 0.35, "grad_norm": 2.6277945041656494, "learning_rate": 0.0002, "loss": 1.6235, "step": 86960 }, { "epoch": 0.35, "grad_norm": 2.087322950363159, "learning_rate": 0.0002, "loss": 1.4244, "step": 86970 }, { "epoch": 0.35, "grad_norm": 1.5099685192108154, "learning_rate": 0.0002, "loss": 1.5615, "step": 86980 }, { "epoch": 0.35, "grad_norm": 3.561368703842163, "learning_rate": 0.0002, "loss": 1.6929, "step": 86990 }, { "epoch": 0.35, "grad_norm": 2.8537375926971436, "learning_rate": 0.0002, "loss": 1.5489, "step": 87000 }, { "epoch": 0.35, "grad_norm": 2.3129756450653076, "learning_rate": 0.0002, "loss": 1.4153, "step": 87010 }, { "epoch": 0.35, "grad_norm": 3.634761333465576, "learning_rate": 0.0002, "loss": 1.5087, "step": 87020 }, { "epoch": 0.35, "grad_norm": 3.3699584007263184, "learning_rate": 0.0002, "loss": 1.4021, "step": 87030 }, { "epoch": 0.35, "grad_norm": 2.2692630290985107, "learning_rate": 0.0002, "loss": 1.4092, "step": 87040 }, { "epoch": 0.35, "grad_norm": 3.76894211769104, "learning_rate": 0.0002, "loss": 1.4033, "step": 87050 }, { "epoch": 0.35, "grad_norm": 3.7875683307647705, "learning_rate": 0.0002, "loss": 1.8192, "step": 87060 }, { "epoch": 0.35, "grad_norm": 2.384002923965454, "learning_rate": 0.0002, "loss": 1.4539, "step": 87070 }, { "epoch": 0.35, "grad_norm": 2.8944292068481445, "learning_rate": 0.0002, "loss": 1.5888, "step": 87080 }, { "epoch": 0.35, "grad_norm": 3.7115163803100586, "learning_rate": 0.0002, "loss": 1.5822, "step": 87090 }, { "epoch": 0.35, "grad_norm": 2.5754120349884033, "learning_rate": 0.0002, "loss": 1.5244, "step": 87100 }, { "epoch": 0.35, "grad_norm": 3.1991963386535645, "learning_rate": 0.0002, "loss": 1.4801, "step": 87110 }, { "epoch": 0.35, "grad_norm": 3.1384661197662354, "learning_rate": 0.0002, "loss": 1.848, "step": 87120 }, { "epoch": 0.35, "grad_norm": 5.27853536605835, "learning_rate": 0.0002, "loss": 1.5706, "step": 87130 }, { "epoch": 0.35, "grad_norm": 2.5541205406188965, "learning_rate": 0.0002, "loss": 1.5603, "step": 87140 }, { "epoch": 0.35, "grad_norm": 3.925344944000244, "learning_rate": 0.0002, "loss": 1.59, "step": 87150 }, { "epoch": 0.35, "grad_norm": 2.3545618057250977, "learning_rate": 0.0002, "loss": 1.3605, "step": 87160 }, { "epoch": 0.35, "grad_norm": 2.5918636322021484, "learning_rate": 0.0002, "loss": 1.5371, "step": 87170 }, { "epoch": 0.35, "grad_norm": 2.69123911857605, "learning_rate": 0.0002, "loss": 1.1579, "step": 87180 }, { "epoch": 0.35, "grad_norm": 1.6643922328948975, "learning_rate": 0.0002, "loss": 1.3827, "step": 87190 }, { "epoch": 0.35, "grad_norm": 3.77681040763855, "learning_rate": 0.0002, "loss": 1.7085, "step": 87200 }, { "epoch": 0.36, "grad_norm": 2.1289076805114746, "learning_rate": 0.0002, "loss": 1.6079, "step": 87210 }, { "epoch": 0.36, "grad_norm": 2.7848031520843506, "learning_rate": 0.0002, "loss": 1.412, "step": 87220 }, { "epoch": 0.36, "grad_norm": 7.162590980529785, "learning_rate": 0.0002, "loss": 1.7524, "step": 87230 }, { "epoch": 0.36, "grad_norm": 3.2200818061828613, "learning_rate": 0.0002, "loss": 1.7061, "step": 87240 }, { "epoch": 0.36, "grad_norm": 3.500241994857788, "learning_rate": 0.0002, "loss": 1.8132, "step": 87250 }, { "epoch": 0.36, "grad_norm": 3.1805922985076904, "learning_rate": 0.0002, "loss": 1.7578, "step": 87260 }, { "epoch": 0.36, "grad_norm": 2.821436643600464, "learning_rate": 0.0002, "loss": 1.3923, "step": 87270 }, { "epoch": 0.36, "grad_norm": 3.40339732170105, "learning_rate": 0.0002, "loss": 1.6043, "step": 87280 }, { "epoch": 0.36, "grad_norm": 2.670199155807495, "learning_rate": 0.0002, "loss": 1.5863, "step": 87290 }, { "epoch": 0.36, "grad_norm": 2.064115524291992, "learning_rate": 0.0002, "loss": 1.6872, "step": 87300 }, { "epoch": 0.36, "grad_norm": 2.9610936641693115, "learning_rate": 0.0002, "loss": 1.2875, "step": 87310 }, { "epoch": 0.36, "grad_norm": 1.8759591579437256, "learning_rate": 0.0002, "loss": 1.7504, "step": 87320 }, { "epoch": 0.36, "grad_norm": 2.172694683074951, "learning_rate": 0.0002, "loss": 1.8047, "step": 87330 }, { "epoch": 0.36, "grad_norm": 2.197603702545166, "learning_rate": 0.0002, "loss": 1.8323, "step": 87340 }, { "epoch": 0.36, "grad_norm": 2.674344778060913, "learning_rate": 0.0002, "loss": 1.402, "step": 87350 }, { "epoch": 0.36, "grad_norm": 1.7209292650222778, "learning_rate": 0.0002, "loss": 1.6321, "step": 87360 }, { "epoch": 0.36, "grad_norm": 3.367391586303711, "learning_rate": 0.0002, "loss": 1.5592, "step": 87370 }, { "epoch": 0.36, "grad_norm": 3.110302448272705, "learning_rate": 0.0002, "loss": 1.3952, "step": 87380 }, { "epoch": 0.36, "grad_norm": 2.371244430541992, "learning_rate": 0.0002, "loss": 1.451, "step": 87390 }, { "epoch": 0.36, "grad_norm": 3.843357563018799, "learning_rate": 0.0002, "loss": 1.7116, "step": 87400 }, { "epoch": 0.36, "grad_norm": 2.878998041152954, "learning_rate": 0.0002, "loss": 1.577, "step": 87410 }, { "epoch": 0.36, "grad_norm": 3.1300556659698486, "learning_rate": 0.0002, "loss": 1.6857, "step": 87420 }, { "epoch": 0.36, "grad_norm": 3.0750138759613037, "learning_rate": 0.0002, "loss": 1.4855, "step": 87430 }, { "epoch": 0.36, "grad_norm": 2.000420093536377, "learning_rate": 0.0002, "loss": 1.7033, "step": 87440 }, { "epoch": 0.36, "grad_norm": 4.163124084472656, "learning_rate": 0.0002, "loss": 1.7957, "step": 87450 }, { "epoch": 0.36, "grad_norm": 2.0720763206481934, "learning_rate": 0.0002, "loss": 1.6783, "step": 87460 }, { "epoch": 0.36, "grad_norm": 2.5978410243988037, "learning_rate": 0.0002, "loss": 1.5702, "step": 87470 }, { "epoch": 0.36, "grad_norm": 3.8319251537323, "learning_rate": 0.0002, "loss": 1.6258, "step": 87480 }, { "epoch": 0.36, "grad_norm": 3.006544828414917, "learning_rate": 0.0002, "loss": 1.6057, "step": 87490 }, { "epoch": 0.36, "grad_norm": 3.492919921875, "learning_rate": 0.0002, "loss": 1.6319, "step": 87500 }, { "epoch": 0.36, "grad_norm": 3.069439649581909, "learning_rate": 0.0002, "loss": 1.2547, "step": 87510 }, { "epoch": 0.36, "grad_norm": 4.022366523742676, "learning_rate": 0.0002, "loss": 1.6453, "step": 87520 }, { "epoch": 0.36, "grad_norm": 3.724808931350708, "learning_rate": 0.0002, "loss": 1.6977, "step": 87530 }, { "epoch": 0.36, "grad_norm": 2.8838489055633545, "learning_rate": 0.0002, "loss": 1.5747, "step": 87540 }, { "epoch": 0.36, "grad_norm": 3.08955454826355, "learning_rate": 0.0002, "loss": 1.8187, "step": 87550 }, { "epoch": 0.36, "grad_norm": 3.6286232471466064, "learning_rate": 0.0002, "loss": 1.6624, "step": 87560 }, { "epoch": 0.36, "grad_norm": 2.1957242488861084, "learning_rate": 0.0002, "loss": 1.3535, "step": 87570 }, { "epoch": 0.36, "grad_norm": 3.2339136600494385, "learning_rate": 0.0002, "loss": 1.3931, "step": 87580 }, { "epoch": 0.36, "grad_norm": 1.6639621257781982, "learning_rate": 0.0002, "loss": 1.4115, "step": 87590 }, { "epoch": 0.36, "grad_norm": 3.9470746517181396, "learning_rate": 0.0002, "loss": 1.5184, "step": 87600 }, { "epoch": 0.36, "grad_norm": 2.0055699348449707, "learning_rate": 0.0002, "loss": 1.7333, "step": 87610 }, { "epoch": 0.36, "grad_norm": 3.066822052001953, "learning_rate": 0.0002, "loss": 1.5089, "step": 87620 }, { "epoch": 0.36, "grad_norm": 3.1735568046569824, "learning_rate": 0.0002, "loss": 1.6026, "step": 87630 }, { "epoch": 0.36, "grad_norm": 3.5268654823303223, "learning_rate": 0.0002, "loss": 1.7582, "step": 87640 }, { "epoch": 0.36, "grad_norm": 2.080037832260132, "learning_rate": 0.0002, "loss": 1.4504, "step": 87650 }, { "epoch": 0.36, "grad_norm": 3.03485107421875, "learning_rate": 0.0002, "loss": 1.4535, "step": 87660 }, { "epoch": 0.36, "grad_norm": 2.6011722087860107, "learning_rate": 0.0002, "loss": 1.5575, "step": 87670 }, { "epoch": 0.36, "grad_norm": 2.7736752033233643, "learning_rate": 0.0002, "loss": 1.6718, "step": 87680 }, { "epoch": 0.36, "grad_norm": 2.683450698852539, "learning_rate": 0.0002, "loss": 1.5416, "step": 87690 }, { "epoch": 0.36, "grad_norm": 1.8083949089050293, "learning_rate": 0.0002, "loss": 1.3964, "step": 87700 }, { "epoch": 0.36, "grad_norm": 2.396021604537964, "learning_rate": 0.0002, "loss": 1.5133, "step": 87710 }, { "epoch": 0.36, "grad_norm": 1.8273248672485352, "learning_rate": 0.0002, "loss": 1.8081, "step": 87720 }, { "epoch": 0.36, "grad_norm": 2.222400188446045, "learning_rate": 0.0002, "loss": 1.4268, "step": 87730 }, { "epoch": 0.36, "grad_norm": 3.998040199279785, "learning_rate": 0.0002, "loss": 1.7673, "step": 87740 }, { "epoch": 0.36, "grad_norm": 3.5246779918670654, "learning_rate": 0.0002, "loss": 1.4788, "step": 87750 }, { "epoch": 0.36, "grad_norm": 2.315420389175415, "learning_rate": 0.0002, "loss": 1.675, "step": 87760 }, { "epoch": 0.36, "grad_norm": 3.237110137939453, "learning_rate": 0.0002, "loss": 1.441, "step": 87770 }, { "epoch": 0.36, "grad_norm": 2.5763278007507324, "learning_rate": 0.0002, "loss": 1.455, "step": 87780 }, { "epoch": 0.36, "grad_norm": 2.6769118309020996, "learning_rate": 0.0002, "loss": 1.4238, "step": 87790 }, { "epoch": 0.36, "grad_norm": 9.21563720703125, "learning_rate": 0.0002, "loss": 1.3982, "step": 87800 }, { "epoch": 0.36, "grad_norm": 2.855602741241455, "learning_rate": 0.0002, "loss": 1.5987, "step": 87810 }, { "epoch": 0.36, "grad_norm": 2.345062255859375, "learning_rate": 0.0002, "loss": 1.5775, "step": 87820 }, { "epoch": 0.36, "grad_norm": 3.194509744644165, "learning_rate": 0.0002, "loss": 1.6793, "step": 87830 }, { "epoch": 0.36, "grad_norm": 3.2861804962158203, "learning_rate": 0.0002, "loss": 1.7543, "step": 87840 }, { "epoch": 0.36, "grad_norm": 1.9172738790512085, "learning_rate": 0.0002, "loss": 1.6381, "step": 87850 }, { "epoch": 0.36, "grad_norm": 4.9464111328125, "learning_rate": 0.0002, "loss": 1.5525, "step": 87860 }, { "epoch": 0.36, "grad_norm": 1.8579412698745728, "learning_rate": 0.0002, "loss": 1.4028, "step": 87870 }, { "epoch": 0.36, "grad_norm": 4.450687408447266, "learning_rate": 0.0002, "loss": 1.5415, "step": 87880 }, { "epoch": 0.36, "grad_norm": 2.312005043029785, "learning_rate": 0.0002, "loss": 1.6328, "step": 87890 }, { "epoch": 0.36, "grad_norm": 1.9817687273025513, "learning_rate": 0.0002, "loss": 1.6629, "step": 87900 }, { "epoch": 0.36, "grad_norm": 1.4298946857452393, "learning_rate": 0.0002, "loss": 1.953, "step": 87910 }, { "epoch": 0.36, "grad_norm": 2.755380392074585, "learning_rate": 0.0002, "loss": 1.3335, "step": 87920 }, { "epoch": 0.36, "grad_norm": 2.0431692600250244, "learning_rate": 0.0002, "loss": 1.8281, "step": 87930 }, { "epoch": 0.36, "grad_norm": 4.141883850097656, "learning_rate": 0.0002, "loss": 1.7284, "step": 87940 }, { "epoch": 0.36, "grad_norm": 4.1299262046813965, "learning_rate": 0.0002, "loss": 1.5207, "step": 87950 }, { "epoch": 0.36, "grad_norm": 3.9764046669006348, "learning_rate": 0.0002, "loss": 1.6336, "step": 87960 }, { "epoch": 0.36, "grad_norm": 3.212085247039795, "learning_rate": 0.0002, "loss": 1.7529, "step": 87970 }, { "epoch": 0.36, "grad_norm": 3.1062698364257812, "learning_rate": 0.0002, "loss": 1.5519, "step": 87980 }, { "epoch": 0.36, "grad_norm": 5.417789936065674, "learning_rate": 0.0002, "loss": 1.6346, "step": 87990 }, { "epoch": 0.36, "grad_norm": 2.2385342121124268, "learning_rate": 0.0002, "loss": 1.7714, "step": 88000 }, { "epoch": 0.36, "grad_norm": 4.886645793914795, "learning_rate": 0.0002, "loss": 1.6805, "step": 88010 }, { "epoch": 0.36, "grad_norm": 2.7821390628814697, "learning_rate": 0.0002, "loss": 1.4744, "step": 88020 }, { "epoch": 0.36, "grad_norm": 2.919201135635376, "learning_rate": 0.0002, "loss": 1.4003, "step": 88030 }, { "epoch": 0.36, "grad_norm": 2.89050555229187, "learning_rate": 0.0002, "loss": 1.8152, "step": 88040 }, { "epoch": 0.36, "grad_norm": 2.082113027572632, "learning_rate": 0.0002, "loss": 1.6266, "step": 88050 }, { "epoch": 0.36, "grad_norm": 3.2250120639801025, "learning_rate": 0.0002, "loss": 1.594, "step": 88060 }, { "epoch": 0.36, "grad_norm": 2.2352540493011475, "learning_rate": 0.0002, "loss": 1.3263, "step": 88070 }, { "epoch": 0.36, "grad_norm": 3.043255090713501, "learning_rate": 0.0002, "loss": 1.6524, "step": 88080 }, { "epoch": 0.36, "grad_norm": 2.1668214797973633, "learning_rate": 0.0002, "loss": 1.5058, "step": 88090 }, { "epoch": 0.36, "grad_norm": 3.5555360317230225, "learning_rate": 0.0002, "loss": 1.8036, "step": 88100 }, { "epoch": 0.36, "grad_norm": 2.325634717941284, "learning_rate": 0.0002, "loss": 1.6268, "step": 88110 }, { "epoch": 0.36, "grad_norm": 2.2401106357574463, "learning_rate": 0.0002, "loss": 1.6336, "step": 88120 }, { "epoch": 0.36, "grad_norm": 2.572479009628296, "learning_rate": 0.0002, "loss": 1.4101, "step": 88130 }, { "epoch": 0.36, "grad_norm": 3.386042594909668, "learning_rate": 0.0002, "loss": 1.6664, "step": 88140 }, { "epoch": 0.36, "grad_norm": 3.214022159576416, "learning_rate": 0.0002, "loss": 1.5852, "step": 88150 }, { "epoch": 0.36, "grad_norm": 1.917722463607788, "learning_rate": 0.0002, "loss": 1.6597, "step": 88160 }, { "epoch": 0.36, "grad_norm": 2.080760955810547, "learning_rate": 0.0002, "loss": 1.7385, "step": 88170 }, { "epoch": 0.36, "grad_norm": 3.0560107231140137, "learning_rate": 0.0002, "loss": 1.6369, "step": 88180 }, { "epoch": 0.36, "grad_norm": 2.5660336017608643, "learning_rate": 0.0002, "loss": 1.6438, "step": 88190 }, { "epoch": 0.36, "grad_norm": 2.667515516281128, "learning_rate": 0.0002, "loss": 1.6848, "step": 88200 }, { "epoch": 0.36, "grad_norm": 3.4890682697296143, "learning_rate": 0.0002, "loss": 1.5944, "step": 88210 }, { "epoch": 0.36, "grad_norm": 2.7813303470611572, "learning_rate": 0.0002, "loss": 1.6596, "step": 88220 }, { "epoch": 0.36, "grad_norm": 1.8994818925857544, "learning_rate": 0.0002, "loss": 1.4525, "step": 88230 }, { "epoch": 0.36, "grad_norm": 2.4706337451934814, "learning_rate": 0.0002, "loss": 1.4409, "step": 88240 }, { "epoch": 0.36, "grad_norm": 4.753865718841553, "learning_rate": 0.0002, "loss": 1.4334, "step": 88250 }, { "epoch": 0.36, "grad_norm": 2.9933626651763916, "learning_rate": 0.0002, "loss": 1.5946, "step": 88260 }, { "epoch": 0.36, "grad_norm": 4.696375846862793, "learning_rate": 0.0002, "loss": 1.4555, "step": 88270 }, { "epoch": 0.36, "grad_norm": 3.2609522342681885, "learning_rate": 0.0002, "loss": 1.8003, "step": 88280 }, { "epoch": 0.36, "grad_norm": 20.78449249267578, "learning_rate": 0.0002, "loss": 1.6609, "step": 88290 }, { "epoch": 0.36, "grad_norm": 2.5105135440826416, "learning_rate": 0.0002, "loss": 1.1642, "step": 88300 }, { "epoch": 0.36, "grad_norm": 2.800144672393799, "learning_rate": 0.0002, "loss": 1.5997, "step": 88310 }, { "epoch": 0.36, "grad_norm": 3.752983570098877, "learning_rate": 0.0002, "loss": 1.5592, "step": 88320 }, { "epoch": 0.36, "grad_norm": 4.364368915557861, "learning_rate": 0.0002, "loss": 1.395, "step": 88330 }, { "epoch": 0.36, "grad_norm": 3.489532709121704, "learning_rate": 0.0002, "loss": 1.7202, "step": 88340 }, { "epoch": 0.36, "grad_norm": 3.5498952865600586, "learning_rate": 0.0002, "loss": 1.4986, "step": 88350 }, { "epoch": 0.36, "grad_norm": 2.2857165336608887, "learning_rate": 0.0002, "loss": 1.609, "step": 88360 }, { "epoch": 0.36, "grad_norm": 2.9353156089782715, "learning_rate": 0.0002, "loss": 1.3617, "step": 88370 }, { "epoch": 0.36, "grad_norm": 2.417832374572754, "learning_rate": 0.0002, "loss": 1.3105, "step": 88380 }, { "epoch": 0.36, "grad_norm": 3.0751140117645264, "learning_rate": 0.0002, "loss": 1.4758, "step": 88390 }, { "epoch": 0.36, "grad_norm": 3.3872828483581543, "learning_rate": 0.0002, "loss": 1.5952, "step": 88400 }, { "epoch": 0.36, "grad_norm": 2.5079092979431152, "learning_rate": 0.0002, "loss": 1.686, "step": 88410 }, { "epoch": 0.36, "grad_norm": 3.295212745666504, "learning_rate": 0.0002, "loss": 1.6183, "step": 88420 }, { "epoch": 0.36, "grad_norm": 3.8881688117980957, "learning_rate": 0.0002, "loss": 1.3477, "step": 88430 }, { "epoch": 0.36, "grad_norm": 2.664307117462158, "learning_rate": 0.0002, "loss": 1.4757, "step": 88440 }, { "epoch": 0.36, "grad_norm": 2.941154718399048, "learning_rate": 0.0002, "loss": 1.6209, "step": 88450 }, { "epoch": 0.36, "grad_norm": 2.4125897884368896, "learning_rate": 0.0002, "loss": 1.5136, "step": 88460 }, { "epoch": 0.36, "grad_norm": 2.9163596630096436, "learning_rate": 0.0002, "loss": 1.7177, "step": 88470 }, { "epoch": 0.36, "grad_norm": 4.441260814666748, "learning_rate": 0.0002, "loss": 1.4183, "step": 88480 }, { "epoch": 0.36, "grad_norm": 4.265293598175049, "learning_rate": 0.0002, "loss": 1.3803, "step": 88490 }, { "epoch": 0.36, "grad_norm": 3.4508728981018066, "learning_rate": 0.0002, "loss": 1.5437, "step": 88500 }, { "epoch": 0.36, "grad_norm": 2.195974111557007, "learning_rate": 0.0002, "loss": 1.7489, "step": 88510 }, { "epoch": 0.36, "grad_norm": 2.108616828918457, "learning_rate": 0.0002, "loss": 1.4943, "step": 88520 }, { "epoch": 0.36, "grad_norm": 2.744335412979126, "learning_rate": 0.0002, "loss": 1.6239, "step": 88530 }, { "epoch": 0.36, "grad_norm": 3.07783842086792, "learning_rate": 0.0002, "loss": 1.4979, "step": 88540 }, { "epoch": 0.36, "grad_norm": 5.140195846557617, "learning_rate": 0.0002, "loss": 1.5553, "step": 88550 }, { "epoch": 0.36, "grad_norm": 4.032151699066162, "learning_rate": 0.0002, "loss": 1.3973, "step": 88560 }, { "epoch": 0.36, "grad_norm": 2.629652261734009, "learning_rate": 0.0002, "loss": 1.4341, "step": 88570 }, { "epoch": 0.36, "grad_norm": 3.262036085128784, "learning_rate": 0.0002, "loss": 1.8937, "step": 88580 }, { "epoch": 0.36, "grad_norm": 1.7599612474441528, "learning_rate": 0.0002, "loss": 1.6421, "step": 88590 }, { "epoch": 0.36, "grad_norm": 1.8882412910461426, "learning_rate": 0.0002, "loss": 1.4948, "step": 88600 }, { "epoch": 0.36, "grad_norm": 3.1792216300964355, "learning_rate": 0.0002, "loss": 1.4683, "step": 88610 }, { "epoch": 0.36, "grad_norm": 2.103562116622925, "learning_rate": 0.0002, "loss": 1.4848, "step": 88620 }, { "epoch": 0.36, "grad_norm": 3.7624337673187256, "learning_rate": 0.0002, "loss": 1.5355, "step": 88630 }, { "epoch": 0.36, "grad_norm": 3.318960428237915, "learning_rate": 0.0002, "loss": 1.832, "step": 88640 }, { "epoch": 0.36, "grad_norm": 3.137399435043335, "learning_rate": 0.0002, "loss": 1.6665, "step": 88650 }, { "epoch": 0.36, "grad_norm": 3.605426073074341, "learning_rate": 0.0002, "loss": 1.3535, "step": 88660 }, { "epoch": 0.36, "grad_norm": 4.060098171234131, "learning_rate": 0.0002, "loss": 1.323, "step": 88670 }, { "epoch": 0.36, "grad_norm": 3.4940688610076904, "learning_rate": 0.0002, "loss": 1.5996, "step": 88680 }, { "epoch": 0.36, "grad_norm": 2.083998680114746, "learning_rate": 0.0002, "loss": 1.4817, "step": 88690 }, { "epoch": 0.36, "grad_norm": 2.506624698638916, "learning_rate": 0.0002, "loss": 1.7143, "step": 88700 }, { "epoch": 0.36, "grad_norm": 2.202528953552246, "learning_rate": 0.0002, "loss": 1.8009, "step": 88710 }, { "epoch": 0.36, "grad_norm": 3.679048776626587, "learning_rate": 0.0002, "loss": 1.6495, "step": 88720 }, { "epoch": 0.36, "grad_norm": 12.485306739807129, "learning_rate": 0.0002, "loss": 1.4971, "step": 88730 }, { "epoch": 0.36, "grad_norm": 3.688076972961426, "learning_rate": 0.0002, "loss": 1.5364, "step": 88740 }, { "epoch": 0.36, "grad_norm": 2.8485772609710693, "learning_rate": 0.0002, "loss": 1.4054, "step": 88750 }, { "epoch": 0.36, "grad_norm": 3.597280979156494, "learning_rate": 0.0002, "loss": 1.4993, "step": 88760 }, { "epoch": 0.36, "grad_norm": 2.6296730041503906, "learning_rate": 0.0002, "loss": 1.4412, "step": 88770 }, { "epoch": 0.36, "grad_norm": 4.483122825622559, "learning_rate": 0.0002, "loss": 1.5015, "step": 88780 }, { "epoch": 0.36, "grad_norm": 4.340429306030273, "learning_rate": 0.0002, "loss": 1.7005, "step": 88790 }, { "epoch": 0.36, "grad_norm": 8.996395111083984, "learning_rate": 0.0002, "loss": 1.504, "step": 88800 }, { "epoch": 0.36, "grad_norm": 5.392481803894043, "learning_rate": 0.0002, "loss": 1.5286, "step": 88810 }, { "epoch": 0.36, "grad_norm": 3.7900280952453613, "learning_rate": 0.0002, "loss": 1.5308, "step": 88820 }, { "epoch": 0.36, "grad_norm": 2.217945098876953, "learning_rate": 0.0002, "loss": 1.4482, "step": 88830 }, { "epoch": 0.36, "grad_norm": 2.437832832336426, "learning_rate": 0.0002, "loss": 1.6417, "step": 88840 }, { "epoch": 0.36, "grad_norm": 3.0730299949645996, "learning_rate": 0.0002, "loss": 1.5323, "step": 88850 }, { "epoch": 0.36, "grad_norm": 4.836709976196289, "learning_rate": 0.0002, "loss": 1.6193, "step": 88860 }, { "epoch": 0.36, "grad_norm": 2.5757973194122314, "learning_rate": 0.0002, "loss": 1.6035, "step": 88870 }, { "epoch": 0.36, "grad_norm": 4.458305358886719, "learning_rate": 0.0002, "loss": 1.7464, "step": 88880 }, { "epoch": 0.36, "grad_norm": 4.1862311363220215, "learning_rate": 0.0002, "loss": 1.5442, "step": 88890 }, { "epoch": 0.36, "grad_norm": 3.1277129650115967, "learning_rate": 0.0002, "loss": 1.6027, "step": 88900 }, { "epoch": 0.36, "grad_norm": 2.733452081680298, "learning_rate": 0.0002, "loss": 1.627, "step": 88910 }, { "epoch": 0.36, "grad_norm": 3.852191925048828, "learning_rate": 0.0002, "loss": 1.7553, "step": 88920 }, { "epoch": 0.36, "grad_norm": 3.267545223236084, "learning_rate": 0.0002, "loss": 1.4444, "step": 88930 }, { "epoch": 0.36, "grad_norm": 2.1981148719787598, "learning_rate": 0.0002, "loss": 1.5499, "step": 88940 }, { "epoch": 0.36, "grad_norm": 2.543287992477417, "learning_rate": 0.0002, "loss": 1.5133, "step": 88950 }, { "epoch": 0.36, "grad_norm": 1.9240511655807495, "learning_rate": 0.0002, "loss": 1.5695, "step": 88960 }, { "epoch": 0.36, "grad_norm": 3.1226584911346436, "learning_rate": 0.0002, "loss": 1.5493, "step": 88970 }, { "epoch": 0.36, "grad_norm": 2.9770519733428955, "learning_rate": 0.0002, "loss": 1.603, "step": 88980 }, { "epoch": 0.36, "grad_norm": 2.283334732055664, "learning_rate": 0.0002, "loss": 1.6296, "step": 88990 }, { "epoch": 0.36, "grad_norm": 3.178828477859497, "learning_rate": 0.0002, "loss": 1.8263, "step": 89000 }, { "epoch": 0.36, "grad_norm": 2.5015041828155518, "learning_rate": 0.0002, "loss": 1.6471, "step": 89010 }, { "epoch": 0.36, "grad_norm": 3.592200517654419, "learning_rate": 0.0002, "loss": 1.5131, "step": 89020 }, { "epoch": 0.36, "grad_norm": 2.1761724948883057, "learning_rate": 0.0002, "loss": 1.5731, "step": 89030 }, { "epoch": 0.36, "grad_norm": 2.2353270053863525, "learning_rate": 0.0002, "loss": 1.6228, "step": 89040 }, { "epoch": 0.36, "grad_norm": 1.968515396118164, "learning_rate": 0.0002, "loss": 1.4262, "step": 89050 }, { "epoch": 0.36, "grad_norm": 2.442758321762085, "learning_rate": 0.0002, "loss": 1.8386, "step": 89060 }, { "epoch": 0.36, "grad_norm": 3.8331472873687744, "learning_rate": 0.0002, "loss": 1.8167, "step": 89070 }, { "epoch": 0.36, "grad_norm": 3.377753257751465, "learning_rate": 0.0002, "loss": 1.5872, "step": 89080 }, { "epoch": 0.36, "grad_norm": 4.321113109588623, "learning_rate": 0.0002, "loss": 1.4374, "step": 89090 }, { "epoch": 0.36, "grad_norm": 2.9693377017974854, "learning_rate": 0.0002, "loss": 1.7417, "step": 89100 }, { "epoch": 0.36, "grad_norm": 3.405533790588379, "learning_rate": 0.0002, "loss": 1.5797, "step": 89110 }, { "epoch": 0.36, "grad_norm": 2.3411808013916016, "learning_rate": 0.0002, "loss": 1.6688, "step": 89120 }, { "epoch": 0.36, "grad_norm": 2.968654155731201, "learning_rate": 0.0002, "loss": 1.9182, "step": 89130 }, { "epoch": 0.36, "grad_norm": 2.5810647010803223, "learning_rate": 0.0002, "loss": 1.7733, "step": 89140 }, { "epoch": 0.36, "grad_norm": 2.4871976375579834, "learning_rate": 0.0002, "loss": 1.3814, "step": 89150 }, { "epoch": 0.36, "grad_norm": 2.2074151039123535, "learning_rate": 0.0002, "loss": 1.455, "step": 89160 }, { "epoch": 0.36, "grad_norm": 3.3660831451416016, "learning_rate": 0.0002, "loss": 1.5809, "step": 89170 }, { "epoch": 0.36, "grad_norm": 3.20753812789917, "learning_rate": 0.0002, "loss": 1.6348, "step": 89180 }, { "epoch": 0.36, "grad_norm": 3.8768250942230225, "learning_rate": 0.0002, "loss": 1.955, "step": 89190 }, { "epoch": 0.36, "grad_norm": 2.5640182495117188, "learning_rate": 0.0002, "loss": 1.4275, "step": 89200 }, { "epoch": 0.36, "grad_norm": 2.502239942550659, "learning_rate": 0.0002, "loss": 1.4164, "step": 89210 }, { "epoch": 0.36, "grad_norm": 2.5364108085632324, "learning_rate": 0.0002, "loss": 1.675, "step": 89220 }, { "epoch": 0.36, "grad_norm": 2.3262369632720947, "learning_rate": 0.0002, "loss": 1.6323, "step": 89230 }, { "epoch": 0.36, "grad_norm": 2.261023759841919, "learning_rate": 0.0002, "loss": 1.2581, "step": 89240 }, { "epoch": 0.36, "grad_norm": 3.3171517848968506, "learning_rate": 0.0002, "loss": 1.6758, "step": 89250 }, { "epoch": 0.36, "grad_norm": 3.9923603534698486, "learning_rate": 0.0002, "loss": 1.4915, "step": 89260 }, { "epoch": 0.36, "grad_norm": 1.6650680303573608, "learning_rate": 0.0002, "loss": 1.5541, "step": 89270 }, { "epoch": 0.36, "grad_norm": 2.064049005508423, "learning_rate": 0.0002, "loss": 1.5686, "step": 89280 }, { "epoch": 0.36, "grad_norm": 2.216966390609741, "learning_rate": 0.0002, "loss": 1.8044, "step": 89290 }, { "epoch": 0.36, "grad_norm": 2.985750913619995, "learning_rate": 0.0002, "loss": 1.4224, "step": 89300 }, { "epoch": 0.36, "grad_norm": 3.4617128372192383, "learning_rate": 0.0002, "loss": 1.3722, "step": 89310 }, { "epoch": 0.36, "grad_norm": 3.0071401596069336, "learning_rate": 0.0002, "loss": 1.6531, "step": 89320 }, { "epoch": 0.36, "grad_norm": 3.3698678016662598, "learning_rate": 0.0002, "loss": 1.5508, "step": 89330 }, { "epoch": 0.36, "grad_norm": 3.025070905685425, "learning_rate": 0.0002, "loss": 1.4216, "step": 89340 }, { "epoch": 0.36, "grad_norm": 5.523303985595703, "learning_rate": 0.0002, "loss": 1.6474, "step": 89350 }, { "epoch": 0.36, "grad_norm": 2.8446455001831055, "learning_rate": 0.0002, "loss": 1.4386, "step": 89360 }, { "epoch": 0.36, "grad_norm": 3.130631923675537, "learning_rate": 0.0002, "loss": 1.7269, "step": 89370 }, { "epoch": 0.36, "grad_norm": 3.870877981185913, "learning_rate": 0.0002, "loss": 1.58, "step": 89380 }, { "epoch": 0.36, "grad_norm": 2.992267370223999, "learning_rate": 0.0002, "loss": 1.6584, "step": 89390 }, { "epoch": 0.36, "grad_norm": 3.1958463191986084, "learning_rate": 0.0002, "loss": 1.7557, "step": 89400 }, { "epoch": 0.36, "grad_norm": 3.264227867126465, "learning_rate": 0.0002, "loss": 1.4297, "step": 89410 }, { "epoch": 0.36, "grad_norm": 4.170897960662842, "learning_rate": 0.0002, "loss": 1.7703, "step": 89420 }, { "epoch": 0.36, "grad_norm": 2.2007811069488525, "learning_rate": 0.0002, "loss": 1.6174, "step": 89430 }, { "epoch": 0.36, "grad_norm": 2.1024856567382812, "learning_rate": 0.0002, "loss": 1.6553, "step": 89440 }, { "epoch": 0.36, "grad_norm": 4.567515850067139, "learning_rate": 0.0002, "loss": 1.4743, "step": 89450 }, { "epoch": 0.36, "grad_norm": 3.787959337234497, "learning_rate": 0.0002, "loss": 1.7219, "step": 89460 }, { "epoch": 0.36, "grad_norm": 2.4809203147888184, "learning_rate": 0.0002, "loss": 1.682, "step": 89470 }, { "epoch": 0.36, "grad_norm": 3.096964120864868, "learning_rate": 0.0002, "loss": 1.4787, "step": 89480 }, { "epoch": 0.36, "grad_norm": 1.403389573097229, "learning_rate": 0.0002, "loss": 1.5112, "step": 89490 }, { "epoch": 0.36, "grad_norm": 11.962873458862305, "learning_rate": 0.0002, "loss": 1.7214, "step": 89500 }, { "epoch": 0.36, "grad_norm": 2.51790189743042, "learning_rate": 0.0002, "loss": 1.3984, "step": 89510 }, { "epoch": 0.36, "grad_norm": 2.888932466506958, "learning_rate": 0.0002, "loss": 1.6277, "step": 89520 }, { "epoch": 0.36, "grad_norm": 3.576218366622925, "learning_rate": 0.0002, "loss": 1.2278, "step": 89530 }, { "epoch": 0.36, "grad_norm": 6.22292423248291, "learning_rate": 0.0002, "loss": 1.4437, "step": 89540 }, { "epoch": 0.36, "grad_norm": 4.603719234466553, "learning_rate": 0.0002, "loss": 1.4143, "step": 89550 }, { "epoch": 0.36, "grad_norm": 2.4410648345947266, "learning_rate": 0.0002, "loss": 1.486, "step": 89560 }, { "epoch": 0.36, "grad_norm": 3.113386392593384, "learning_rate": 0.0002, "loss": 1.5508, "step": 89570 }, { "epoch": 0.36, "grad_norm": 3.9412221908569336, "learning_rate": 0.0002, "loss": 1.8722, "step": 89580 }, { "epoch": 0.36, "grad_norm": 3.8600170612335205, "learning_rate": 0.0002, "loss": 1.6122, "step": 89590 }, { "epoch": 0.36, "grad_norm": 2.084646224975586, "learning_rate": 0.0002, "loss": 1.6137, "step": 89600 }, { "epoch": 0.36, "grad_norm": 1.8828632831573486, "learning_rate": 0.0002, "loss": 1.3911, "step": 89610 }, { "epoch": 0.36, "grad_norm": 3.7179017066955566, "learning_rate": 0.0002, "loss": 1.4041, "step": 89620 }, { "epoch": 0.36, "grad_norm": 3.202902317047119, "learning_rate": 0.0002, "loss": 1.7497, "step": 89630 }, { "epoch": 0.36, "grad_norm": 3.7549350261688232, "learning_rate": 0.0002, "loss": 1.5129, "step": 89640 }, { "epoch": 0.36, "grad_norm": 2.653210401535034, "learning_rate": 0.0002, "loss": 1.4685, "step": 89650 }, { "epoch": 0.36, "grad_norm": 5.279246807098389, "learning_rate": 0.0002, "loss": 1.722, "step": 89660 }, { "epoch": 0.37, "grad_norm": 2.6895151138305664, "learning_rate": 0.0002, "loss": 1.4202, "step": 89670 }, { "epoch": 0.37, "grad_norm": 2.9163403511047363, "learning_rate": 0.0002, "loss": 1.7056, "step": 89680 }, { "epoch": 0.37, "grad_norm": 2.3473057746887207, "learning_rate": 0.0002, "loss": 1.6308, "step": 89690 }, { "epoch": 0.37, "grad_norm": 2.9397857189178467, "learning_rate": 0.0002, "loss": 1.8287, "step": 89700 }, { "epoch": 0.37, "grad_norm": 3.038841962814331, "learning_rate": 0.0002, "loss": 1.5748, "step": 89710 }, { "epoch": 0.37, "grad_norm": 1.8309656381607056, "learning_rate": 0.0002, "loss": 1.6404, "step": 89720 }, { "epoch": 0.37, "grad_norm": 3.3968591690063477, "learning_rate": 0.0002, "loss": 1.5574, "step": 89730 }, { "epoch": 0.37, "grad_norm": 3.93525767326355, "learning_rate": 0.0002, "loss": 1.5952, "step": 89740 }, { "epoch": 0.37, "grad_norm": 3.7058045864105225, "learning_rate": 0.0002, "loss": 1.7138, "step": 89750 }, { "epoch": 0.37, "grad_norm": 2.791304588317871, "learning_rate": 0.0002, "loss": 1.5391, "step": 89760 }, { "epoch": 0.37, "grad_norm": 3.376316785812378, "learning_rate": 0.0002, "loss": 1.6261, "step": 89770 }, { "epoch": 0.37, "grad_norm": 2.702331066131592, "learning_rate": 0.0002, "loss": 1.4844, "step": 89780 }, { "epoch": 0.37, "grad_norm": 3.4247024059295654, "learning_rate": 0.0002, "loss": 1.7282, "step": 89790 }, { "epoch": 0.37, "grad_norm": 4.626095771789551, "learning_rate": 0.0002, "loss": 1.5064, "step": 89800 }, { "epoch": 0.37, "grad_norm": 1.9507607221603394, "learning_rate": 0.0002, "loss": 1.5653, "step": 89810 }, { "epoch": 0.37, "grad_norm": 4.491638660430908, "learning_rate": 0.0002, "loss": 1.8746, "step": 89820 }, { "epoch": 0.37, "grad_norm": 3.805087089538574, "learning_rate": 0.0002, "loss": 1.2825, "step": 89830 }, { "epoch": 0.37, "grad_norm": 3.0277507305145264, "learning_rate": 0.0002, "loss": 1.3809, "step": 89840 }, { "epoch": 0.37, "grad_norm": 1.885000228881836, "learning_rate": 0.0002, "loss": 1.4628, "step": 89850 }, { "epoch": 0.37, "grad_norm": 2.05599045753479, "learning_rate": 0.0002, "loss": 1.6253, "step": 89860 }, { "epoch": 0.37, "grad_norm": 2.7267472743988037, "learning_rate": 0.0002, "loss": 1.7029, "step": 89870 }, { "epoch": 0.37, "grad_norm": 2.2081334590911865, "learning_rate": 0.0002, "loss": 1.622, "step": 89880 }, { "epoch": 0.37, "grad_norm": 3.19287109375, "learning_rate": 0.0002, "loss": 1.4717, "step": 89890 }, { "epoch": 0.37, "grad_norm": 2.2724180221557617, "learning_rate": 0.0002, "loss": 1.7262, "step": 89900 }, { "epoch": 0.37, "grad_norm": 3.349055767059326, "learning_rate": 0.0002, "loss": 1.6642, "step": 89910 }, { "epoch": 0.37, "grad_norm": 4.1992597579956055, "learning_rate": 0.0002, "loss": 1.5889, "step": 89920 }, { "epoch": 0.37, "grad_norm": 3.1613211631774902, "learning_rate": 0.0002, "loss": 1.5015, "step": 89930 }, { "epoch": 0.37, "grad_norm": 3.3916938304901123, "learning_rate": 0.0002, "loss": 1.6298, "step": 89940 }, { "epoch": 0.37, "grad_norm": 1.1391388177871704, "learning_rate": 0.0002, "loss": 1.362, "step": 89950 }, { "epoch": 0.37, "grad_norm": 1.8561148643493652, "learning_rate": 0.0002, "loss": 1.771, "step": 89960 }, { "epoch": 0.37, "grad_norm": 2.355755090713501, "learning_rate": 0.0002, "loss": 1.5807, "step": 89970 }, { "epoch": 0.37, "grad_norm": 1.8614927530288696, "learning_rate": 0.0002, "loss": 1.6307, "step": 89980 }, { "epoch": 0.37, "grad_norm": 2.11140775680542, "learning_rate": 0.0002, "loss": 1.3454, "step": 89990 }, { "epoch": 0.37, "grad_norm": 2.807671308517456, "learning_rate": 0.0002, "loss": 1.5399, "step": 90000 }, { "epoch": 0.37, "grad_norm": 4.998877048492432, "learning_rate": 0.0002, "loss": 1.4408, "step": 90010 }, { "epoch": 0.37, "grad_norm": 1.9544713497161865, "learning_rate": 0.0002, "loss": 1.5692, "step": 90020 }, { "epoch": 0.37, "grad_norm": 4.08217716217041, "learning_rate": 0.0002, "loss": 1.6716, "step": 90030 }, { "epoch": 0.37, "grad_norm": 4.901784896850586, "learning_rate": 0.0002, "loss": 1.6011, "step": 90040 }, { "epoch": 0.37, "grad_norm": 2.079483985900879, "learning_rate": 0.0002, "loss": 1.6444, "step": 90050 }, { "epoch": 0.37, "grad_norm": 2.2836647033691406, "learning_rate": 0.0002, "loss": 1.6592, "step": 90060 }, { "epoch": 0.37, "grad_norm": 2.813150405883789, "learning_rate": 0.0002, "loss": 1.3548, "step": 90070 }, { "epoch": 0.37, "grad_norm": 3.549358367919922, "learning_rate": 0.0002, "loss": 1.2229, "step": 90080 }, { "epoch": 0.37, "grad_norm": 2.888643741607666, "learning_rate": 0.0002, "loss": 1.511, "step": 90090 }, { "epoch": 0.37, "grad_norm": 4.215397834777832, "learning_rate": 0.0002, "loss": 1.6796, "step": 90100 }, { "epoch": 0.37, "grad_norm": 2.169581413269043, "learning_rate": 0.0002, "loss": 1.599, "step": 90110 }, { "epoch": 0.37, "grad_norm": 3.190598726272583, "learning_rate": 0.0002, "loss": 1.5577, "step": 90120 }, { "epoch": 0.37, "grad_norm": 2.861874580383301, "learning_rate": 0.0002, "loss": 1.4912, "step": 90130 }, { "epoch": 0.37, "grad_norm": 1.689785122871399, "learning_rate": 0.0002, "loss": 1.8052, "step": 90140 }, { "epoch": 0.37, "grad_norm": 2.5066797733306885, "learning_rate": 0.0002, "loss": 1.8119, "step": 90150 }, { "epoch": 0.37, "grad_norm": 2.6238815784454346, "learning_rate": 0.0002, "loss": 1.6497, "step": 90160 }, { "epoch": 0.37, "grad_norm": 1.2626129388809204, "learning_rate": 0.0002, "loss": 1.5229, "step": 90170 }, { "epoch": 0.37, "grad_norm": 4.568016529083252, "learning_rate": 0.0002, "loss": 1.6829, "step": 90180 }, { "epoch": 0.37, "grad_norm": 2.5111072063446045, "learning_rate": 0.0002, "loss": 1.8067, "step": 90190 }, { "epoch": 0.37, "grad_norm": 2.2860071659088135, "learning_rate": 0.0002, "loss": 1.3975, "step": 90200 }, { "epoch": 0.37, "grad_norm": 1.5319774150848389, "learning_rate": 0.0002, "loss": 1.6787, "step": 90210 }, { "epoch": 0.37, "grad_norm": 3.8010823726654053, "learning_rate": 0.0002, "loss": 1.6827, "step": 90220 }, { "epoch": 0.37, "grad_norm": 9.452104568481445, "learning_rate": 0.0002, "loss": 1.7791, "step": 90230 }, { "epoch": 0.37, "grad_norm": 3.4068942070007324, "learning_rate": 0.0002, "loss": 1.5053, "step": 90240 }, { "epoch": 0.37, "grad_norm": 4.5361785888671875, "learning_rate": 0.0002, "loss": 1.8558, "step": 90250 }, { "epoch": 0.37, "grad_norm": 2.3009674549102783, "learning_rate": 0.0002, "loss": 1.3827, "step": 90260 }, { "epoch": 0.37, "grad_norm": 3.6575849056243896, "learning_rate": 0.0002, "loss": 1.6238, "step": 90270 }, { "epoch": 0.37, "grad_norm": 2.9151012897491455, "learning_rate": 0.0002, "loss": 1.6324, "step": 90280 }, { "epoch": 0.37, "grad_norm": 2.368347644805908, "learning_rate": 0.0002, "loss": 1.5918, "step": 90290 }, { "epoch": 0.37, "grad_norm": 5.665872573852539, "learning_rate": 0.0002, "loss": 1.6185, "step": 90300 }, { "epoch": 0.37, "grad_norm": 3.4950687885284424, "learning_rate": 0.0002, "loss": 1.3817, "step": 90310 }, { "epoch": 0.37, "grad_norm": 3.6375298500061035, "learning_rate": 0.0002, "loss": 1.6248, "step": 90320 }, { "epoch": 0.37, "grad_norm": 1.7643556594848633, "learning_rate": 0.0002, "loss": 1.6252, "step": 90330 }, { "epoch": 0.37, "grad_norm": 3.0833609104156494, "learning_rate": 0.0002, "loss": 1.8205, "step": 90340 }, { "epoch": 0.37, "grad_norm": 3.2511706352233887, "learning_rate": 0.0002, "loss": 1.827, "step": 90350 }, { "epoch": 0.37, "grad_norm": 2.845027208328247, "learning_rate": 0.0002, "loss": 1.5622, "step": 90360 }, { "epoch": 0.37, "grad_norm": 3.6533005237579346, "learning_rate": 0.0002, "loss": 1.5405, "step": 90370 }, { "epoch": 0.37, "grad_norm": 2.3908379077911377, "learning_rate": 0.0002, "loss": 1.47, "step": 90380 }, { "epoch": 0.37, "grad_norm": 3.0493524074554443, "learning_rate": 0.0002, "loss": 1.532, "step": 90390 }, { "epoch": 0.37, "grad_norm": 3.2406437397003174, "learning_rate": 0.0002, "loss": 1.515, "step": 90400 }, { "epoch": 0.37, "grad_norm": 3.381930351257324, "learning_rate": 0.0002, "loss": 1.7077, "step": 90410 }, { "epoch": 0.37, "grad_norm": 2.3932607173919678, "learning_rate": 0.0002, "loss": 1.6267, "step": 90420 }, { "epoch": 0.37, "grad_norm": 2.0195393562316895, "learning_rate": 0.0002, "loss": 1.6438, "step": 90430 }, { "epoch": 0.37, "grad_norm": 3.3015296459198, "learning_rate": 0.0002, "loss": 1.731, "step": 90440 }, { "epoch": 0.37, "grad_norm": 4.090868949890137, "learning_rate": 0.0002, "loss": 1.8142, "step": 90450 }, { "epoch": 0.37, "grad_norm": 3.001235008239746, "learning_rate": 0.0002, "loss": 1.3798, "step": 90460 }, { "epoch": 0.37, "grad_norm": 2.5414209365844727, "learning_rate": 0.0002, "loss": 1.6238, "step": 90470 }, { "epoch": 0.37, "grad_norm": 4.915120601654053, "learning_rate": 0.0002, "loss": 1.6186, "step": 90480 }, { "epoch": 0.37, "grad_norm": 2.959425687789917, "learning_rate": 0.0002, "loss": 1.5111, "step": 90490 }, { "epoch": 0.37, "grad_norm": 3.35256290435791, "learning_rate": 0.0002, "loss": 1.5291, "step": 90500 }, { "epoch": 0.37, "grad_norm": 3.606503963470459, "learning_rate": 0.0002, "loss": 1.6762, "step": 90510 }, { "epoch": 0.37, "grad_norm": 3.9127438068389893, "learning_rate": 0.0002, "loss": 1.3963, "step": 90520 }, { "epoch": 0.37, "grad_norm": 1.8534791469573975, "learning_rate": 0.0002, "loss": 1.4728, "step": 90530 }, { "epoch": 0.37, "grad_norm": 3.1321961879730225, "learning_rate": 0.0002, "loss": 1.614, "step": 90540 }, { "epoch": 0.37, "grad_norm": 2.4516026973724365, "learning_rate": 0.0002, "loss": 1.5888, "step": 90550 }, { "epoch": 0.37, "grad_norm": 3.441889524459839, "learning_rate": 0.0002, "loss": 1.5878, "step": 90560 }, { "epoch": 0.37, "grad_norm": 2.462279796600342, "learning_rate": 0.0002, "loss": 1.77, "step": 90570 }, { "epoch": 0.37, "grad_norm": 2.2709028720855713, "learning_rate": 0.0002, "loss": 1.5128, "step": 90580 }, { "epoch": 0.37, "grad_norm": 2.1962027549743652, "learning_rate": 0.0002, "loss": 1.63, "step": 90590 }, { "epoch": 0.37, "grad_norm": 2.9896161556243896, "learning_rate": 0.0002, "loss": 1.462, "step": 90600 }, { "epoch": 0.37, "grad_norm": 4.559443950653076, "learning_rate": 0.0002, "loss": 1.4636, "step": 90610 }, { "epoch": 0.37, "grad_norm": 2.224510669708252, "learning_rate": 0.0002, "loss": 1.5706, "step": 90620 }, { "epoch": 0.37, "grad_norm": 3.3281610012054443, "learning_rate": 0.0002, "loss": 1.4014, "step": 90630 }, { "epoch": 0.37, "grad_norm": 4.395589351654053, "learning_rate": 0.0002, "loss": 1.5285, "step": 90640 }, { "epoch": 0.37, "grad_norm": 2.9315006732940674, "learning_rate": 0.0002, "loss": 1.5649, "step": 90650 }, { "epoch": 0.37, "grad_norm": 1.557867169380188, "learning_rate": 0.0002, "loss": 1.5987, "step": 90660 }, { "epoch": 0.37, "grad_norm": 1.7652668952941895, "learning_rate": 0.0002, "loss": 1.4793, "step": 90670 }, { "epoch": 0.37, "grad_norm": 2.049283981323242, "learning_rate": 0.0002, "loss": 1.4719, "step": 90680 }, { "epoch": 0.37, "grad_norm": 3.416149377822876, "learning_rate": 0.0002, "loss": 1.4574, "step": 90690 }, { "epoch": 0.37, "grad_norm": 2.5635159015655518, "learning_rate": 0.0002, "loss": 1.4697, "step": 90700 }, { "epoch": 0.37, "grad_norm": 6.012709140777588, "learning_rate": 0.0002, "loss": 1.6506, "step": 90710 }, { "epoch": 0.37, "grad_norm": 3.5304720401763916, "learning_rate": 0.0002, "loss": 1.6052, "step": 90720 }, { "epoch": 0.37, "grad_norm": 3.7002694606781006, "learning_rate": 0.0002, "loss": 1.6201, "step": 90730 }, { "epoch": 0.37, "grad_norm": 3.9336118698120117, "learning_rate": 0.0002, "loss": 1.5615, "step": 90740 }, { "epoch": 0.37, "grad_norm": 3.270350456237793, "learning_rate": 0.0002, "loss": 1.5898, "step": 90750 }, { "epoch": 0.37, "grad_norm": 2.837933301925659, "learning_rate": 0.0002, "loss": 1.6455, "step": 90760 }, { "epoch": 0.37, "grad_norm": 2.2774243354797363, "learning_rate": 0.0002, "loss": 1.3314, "step": 90770 }, { "epoch": 0.37, "grad_norm": 2.493346691131592, "learning_rate": 0.0002, "loss": 1.6475, "step": 90780 }, { "epoch": 0.37, "grad_norm": 2.7020769119262695, "learning_rate": 0.0002, "loss": 1.5645, "step": 90790 }, { "epoch": 0.37, "grad_norm": 3.256969690322876, "learning_rate": 0.0002, "loss": 1.5589, "step": 90800 }, { "epoch": 0.37, "grad_norm": 3.634347915649414, "learning_rate": 0.0002, "loss": 1.4385, "step": 90810 }, { "epoch": 0.37, "grad_norm": 2.5306811332702637, "learning_rate": 0.0002, "loss": 1.54, "step": 90820 }, { "epoch": 0.37, "grad_norm": 3.119689702987671, "learning_rate": 0.0002, "loss": 1.5434, "step": 90830 }, { "epoch": 0.37, "grad_norm": 3.8723559379577637, "learning_rate": 0.0002, "loss": 1.627, "step": 90840 }, { "epoch": 0.37, "grad_norm": 3.8093953132629395, "learning_rate": 0.0002, "loss": 1.5583, "step": 90850 }, { "epoch": 0.37, "grad_norm": 4.518841743469238, "learning_rate": 0.0002, "loss": 1.607, "step": 90860 }, { "epoch": 0.37, "grad_norm": 3.856441020965576, "learning_rate": 0.0002, "loss": 1.7208, "step": 90870 }, { "epoch": 0.37, "grad_norm": 3.658543825149536, "learning_rate": 0.0002, "loss": 1.8071, "step": 90880 }, { "epoch": 0.37, "grad_norm": 2.193709135055542, "learning_rate": 0.0002, "loss": 1.732, "step": 90890 }, { "epoch": 0.37, "grad_norm": 2.1940436363220215, "learning_rate": 0.0002, "loss": 1.2782, "step": 90900 }, { "epoch": 0.37, "grad_norm": 2.551067590713501, "learning_rate": 0.0002, "loss": 1.4854, "step": 90910 }, { "epoch": 0.37, "grad_norm": 3.1526925563812256, "learning_rate": 0.0002, "loss": 1.6923, "step": 90920 }, { "epoch": 0.37, "grad_norm": 3.559804916381836, "learning_rate": 0.0002, "loss": 1.6701, "step": 90930 }, { "epoch": 0.37, "grad_norm": 3.365574598312378, "learning_rate": 0.0002, "loss": 1.5344, "step": 90940 }, { "epoch": 0.37, "grad_norm": 2.7702224254608154, "learning_rate": 0.0002, "loss": 1.3254, "step": 90950 }, { "epoch": 0.37, "grad_norm": 2.4697976112365723, "learning_rate": 0.0002, "loss": 1.6793, "step": 90960 }, { "epoch": 0.37, "grad_norm": 3.7256991863250732, "learning_rate": 0.0002, "loss": 1.4762, "step": 90970 }, { "epoch": 0.37, "grad_norm": 5.288888454437256, "learning_rate": 0.0002, "loss": 1.4417, "step": 90980 }, { "epoch": 0.37, "grad_norm": 2.6207101345062256, "learning_rate": 0.0002, "loss": 1.8957, "step": 90990 }, { "epoch": 0.37, "grad_norm": 4.38506555557251, "learning_rate": 0.0002, "loss": 1.6563, "step": 91000 }, { "epoch": 0.37, "grad_norm": 4.30532169342041, "learning_rate": 0.0002, "loss": 1.9936, "step": 91010 }, { "epoch": 0.37, "grad_norm": 2.616239547729492, "learning_rate": 0.0002, "loss": 1.6438, "step": 91020 }, { "epoch": 0.37, "grad_norm": 2.199392557144165, "learning_rate": 0.0002, "loss": 1.6721, "step": 91030 }, { "epoch": 0.37, "grad_norm": 2.3916335105895996, "learning_rate": 0.0002, "loss": 1.7114, "step": 91040 }, { "epoch": 0.37, "grad_norm": 3.596215009689331, "learning_rate": 0.0002, "loss": 1.4452, "step": 91050 }, { "epoch": 0.37, "grad_norm": 1.8989837169647217, "learning_rate": 0.0002, "loss": 1.7955, "step": 91060 }, { "epoch": 0.37, "grad_norm": 2.1294922828674316, "learning_rate": 0.0002, "loss": 1.2882, "step": 91070 }, { "epoch": 0.37, "grad_norm": 2.624051809310913, "learning_rate": 0.0002, "loss": 1.6654, "step": 91080 }, { "epoch": 0.37, "grad_norm": 3.146075487136841, "learning_rate": 0.0002, "loss": 1.6825, "step": 91090 }, { "epoch": 0.37, "grad_norm": 3.0153558254241943, "learning_rate": 0.0002, "loss": 1.624, "step": 91100 }, { "epoch": 0.37, "grad_norm": 4.388662815093994, "learning_rate": 0.0002, "loss": 1.425, "step": 91110 }, { "epoch": 0.37, "grad_norm": 2.1511762142181396, "learning_rate": 0.0002, "loss": 1.7496, "step": 91120 }, { "epoch": 0.37, "grad_norm": 2.3670222759246826, "learning_rate": 0.0002, "loss": 1.4171, "step": 91130 }, { "epoch": 0.37, "grad_norm": 3.1862800121307373, "learning_rate": 0.0002, "loss": 1.5021, "step": 91140 }, { "epoch": 0.37, "grad_norm": 2.361027956008911, "learning_rate": 0.0002, "loss": 1.6093, "step": 91150 }, { "epoch": 0.37, "grad_norm": 4.083091735839844, "learning_rate": 0.0002, "loss": 1.7994, "step": 91160 }, { "epoch": 0.37, "grad_norm": 2.8932645320892334, "learning_rate": 0.0002, "loss": 1.5304, "step": 91170 }, { "epoch": 0.37, "grad_norm": 2.050529956817627, "learning_rate": 0.0002, "loss": 1.7456, "step": 91180 }, { "epoch": 0.37, "grad_norm": 2.482036828994751, "learning_rate": 0.0002, "loss": 1.6801, "step": 91190 }, { "epoch": 0.37, "grad_norm": 2.522435188293457, "learning_rate": 0.0002, "loss": 1.3287, "step": 91200 }, { "epoch": 0.37, "grad_norm": 1.7471625804901123, "learning_rate": 0.0002, "loss": 1.7834, "step": 91210 }, { "epoch": 0.37, "grad_norm": 3.0012502670288086, "learning_rate": 0.0002, "loss": 1.4569, "step": 91220 }, { "epoch": 0.37, "grad_norm": 2.4237701892852783, "learning_rate": 0.0002, "loss": 1.5525, "step": 91230 }, { "epoch": 0.37, "grad_norm": 2.9162838459014893, "learning_rate": 0.0002, "loss": 1.4909, "step": 91240 }, { "epoch": 0.37, "grad_norm": 3.401029348373413, "learning_rate": 0.0002, "loss": 1.6979, "step": 91250 }, { "epoch": 0.37, "grad_norm": 5.520567417144775, "learning_rate": 0.0002, "loss": 1.6916, "step": 91260 }, { "epoch": 0.37, "grad_norm": 2.666754722595215, "learning_rate": 0.0002, "loss": 1.9475, "step": 91270 }, { "epoch": 0.37, "grad_norm": 3.3249387741088867, "learning_rate": 0.0002, "loss": 1.4946, "step": 91280 }, { "epoch": 0.37, "grad_norm": 4.301730632781982, "learning_rate": 0.0002, "loss": 1.4673, "step": 91290 }, { "epoch": 0.37, "grad_norm": 3.6630914211273193, "learning_rate": 0.0002, "loss": 1.4753, "step": 91300 }, { "epoch": 0.37, "grad_norm": 4.014461040496826, "learning_rate": 0.0002, "loss": 1.5586, "step": 91310 }, { "epoch": 0.37, "grad_norm": 2.430898666381836, "learning_rate": 0.0002, "loss": 1.4332, "step": 91320 }, { "epoch": 0.37, "grad_norm": 2.852935791015625, "learning_rate": 0.0002, "loss": 1.7573, "step": 91330 }, { "epoch": 0.37, "grad_norm": 5.397654056549072, "learning_rate": 0.0002, "loss": 1.4784, "step": 91340 }, { "epoch": 0.37, "grad_norm": 2.57861590385437, "learning_rate": 0.0002, "loss": 1.4861, "step": 91350 }, { "epoch": 0.37, "grad_norm": 3.162660598754883, "learning_rate": 0.0002, "loss": 1.603, "step": 91360 }, { "epoch": 0.37, "grad_norm": 3.4903488159179688, "learning_rate": 0.0002, "loss": 1.737, "step": 91370 }, { "epoch": 0.37, "grad_norm": 3.6158154010772705, "learning_rate": 0.0002, "loss": 1.4072, "step": 91380 }, { "epoch": 0.37, "grad_norm": 4.093202114105225, "learning_rate": 0.0002, "loss": 1.5999, "step": 91390 }, { "epoch": 0.37, "grad_norm": 1.7448047399520874, "learning_rate": 0.0002, "loss": 1.5826, "step": 91400 }, { "epoch": 0.37, "grad_norm": 3.317545175552368, "learning_rate": 0.0002, "loss": 1.6767, "step": 91410 }, { "epoch": 0.37, "grad_norm": 3.645127773284912, "learning_rate": 0.0002, "loss": 1.6642, "step": 91420 }, { "epoch": 0.37, "grad_norm": 2.6732072830200195, "learning_rate": 0.0002, "loss": 1.5677, "step": 91430 }, { "epoch": 0.37, "grad_norm": 4.631689548492432, "learning_rate": 0.0002, "loss": 1.7025, "step": 91440 }, { "epoch": 0.37, "grad_norm": 4.1671528816223145, "learning_rate": 0.0002, "loss": 1.5676, "step": 91450 }, { "epoch": 0.37, "grad_norm": 3.7021491527557373, "learning_rate": 0.0002, "loss": 1.5353, "step": 91460 }, { "epoch": 0.37, "grad_norm": 3.285529613494873, "learning_rate": 0.0002, "loss": 1.5261, "step": 91470 }, { "epoch": 0.37, "grad_norm": 3.1173131465911865, "learning_rate": 0.0002, "loss": 1.4657, "step": 91480 }, { "epoch": 0.37, "grad_norm": 1.8796796798706055, "learning_rate": 0.0002, "loss": 1.3813, "step": 91490 }, { "epoch": 0.37, "grad_norm": 2.1593239307403564, "learning_rate": 0.0002, "loss": 1.8242, "step": 91500 }, { "epoch": 0.37, "grad_norm": 2.657867670059204, "learning_rate": 0.0002, "loss": 1.6466, "step": 91510 }, { "epoch": 0.37, "grad_norm": 3.3864269256591797, "learning_rate": 0.0002, "loss": 1.651, "step": 91520 }, { "epoch": 0.37, "grad_norm": 3.66485595703125, "learning_rate": 0.0002, "loss": 1.3836, "step": 91530 }, { "epoch": 0.37, "grad_norm": 6.482394695281982, "learning_rate": 0.0002, "loss": 1.542, "step": 91540 }, { "epoch": 0.37, "grad_norm": 4.173646926879883, "learning_rate": 0.0002, "loss": 1.7453, "step": 91550 }, { "epoch": 0.37, "grad_norm": 3.0975735187530518, "learning_rate": 0.0002, "loss": 1.4027, "step": 91560 }, { "epoch": 0.37, "grad_norm": 2.95412015914917, "learning_rate": 0.0002, "loss": 1.6081, "step": 91570 }, { "epoch": 0.37, "grad_norm": 2.5468192100524902, "learning_rate": 0.0002, "loss": 1.7428, "step": 91580 }, { "epoch": 0.37, "grad_norm": 2.9923579692840576, "learning_rate": 0.0002, "loss": 1.7861, "step": 91590 }, { "epoch": 0.37, "grad_norm": 2.5238916873931885, "learning_rate": 0.0002, "loss": 1.4297, "step": 91600 }, { "epoch": 0.37, "grad_norm": 2.0102787017822266, "learning_rate": 0.0002, "loss": 1.6736, "step": 91610 }, { "epoch": 0.37, "grad_norm": 3.7050981521606445, "learning_rate": 0.0002, "loss": 1.4071, "step": 91620 }, { "epoch": 0.37, "grad_norm": 3.352949380874634, "learning_rate": 0.0002, "loss": 1.3316, "step": 91630 }, { "epoch": 0.37, "grad_norm": 3.6303815841674805, "learning_rate": 0.0002, "loss": 1.5996, "step": 91640 }, { "epoch": 0.37, "grad_norm": 2.1179933547973633, "learning_rate": 0.0002, "loss": 1.6952, "step": 91650 }, { "epoch": 0.37, "grad_norm": 2.8303349018096924, "learning_rate": 0.0002, "loss": 1.5716, "step": 91660 }, { "epoch": 0.37, "grad_norm": 3.0904769897460938, "learning_rate": 0.0002, "loss": 1.4183, "step": 91670 }, { "epoch": 0.37, "grad_norm": 2.607936143875122, "learning_rate": 0.0002, "loss": 1.4471, "step": 91680 }, { "epoch": 0.37, "grad_norm": 2.0526437759399414, "learning_rate": 0.0002, "loss": 1.6197, "step": 91690 }, { "epoch": 0.37, "grad_norm": 1.8670471906661987, "learning_rate": 0.0002, "loss": 1.525, "step": 91700 }, { "epoch": 0.37, "grad_norm": 2.558776378631592, "learning_rate": 0.0002, "loss": 1.5502, "step": 91710 }, { "epoch": 0.37, "grad_norm": 5.148453235626221, "learning_rate": 0.0002, "loss": 1.5587, "step": 91720 }, { "epoch": 0.37, "grad_norm": 4.233919620513916, "learning_rate": 0.0002, "loss": 1.5882, "step": 91730 }, { "epoch": 0.37, "grad_norm": 2.4935450553894043, "learning_rate": 0.0002, "loss": 1.2933, "step": 91740 }, { "epoch": 0.37, "grad_norm": 2.399939775466919, "learning_rate": 0.0002, "loss": 1.5895, "step": 91750 }, { "epoch": 0.37, "grad_norm": 2.320389747619629, "learning_rate": 0.0002, "loss": 1.7446, "step": 91760 }, { "epoch": 0.37, "grad_norm": 2.3397977352142334, "learning_rate": 0.0002, "loss": 1.5145, "step": 91770 }, { "epoch": 0.37, "grad_norm": 10.108697891235352, "learning_rate": 0.0002, "loss": 1.566, "step": 91780 }, { "epoch": 0.37, "grad_norm": 3.0265114307403564, "learning_rate": 0.0002, "loss": 1.6384, "step": 91790 }, { "epoch": 0.37, "grad_norm": 2.889418840408325, "learning_rate": 0.0002, "loss": 1.7563, "step": 91800 }, { "epoch": 0.37, "grad_norm": 2.7936999797821045, "learning_rate": 0.0002, "loss": 1.7559, "step": 91810 }, { "epoch": 0.37, "grad_norm": 2.795213460922241, "learning_rate": 0.0002, "loss": 1.4853, "step": 91820 }, { "epoch": 0.37, "grad_norm": 2.8662309646606445, "learning_rate": 0.0002, "loss": 1.6341, "step": 91830 }, { "epoch": 0.37, "grad_norm": 2.392476797103882, "learning_rate": 0.0002, "loss": 1.751, "step": 91840 }, { "epoch": 0.37, "grad_norm": 5.082403182983398, "learning_rate": 0.0002, "loss": 1.5833, "step": 91850 }, { "epoch": 0.37, "grad_norm": 3.7238047122955322, "learning_rate": 0.0002, "loss": 1.4693, "step": 91860 }, { "epoch": 0.37, "grad_norm": 2.796251058578491, "learning_rate": 0.0002, "loss": 1.804, "step": 91870 }, { "epoch": 0.37, "grad_norm": 1.7953085899353027, "learning_rate": 0.0002, "loss": 1.4454, "step": 91880 }, { "epoch": 0.37, "grad_norm": 3.8357675075531006, "learning_rate": 0.0002, "loss": 1.6114, "step": 91890 }, { "epoch": 0.37, "grad_norm": 3.6879754066467285, "learning_rate": 0.0002, "loss": 1.7099, "step": 91900 }, { "epoch": 0.37, "grad_norm": 3.2823679447174072, "learning_rate": 0.0002, "loss": 1.5056, "step": 91910 }, { "epoch": 0.37, "grad_norm": 1.5971487760543823, "learning_rate": 0.0002, "loss": 1.6702, "step": 91920 }, { "epoch": 0.37, "grad_norm": 3.022343635559082, "learning_rate": 0.0002, "loss": 1.4761, "step": 91930 }, { "epoch": 0.37, "grad_norm": 2.089021921157837, "learning_rate": 0.0002, "loss": 1.4613, "step": 91940 }, { "epoch": 0.37, "grad_norm": 3.3251793384552, "learning_rate": 0.0002, "loss": 1.696, "step": 91950 }, { "epoch": 0.37, "grad_norm": 1.7338342666625977, "learning_rate": 0.0002, "loss": 1.5948, "step": 91960 }, { "epoch": 0.37, "grad_norm": 2.894462823867798, "learning_rate": 0.0002, "loss": 1.7067, "step": 91970 }, { "epoch": 0.37, "grad_norm": 4.586516380310059, "learning_rate": 0.0002, "loss": 1.6253, "step": 91980 }, { "epoch": 0.37, "grad_norm": 2.147015333175659, "learning_rate": 0.0002, "loss": 1.7091, "step": 91990 }, { "epoch": 0.37, "grad_norm": 2.4445600509643555, "learning_rate": 0.0002, "loss": 1.6902, "step": 92000 }, { "epoch": 0.37, "grad_norm": 2.240415573120117, "learning_rate": 0.0002, "loss": 1.5768, "step": 92010 }, { "epoch": 0.37, "grad_norm": 2.6664302349090576, "learning_rate": 0.0002, "loss": 1.6881, "step": 92020 }, { "epoch": 0.37, "grad_norm": 3.607250928878784, "learning_rate": 0.0002, "loss": 1.4701, "step": 92030 }, { "epoch": 0.37, "grad_norm": 2.0466153621673584, "learning_rate": 0.0002, "loss": 1.6233, "step": 92040 }, { "epoch": 0.37, "grad_norm": 2.708143949508667, "learning_rate": 0.0002, "loss": 1.5585, "step": 92050 }, { "epoch": 0.37, "grad_norm": 2.8807168006896973, "learning_rate": 0.0002, "loss": 1.6128, "step": 92060 }, { "epoch": 0.37, "grad_norm": 2.2400245666503906, "learning_rate": 0.0002, "loss": 1.4731, "step": 92070 }, { "epoch": 0.37, "grad_norm": 2.1091556549072266, "learning_rate": 0.0002, "loss": 1.6404, "step": 92080 }, { "epoch": 0.37, "grad_norm": 3.777082920074463, "learning_rate": 0.0002, "loss": 1.6852, "step": 92090 }, { "epoch": 0.37, "grad_norm": 1.7471731901168823, "learning_rate": 0.0002, "loss": 1.2763, "step": 92100 }, { "epoch": 0.37, "grad_norm": 3.206057548522949, "learning_rate": 0.0002, "loss": 1.4193, "step": 92110 }, { "epoch": 0.38, "grad_norm": 1.9658715724945068, "learning_rate": 0.0002, "loss": 1.8719, "step": 92120 }, { "epoch": 0.38, "grad_norm": 2.2677996158599854, "learning_rate": 0.0002, "loss": 1.3222, "step": 92130 }, { "epoch": 0.38, "grad_norm": 4.57909631729126, "learning_rate": 0.0002, "loss": 1.4215, "step": 92140 }, { "epoch": 0.38, "grad_norm": 2.58213472366333, "learning_rate": 0.0002, "loss": 1.4739, "step": 92150 }, { "epoch": 0.38, "grad_norm": 2.1012091636657715, "learning_rate": 0.0002, "loss": 1.7641, "step": 92160 }, { "epoch": 0.38, "grad_norm": 2.8927721977233887, "learning_rate": 0.0002, "loss": 1.4841, "step": 92170 }, { "epoch": 0.38, "grad_norm": 2.8413586616516113, "learning_rate": 0.0002, "loss": 1.7607, "step": 92180 }, { "epoch": 0.38, "grad_norm": 2.0102198123931885, "learning_rate": 0.0002, "loss": 1.5914, "step": 92190 }, { "epoch": 0.38, "grad_norm": 2.3092644214630127, "learning_rate": 0.0002, "loss": 1.7856, "step": 92200 }, { "epoch": 0.38, "grad_norm": 2.978419780731201, "learning_rate": 0.0002, "loss": 1.7989, "step": 92210 }, { "epoch": 0.38, "grad_norm": 3.5650227069854736, "learning_rate": 0.0002, "loss": 1.5333, "step": 92220 }, { "epoch": 0.38, "grad_norm": 3.4307045936584473, "learning_rate": 0.0002, "loss": 1.6388, "step": 92230 }, { "epoch": 0.38, "grad_norm": 4.010962009429932, "learning_rate": 0.0002, "loss": 1.5594, "step": 92240 }, { "epoch": 0.38, "grad_norm": 7.384329795837402, "learning_rate": 0.0002, "loss": 1.4058, "step": 92250 }, { "epoch": 0.38, "grad_norm": 2.063555955886841, "learning_rate": 0.0002, "loss": 1.5393, "step": 92260 }, { "epoch": 0.38, "grad_norm": 4.369036674499512, "learning_rate": 0.0002, "loss": 1.545, "step": 92270 }, { "epoch": 0.38, "grad_norm": 3.683865785598755, "learning_rate": 0.0002, "loss": 1.6218, "step": 92280 }, { "epoch": 0.38, "grad_norm": 4.342751502990723, "learning_rate": 0.0002, "loss": 1.49, "step": 92290 }, { "epoch": 0.38, "grad_norm": 2.909573793411255, "learning_rate": 0.0002, "loss": 1.4046, "step": 92300 }, { "epoch": 0.38, "grad_norm": 7.083775520324707, "learning_rate": 0.0002, "loss": 1.8553, "step": 92310 }, { "epoch": 0.38, "grad_norm": 4.134945869445801, "learning_rate": 0.0002, "loss": 1.5164, "step": 92320 }, { "epoch": 0.38, "grad_norm": 3.989534378051758, "learning_rate": 0.0002, "loss": 1.5688, "step": 92330 }, { "epoch": 0.38, "grad_norm": 2.3214492797851562, "learning_rate": 0.0002, "loss": 1.4698, "step": 92340 }, { "epoch": 0.38, "grad_norm": 2.376600742340088, "learning_rate": 0.0002, "loss": 1.5085, "step": 92350 }, { "epoch": 0.38, "grad_norm": 3.0434322357177734, "learning_rate": 0.0002, "loss": 1.3441, "step": 92360 }, { "epoch": 0.38, "grad_norm": 4.508491516113281, "learning_rate": 0.0002, "loss": 1.4797, "step": 92370 }, { "epoch": 0.38, "grad_norm": 4.8458757400512695, "learning_rate": 0.0002, "loss": 1.7279, "step": 92380 }, { "epoch": 0.38, "grad_norm": 3.9001989364624023, "learning_rate": 0.0002, "loss": 1.6039, "step": 92390 }, { "epoch": 0.38, "grad_norm": 2.717986822128296, "learning_rate": 0.0002, "loss": 1.4117, "step": 92400 }, { "epoch": 0.38, "grad_norm": 4.308769702911377, "learning_rate": 0.0002, "loss": 1.6389, "step": 92410 }, { "epoch": 0.38, "grad_norm": 2.6714894771575928, "learning_rate": 0.0002, "loss": 1.6388, "step": 92420 }, { "epoch": 0.38, "grad_norm": 2.1340112686157227, "learning_rate": 0.0002, "loss": 1.1465, "step": 92430 }, { "epoch": 0.38, "grad_norm": 3.3770806789398193, "learning_rate": 0.0002, "loss": 1.4932, "step": 92440 }, { "epoch": 0.38, "grad_norm": 2.1893322467803955, "learning_rate": 0.0002, "loss": 1.6279, "step": 92450 }, { "epoch": 0.38, "grad_norm": 2.2629501819610596, "learning_rate": 0.0002, "loss": 1.4721, "step": 92460 }, { "epoch": 0.38, "grad_norm": 2.538767099380493, "learning_rate": 0.0002, "loss": 1.5244, "step": 92470 }, { "epoch": 0.38, "grad_norm": 5.457204341888428, "learning_rate": 0.0002, "loss": 1.7107, "step": 92480 }, { "epoch": 0.38, "grad_norm": 2.9098317623138428, "learning_rate": 0.0002, "loss": 1.5116, "step": 92490 }, { "epoch": 0.38, "grad_norm": 1.8084458112716675, "learning_rate": 0.0002, "loss": 1.8106, "step": 92500 }, { "epoch": 0.38, "grad_norm": 2.9777872562408447, "learning_rate": 0.0002, "loss": 1.3626, "step": 92510 }, { "epoch": 0.38, "grad_norm": 3.802595376968384, "learning_rate": 0.0002, "loss": 1.5683, "step": 92520 }, { "epoch": 0.38, "grad_norm": 1.3367993831634521, "learning_rate": 0.0002, "loss": 1.5583, "step": 92530 }, { "epoch": 0.38, "grad_norm": 2.385934829711914, "learning_rate": 0.0002, "loss": 1.4044, "step": 92540 }, { "epoch": 0.38, "grad_norm": 1.6240540742874146, "learning_rate": 0.0002, "loss": 1.6913, "step": 92550 }, { "epoch": 0.38, "grad_norm": 2.9757697582244873, "learning_rate": 0.0002, "loss": 1.5189, "step": 92560 }, { "epoch": 0.38, "grad_norm": 2.004526376724243, "learning_rate": 0.0002, "loss": 1.7043, "step": 92570 }, { "epoch": 0.38, "grad_norm": 3.4375977516174316, "learning_rate": 0.0002, "loss": 1.4975, "step": 92580 }, { "epoch": 0.38, "grad_norm": 1.7451430559158325, "learning_rate": 0.0002, "loss": 1.7346, "step": 92590 }, { "epoch": 0.38, "grad_norm": 2.3310158252716064, "learning_rate": 0.0002, "loss": 1.3818, "step": 92600 }, { "epoch": 0.38, "grad_norm": 5.171304702758789, "learning_rate": 0.0002, "loss": 1.4553, "step": 92610 }, { "epoch": 0.38, "grad_norm": 3.3191475868225098, "learning_rate": 0.0002, "loss": 1.6645, "step": 92620 }, { "epoch": 0.38, "grad_norm": 2.0087616443634033, "learning_rate": 0.0002, "loss": 1.6112, "step": 92630 }, { "epoch": 0.38, "grad_norm": 3.137230634689331, "learning_rate": 0.0002, "loss": 1.6933, "step": 92640 }, { "epoch": 0.38, "grad_norm": 2.4731485843658447, "learning_rate": 0.0002, "loss": 1.3968, "step": 92650 }, { "epoch": 0.38, "grad_norm": 2.193457841873169, "learning_rate": 0.0002, "loss": 1.6548, "step": 92660 }, { "epoch": 0.38, "grad_norm": 6.168549060821533, "learning_rate": 0.0002, "loss": 1.6707, "step": 92670 }, { "epoch": 0.38, "grad_norm": 3.397869110107422, "learning_rate": 0.0002, "loss": 1.3704, "step": 92680 }, { "epoch": 0.38, "grad_norm": 1.7247198820114136, "learning_rate": 0.0002, "loss": 1.7091, "step": 92690 }, { "epoch": 0.38, "grad_norm": 2.1877501010894775, "learning_rate": 0.0002, "loss": 1.5761, "step": 92700 }, { "epoch": 0.38, "grad_norm": 2.120107412338257, "learning_rate": 0.0002, "loss": 1.3801, "step": 92710 }, { "epoch": 0.38, "grad_norm": 2.3500633239746094, "learning_rate": 0.0002, "loss": 1.8245, "step": 92720 }, { "epoch": 0.38, "grad_norm": 5.253742694854736, "learning_rate": 0.0002, "loss": 1.4741, "step": 92730 }, { "epoch": 0.38, "grad_norm": 2.4091861248016357, "learning_rate": 0.0002, "loss": 1.5764, "step": 92740 }, { "epoch": 0.38, "grad_norm": 2.7892098426818848, "learning_rate": 0.0002, "loss": 1.6276, "step": 92750 }, { "epoch": 0.38, "grad_norm": 3.374016523361206, "learning_rate": 0.0002, "loss": 1.5833, "step": 92760 }, { "epoch": 0.38, "grad_norm": 2.115344524383545, "learning_rate": 0.0002, "loss": 1.2268, "step": 92770 }, { "epoch": 0.38, "grad_norm": 4.523200035095215, "learning_rate": 0.0002, "loss": 1.462, "step": 92780 }, { "epoch": 0.38, "grad_norm": 2.997391939163208, "learning_rate": 0.0002, "loss": 1.528, "step": 92790 }, { "epoch": 0.38, "grad_norm": 2.0721352100372314, "learning_rate": 0.0002, "loss": 1.668, "step": 92800 }, { "epoch": 0.38, "grad_norm": 3.268324851989746, "learning_rate": 0.0002, "loss": 1.5418, "step": 92810 }, { "epoch": 0.38, "grad_norm": 2.029670476913452, "learning_rate": 0.0002, "loss": 1.562, "step": 92820 }, { "epoch": 0.38, "grad_norm": 4.202690124511719, "learning_rate": 0.0002, "loss": 1.5907, "step": 92830 }, { "epoch": 0.38, "grad_norm": 2.417001485824585, "learning_rate": 0.0002, "loss": 1.7514, "step": 92840 }, { "epoch": 0.38, "grad_norm": 2.2358551025390625, "learning_rate": 0.0002, "loss": 1.3371, "step": 92850 }, { "epoch": 0.38, "grad_norm": 4.656244277954102, "learning_rate": 0.0002, "loss": 1.3793, "step": 92860 }, { "epoch": 0.38, "grad_norm": 2.785183906555176, "learning_rate": 0.0002, "loss": 1.4804, "step": 92870 }, { "epoch": 0.38, "grad_norm": 2.9733505249023438, "learning_rate": 0.0002, "loss": 1.4235, "step": 92880 }, { "epoch": 0.38, "grad_norm": 3.4342384338378906, "learning_rate": 0.0002, "loss": 1.3656, "step": 92890 }, { "epoch": 0.38, "grad_norm": 10.507137298583984, "learning_rate": 0.0002, "loss": 1.5082, "step": 92900 }, { "epoch": 0.38, "grad_norm": 1.9200787544250488, "learning_rate": 0.0002, "loss": 1.6654, "step": 92910 }, { "epoch": 0.38, "grad_norm": 2.80330228805542, "learning_rate": 0.0002, "loss": 1.452, "step": 92920 }, { "epoch": 0.38, "grad_norm": 2.551626205444336, "learning_rate": 0.0002, "loss": 1.5579, "step": 92930 }, { "epoch": 0.38, "grad_norm": 2.814958333969116, "learning_rate": 0.0002, "loss": 1.5008, "step": 92940 }, { "epoch": 0.38, "grad_norm": 3.217745065689087, "learning_rate": 0.0002, "loss": 1.7785, "step": 92950 }, { "epoch": 0.38, "grad_norm": 3.5013010501861572, "learning_rate": 0.0002, "loss": 1.724, "step": 92960 }, { "epoch": 0.38, "grad_norm": 3.464205026626587, "learning_rate": 0.0002, "loss": 1.4001, "step": 92970 }, { "epoch": 0.38, "grad_norm": 2.4503650665283203, "learning_rate": 0.0002, "loss": 1.6189, "step": 92980 }, { "epoch": 0.38, "grad_norm": 4.326249122619629, "learning_rate": 0.0002, "loss": 1.7525, "step": 92990 }, { "epoch": 0.38, "grad_norm": 4.36706018447876, "learning_rate": 0.0002, "loss": 1.5554, "step": 93000 }, { "epoch": 0.38, "grad_norm": 2.4704513549804688, "learning_rate": 0.0002, "loss": 1.4792, "step": 93010 }, { "epoch": 0.38, "grad_norm": 3.276240348815918, "learning_rate": 0.0002, "loss": 1.8091, "step": 93020 }, { "epoch": 0.38, "grad_norm": 8.865833282470703, "learning_rate": 0.0002, "loss": 1.5442, "step": 93030 }, { "epoch": 0.38, "grad_norm": 5.172389030456543, "learning_rate": 0.0002, "loss": 1.509, "step": 93040 }, { "epoch": 0.38, "grad_norm": 5.662922382354736, "learning_rate": 0.0002, "loss": 1.4545, "step": 93050 }, { "epoch": 0.38, "grad_norm": 3.7487049102783203, "learning_rate": 0.0002, "loss": 1.6542, "step": 93060 }, { "epoch": 0.38, "grad_norm": 5.59116268157959, "learning_rate": 0.0002, "loss": 1.3659, "step": 93070 }, { "epoch": 0.38, "grad_norm": 1.8435219526290894, "learning_rate": 0.0002, "loss": 1.2824, "step": 93080 }, { "epoch": 0.38, "grad_norm": 4.145457744598389, "learning_rate": 0.0002, "loss": 1.5917, "step": 93090 }, { "epoch": 0.38, "grad_norm": 3.1255218982696533, "learning_rate": 0.0002, "loss": 1.7669, "step": 93100 }, { "epoch": 0.38, "grad_norm": 2.511263132095337, "learning_rate": 0.0002, "loss": 1.7585, "step": 93110 }, { "epoch": 0.38, "grad_norm": 2.5934133529663086, "learning_rate": 0.0002, "loss": 1.6819, "step": 93120 }, { "epoch": 0.38, "grad_norm": 2.193420171737671, "learning_rate": 0.0002, "loss": 1.5159, "step": 93130 }, { "epoch": 0.38, "grad_norm": 7.9715423583984375, "learning_rate": 0.0002, "loss": 1.5854, "step": 93140 }, { "epoch": 0.38, "grad_norm": 2.17503023147583, "learning_rate": 0.0002, "loss": 1.5557, "step": 93150 }, { "epoch": 0.38, "grad_norm": 3.3613290786743164, "learning_rate": 0.0002, "loss": 1.5529, "step": 93160 }, { "epoch": 0.38, "grad_norm": 5.643915176391602, "learning_rate": 0.0002, "loss": 1.4678, "step": 93170 }, { "epoch": 0.38, "grad_norm": 2.4198288917541504, "learning_rate": 0.0002, "loss": 1.317, "step": 93180 }, { "epoch": 0.38, "grad_norm": 3.1718974113464355, "learning_rate": 0.0002, "loss": 1.8529, "step": 93190 }, { "epoch": 0.38, "grad_norm": 3.267271041870117, "learning_rate": 0.0002, "loss": 1.741, "step": 93200 }, { "epoch": 0.38, "grad_norm": 2.473750352859497, "learning_rate": 0.0002, "loss": 1.5368, "step": 93210 }, { "epoch": 0.38, "grad_norm": 2.916501998901367, "learning_rate": 0.0002, "loss": 1.6997, "step": 93220 }, { "epoch": 0.38, "grad_norm": 2.854111433029175, "learning_rate": 0.0002, "loss": 1.7188, "step": 93230 }, { "epoch": 0.38, "grad_norm": 3.0910708904266357, "learning_rate": 0.0002, "loss": 1.4023, "step": 93240 }, { "epoch": 0.38, "grad_norm": 3.5270004272460938, "learning_rate": 0.0002, "loss": 1.5238, "step": 93250 }, { "epoch": 0.38, "grad_norm": 3.397696018218994, "learning_rate": 0.0002, "loss": 1.6956, "step": 93260 }, { "epoch": 0.38, "grad_norm": 2.31540584564209, "learning_rate": 0.0002, "loss": 1.4021, "step": 93270 }, { "epoch": 0.38, "grad_norm": 4.590020179748535, "learning_rate": 0.0002, "loss": 1.6213, "step": 93280 }, { "epoch": 0.38, "grad_norm": 2.9990627765655518, "learning_rate": 0.0002, "loss": 1.5447, "step": 93290 }, { "epoch": 0.38, "grad_norm": 3.9897403717041016, "learning_rate": 0.0002, "loss": 1.7227, "step": 93300 }, { "epoch": 0.38, "grad_norm": 4.537994861602783, "learning_rate": 0.0002, "loss": 1.5772, "step": 93310 }, { "epoch": 0.38, "grad_norm": 3.0137829780578613, "learning_rate": 0.0002, "loss": 1.3957, "step": 93320 }, { "epoch": 0.38, "grad_norm": 3.4507548809051514, "learning_rate": 0.0002, "loss": 1.4039, "step": 93330 }, { "epoch": 0.38, "grad_norm": 3.159907102584839, "learning_rate": 0.0002, "loss": 1.5701, "step": 93340 }, { "epoch": 0.38, "grad_norm": 4.746364116668701, "learning_rate": 0.0002, "loss": 1.566, "step": 93350 }, { "epoch": 0.38, "grad_norm": 3.260737419128418, "learning_rate": 0.0002, "loss": 1.5922, "step": 93360 }, { "epoch": 0.38, "grad_norm": 4.214773178100586, "learning_rate": 0.0002, "loss": 1.7197, "step": 93370 }, { "epoch": 0.38, "grad_norm": 2.7634060382843018, "learning_rate": 0.0002, "loss": 1.6462, "step": 93380 }, { "epoch": 0.38, "grad_norm": 3.0427746772766113, "learning_rate": 0.0002, "loss": 1.4137, "step": 93390 }, { "epoch": 0.38, "grad_norm": 3.6678683757781982, "learning_rate": 0.0002, "loss": 1.5153, "step": 93400 }, { "epoch": 0.38, "grad_norm": 3.1588757038116455, "learning_rate": 0.0002, "loss": 1.8257, "step": 93410 }, { "epoch": 0.38, "grad_norm": 3.2271933555603027, "learning_rate": 0.0002, "loss": 1.636, "step": 93420 }, { "epoch": 0.38, "grad_norm": 1.5453953742980957, "learning_rate": 0.0002, "loss": 1.4792, "step": 93430 }, { "epoch": 0.38, "grad_norm": 4.112008571624756, "learning_rate": 0.0002, "loss": 1.5195, "step": 93440 }, { "epoch": 0.38, "grad_norm": 3.2688543796539307, "learning_rate": 0.0002, "loss": 1.6416, "step": 93450 }, { "epoch": 0.38, "grad_norm": 2.713930368423462, "learning_rate": 0.0002, "loss": 1.5304, "step": 93460 }, { "epoch": 0.38, "grad_norm": 3.3590404987335205, "learning_rate": 0.0002, "loss": 1.6266, "step": 93470 }, { "epoch": 0.38, "grad_norm": 3.1052746772766113, "learning_rate": 0.0002, "loss": 1.5064, "step": 93480 }, { "epoch": 0.38, "grad_norm": 3.4822213649749756, "learning_rate": 0.0002, "loss": 1.6112, "step": 93490 }, { "epoch": 0.38, "grad_norm": 2.750162124633789, "learning_rate": 0.0002, "loss": 1.5624, "step": 93500 }, { "epoch": 0.38, "grad_norm": 2.909787893295288, "learning_rate": 0.0002, "loss": 1.5956, "step": 93510 }, { "epoch": 0.38, "grad_norm": 2.497843027114868, "learning_rate": 0.0002, "loss": 1.3704, "step": 93520 }, { "epoch": 0.38, "grad_norm": 3.592200994491577, "learning_rate": 0.0002, "loss": 1.5377, "step": 93530 }, { "epoch": 0.38, "grad_norm": 3.3922808170318604, "learning_rate": 0.0002, "loss": 1.5367, "step": 93540 }, { "epoch": 0.38, "grad_norm": 4.2741546630859375, "learning_rate": 0.0002, "loss": 1.3271, "step": 93550 }, { "epoch": 0.38, "grad_norm": 2.912785291671753, "learning_rate": 0.0002, "loss": 1.5999, "step": 93560 }, { "epoch": 0.38, "grad_norm": 2.8739521503448486, "learning_rate": 0.0002, "loss": 1.4313, "step": 93570 }, { "epoch": 0.38, "grad_norm": 2.391376256942749, "learning_rate": 0.0002, "loss": 1.6561, "step": 93580 }, { "epoch": 0.38, "grad_norm": 3.7811176776885986, "learning_rate": 0.0002, "loss": 1.6407, "step": 93590 }, { "epoch": 0.38, "grad_norm": 5.669251441955566, "learning_rate": 0.0002, "loss": 1.6226, "step": 93600 }, { "epoch": 0.38, "grad_norm": 1.9949263334274292, "learning_rate": 0.0002, "loss": 1.4983, "step": 93610 }, { "epoch": 0.38, "grad_norm": 3.2834231853485107, "learning_rate": 0.0002, "loss": 1.6077, "step": 93620 }, { "epoch": 0.38, "grad_norm": 4.885267734527588, "learning_rate": 0.0002, "loss": 1.3453, "step": 93630 }, { "epoch": 0.38, "grad_norm": 2.015270709991455, "learning_rate": 0.0002, "loss": 1.6099, "step": 93640 }, { "epoch": 0.38, "grad_norm": 2.0949597358703613, "learning_rate": 0.0002, "loss": 1.5864, "step": 93650 }, { "epoch": 0.38, "grad_norm": 1.8622701168060303, "learning_rate": 0.0002, "loss": 1.6813, "step": 93660 }, { "epoch": 0.38, "grad_norm": 3.562380790710449, "learning_rate": 0.0002, "loss": 1.5069, "step": 93670 }, { "epoch": 0.38, "grad_norm": 3.2276418209075928, "learning_rate": 0.0002, "loss": 1.6562, "step": 93680 }, { "epoch": 0.38, "grad_norm": 3.1377711296081543, "learning_rate": 0.0002, "loss": 1.7593, "step": 93690 }, { "epoch": 0.38, "grad_norm": 3.6296067237854004, "learning_rate": 0.0002, "loss": 1.4182, "step": 93700 }, { "epoch": 0.38, "grad_norm": 3.156121015548706, "learning_rate": 0.0002, "loss": 1.5054, "step": 93710 }, { "epoch": 0.38, "grad_norm": 2.5145623683929443, "learning_rate": 0.0002, "loss": 1.6292, "step": 93720 }, { "epoch": 0.38, "grad_norm": 2.7699472904205322, "learning_rate": 0.0002, "loss": 1.6612, "step": 93730 }, { "epoch": 0.38, "grad_norm": 3.6923437118530273, "learning_rate": 0.0002, "loss": 1.6273, "step": 93740 }, { "epoch": 0.38, "grad_norm": 2.7734780311584473, "learning_rate": 0.0002, "loss": 1.5253, "step": 93750 }, { "epoch": 0.38, "grad_norm": 2.7709765434265137, "learning_rate": 0.0002, "loss": 1.8971, "step": 93760 }, { "epoch": 0.38, "grad_norm": 3.487142562866211, "learning_rate": 0.0002, "loss": 1.7128, "step": 93770 }, { "epoch": 0.38, "grad_norm": 3.5159494876861572, "learning_rate": 0.0002, "loss": 1.4791, "step": 93780 }, { "epoch": 0.38, "grad_norm": 3.614253282546997, "learning_rate": 0.0002, "loss": 1.6222, "step": 93790 }, { "epoch": 0.38, "grad_norm": 1.9388375282287598, "learning_rate": 0.0002, "loss": 1.7529, "step": 93800 }, { "epoch": 0.38, "grad_norm": 2.5886101722717285, "learning_rate": 0.0002, "loss": 1.5344, "step": 93810 }, { "epoch": 0.38, "grad_norm": 2.9093751907348633, "learning_rate": 0.0002, "loss": 1.3377, "step": 93820 }, { "epoch": 0.38, "grad_norm": 3.9006707668304443, "learning_rate": 0.0002, "loss": 1.6117, "step": 93830 }, { "epoch": 0.38, "grad_norm": 5.307832717895508, "learning_rate": 0.0002, "loss": 1.6915, "step": 93840 }, { "epoch": 0.38, "grad_norm": 2.8788416385650635, "learning_rate": 0.0002, "loss": 1.5762, "step": 93850 }, { "epoch": 0.38, "grad_norm": 3.318697214126587, "learning_rate": 0.0002, "loss": 1.5217, "step": 93860 }, { "epoch": 0.38, "grad_norm": 2.261542797088623, "learning_rate": 0.0002, "loss": 1.4391, "step": 93870 }, { "epoch": 0.38, "grad_norm": 2.3616719245910645, "learning_rate": 0.0002, "loss": 1.5558, "step": 93880 }, { "epoch": 0.38, "grad_norm": 2.6394989490509033, "learning_rate": 0.0002, "loss": 1.635, "step": 93890 }, { "epoch": 0.38, "grad_norm": 1.9855260848999023, "learning_rate": 0.0002, "loss": 1.8254, "step": 93900 }, { "epoch": 0.38, "grad_norm": 2.291994571685791, "learning_rate": 0.0002, "loss": 1.6851, "step": 93910 }, { "epoch": 0.38, "grad_norm": 2.327173948287964, "learning_rate": 0.0002, "loss": 1.5363, "step": 93920 }, { "epoch": 0.38, "grad_norm": 4.9784626960754395, "learning_rate": 0.0002, "loss": 1.5897, "step": 93930 }, { "epoch": 0.38, "grad_norm": 3.7826452255249023, "learning_rate": 0.0002, "loss": 1.54, "step": 93940 }, { "epoch": 0.38, "grad_norm": 2.033515453338623, "learning_rate": 0.0002, "loss": 1.8283, "step": 93950 }, { "epoch": 0.38, "grad_norm": 4.466263294219971, "learning_rate": 0.0002, "loss": 1.4327, "step": 93960 }, { "epoch": 0.38, "grad_norm": 2.145256280899048, "learning_rate": 0.0002, "loss": 1.4724, "step": 93970 }, { "epoch": 0.38, "grad_norm": 1.3401085138320923, "learning_rate": 0.0002, "loss": 1.4766, "step": 93980 }, { "epoch": 0.38, "grad_norm": 2.8480334281921387, "learning_rate": 0.0002, "loss": 1.6498, "step": 93990 }, { "epoch": 0.38, "grad_norm": 3.454878807067871, "learning_rate": 0.0002, "loss": 1.7279, "step": 94000 }, { "epoch": 0.38, "grad_norm": 1.8855773210525513, "learning_rate": 0.0002, "loss": 1.6273, "step": 94010 }, { "epoch": 0.38, "grad_norm": 4.718909740447998, "learning_rate": 0.0002, "loss": 1.4404, "step": 94020 }, { "epoch": 0.38, "grad_norm": 3.243694305419922, "learning_rate": 0.0002, "loss": 1.5726, "step": 94030 }, { "epoch": 0.38, "grad_norm": 3.2615599632263184, "learning_rate": 0.0002, "loss": 1.4997, "step": 94040 }, { "epoch": 0.38, "grad_norm": 2.561458110809326, "learning_rate": 0.0002, "loss": 1.6032, "step": 94050 }, { "epoch": 0.38, "grad_norm": 2.877173662185669, "learning_rate": 0.0002, "loss": 1.6908, "step": 94060 }, { "epoch": 0.38, "grad_norm": 3.2814483642578125, "learning_rate": 0.0002, "loss": 1.4901, "step": 94070 }, { "epoch": 0.38, "grad_norm": 3.0429062843322754, "learning_rate": 0.0002, "loss": 1.5053, "step": 94080 }, { "epoch": 0.38, "grad_norm": 4.313347339630127, "learning_rate": 0.0002, "loss": 1.6174, "step": 94090 }, { "epoch": 0.38, "grad_norm": 3.034146547317505, "learning_rate": 0.0002, "loss": 1.5537, "step": 94100 }, { "epoch": 0.38, "grad_norm": 3.380936622619629, "learning_rate": 0.0002, "loss": 1.655, "step": 94110 }, { "epoch": 0.38, "grad_norm": 5.6280198097229, "learning_rate": 0.0002, "loss": 1.6964, "step": 94120 }, { "epoch": 0.38, "grad_norm": 2.861330032348633, "learning_rate": 0.0002, "loss": 1.6104, "step": 94130 }, { "epoch": 0.38, "grad_norm": 3.169126272201538, "learning_rate": 0.0002, "loss": 1.698, "step": 94140 }, { "epoch": 0.38, "grad_norm": 3.6258111000061035, "learning_rate": 0.0002, "loss": 1.5763, "step": 94150 }, { "epoch": 0.38, "grad_norm": 4.258781433105469, "learning_rate": 0.0002, "loss": 1.5756, "step": 94160 }, { "epoch": 0.38, "grad_norm": 4.576888084411621, "learning_rate": 0.0002, "loss": 1.5232, "step": 94170 }, { "epoch": 0.38, "grad_norm": 3.2580974102020264, "learning_rate": 0.0002, "loss": 1.5357, "step": 94180 }, { "epoch": 0.38, "grad_norm": 2.601865768432617, "learning_rate": 0.0002, "loss": 1.6342, "step": 94190 }, { "epoch": 0.38, "grad_norm": 2.746490955352783, "learning_rate": 0.0002, "loss": 1.5602, "step": 94200 }, { "epoch": 0.38, "grad_norm": 2.3253939151763916, "learning_rate": 0.0002, "loss": 1.6063, "step": 94210 }, { "epoch": 0.38, "grad_norm": 2.4581475257873535, "learning_rate": 0.0002, "loss": 1.5885, "step": 94220 }, { "epoch": 0.38, "grad_norm": 11.80836009979248, "learning_rate": 0.0002, "loss": 1.6343, "step": 94230 }, { "epoch": 0.38, "grad_norm": 2.56669020652771, "learning_rate": 0.0002, "loss": 1.7456, "step": 94240 }, { "epoch": 0.38, "grad_norm": 5.320274353027344, "learning_rate": 0.0002, "loss": 1.8519, "step": 94250 }, { "epoch": 0.38, "grad_norm": 3.94254732131958, "learning_rate": 0.0002, "loss": 1.6269, "step": 94260 }, { "epoch": 0.38, "grad_norm": 3.08638596534729, "learning_rate": 0.0002, "loss": 1.4876, "step": 94270 }, { "epoch": 0.38, "grad_norm": 2.4731786251068115, "learning_rate": 0.0002, "loss": 1.5098, "step": 94280 }, { "epoch": 0.38, "grad_norm": 4.436784267425537, "learning_rate": 0.0002, "loss": 1.4077, "step": 94290 }, { "epoch": 0.38, "grad_norm": 2.622558832168579, "learning_rate": 0.0002, "loss": 1.5534, "step": 94300 }, { "epoch": 0.38, "grad_norm": 3.8682138919830322, "learning_rate": 0.0002, "loss": 1.4501, "step": 94310 }, { "epoch": 0.38, "grad_norm": 3.2929527759552, "learning_rate": 0.0002, "loss": 1.4212, "step": 94320 }, { "epoch": 0.38, "grad_norm": 2.4580774307250977, "learning_rate": 0.0002, "loss": 1.6328, "step": 94330 }, { "epoch": 0.38, "grad_norm": 2.686830997467041, "learning_rate": 0.0002, "loss": 1.6309, "step": 94340 }, { "epoch": 0.38, "grad_norm": 2.6080758571624756, "learning_rate": 0.0002, "loss": 1.5924, "step": 94350 }, { "epoch": 0.38, "grad_norm": 3.2329795360565186, "learning_rate": 0.0002, "loss": 1.7262, "step": 94360 }, { "epoch": 0.38, "grad_norm": 2.1811118125915527, "learning_rate": 0.0002, "loss": 1.6041, "step": 94370 }, { "epoch": 0.38, "grad_norm": 3.847198963165283, "learning_rate": 0.0002, "loss": 1.565, "step": 94380 }, { "epoch": 0.38, "grad_norm": 3.5947864055633545, "learning_rate": 0.0002, "loss": 1.2981, "step": 94390 }, { "epoch": 0.38, "grad_norm": 2.1510837078094482, "learning_rate": 0.0002, "loss": 1.6296, "step": 94400 }, { "epoch": 0.38, "grad_norm": 2.9967784881591797, "learning_rate": 0.0002, "loss": 1.543, "step": 94410 }, { "epoch": 0.38, "grad_norm": 1.9220491647720337, "learning_rate": 0.0002, "loss": 1.6576, "step": 94420 }, { "epoch": 0.38, "grad_norm": 3.198665142059326, "learning_rate": 0.0002, "loss": 1.7014, "step": 94430 }, { "epoch": 0.38, "grad_norm": 3.6563215255737305, "learning_rate": 0.0002, "loss": 1.4655, "step": 94440 }, { "epoch": 0.38, "grad_norm": 2.582515001296997, "learning_rate": 0.0002, "loss": 1.425, "step": 94450 }, { "epoch": 0.38, "grad_norm": 2.483624219894409, "learning_rate": 0.0002, "loss": 1.4154, "step": 94460 }, { "epoch": 0.38, "grad_norm": 3.2983322143554688, "learning_rate": 0.0002, "loss": 1.7014, "step": 94470 }, { "epoch": 0.38, "grad_norm": 1.9538943767547607, "learning_rate": 0.0002, "loss": 1.5454, "step": 94480 }, { "epoch": 0.38, "grad_norm": 2.9680685997009277, "learning_rate": 0.0002, "loss": 1.5019, "step": 94490 }, { "epoch": 0.38, "grad_norm": 2.838296890258789, "learning_rate": 0.0002, "loss": 1.3394, "step": 94500 }, { "epoch": 0.38, "grad_norm": 3.059502601623535, "learning_rate": 0.0002, "loss": 1.8375, "step": 94510 }, { "epoch": 0.38, "grad_norm": 3.0806524753570557, "learning_rate": 0.0002, "loss": 1.7081, "step": 94520 }, { "epoch": 0.38, "grad_norm": 3.4673306941986084, "learning_rate": 0.0002, "loss": 1.5357, "step": 94530 }, { "epoch": 0.38, "grad_norm": 2.0817809104919434, "learning_rate": 0.0002, "loss": 1.5376, "step": 94540 }, { "epoch": 0.38, "grad_norm": 3.780463695526123, "learning_rate": 0.0002, "loss": 1.5135, "step": 94550 }, { "epoch": 0.38, "grad_norm": 3.6695048809051514, "learning_rate": 0.0002, "loss": 1.5242, "step": 94560 }, { "epoch": 0.38, "grad_norm": 3.262580633163452, "learning_rate": 0.0002, "loss": 1.6691, "step": 94570 }, { "epoch": 0.39, "grad_norm": 3.1225314140319824, "learning_rate": 0.0002, "loss": 1.496, "step": 94580 }, { "epoch": 0.39, "grad_norm": 5.469995021820068, "learning_rate": 0.0002, "loss": 1.6942, "step": 94590 }, { "epoch": 0.39, "grad_norm": 2.647592544555664, "learning_rate": 0.0002, "loss": 1.5202, "step": 94600 }, { "epoch": 0.39, "grad_norm": 4.085741996765137, "learning_rate": 0.0002, "loss": 1.7835, "step": 94610 }, { "epoch": 0.39, "grad_norm": 2.9045722484588623, "learning_rate": 0.0002, "loss": 1.4679, "step": 94620 }, { "epoch": 0.39, "grad_norm": 4.179440975189209, "learning_rate": 0.0002, "loss": 1.6145, "step": 94630 }, { "epoch": 0.39, "grad_norm": 2.122509479522705, "learning_rate": 0.0002, "loss": 1.5344, "step": 94640 }, { "epoch": 0.39, "grad_norm": 1.775134801864624, "learning_rate": 0.0002, "loss": 1.7051, "step": 94650 }, { "epoch": 0.39, "grad_norm": 2.7335357666015625, "learning_rate": 0.0002, "loss": 1.6276, "step": 94660 }, { "epoch": 0.39, "grad_norm": 2.888908863067627, "learning_rate": 0.0002, "loss": 1.7072, "step": 94670 }, { "epoch": 0.39, "grad_norm": 3.7495319843292236, "learning_rate": 0.0002, "loss": 1.6589, "step": 94680 }, { "epoch": 0.39, "grad_norm": 3.7023816108703613, "learning_rate": 0.0002, "loss": 1.6496, "step": 94690 }, { "epoch": 0.39, "grad_norm": 2.776214838027954, "learning_rate": 0.0002, "loss": 1.5055, "step": 94700 }, { "epoch": 0.39, "grad_norm": 2.6290252208709717, "learning_rate": 0.0002, "loss": 1.5468, "step": 94710 }, { "epoch": 0.39, "grad_norm": 3.4858171939849854, "learning_rate": 0.0002, "loss": 1.6275, "step": 94720 }, { "epoch": 0.39, "grad_norm": 2.7986080646514893, "learning_rate": 0.0002, "loss": 1.5814, "step": 94730 }, { "epoch": 0.39, "grad_norm": 3.3074066638946533, "learning_rate": 0.0002, "loss": 1.6616, "step": 94740 }, { "epoch": 0.39, "grad_norm": 2.1759071350097656, "learning_rate": 0.0002, "loss": 1.7551, "step": 94750 }, { "epoch": 0.39, "grad_norm": 4.721797466278076, "learning_rate": 0.0002, "loss": 1.5617, "step": 94760 }, { "epoch": 0.39, "grad_norm": 2.3121042251586914, "learning_rate": 0.0002, "loss": 1.7123, "step": 94770 }, { "epoch": 0.39, "grad_norm": 3.3268704414367676, "learning_rate": 0.0002, "loss": 1.6188, "step": 94780 }, { "epoch": 0.39, "grad_norm": 2.5392420291900635, "learning_rate": 0.0002, "loss": 1.5328, "step": 94790 }, { "epoch": 0.39, "grad_norm": 3.4882917404174805, "learning_rate": 0.0002, "loss": 1.8086, "step": 94800 }, { "epoch": 0.39, "grad_norm": 2.539781332015991, "learning_rate": 0.0002, "loss": 1.5446, "step": 94810 }, { "epoch": 0.39, "grad_norm": 3.448016405105591, "learning_rate": 0.0002, "loss": 1.4798, "step": 94820 }, { "epoch": 0.39, "grad_norm": 2.8263907432556152, "learning_rate": 0.0002, "loss": 1.6274, "step": 94830 }, { "epoch": 0.39, "grad_norm": 3.126647710800171, "learning_rate": 0.0002, "loss": 1.5408, "step": 94840 }, { "epoch": 0.39, "grad_norm": 3.763368606567383, "learning_rate": 0.0002, "loss": 1.4126, "step": 94850 }, { "epoch": 0.39, "grad_norm": 5.458247661590576, "learning_rate": 0.0002, "loss": 1.6358, "step": 94860 }, { "epoch": 0.39, "grad_norm": 4.395270824432373, "learning_rate": 0.0002, "loss": 1.6606, "step": 94870 }, { "epoch": 0.39, "grad_norm": 3.0907094478607178, "learning_rate": 0.0002, "loss": 1.5856, "step": 94880 }, { "epoch": 0.39, "grad_norm": 3.0886409282684326, "learning_rate": 0.0002, "loss": 1.7036, "step": 94890 }, { "epoch": 0.39, "grad_norm": 3.417346477508545, "learning_rate": 0.0002, "loss": 1.4529, "step": 94900 }, { "epoch": 0.39, "grad_norm": 3.175861120223999, "learning_rate": 0.0002, "loss": 1.3799, "step": 94910 }, { "epoch": 0.39, "grad_norm": 2.559750556945801, "learning_rate": 0.0002, "loss": 1.4391, "step": 94920 }, { "epoch": 0.39, "grad_norm": 1.6493853330612183, "learning_rate": 0.0002, "loss": 1.6894, "step": 94930 }, { "epoch": 0.39, "grad_norm": 1.7818821668624878, "learning_rate": 0.0002, "loss": 1.5337, "step": 94940 }, { "epoch": 0.39, "grad_norm": 2.2339279651641846, "learning_rate": 0.0002, "loss": 1.5312, "step": 94950 }, { "epoch": 0.39, "grad_norm": 3.1379990577697754, "learning_rate": 0.0002, "loss": 1.7821, "step": 94960 }, { "epoch": 0.39, "grad_norm": 1.4004981517791748, "learning_rate": 0.0002, "loss": 1.3508, "step": 94970 }, { "epoch": 0.39, "grad_norm": 4.497682094573975, "learning_rate": 0.0002, "loss": 1.6405, "step": 94980 }, { "epoch": 0.39, "grad_norm": 4.453796863555908, "learning_rate": 0.0002, "loss": 1.5377, "step": 94990 }, { "epoch": 0.39, "grad_norm": 2.799330711364746, "learning_rate": 0.0002, "loss": 1.6598, "step": 95000 }, { "epoch": 0.39, "grad_norm": 2.6436684131622314, "learning_rate": 0.0002, "loss": 1.4202, "step": 95010 }, { "epoch": 0.39, "grad_norm": 3.7071428298950195, "learning_rate": 0.0002, "loss": 1.624, "step": 95020 }, { "epoch": 0.39, "grad_norm": 3.543724298477173, "learning_rate": 0.0002, "loss": 1.774, "step": 95030 }, { "epoch": 0.39, "grad_norm": 2.3064534664154053, "learning_rate": 0.0002, "loss": 1.5437, "step": 95040 }, { "epoch": 0.39, "grad_norm": 4.083102703094482, "learning_rate": 0.0002, "loss": 1.665, "step": 95050 }, { "epoch": 0.39, "grad_norm": 3.824601888656616, "learning_rate": 0.0002, "loss": 1.6217, "step": 95060 }, { "epoch": 0.39, "grad_norm": 2.379368305206299, "learning_rate": 0.0002, "loss": 1.5808, "step": 95070 }, { "epoch": 0.39, "grad_norm": 2.457139492034912, "learning_rate": 0.0002, "loss": 1.6004, "step": 95080 }, { "epoch": 0.39, "grad_norm": 3.318892478942871, "learning_rate": 0.0002, "loss": 1.8168, "step": 95090 }, { "epoch": 0.39, "grad_norm": 2.279994010925293, "learning_rate": 0.0002, "loss": 1.169, "step": 95100 }, { "epoch": 0.39, "grad_norm": 3.125396728515625, "learning_rate": 0.0002, "loss": 1.7968, "step": 95110 }, { "epoch": 0.39, "grad_norm": 2.3162894248962402, "learning_rate": 0.0002, "loss": 1.6058, "step": 95120 }, { "epoch": 0.39, "grad_norm": 3.533651351928711, "learning_rate": 0.0002, "loss": 1.7056, "step": 95130 }, { "epoch": 0.39, "grad_norm": 2.654902458190918, "learning_rate": 0.0002, "loss": 1.6438, "step": 95140 }, { "epoch": 0.39, "grad_norm": 2.959054946899414, "learning_rate": 0.0002, "loss": 1.6659, "step": 95150 }, { "epoch": 0.39, "grad_norm": 2.47928786277771, "learning_rate": 0.0002, "loss": 1.6139, "step": 95160 }, { "epoch": 0.39, "grad_norm": 3.2546606063842773, "learning_rate": 0.0002, "loss": 1.5845, "step": 95170 }, { "epoch": 0.39, "grad_norm": 2.5034313201904297, "learning_rate": 0.0002, "loss": 1.516, "step": 95180 }, { "epoch": 0.39, "grad_norm": 3.9459304809570312, "learning_rate": 0.0002, "loss": 1.4706, "step": 95190 }, { "epoch": 0.39, "grad_norm": 2.7290914058685303, "learning_rate": 0.0002, "loss": 1.4393, "step": 95200 }, { "epoch": 0.39, "grad_norm": 2.2531116008758545, "learning_rate": 0.0002, "loss": 1.5879, "step": 95210 }, { "epoch": 0.39, "grad_norm": 2.59458065032959, "learning_rate": 0.0002, "loss": 1.762, "step": 95220 }, { "epoch": 0.39, "grad_norm": 2.6867387294769287, "learning_rate": 0.0002, "loss": 1.3719, "step": 95230 }, { "epoch": 0.39, "grad_norm": 3.606289863586426, "learning_rate": 0.0002, "loss": 1.5442, "step": 95240 }, { "epoch": 0.39, "grad_norm": 3.2132112979888916, "learning_rate": 0.0002, "loss": 1.6087, "step": 95250 }, { "epoch": 0.39, "grad_norm": 1.5348840951919556, "learning_rate": 0.0002, "loss": 1.5379, "step": 95260 }, { "epoch": 0.39, "grad_norm": 3.3067145347595215, "learning_rate": 0.0002, "loss": 1.4715, "step": 95270 }, { "epoch": 0.39, "grad_norm": 2.595916509628296, "learning_rate": 0.0002, "loss": 1.689, "step": 95280 }, { "epoch": 0.39, "grad_norm": 2.7790005207061768, "learning_rate": 0.0002, "loss": 1.5239, "step": 95290 }, { "epoch": 0.39, "grad_norm": 2.5026350021362305, "learning_rate": 0.0002, "loss": 1.6785, "step": 95300 }, { "epoch": 0.39, "grad_norm": 1.629520058631897, "learning_rate": 0.0002, "loss": 1.8216, "step": 95310 }, { "epoch": 0.39, "grad_norm": 3.4755496978759766, "learning_rate": 0.0002, "loss": 1.6167, "step": 95320 }, { "epoch": 0.39, "grad_norm": 3.3548684120178223, "learning_rate": 0.0002, "loss": 1.4079, "step": 95330 }, { "epoch": 0.39, "grad_norm": 5.657381057739258, "learning_rate": 0.0002, "loss": 1.5921, "step": 95340 }, { "epoch": 0.39, "grad_norm": 3.204266309738159, "learning_rate": 0.0002, "loss": 1.4002, "step": 95350 }, { "epoch": 0.39, "grad_norm": 2.649509906768799, "learning_rate": 0.0002, "loss": 1.69, "step": 95360 }, { "epoch": 0.39, "grad_norm": 2.748197078704834, "learning_rate": 0.0002, "loss": 1.806, "step": 95370 }, { "epoch": 0.39, "grad_norm": 1.7683998346328735, "learning_rate": 0.0002, "loss": 1.5001, "step": 95380 }, { "epoch": 0.39, "grad_norm": 1.7876396179199219, "learning_rate": 0.0002, "loss": 1.5323, "step": 95390 }, { "epoch": 0.39, "grad_norm": 3.0073723793029785, "learning_rate": 0.0002, "loss": 1.603, "step": 95400 }, { "epoch": 0.39, "grad_norm": 5.0005083084106445, "learning_rate": 0.0002, "loss": 1.6901, "step": 95410 }, { "epoch": 0.39, "grad_norm": 2.2159969806671143, "learning_rate": 0.0002, "loss": 1.76, "step": 95420 }, { "epoch": 0.39, "grad_norm": 2.065196990966797, "learning_rate": 0.0002, "loss": 1.501, "step": 95430 }, { "epoch": 0.39, "grad_norm": 2.5726568698883057, "learning_rate": 0.0002, "loss": 1.6159, "step": 95440 }, { "epoch": 0.39, "grad_norm": 3.6655189990997314, "learning_rate": 0.0002, "loss": 1.5036, "step": 95450 }, { "epoch": 0.39, "grad_norm": 3.9671382904052734, "learning_rate": 0.0002, "loss": 1.6725, "step": 95460 }, { "epoch": 0.39, "grad_norm": 2.323866128921509, "learning_rate": 0.0002, "loss": 1.3576, "step": 95470 }, { "epoch": 0.39, "grad_norm": 2.475013494491577, "learning_rate": 0.0002, "loss": 1.5067, "step": 95480 }, { "epoch": 0.39, "grad_norm": 3.262789011001587, "learning_rate": 0.0002, "loss": 1.6277, "step": 95490 }, { "epoch": 0.39, "grad_norm": 1.6613585948944092, "learning_rate": 0.0002, "loss": 1.4887, "step": 95500 }, { "epoch": 0.39, "grad_norm": 2.425168514251709, "learning_rate": 0.0002, "loss": 1.5072, "step": 95510 }, { "epoch": 0.39, "grad_norm": 2.3815746307373047, "learning_rate": 0.0002, "loss": 1.5765, "step": 95520 }, { "epoch": 0.39, "grad_norm": 4.7026448249816895, "learning_rate": 0.0002, "loss": 1.5728, "step": 95530 }, { "epoch": 0.39, "grad_norm": 2.6578736305236816, "learning_rate": 0.0002, "loss": 1.3712, "step": 95540 }, { "epoch": 0.39, "grad_norm": 2.9581851959228516, "learning_rate": 0.0002, "loss": 1.843, "step": 95550 }, { "epoch": 0.39, "grad_norm": 1.5144122838974, "learning_rate": 0.0002, "loss": 1.8402, "step": 95560 }, { "epoch": 0.39, "grad_norm": 3.41060209274292, "learning_rate": 0.0002, "loss": 1.3891, "step": 95570 }, { "epoch": 0.39, "grad_norm": 4.167252063751221, "learning_rate": 0.0002, "loss": 1.538, "step": 95580 }, { "epoch": 0.39, "grad_norm": 5.400693416595459, "learning_rate": 0.0002, "loss": 1.536, "step": 95590 }, { "epoch": 0.39, "grad_norm": 2.0216305255889893, "learning_rate": 0.0002, "loss": 1.4586, "step": 95600 }, { "epoch": 0.39, "grad_norm": 4.151354789733887, "learning_rate": 0.0002, "loss": 1.5919, "step": 95610 }, { "epoch": 0.39, "grad_norm": 4.167516231536865, "learning_rate": 0.0002, "loss": 1.346, "step": 95620 }, { "epoch": 0.39, "grad_norm": 2.689279556274414, "learning_rate": 0.0002, "loss": 1.7023, "step": 95630 }, { "epoch": 0.39, "grad_norm": 5.341846942901611, "learning_rate": 0.0002, "loss": 1.5315, "step": 95640 }, { "epoch": 0.39, "grad_norm": 2.327437162399292, "learning_rate": 0.0002, "loss": 1.7775, "step": 95650 }, { "epoch": 0.39, "grad_norm": 3.5521392822265625, "learning_rate": 0.0002, "loss": 1.5653, "step": 95660 }, { "epoch": 0.39, "grad_norm": 2.8311047554016113, "learning_rate": 0.0002, "loss": 1.8579, "step": 95670 }, { "epoch": 0.39, "grad_norm": 3.0321216583251953, "learning_rate": 0.0002, "loss": 1.7103, "step": 95680 }, { "epoch": 0.39, "grad_norm": 1.6608364582061768, "learning_rate": 0.0002, "loss": 1.4928, "step": 95690 }, { "epoch": 0.39, "grad_norm": 3.125359296798706, "learning_rate": 0.0002, "loss": 1.7939, "step": 95700 }, { "epoch": 0.39, "grad_norm": 2.4610495567321777, "learning_rate": 0.0002, "loss": 1.8893, "step": 95710 }, { "epoch": 0.39, "grad_norm": 2.284123420715332, "learning_rate": 0.0002, "loss": 1.3416, "step": 95720 }, { "epoch": 0.39, "grad_norm": 2.481208086013794, "learning_rate": 0.0002, "loss": 1.7926, "step": 95730 }, { "epoch": 0.39, "grad_norm": 3.010194778442383, "learning_rate": 0.0002, "loss": 1.4856, "step": 95740 }, { "epoch": 0.39, "grad_norm": 2.551753520965576, "learning_rate": 0.0002, "loss": 1.6143, "step": 95750 }, { "epoch": 0.39, "grad_norm": 2.891970634460449, "learning_rate": 0.0002, "loss": 1.478, "step": 95760 }, { "epoch": 0.39, "grad_norm": 1.2130370140075684, "learning_rate": 0.0002, "loss": 1.3206, "step": 95770 }, { "epoch": 0.39, "grad_norm": 3.287562131881714, "learning_rate": 0.0002, "loss": 1.6626, "step": 95780 }, { "epoch": 0.39, "grad_norm": 2.163784980773926, "learning_rate": 0.0002, "loss": 1.8321, "step": 95790 }, { "epoch": 0.39, "grad_norm": 2.8090059757232666, "learning_rate": 0.0002, "loss": 1.7479, "step": 95800 }, { "epoch": 0.39, "grad_norm": 2.7826757431030273, "learning_rate": 0.0002, "loss": 1.5384, "step": 95810 }, { "epoch": 0.39, "grad_norm": 3.8903441429138184, "learning_rate": 0.0002, "loss": 1.6188, "step": 95820 }, { "epoch": 0.39, "grad_norm": 2.9827301502227783, "learning_rate": 0.0002, "loss": 1.5719, "step": 95830 }, { "epoch": 0.39, "grad_norm": 2.7458581924438477, "learning_rate": 0.0002, "loss": 1.7058, "step": 95840 }, { "epoch": 0.39, "grad_norm": 4.091090202331543, "learning_rate": 0.0002, "loss": 1.7779, "step": 95850 }, { "epoch": 0.39, "grad_norm": 5.65206241607666, "learning_rate": 0.0002, "loss": 1.7234, "step": 95860 }, { "epoch": 0.39, "grad_norm": 3.5410678386688232, "learning_rate": 0.0002, "loss": 1.5977, "step": 95870 }, { "epoch": 0.39, "grad_norm": 2.5398333072662354, "learning_rate": 0.0002, "loss": 1.5978, "step": 95880 }, { "epoch": 0.39, "grad_norm": 2.890918493270874, "learning_rate": 0.0002, "loss": 1.6444, "step": 95890 }, { "epoch": 0.39, "grad_norm": 1.7454864978790283, "learning_rate": 0.0002, "loss": 1.6114, "step": 95900 }, { "epoch": 0.39, "grad_norm": 2.3741261959075928, "learning_rate": 0.0002, "loss": 1.507, "step": 95910 }, { "epoch": 0.39, "grad_norm": 3.093878746032715, "learning_rate": 0.0002, "loss": 1.5811, "step": 95920 }, { "epoch": 0.39, "grad_norm": 2.120941638946533, "learning_rate": 0.0002, "loss": 1.6374, "step": 95930 }, { "epoch": 0.39, "grad_norm": 2.903733968734741, "learning_rate": 0.0002, "loss": 1.9016, "step": 95940 }, { "epoch": 0.39, "grad_norm": 6.022045612335205, "learning_rate": 0.0002, "loss": 1.4923, "step": 95950 }, { "epoch": 0.39, "grad_norm": 6.643293857574463, "learning_rate": 0.0002, "loss": 1.7188, "step": 95960 }, { "epoch": 0.39, "grad_norm": 5.972464084625244, "learning_rate": 0.0002, "loss": 1.6255, "step": 95970 }, { "epoch": 0.39, "grad_norm": 3.3555891513824463, "learning_rate": 0.0002, "loss": 1.5649, "step": 95980 }, { "epoch": 0.39, "grad_norm": 3.0246636867523193, "learning_rate": 0.0002, "loss": 1.7258, "step": 95990 }, { "epoch": 0.39, "grad_norm": 3.2165305614471436, "learning_rate": 0.0002, "loss": 1.3757, "step": 96000 }, { "epoch": 0.39, "grad_norm": 1.627716064453125, "learning_rate": 0.0002, "loss": 1.6695, "step": 96010 }, { "epoch": 0.39, "grad_norm": 2.6277027130126953, "learning_rate": 0.0002, "loss": 1.6472, "step": 96020 }, { "epoch": 0.39, "grad_norm": 3.21682071685791, "learning_rate": 0.0002, "loss": 1.4304, "step": 96030 }, { "epoch": 0.39, "grad_norm": 12.909703254699707, "learning_rate": 0.0002, "loss": 1.7013, "step": 96040 }, { "epoch": 0.39, "grad_norm": 2.171139717102051, "learning_rate": 0.0002, "loss": 1.8417, "step": 96050 }, { "epoch": 0.39, "grad_norm": 2.78420090675354, "learning_rate": 0.0002, "loss": 1.7206, "step": 96060 }, { "epoch": 0.39, "grad_norm": 3.5445966720581055, "learning_rate": 0.0002, "loss": 1.6743, "step": 96070 }, { "epoch": 0.39, "grad_norm": 3.2241642475128174, "learning_rate": 0.0002, "loss": 1.5237, "step": 96080 }, { "epoch": 0.39, "grad_norm": 2.9894232749938965, "learning_rate": 0.0002, "loss": 1.4579, "step": 96090 }, { "epoch": 0.39, "grad_norm": 2.388413190841675, "learning_rate": 0.0002, "loss": 1.3368, "step": 96100 }, { "epoch": 0.39, "grad_norm": 2.4916298389434814, "learning_rate": 0.0002, "loss": 1.4643, "step": 96110 }, { "epoch": 0.39, "grad_norm": 2.7569143772125244, "learning_rate": 0.0002, "loss": 1.7066, "step": 96120 }, { "epoch": 0.39, "grad_norm": 2.864894390106201, "learning_rate": 0.0002, "loss": 1.4578, "step": 96130 }, { "epoch": 0.39, "grad_norm": 2.620434284210205, "learning_rate": 0.0002, "loss": 1.6684, "step": 96140 }, { "epoch": 0.39, "grad_norm": 2.5543360710144043, "learning_rate": 0.0002, "loss": 1.4203, "step": 96150 }, { "epoch": 0.39, "grad_norm": 4.987446308135986, "learning_rate": 0.0002, "loss": 1.8166, "step": 96160 }, { "epoch": 0.39, "grad_norm": 3.2677502632141113, "learning_rate": 0.0002, "loss": 1.6786, "step": 96170 }, { "epoch": 0.39, "grad_norm": 3.8283679485321045, "learning_rate": 0.0002, "loss": 1.6533, "step": 96180 }, { "epoch": 0.39, "grad_norm": 4.154026985168457, "learning_rate": 0.0002, "loss": 1.4373, "step": 96190 }, { "epoch": 0.39, "grad_norm": 2.1128990650177, "learning_rate": 0.0002, "loss": 1.3683, "step": 96200 }, { "epoch": 0.39, "grad_norm": 4.252206802368164, "learning_rate": 0.0002, "loss": 1.6416, "step": 96210 }, { "epoch": 0.39, "grad_norm": 4.598198890686035, "learning_rate": 0.0002, "loss": 1.5307, "step": 96220 }, { "epoch": 0.39, "grad_norm": 2.130366563796997, "learning_rate": 0.0002, "loss": 1.7808, "step": 96230 }, { "epoch": 0.39, "grad_norm": 3.778918743133545, "learning_rate": 0.0002, "loss": 1.5708, "step": 96240 }, { "epoch": 0.39, "grad_norm": 3.614600419998169, "learning_rate": 0.0002, "loss": 1.6163, "step": 96250 }, { "epoch": 0.39, "grad_norm": 3.4724137783050537, "learning_rate": 0.0002, "loss": 1.733, "step": 96260 }, { "epoch": 0.39, "grad_norm": 3.8584511280059814, "learning_rate": 0.0002, "loss": 1.3253, "step": 96270 }, { "epoch": 0.39, "grad_norm": 2.607147216796875, "learning_rate": 0.0002, "loss": 1.5348, "step": 96280 }, { "epoch": 0.39, "grad_norm": 2.80560564994812, "learning_rate": 0.0002, "loss": 1.5124, "step": 96290 }, { "epoch": 0.39, "grad_norm": 1.4285651445388794, "learning_rate": 0.0002, "loss": 1.583, "step": 96300 }, { "epoch": 0.39, "grad_norm": 3.101126194000244, "learning_rate": 0.0002, "loss": 1.4247, "step": 96310 }, { "epoch": 0.39, "grad_norm": 3.8159687519073486, "learning_rate": 0.0002, "loss": 1.7489, "step": 96320 }, { "epoch": 0.39, "grad_norm": 2.4360148906707764, "learning_rate": 0.0002, "loss": 1.5475, "step": 96330 }, { "epoch": 0.39, "grad_norm": 2.907445192337036, "learning_rate": 0.0002, "loss": 1.5896, "step": 96340 }, { "epoch": 0.39, "grad_norm": 4.253844261169434, "learning_rate": 0.0002, "loss": 1.5009, "step": 96350 }, { "epoch": 0.39, "grad_norm": 4.2242817878723145, "learning_rate": 0.0002, "loss": 1.5888, "step": 96360 }, { "epoch": 0.39, "grad_norm": 2.9414825439453125, "learning_rate": 0.0002, "loss": 1.4554, "step": 96370 }, { "epoch": 0.39, "grad_norm": 4.516598224639893, "learning_rate": 0.0002, "loss": 1.5562, "step": 96380 }, { "epoch": 0.39, "grad_norm": 3.265974283218384, "learning_rate": 0.0002, "loss": 1.392, "step": 96390 }, { "epoch": 0.39, "grad_norm": 3.1943283081054688, "learning_rate": 0.0002, "loss": 1.4794, "step": 96400 }, { "epoch": 0.39, "grad_norm": 3.6205532550811768, "learning_rate": 0.0002, "loss": 1.7741, "step": 96410 }, { "epoch": 0.39, "grad_norm": 1.873231291770935, "learning_rate": 0.0002, "loss": 1.6177, "step": 96420 }, { "epoch": 0.39, "grad_norm": 3.811310052871704, "learning_rate": 0.0002, "loss": 1.6776, "step": 96430 }, { "epoch": 0.39, "grad_norm": 3.3504343032836914, "learning_rate": 0.0002, "loss": 1.5249, "step": 96440 }, { "epoch": 0.39, "grad_norm": 2.7703030109405518, "learning_rate": 0.0002, "loss": 1.4584, "step": 96450 }, { "epoch": 0.39, "grad_norm": 5.369021415710449, "learning_rate": 0.0002, "loss": 1.7401, "step": 96460 }, { "epoch": 0.39, "grad_norm": 3.1920087337493896, "learning_rate": 0.0002, "loss": 1.6176, "step": 96470 }, { "epoch": 0.39, "grad_norm": 2.1934611797332764, "learning_rate": 0.0002, "loss": 1.6131, "step": 96480 }, { "epoch": 0.39, "grad_norm": 4.41170597076416, "learning_rate": 0.0002, "loss": 1.5926, "step": 96490 }, { "epoch": 0.39, "grad_norm": 2.3318302631378174, "learning_rate": 0.0002, "loss": 1.6297, "step": 96500 }, { "epoch": 0.39, "grad_norm": 2.843165636062622, "learning_rate": 0.0002, "loss": 1.5811, "step": 96510 }, { "epoch": 0.39, "grad_norm": 3.542726516723633, "learning_rate": 0.0002, "loss": 1.5131, "step": 96520 }, { "epoch": 0.39, "grad_norm": 2.487330198287964, "learning_rate": 0.0002, "loss": 1.9663, "step": 96530 }, { "epoch": 0.39, "grad_norm": 3.1286628246307373, "learning_rate": 0.0002, "loss": 1.2732, "step": 96540 }, { "epoch": 0.39, "grad_norm": 2.4749038219451904, "learning_rate": 0.0002, "loss": 1.64, "step": 96550 }, { "epoch": 0.39, "grad_norm": 2.8541598320007324, "learning_rate": 0.0002, "loss": 1.4947, "step": 96560 }, { "epoch": 0.39, "grad_norm": 2.6755318641662598, "learning_rate": 0.0002, "loss": 1.3618, "step": 96570 }, { "epoch": 0.39, "grad_norm": 2.7295970916748047, "learning_rate": 0.0002, "loss": 1.4701, "step": 96580 }, { "epoch": 0.39, "grad_norm": 3.8415937423706055, "learning_rate": 0.0002, "loss": 1.8732, "step": 96590 }, { "epoch": 0.39, "grad_norm": 2.933098554611206, "learning_rate": 0.0002, "loss": 1.677, "step": 96600 }, { "epoch": 0.39, "grad_norm": 2.906865119934082, "learning_rate": 0.0002, "loss": 1.427, "step": 96610 }, { "epoch": 0.39, "grad_norm": 2.901689291000366, "learning_rate": 0.0002, "loss": 1.565, "step": 96620 }, { "epoch": 0.39, "grad_norm": 5.4504475593566895, "learning_rate": 0.0002, "loss": 1.591, "step": 96630 }, { "epoch": 0.39, "grad_norm": 3.229902505874634, "learning_rate": 0.0002, "loss": 1.4506, "step": 96640 }, { "epoch": 0.39, "grad_norm": 4.3553547859191895, "learning_rate": 0.0002, "loss": 1.4859, "step": 96650 }, { "epoch": 0.39, "grad_norm": 3.4340660572052, "learning_rate": 0.0002, "loss": 1.5606, "step": 96660 }, { "epoch": 0.39, "grad_norm": 3.178999900817871, "learning_rate": 0.0002, "loss": 1.6185, "step": 96670 }, { "epoch": 0.39, "grad_norm": 3.32066011428833, "learning_rate": 0.0002, "loss": 1.4675, "step": 96680 }, { "epoch": 0.39, "grad_norm": 2.6328306198120117, "learning_rate": 0.0002, "loss": 1.5456, "step": 96690 }, { "epoch": 0.39, "grad_norm": 2.609677314758301, "learning_rate": 0.0002, "loss": 1.7136, "step": 96700 }, { "epoch": 0.39, "grad_norm": 3.8970279693603516, "learning_rate": 0.0002, "loss": 1.449, "step": 96710 }, { "epoch": 0.39, "grad_norm": 2.2081120014190674, "learning_rate": 0.0002, "loss": 1.4865, "step": 96720 }, { "epoch": 0.39, "grad_norm": 4.819793224334717, "learning_rate": 0.0002, "loss": 1.5409, "step": 96730 }, { "epoch": 0.39, "grad_norm": 7.597283840179443, "learning_rate": 0.0002, "loss": 1.6789, "step": 96740 }, { "epoch": 0.39, "grad_norm": 3.192809581756592, "learning_rate": 0.0002, "loss": 1.874, "step": 96750 }, { "epoch": 0.39, "grad_norm": 2.659421682357788, "learning_rate": 0.0002, "loss": 1.6105, "step": 96760 }, { "epoch": 0.39, "grad_norm": 1.7046316862106323, "learning_rate": 0.0002, "loss": 1.5557, "step": 96770 }, { "epoch": 0.39, "grad_norm": 3.009946584701538, "learning_rate": 0.0002, "loss": 1.8122, "step": 96780 }, { "epoch": 0.39, "grad_norm": 6.017276763916016, "learning_rate": 0.0002, "loss": 1.5067, "step": 96790 }, { "epoch": 0.39, "grad_norm": 2.8177552223205566, "learning_rate": 0.0002, "loss": 1.5425, "step": 96800 }, { "epoch": 0.39, "grad_norm": 2.8993515968322754, "learning_rate": 0.0002, "loss": 1.5468, "step": 96810 }, { "epoch": 0.39, "grad_norm": 2.88873028755188, "learning_rate": 0.0002, "loss": 1.8326, "step": 96820 }, { "epoch": 0.39, "grad_norm": 3.72027850151062, "learning_rate": 0.0002, "loss": 1.6349, "step": 96830 }, { "epoch": 0.39, "grad_norm": 2.7180402278900146, "learning_rate": 0.0002, "loss": 1.5092, "step": 96840 }, { "epoch": 0.39, "grad_norm": 2.990466833114624, "learning_rate": 0.0002, "loss": 1.6683, "step": 96850 }, { "epoch": 0.39, "grad_norm": 3.1436502933502197, "learning_rate": 0.0002, "loss": 1.5558, "step": 96860 }, { "epoch": 0.39, "grad_norm": 2.5290729999542236, "learning_rate": 0.0002, "loss": 1.5211, "step": 96870 }, { "epoch": 0.39, "grad_norm": 2.414032220840454, "learning_rate": 0.0002, "loss": 1.7132, "step": 96880 }, { "epoch": 0.39, "grad_norm": 7.595341205596924, "learning_rate": 0.0002, "loss": 1.806, "step": 96890 }, { "epoch": 0.39, "grad_norm": 2.563256025314331, "learning_rate": 0.0002, "loss": 1.5701, "step": 96900 }, { "epoch": 0.39, "grad_norm": 1.6046146154403687, "learning_rate": 0.0002, "loss": 1.7279, "step": 96910 }, { "epoch": 0.39, "grad_norm": 5.477295398712158, "learning_rate": 0.0002, "loss": 1.4656, "step": 96920 }, { "epoch": 0.39, "grad_norm": 3.6776628494262695, "learning_rate": 0.0002, "loss": 1.377, "step": 96930 }, { "epoch": 0.39, "grad_norm": 2.3636176586151123, "learning_rate": 0.0002, "loss": 1.9539, "step": 96940 }, { "epoch": 0.39, "grad_norm": 3.5526037216186523, "learning_rate": 0.0002, "loss": 1.4914, "step": 96950 }, { "epoch": 0.39, "grad_norm": 3.087512969970703, "learning_rate": 0.0002, "loss": 1.8138, "step": 96960 }, { "epoch": 0.39, "grad_norm": 3.8640267848968506, "learning_rate": 0.0002, "loss": 1.4103, "step": 96970 }, { "epoch": 0.39, "grad_norm": 2.4802417755126953, "learning_rate": 0.0002, "loss": 1.6712, "step": 96980 }, { "epoch": 0.39, "grad_norm": 2.6162948608398438, "learning_rate": 0.0002, "loss": 1.5615, "step": 96990 }, { "epoch": 0.39, "grad_norm": 2.0880370140075684, "learning_rate": 0.0002, "loss": 1.2918, "step": 97000 }, { "epoch": 0.39, "grad_norm": 6.36289644241333, "learning_rate": 0.0002, "loss": 1.5757, "step": 97010 }, { "epoch": 0.39, "grad_norm": 3.280879020690918, "learning_rate": 0.0002, "loss": 1.3146, "step": 97020 }, { "epoch": 0.4, "grad_norm": 2.4579272270202637, "learning_rate": 0.0002, "loss": 1.3482, "step": 97030 }, { "epoch": 0.4, "grad_norm": 3.3654654026031494, "learning_rate": 0.0002, "loss": 1.5514, "step": 97040 }, { "epoch": 0.4, "grad_norm": 2.4058408737182617, "learning_rate": 0.0002, "loss": 1.5213, "step": 97050 }, { "epoch": 0.4, "grad_norm": 1.65269935131073, "learning_rate": 0.0002, "loss": 1.5919, "step": 97060 }, { "epoch": 0.4, "grad_norm": 4.124375343322754, "learning_rate": 0.0002, "loss": 1.5237, "step": 97070 }, { "epoch": 0.4, "grad_norm": 3.4638023376464844, "learning_rate": 0.0002, "loss": 1.5281, "step": 97080 }, { "epoch": 0.4, "grad_norm": 2.8363282680511475, "learning_rate": 0.0002, "loss": 1.4398, "step": 97090 }, { "epoch": 0.4, "grad_norm": 2.800793170928955, "learning_rate": 0.0002, "loss": 1.5504, "step": 97100 }, { "epoch": 0.4, "grad_norm": 3.708772897720337, "learning_rate": 0.0002, "loss": 1.5563, "step": 97110 }, { "epoch": 0.4, "grad_norm": 3.3485329151153564, "learning_rate": 0.0002, "loss": 1.4248, "step": 97120 }, { "epoch": 0.4, "grad_norm": 4.379899501800537, "learning_rate": 0.0002, "loss": 1.6219, "step": 97130 }, { "epoch": 0.4, "grad_norm": 4.8371782302856445, "learning_rate": 0.0002, "loss": 1.6842, "step": 97140 }, { "epoch": 0.4, "grad_norm": 2.8687551021575928, "learning_rate": 0.0002, "loss": 1.46, "step": 97150 }, { "epoch": 0.4, "grad_norm": 1.900855541229248, "learning_rate": 0.0002, "loss": 1.4924, "step": 97160 }, { "epoch": 0.4, "grad_norm": 2.662604331970215, "learning_rate": 0.0002, "loss": 1.6275, "step": 97170 }, { "epoch": 0.4, "grad_norm": 3.125725030899048, "learning_rate": 0.0002, "loss": 1.565, "step": 97180 }, { "epoch": 0.4, "grad_norm": 2.8226351737976074, "learning_rate": 0.0002, "loss": 1.5337, "step": 97190 }, { "epoch": 0.4, "grad_norm": 6.854203701019287, "learning_rate": 0.0002, "loss": 1.8156, "step": 97200 }, { "epoch": 0.4, "grad_norm": 2.97957181930542, "learning_rate": 0.0002, "loss": 1.3445, "step": 97210 }, { "epoch": 0.4, "grad_norm": 2.2522995471954346, "learning_rate": 0.0002, "loss": 1.6021, "step": 97220 }, { "epoch": 0.4, "grad_norm": 2.5759923458099365, "learning_rate": 0.0002, "loss": 1.446, "step": 97230 }, { "epoch": 0.4, "grad_norm": 3.816732406616211, "learning_rate": 0.0002, "loss": 1.5175, "step": 97240 }, { "epoch": 0.4, "grad_norm": 1.931429386138916, "learning_rate": 0.0002, "loss": 1.8113, "step": 97250 }, { "epoch": 0.4, "grad_norm": 2.707066774368286, "learning_rate": 0.0002, "loss": 1.4109, "step": 97260 }, { "epoch": 0.4, "grad_norm": 2.6754496097564697, "learning_rate": 0.0002, "loss": 1.5411, "step": 97270 }, { "epoch": 0.4, "grad_norm": 2.8963963985443115, "learning_rate": 0.0002, "loss": 1.7091, "step": 97280 }, { "epoch": 0.4, "grad_norm": 3.199634075164795, "learning_rate": 0.0002, "loss": 1.6711, "step": 97290 }, { "epoch": 0.4, "grad_norm": 6.406696796417236, "learning_rate": 0.0002, "loss": 1.7939, "step": 97300 }, { "epoch": 0.4, "grad_norm": 4.189337730407715, "learning_rate": 0.0002, "loss": 1.6838, "step": 97310 }, { "epoch": 0.4, "grad_norm": 3.2193310260772705, "learning_rate": 0.0002, "loss": 1.7077, "step": 97320 }, { "epoch": 0.4, "grad_norm": 5.455179691314697, "learning_rate": 0.0002, "loss": 1.5332, "step": 97330 }, { "epoch": 0.4, "grad_norm": 3.1194021701812744, "learning_rate": 0.0002, "loss": 1.5938, "step": 97340 }, { "epoch": 0.4, "grad_norm": 1.8647650480270386, "learning_rate": 0.0002, "loss": 1.3769, "step": 97350 }, { "epoch": 0.4, "grad_norm": 3.1524171829223633, "learning_rate": 0.0002, "loss": 1.3063, "step": 97360 }, { "epoch": 0.4, "grad_norm": 5.472466945648193, "learning_rate": 0.0002, "loss": 1.5823, "step": 97370 }, { "epoch": 0.4, "grad_norm": 4.389545440673828, "learning_rate": 0.0002, "loss": 1.3422, "step": 97380 }, { "epoch": 0.4, "grad_norm": 3.419080972671509, "learning_rate": 0.0002, "loss": 1.4997, "step": 97390 }, { "epoch": 0.4, "grad_norm": 2.218618154525757, "learning_rate": 0.0002, "loss": 1.6931, "step": 97400 }, { "epoch": 0.4, "grad_norm": 4.583892822265625, "learning_rate": 0.0002, "loss": 1.4267, "step": 97410 }, { "epoch": 0.4, "grad_norm": 1.8616842031478882, "learning_rate": 0.0002, "loss": 1.6637, "step": 97420 }, { "epoch": 0.4, "grad_norm": 3.1622121334075928, "learning_rate": 0.0002, "loss": 1.7829, "step": 97430 }, { "epoch": 0.4, "grad_norm": 1.8528387546539307, "learning_rate": 0.0002, "loss": 1.7204, "step": 97440 }, { "epoch": 0.4, "grad_norm": 2.72868013381958, "learning_rate": 0.0002, "loss": 1.6967, "step": 97450 }, { "epoch": 0.4, "grad_norm": 2.50476336479187, "learning_rate": 0.0002, "loss": 1.7166, "step": 97460 }, { "epoch": 0.4, "grad_norm": 2.222278118133545, "learning_rate": 0.0002, "loss": 1.469, "step": 97470 }, { "epoch": 0.4, "grad_norm": 1.8168340921401978, "learning_rate": 0.0002, "loss": 1.4467, "step": 97480 }, { "epoch": 0.4, "grad_norm": 3.411614418029785, "learning_rate": 0.0002, "loss": 1.7316, "step": 97490 }, { "epoch": 0.4, "grad_norm": 2.7520904541015625, "learning_rate": 0.0002, "loss": 1.5459, "step": 97500 }, { "epoch": 0.4, "grad_norm": 12.57774543762207, "learning_rate": 0.0002, "loss": 1.4633, "step": 97510 }, { "epoch": 0.4, "grad_norm": 3.486903667449951, "learning_rate": 0.0002, "loss": 1.5982, "step": 97520 }, { "epoch": 0.4, "grad_norm": 2.807931423187256, "learning_rate": 0.0002, "loss": 1.5332, "step": 97530 }, { "epoch": 0.4, "grad_norm": 3.537353277206421, "learning_rate": 0.0002, "loss": 1.7934, "step": 97540 }, { "epoch": 0.4, "grad_norm": 2.5200910568237305, "learning_rate": 0.0002, "loss": 1.9748, "step": 97550 }, { "epoch": 0.4, "grad_norm": 2.7830393314361572, "learning_rate": 0.0002, "loss": 1.6627, "step": 97560 }, { "epoch": 0.4, "grad_norm": 2.5713469982147217, "learning_rate": 0.0002, "loss": 1.8792, "step": 97570 }, { "epoch": 0.4, "grad_norm": 3.511871576309204, "learning_rate": 0.0002, "loss": 1.4893, "step": 97580 }, { "epoch": 0.4, "grad_norm": 2.4227726459503174, "learning_rate": 0.0002, "loss": 1.6642, "step": 97590 }, { "epoch": 0.4, "grad_norm": 3.023442029953003, "learning_rate": 0.0002, "loss": 1.501, "step": 97600 }, { "epoch": 0.4, "grad_norm": 2.5385758876800537, "learning_rate": 0.0002, "loss": 1.3573, "step": 97610 }, { "epoch": 0.4, "grad_norm": 2.7341654300689697, "learning_rate": 0.0002, "loss": 1.554, "step": 97620 }, { "epoch": 0.4, "grad_norm": 2.1645023822784424, "learning_rate": 0.0002, "loss": 1.4386, "step": 97630 }, { "epoch": 0.4, "grad_norm": 3.1568360328674316, "learning_rate": 0.0002, "loss": 1.8319, "step": 97640 }, { "epoch": 0.4, "grad_norm": 4.360036849975586, "learning_rate": 0.0002, "loss": 1.6115, "step": 97650 }, { "epoch": 0.4, "grad_norm": 2.2094457149505615, "learning_rate": 0.0002, "loss": 1.5346, "step": 97660 }, { "epoch": 0.4, "grad_norm": 3.0025084018707275, "learning_rate": 0.0002, "loss": 1.6443, "step": 97670 }, { "epoch": 0.4, "grad_norm": 2.927191734313965, "learning_rate": 0.0002, "loss": 1.7875, "step": 97680 }, { "epoch": 0.4, "grad_norm": 1.8411064147949219, "learning_rate": 0.0002, "loss": 1.5187, "step": 97690 }, { "epoch": 0.4, "grad_norm": 2.7180233001708984, "learning_rate": 0.0002, "loss": 1.6416, "step": 97700 }, { "epoch": 0.4, "grad_norm": 3.475105047225952, "learning_rate": 0.0002, "loss": 1.419, "step": 97710 }, { "epoch": 0.4, "grad_norm": 2.1513595581054688, "learning_rate": 0.0002, "loss": 1.633, "step": 97720 }, { "epoch": 0.4, "grad_norm": 2.37324857711792, "learning_rate": 0.0002, "loss": 1.7513, "step": 97730 }, { "epoch": 0.4, "grad_norm": 2.752378463745117, "learning_rate": 0.0002, "loss": 1.5172, "step": 97740 }, { "epoch": 0.4, "grad_norm": 1.7141364812850952, "learning_rate": 0.0002, "loss": 1.6847, "step": 97750 }, { "epoch": 0.4, "grad_norm": 3.569913864135742, "learning_rate": 0.0002, "loss": 1.6665, "step": 97760 }, { "epoch": 0.4, "grad_norm": 2.248884916305542, "learning_rate": 0.0002, "loss": 1.5404, "step": 97770 }, { "epoch": 0.4, "grad_norm": 3.6962833404541016, "learning_rate": 0.0002, "loss": 1.6054, "step": 97780 }, { "epoch": 0.4, "grad_norm": 3.4199812412261963, "learning_rate": 0.0002, "loss": 1.5567, "step": 97790 }, { "epoch": 0.4, "grad_norm": 3.216512441635132, "learning_rate": 0.0002, "loss": 1.6371, "step": 97800 }, { "epoch": 0.4, "grad_norm": 3.214792013168335, "learning_rate": 0.0002, "loss": 1.7221, "step": 97810 }, { "epoch": 0.4, "grad_norm": 2.7306265830993652, "learning_rate": 0.0002, "loss": 1.4988, "step": 97820 }, { "epoch": 0.4, "grad_norm": 6.6963372230529785, "learning_rate": 0.0002, "loss": 1.5504, "step": 97830 }, { "epoch": 0.4, "grad_norm": 3.4860196113586426, "learning_rate": 0.0002, "loss": 1.5555, "step": 97840 }, { "epoch": 0.4, "grad_norm": 3.263303756713867, "learning_rate": 0.0002, "loss": 1.583, "step": 97850 }, { "epoch": 0.4, "grad_norm": 2.659344434738159, "learning_rate": 0.0002, "loss": 1.5878, "step": 97860 }, { "epoch": 0.4, "grad_norm": 2.9774091243743896, "learning_rate": 0.0002, "loss": 1.636, "step": 97870 }, { "epoch": 0.4, "grad_norm": 3.071101665496826, "learning_rate": 0.0002, "loss": 1.762, "step": 97880 }, { "epoch": 0.4, "grad_norm": 1.8264917135238647, "learning_rate": 0.0002, "loss": 1.6744, "step": 97890 }, { "epoch": 0.4, "grad_norm": 6.4885149002075195, "learning_rate": 0.0002, "loss": 1.6465, "step": 97900 }, { "epoch": 0.4, "grad_norm": 3.3376431465148926, "learning_rate": 0.0002, "loss": 1.6576, "step": 97910 }, { "epoch": 0.4, "grad_norm": 3.894129514694214, "learning_rate": 0.0002, "loss": 1.5322, "step": 97920 }, { "epoch": 0.4, "grad_norm": 2.259941816329956, "learning_rate": 0.0002, "loss": 1.4734, "step": 97930 }, { "epoch": 0.4, "grad_norm": 2.4728338718414307, "learning_rate": 0.0002, "loss": 1.5819, "step": 97940 }, { "epoch": 0.4, "grad_norm": 2.8120100498199463, "learning_rate": 0.0002, "loss": 1.5128, "step": 97950 }, { "epoch": 0.4, "grad_norm": 2.073009967803955, "learning_rate": 0.0002, "loss": 1.6699, "step": 97960 }, { "epoch": 0.4, "grad_norm": 4.753398895263672, "learning_rate": 0.0002, "loss": 1.5788, "step": 97970 }, { "epoch": 0.4, "grad_norm": 2.427513360977173, "learning_rate": 0.0002, "loss": 1.5613, "step": 97980 }, { "epoch": 0.4, "grad_norm": 2.327728748321533, "learning_rate": 0.0002, "loss": 1.5824, "step": 97990 }, { "epoch": 0.4, "grad_norm": 2.524522304534912, "learning_rate": 0.0002, "loss": 1.5982, "step": 98000 }, { "epoch": 0.4, "grad_norm": 2.6232681274414062, "learning_rate": 0.0002, "loss": 1.5613, "step": 98010 }, { "epoch": 0.4, "grad_norm": 2.043146848678589, "learning_rate": 0.0002, "loss": 1.465, "step": 98020 }, { "epoch": 0.4, "grad_norm": 3.6555821895599365, "learning_rate": 0.0002, "loss": 1.6377, "step": 98030 }, { "epoch": 0.4, "grad_norm": 2.6853644847869873, "learning_rate": 0.0002, "loss": 1.6395, "step": 98040 }, { "epoch": 0.4, "grad_norm": 2.7167437076568604, "learning_rate": 0.0002, "loss": 1.6213, "step": 98050 }, { "epoch": 0.4, "grad_norm": 2.4359076023101807, "learning_rate": 0.0002, "loss": 1.4795, "step": 98060 }, { "epoch": 0.4, "grad_norm": 4.123021125793457, "learning_rate": 0.0002, "loss": 1.6768, "step": 98070 }, { "epoch": 0.4, "grad_norm": 2.9443392753601074, "learning_rate": 0.0002, "loss": 1.6009, "step": 98080 }, { "epoch": 0.4, "grad_norm": 2.958775520324707, "learning_rate": 0.0002, "loss": 1.2477, "step": 98090 }, { "epoch": 0.4, "grad_norm": 2.3643851280212402, "learning_rate": 0.0002, "loss": 1.5685, "step": 98100 }, { "epoch": 0.4, "grad_norm": 5.016831874847412, "learning_rate": 0.0002, "loss": 1.6085, "step": 98110 }, { "epoch": 0.4, "grad_norm": 2.7968804836273193, "learning_rate": 0.0002, "loss": 1.761, "step": 98120 }, { "epoch": 0.4, "grad_norm": 2.0239367485046387, "learning_rate": 0.0002, "loss": 1.6821, "step": 98130 }, { "epoch": 0.4, "grad_norm": 2.7286808490753174, "learning_rate": 0.0002, "loss": 1.2598, "step": 98140 }, { "epoch": 0.4, "grad_norm": 6.781245231628418, "learning_rate": 0.0002, "loss": 1.5005, "step": 98150 }, { "epoch": 0.4, "grad_norm": 3.0773069858551025, "learning_rate": 0.0002, "loss": 1.5172, "step": 98160 }, { "epoch": 0.4, "grad_norm": 2.863393783569336, "learning_rate": 0.0002, "loss": 1.5088, "step": 98170 }, { "epoch": 0.4, "grad_norm": 2.4669554233551025, "learning_rate": 0.0002, "loss": 1.7847, "step": 98180 }, { "epoch": 0.4, "grad_norm": 2.914417266845703, "learning_rate": 0.0002, "loss": 1.6855, "step": 98190 }, { "epoch": 0.4, "grad_norm": 3.2987475395202637, "learning_rate": 0.0002, "loss": 1.8533, "step": 98200 }, { "epoch": 0.4, "grad_norm": 3.46170711517334, "learning_rate": 0.0002, "loss": 1.482, "step": 98210 }, { "epoch": 0.4, "grad_norm": 3.2278411388397217, "learning_rate": 0.0002, "loss": 1.7162, "step": 98220 }, { "epoch": 0.4, "grad_norm": 4.931420803070068, "learning_rate": 0.0002, "loss": 1.5816, "step": 98230 }, { "epoch": 0.4, "grad_norm": 2.8026559352874756, "learning_rate": 0.0002, "loss": 1.6937, "step": 98240 }, { "epoch": 0.4, "grad_norm": 2.3193206787109375, "learning_rate": 0.0002, "loss": 1.767, "step": 98250 }, { "epoch": 0.4, "grad_norm": 3.5814156532287598, "learning_rate": 0.0002, "loss": 1.934, "step": 98260 }, { "epoch": 0.4, "grad_norm": 2.9143214225769043, "learning_rate": 0.0002, "loss": 1.3895, "step": 98270 }, { "epoch": 0.4, "grad_norm": 2.8324406147003174, "learning_rate": 0.0002, "loss": 1.185, "step": 98280 }, { "epoch": 0.4, "grad_norm": 2.806713819503784, "learning_rate": 0.0002, "loss": 1.6948, "step": 98290 }, { "epoch": 0.4, "grad_norm": 2.1170248985290527, "learning_rate": 0.0002, "loss": 1.7152, "step": 98300 }, { "epoch": 0.4, "grad_norm": 2.523841381072998, "learning_rate": 0.0002, "loss": 1.5656, "step": 98310 }, { "epoch": 0.4, "grad_norm": 2.839982509613037, "learning_rate": 0.0002, "loss": 1.8228, "step": 98320 }, { "epoch": 0.4, "grad_norm": 2.9050803184509277, "learning_rate": 0.0002, "loss": 1.5227, "step": 98330 }, { "epoch": 0.4, "grad_norm": 2.4240734577178955, "learning_rate": 0.0002, "loss": 1.5589, "step": 98340 }, { "epoch": 0.4, "grad_norm": 1.7703986167907715, "learning_rate": 0.0002, "loss": 1.768, "step": 98350 }, { "epoch": 0.4, "grad_norm": 2.863088369369507, "learning_rate": 0.0002, "loss": 1.4963, "step": 98360 }, { "epoch": 0.4, "grad_norm": 3.499767541885376, "learning_rate": 0.0002, "loss": 1.57, "step": 98370 }, { "epoch": 0.4, "grad_norm": 4.534446716308594, "learning_rate": 0.0002, "loss": 1.5184, "step": 98380 }, { "epoch": 0.4, "grad_norm": 2.874135971069336, "learning_rate": 0.0002, "loss": 1.4639, "step": 98390 }, { "epoch": 0.4, "grad_norm": 1.7940000295639038, "learning_rate": 0.0002, "loss": 1.552, "step": 98400 }, { "epoch": 0.4, "grad_norm": 1.9661791324615479, "learning_rate": 0.0002, "loss": 1.5462, "step": 98410 }, { "epoch": 0.4, "grad_norm": 2.838022232055664, "learning_rate": 0.0002, "loss": 1.5217, "step": 98420 }, { "epoch": 0.4, "grad_norm": 5.449462413787842, "learning_rate": 0.0002, "loss": 1.8801, "step": 98430 }, { "epoch": 0.4, "grad_norm": 2.96616530418396, "learning_rate": 0.0002, "loss": 1.4996, "step": 98440 }, { "epoch": 0.4, "grad_norm": 3.3567066192626953, "learning_rate": 0.0002, "loss": 1.3415, "step": 98450 }, { "epoch": 0.4, "grad_norm": 3.834150791168213, "learning_rate": 0.0002, "loss": 1.7454, "step": 98460 }, { "epoch": 0.4, "grad_norm": 3.0267059803009033, "learning_rate": 0.0002, "loss": 1.6449, "step": 98470 }, { "epoch": 0.4, "grad_norm": 2.917189121246338, "learning_rate": 0.0002, "loss": 1.5673, "step": 98480 }, { "epoch": 0.4, "grad_norm": 5.149128437042236, "learning_rate": 0.0002, "loss": 1.7939, "step": 98490 }, { "epoch": 0.4, "grad_norm": 2.10027813911438, "learning_rate": 0.0002, "loss": 1.5825, "step": 98500 }, { "epoch": 0.4, "grad_norm": 2.0036747455596924, "learning_rate": 0.0002, "loss": 1.707, "step": 98510 }, { "epoch": 0.4, "grad_norm": 5.028010368347168, "learning_rate": 0.0002, "loss": 1.8375, "step": 98520 }, { "epoch": 0.4, "grad_norm": 3.2095799446105957, "learning_rate": 0.0002, "loss": 1.5454, "step": 98530 }, { "epoch": 0.4, "grad_norm": 2.9583613872528076, "learning_rate": 0.0002, "loss": 1.684, "step": 98540 }, { "epoch": 0.4, "grad_norm": 3.6391303539276123, "learning_rate": 0.0002, "loss": 1.7028, "step": 98550 }, { "epoch": 0.4, "grad_norm": 3.801182270050049, "learning_rate": 0.0002, "loss": 1.662, "step": 98560 }, { "epoch": 0.4, "grad_norm": 3.860456943511963, "learning_rate": 0.0002, "loss": 1.6006, "step": 98570 }, { "epoch": 0.4, "grad_norm": 2.5107369422912598, "learning_rate": 0.0002, "loss": 1.4642, "step": 98580 }, { "epoch": 0.4, "grad_norm": 4.03475284576416, "learning_rate": 0.0002, "loss": 1.2321, "step": 98590 }, { "epoch": 0.4, "grad_norm": 2.985865831375122, "learning_rate": 0.0002, "loss": 2.0449, "step": 98600 }, { "epoch": 0.4, "grad_norm": 4.161733627319336, "learning_rate": 0.0002, "loss": 1.699, "step": 98610 }, { "epoch": 0.4, "grad_norm": 3.0205132961273193, "learning_rate": 0.0002, "loss": 1.6307, "step": 98620 }, { "epoch": 0.4, "grad_norm": 3.7775378227233887, "learning_rate": 0.0002, "loss": 1.6368, "step": 98630 }, { "epoch": 0.4, "grad_norm": 3.319149971008301, "learning_rate": 0.0002, "loss": 1.74, "step": 98640 }, { "epoch": 0.4, "grad_norm": 7.4420976638793945, "learning_rate": 0.0002, "loss": 1.498, "step": 98650 }, { "epoch": 0.4, "grad_norm": 6.913822174072266, "learning_rate": 0.0002, "loss": 1.2894, "step": 98660 }, { "epoch": 0.4, "grad_norm": 1.9755244255065918, "learning_rate": 0.0002, "loss": 1.4185, "step": 98670 }, { "epoch": 0.4, "grad_norm": 4.076263904571533, "learning_rate": 0.0002, "loss": 1.7962, "step": 98680 }, { "epoch": 0.4, "grad_norm": 8.767325401306152, "learning_rate": 0.0002, "loss": 1.6869, "step": 98690 }, { "epoch": 0.4, "grad_norm": 2.5011401176452637, "learning_rate": 0.0002, "loss": 1.5493, "step": 98700 }, { "epoch": 0.4, "grad_norm": 3.1782584190368652, "learning_rate": 0.0002, "loss": 1.7969, "step": 98710 }, { "epoch": 0.4, "grad_norm": 3.7333693504333496, "learning_rate": 0.0002, "loss": 1.5662, "step": 98720 }, { "epoch": 0.4, "grad_norm": 2.5761592388153076, "learning_rate": 0.0002, "loss": 1.5976, "step": 98730 }, { "epoch": 0.4, "grad_norm": 3.752333402633667, "learning_rate": 0.0002, "loss": 1.6115, "step": 98740 }, { "epoch": 0.4, "grad_norm": 3.5362260341644287, "learning_rate": 0.0002, "loss": 1.5859, "step": 98750 }, { "epoch": 0.4, "grad_norm": 2.484100341796875, "learning_rate": 0.0002, "loss": 1.4271, "step": 98760 }, { "epoch": 0.4, "grad_norm": 3.3619930744171143, "learning_rate": 0.0002, "loss": 1.6512, "step": 98770 }, { "epoch": 0.4, "grad_norm": 4.1094465255737305, "learning_rate": 0.0002, "loss": 1.4659, "step": 98780 }, { "epoch": 0.4, "grad_norm": 2.227393388748169, "learning_rate": 0.0002, "loss": 1.5301, "step": 98790 }, { "epoch": 0.4, "grad_norm": 2.092784881591797, "learning_rate": 0.0002, "loss": 1.5268, "step": 98800 }, { "epoch": 0.4, "grad_norm": 2.0978050231933594, "learning_rate": 0.0002, "loss": 1.4845, "step": 98810 }, { "epoch": 0.4, "grad_norm": 1.614006519317627, "learning_rate": 0.0002, "loss": 1.6623, "step": 98820 }, { "epoch": 0.4, "grad_norm": 1.8617228269577026, "learning_rate": 0.0002, "loss": 1.6739, "step": 98830 }, { "epoch": 0.4, "grad_norm": 2.8233861923217773, "learning_rate": 0.0002, "loss": 1.4802, "step": 98840 }, { "epoch": 0.4, "grad_norm": 2.8368825912475586, "learning_rate": 0.0002, "loss": 1.6827, "step": 98850 }, { "epoch": 0.4, "grad_norm": 3.389819622039795, "learning_rate": 0.0002, "loss": 1.6949, "step": 98860 }, { "epoch": 0.4, "grad_norm": 2.801847219467163, "learning_rate": 0.0002, "loss": 1.8039, "step": 98870 }, { "epoch": 0.4, "grad_norm": 3.6431286334991455, "learning_rate": 0.0002, "loss": 1.3592, "step": 98880 }, { "epoch": 0.4, "grad_norm": 4.174862384796143, "learning_rate": 0.0002, "loss": 1.5911, "step": 98890 }, { "epoch": 0.4, "grad_norm": 2.7381865978240967, "learning_rate": 0.0002, "loss": 1.644, "step": 98900 }, { "epoch": 0.4, "grad_norm": 3.993380308151245, "learning_rate": 0.0002, "loss": 1.9338, "step": 98910 }, { "epoch": 0.4, "grad_norm": 3.5199167728424072, "learning_rate": 0.0002, "loss": 1.594, "step": 98920 }, { "epoch": 0.4, "grad_norm": 2.1258904933929443, "learning_rate": 0.0002, "loss": 1.6908, "step": 98930 }, { "epoch": 0.4, "grad_norm": 2.7700979709625244, "learning_rate": 0.0002, "loss": 1.7532, "step": 98940 }, { "epoch": 0.4, "grad_norm": 2.981355905532837, "learning_rate": 0.0002, "loss": 1.4938, "step": 98950 }, { "epoch": 0.4, "grad_norm": 2.0935192108154297, "learning_rate": 0.0002, "loss": 1.6535, "step": 98960 }, { "epoch": 0.4, "grad_norm": 2.667959213256836, "learning_rate": 0.0002, "loss": 1.8129, "step": 98970 }, { "epoch": 0.4, "grad_norm": 2.642563819885254, "learning_rate": 0.0002, "loss": 1.448, "step": 98980 }, { "epoch": 0.4, "grad_norm": 3.0826714038848877, "learning_rate": 0.0002, "loss": 1.5825, "step": 98990 }, { "epoch": 0.4, "grad_norm": 4.4246392250061035, "learning_rate": 0.0002, "loss": 1.6438, "step": 99000 }, { "epoch": 0.4, "grad_norm": 2.373892068862915, "learning_rate": 0.0002, "loss": 1.4083, "step": 99010 }, { "epoch": 0.4, "grad_norm": 2.7015256881713867, "learning_rate": 0.0002, "loss": 1.5772, "step": 99020 }, { "epoch": 0.4, "grad_norm": 4.3811516761779785, "learning_rate": 0.0002, "loss": 1.5653, "step": 99030 }, { "epoch": 0.4, "grad_norm": 3.527117967605591, "learning_rate": 0.0002, "loss": 1.7368, "step": 99040 }, { "epoch": 0.4, "grad_norm": 3.3789870738983154, "learning_rate": 0.0002, "loss": 1.8069, "step": 99050 }, { "epoch": 0.4, "grad_norm": 2.596458673477173, "learning_rate": 0.0002, "loss": 1.8904, "step": 99060 }, { "epoch": 0.4, "grad_norm": 2.4820363521575928, "learning_rate": 0.0002, "loss": 1.6747, "step": 99070 }, { "epoch": 0.4, "grad_norm": 3.4792661666870117, "learning_rate": 0.0002, "loss": 1.4903, "step": 99080 }, { "epoch": 0.4, "grad_norm": 3.1200101375579834, "learning_rate": 0.0002, "loss": 1.9293, "step": 99090 }, { "epoch": 0.4, "grad_norm": 1.99510657787323, "learning_rate": 0.0002, "loss": 1.6606, "step": 99100 }, { "epoch": 0.4, "grad_norm": 1.59540855884552, "learning_rate": 0.0002, "loss": 1.6807, "step": 99110 }, { "epoch": 0.4, "grad_norm": 2.6320743560791016, "learning_rate": 0.0002, "loss": 1.6617, "step": 99120 }, { "epoch": 0.4, "grad_norm": 3.4627552032470703, "learning_rate": 0.0002, "loss": 1.6996, "step": 99130 }, { "epoch": 0.4, "grad_norm": 2.363060712814331, "learning_rate": 0.0002, "loss": 1.5974, "step": 99140 }, { "epoch": 0.4, "grad_norm": 2.629403591156006, "learning_rate": 0.0002, "loss": 1.4749, "step": 99150 }, { "epoch": 0.4, "grad_norm": 2.023473024368286, "learning_rate": 0.0002, "loss": 1.6165, "step": 99160 }, { "epoch": 0.4, "grad_norm": 1.4258219003677368, "learning_rate": 0.0002, "loss": 1.5591, "step": 99170 }, { "epoch": 0.4, "grad_norm": 2.3966798782348633, "learning_rate": 0.0002, "loss": 1.5945, "step": 99180 }, { "epoch": 0.4, "grad_norm": 4.079476356506348, "learning_rate": 0.0002, "loss": 1.684, "step": 99190 }, { "epoch": 0.4, "grad_norm": 3.3454630374908447, "learning_rate": 0.0002, "loss": 1.5982, "step": 99200 }, { "epoch": 0.4, "grad_norm": 3.418184518814087, "learning_rate": 0.0002, "loss": 1.445, "step": 99210 }, { "epoch": 0.4, "grad_norm": 5.014179706573486, "learning_rate": 0.0002, "loss": 1.5512, "step": 99220 }, { "epoch": 0.4, "grad_norm": 5.113932132720947, "learning_rate": 0.0002, "loss": 1.5848, "step": 99230 }, { "epoch": 0.4, "grad_norm": 2.679976463317871, "learning_rate": 0.0002, "loss": 1.7265, "step": 99240 }, { "epoch": 0.4, "grad_norm": 2.656090497970581, "learning_rate": 0.0002, "loss": 1.7687, "step": 99250 }, { "epoch": 0.4, "grad_norm": 4.556119441986084, "learning_rate": 0.0002, "loss": 1.7682, "step": 99260 }, { "epoch": 0.4, "grad_norm": 2.578343152999878, "learning_rate": 0.0002, "loss": 1.5387, "step": 99270 }, { "epoch": 0.4, "grad_norm": 4.438445091247559, "learning_rate": 0.0002, "loss": 1.6854, "step": 99280 }, { "epoch": 0.4, "grad_norm": 2.662038803100586, "learning_rate": 0.0002, "loss": 1.7264, "step": 99290 }, { "epoch": 0.4, "grad_norm": 2.132490873336792, "learning_rate": 0.0002, "loss": 1.3957, "step": 99300 }, { "epoch": 0.4, "grad_norm": 3.535438299179077, "learning_rate": 0.0002, "loss": 1.5834, "step": 99310 }, { "epoch": 0.4, "grad_norm": 3.573202610015869, "learning_rate": 0.0002, "loss": 1.7392, "step": 99320 }, { "epoch": 0.4, "grad_norm": 4.611422061920166, "learning_rate": 0.0002, "loss": 1.4989, "step": 99330 }, { "epoch": 0.4, "grad_norm": 3.035581111907959, "learning_rate": 0.0002, "loss": 1.7284, "step": 99340 }, { "epoch": 0.4, "grad_norm": 2.9957685470581055, "learning_rate": 0.0002, "loss": 1.5924, "step": 99350 }, { "epoch": 0.4, "grad_norm": 3.284578800201416, "learning_rate": 0.0002, "loss": 1.4692, "step": 99360 }, { "epoch": 0.4, "grad_norm": 2.529088258743286, "learning_rate": 0.0002, "loss": 1.4993, "step": 99370 }, { "epoch": 0.4, "grad_norm": 5.403666019439697, "learning_rate": 0.0002, "loss": 1.3788, "step": 99380 }, { "epoch": 0.4, "grad_norm": 3.7262039184570312, "learning_rate": 0.0002, "loss": 1.5615, "step": 99390 }, { "epoch": 0.4, "grad_norm": 4.470466136932373, "learning_rate": 0.0002, "loss": 1.6047, "step": 99400 }, { "epoch": 0.4, "grad_norm": 2.7961361408233643, "learning_rate": 0.0002, "loss": 1.6323, "step": 99410 }, { "epoch": 0.4, "grad_norm": 2.566948652267456, "learning_rate": 0.0002, "loss": 1.494, "step": 99420 }, { "epoch": 0.4, "grad_norm": 2.6938068866729736, "learning_rate": 0.0002, "loss": 1.4327, "step": 99430 }, { "epoch": 0.4, "grad_norm": 3.882488489151001, "learning_rate": 0.0002, "loss": 1.491, "step": 99440 }, { "epoch": 0.4, "grad_norm": 6.97109317779541, "learning_rate": 0.0002, "loss": 1.6637, "step": 99450 }, { "epoch": 0.4, "grad_norm": 2.9754600524902344, "learning_rate": 0.0002, "loss": 1.6953, "step": 99460 }, { "epoch": 0.4, "grad_norm": 3.948108434677124, "learning_rate": 0.0002, "loss": 1.4358, "step": 99470 }, { "epoch": 0.4, "grad_norm": 1.9333500862121582, "learning_rate": 0.0002, "loss": 1.3413, "step": 99480 }, { "epoch": 0.41, "grad_norm": 3.782909870147705, "learning_rate": 0.0002, "loss": 1.4901, "step": 99490 }, { "epoch": 0.41, "grad_norm": 2.9305500984191895, "learning_rate": 0.0002, "loss": 1.5104, "step": 99500 }, { "epoch": 0.41, "grad_norm": 3.631763219833374, "learning_rate": 0.0002, "loss": 1.4931, "step": 99510 }, { "epoch": 0.41, "grad_norm": 2.2740471363067627, "learning_rate": 0.0002, "loss": 1.6771, "step": 99520 }, { "epoch": 0.41, "grad_norm": 2.46205735206604, "learning_rate": 0.0002, "loss": 1.7634, "step": 99530 }, { "epoch": 0.41, "grad_norm": 3.0452094078063965, "learning_rate": 0.0002, "loss": 1.6189, "step": 99540 }, { "epoch": 0.41, "grad_norm": 3.867548942565918, "learning_rate": 0.0002, "loss": 1.5376, "step": 99550 }, { "epoch": 0.41, "grad_norm": 5.233748435974121, "learning_rate": 0.0002, "loss": 1.7243, "step": 99560 }, { "epoch": 0.41, "grad_norm": 4.548920154571533, "learning_rate": 0.0002, "loss": 1.6543, "step": 99570 }, { "epoch": 0.41, "grad_norm": 2.2555716037750244, "learning_rate": 0.0002, "loss": 1.5109, "step": 99580 }, { "epoch": 0.41, "grad_norm": 3.2909369468688965, "learning_rate": 0.0002, "loss": 1.5786, "step": 99590 }, { "epoch": 0.41, "grad_norm": 1.6241860389709473, "learning_rate": 0.0002, "loss": 1.6898, "step": 99600 }, { "epoch": 0.41, "grad_norm": 5.102236747741699, "learning_rate": 0.0002, "loss": 1.5306, "step": 99610 }, { "epoch": 0.41, "grad_norm": 2.5826609134674072, "learning_rate": 0.0002, "loss": 1.5214, "step": 99620 }, { "epoch": 0.41, "grad_norm": 4.006906032562256, "learning_rate": 0.0002, "loss": 1.71, "step": 99630 }, { "epoch": 0.41, "grad_norm": 1.2541481256484985, "learning_rate": 0.0002, "loss": 1.6547, "step": 99640 }, { "epoch": 0.41, "grad_norm": 3.824500322341919, "learning_rate": 0.0002, "loss": 1.5933, "step": 99650 }, { "epoch": 0.41, "grad_norm": 3.6007354259490967, "learning_rate": 0.0002, "loss": 1.591, "step": 99660 }, { "epoch": 0.41, "grad_norm": 3.3486146926879883, "learning_rate": 0.0002, "loss": 1.4091, "step": 99670 }, { "epoch": 0.41, "grad_norm": 3.550581932067871, "learning_rate": 0.0002, "loss": 1.83, "step": 99680 }, { "epoch": 0.41, "grad_norm": 4.560729026794434, "learning_rate": 0.0002, "loss": 1.6021, "step": 99690 }, { "epoch": 0.41, "grad_norm": 4.309304237365723, "learning_rate": 0.0002, "loss": 1.7215, "step": 99700 }, { "epoch": 0.41, "grad_norm": 4.559431076049805, "learning_rate": 0.0002, "loss": 1.5198, "step": 99710 }, { "epoch": 0.41, "grad_norm": 2.8641204833984375, "learning_rate": 0.0002, "loss": 1.5876, "step": 99720 }, { "epoch": 0.41, "grad_norm": 3.1551122665405273, "learning_rate": 0.0002, "loss": 1.4778, "step": 99730 }, { "epoch": 0.41, "grad_norm": 3.364004373550415, "learning_rate": 0.0002, "loss": 1.6147, "step": 99740 }, { "epoch": 0.41, "grad_norm": 2.1814796924591064, "learning_rate": 0.0002, "loss": 1.4809, "step": 99750 }, { "epoch": 0.41, "grad_norm": 2.0453946590423584, "learning_rate": 0.0002, "loss": 1.6866, "step": 99760 }, { "epoch": 0.41, "grad_norm": 2.6443209648132324, "learning_rate": 0.0002, "loss": 1.5799, "step": 99770 }, { "epoch": 0.41, "grad_norm": 2.556997060775757, "learning_rate": 0.0002, "loss": 1.6462, "step": 99780 }, { "epoch": 0.41, "grad_norm": 2.106292486190796, "learning_rate": 0.0002, "loss": 1.5112, "step": 99790 }, { "epoch": 0.41, "grad_norm": 5.092119216918945, "learning_rate": 0.0002, "loss": 1.5792, "step": 99800 }, { "epoch": 0.41, "grad_norm": 2.5207066535949707, "learning_rate": 0.0002, "loss": 1.6872, "step": 99810 }, { "epoch": 0.41, "grad_norm": 2.7570011615753174, "learning_rate": 0.0002, "loss": 1.4946, "step": 99820 }, { "epoch": 0.41, "grad_norm": 2.6353695392608643, "learning_rate": 0.0002, "loss": 1.4637, "step": 99830 }, { "epoch": 0.41, "grad_norm": 4.420652866363525, "learning_rate": 0.0002, "loss": 1.7663, "step": 99840 }, { "epoch": 0.41, "grad_norm": 3.824896812438965, "learning_rate": 0.0002, "loss": 1.4465, "step": 99850 }, { "epoch": 0.41, "grad_norm": 2.0602622032165527, "learning_rate": 0.0002, "loss": 1.5284, "step": 99860 }, { "epoch": 0.41, "grad_norm": 2.3490517139434814, "learning_rate": 0.0002, "loss": 1.7014, "step": 99870 }, { "epoch": 0.41, "grad_norm": 3.342416763305664, "learning_rate": 0.0002, "loss": 1.5857, "step": 99880 }, { "epoch": 0.41, "grad_norm": 2.095817804336548, "learning_rate": 0.0002, "loss": 1.5861, "step": 99890 }, { "epoch": 0.41, "grad_norm": 2.9855899810791016, "learning_rate": 0.0002, "loss": 1.6784, "step": 99900 }, { "epoch": 0.41, "grad_norm": 2.98567795753479, "learning_rate": 0.0002, "loss": 1.5864, "step": 99910 }, { "epoch": 0.41, "grad_norm": 2.433309316635132, "learning_rate": 0.0002, "loss": 1.5401, "step": 99920 }, { "epoch": 0.41, "grad_norm": 2.1750686168670654, "learning_rate": 0.0002, "loss": 1.8735, "step": 99930 }, { "epoch": 0.41, "grad_norm": 2.802171230316162, "learning_rate": 0.0002, "loss": 1.8037, "step": 99940 }, { "epoch": 0.41, "grad_norm": 2.7734215259552, "learning_rate": 0.0002, "loss": 1.4877, "step": 99950 }, { "epoch": 0.41, "grad_norm": 3.9672882556915283, "learning_rate": 0.0002, "loss": 1.7677, "step": 99960 }, { "epoch": 0.41, "grad_norm": 2.389615535736084, "learning_rate": 0.0002, "loss": 1.7927, "step": 99970 }, { "epoch": 0.41, "grad_norm": 3.963796377182007, "learning_rate": 0.0002, "loss": 1.5181, "step": 99980 }, { "epoch": 0.41, "grad_norm": 3.1449217796325684, "learning_rate": 0.0002, "loss": 1.6277, "step": 99990 }, { "epoch": 0.41, "grad_norm": 2.2043633460998535, "learning_rate": 0.0002, "loss": 1.6622, "step": 100000 }, { "epoch": 0.41, "grad_norm": 2.566136121749878, "learning_rate": 0.0002, "loss": 1.6105, "step": 100010 }, { "epoch": 0.41, "grad_norm": 4.1185173988342285, "learning_rate": 0.0002, "loss": 1.7682, "step": 100020 }, { "epoch": 0.41, "grad_norm": 1.8725659847259521, "learning_rate": 0.0002, "loss": 1.7862, "step": 100030 }, { "epoch": 0.41, "grad_norm": 2.4026315212249756, "learning_rate": 0.0002, "loss": 1.6113, "step": 100040 }, { "epoch": 0.41, "grad_norm": 6.189230442047119, "learning_rate": 0.0002, "loss": 1.5678, "step": 100050 }, { "epoch": 0.41, "grad_norm": 3.336526870727539, "learning_rate": 0.0002, "loss": 1.6737, "step": 100060 }, { "epoch": 0.41, "grad_norm": 3.294307231903076, "learning_rate": 0.0002, "loss": 1.6384, "step": 100070 }, { "epoch": 0.41, "grad_norm": 2.660775661468506, "learning_rate": 0.0002, "loss": 1.4852, "step": 100080 }, { "epoch": 0.41, "grad_norm": 3.7227632999420166, "learning_rate": 0.0002, "loss": 1.8215, "step": 100090 }, { "epoch": 0.41, "grad_norm": 2.784395217895508, "learning_rate": 0.0002, "loss": 1.987, "step": 100100 }, { "epoch": 0.41, "grad_norm": 2.5244178771972656, "learning_rate": 0.0002, "loss": 1.531, "step": 100110 }, { "epoch": 0.41, "grad_norm": 2.428870916366577, "learning_rate": 0.0002, "loss": 1.5586, "step": 100120 }, { "epoch": 0.41, "grad_norm": 5.677242279052734, "learning_rate": 0.0002, "loss": 1.6542, "step": 100130 }, { "epoch": 0.41, "grad_norm": 2.3743324279785156, "learning_rate": 0.0002, "loss": 1.7659, "step": 100140 }, { "epoch": 0.41, "grad_norm": 2.1975111961364746, "learning_rate": 0.0002, "loss": 1.5571, "step": 100150 }, { "epoch": 0.41, "grad_norm": 3.680856227874756, "learning_rate": 0.0002, "loss": 1.4157, "step": 100160 }, { "epoch": 0.41, "grad_norm": 1.9425671100616455, "learning_rate": 0.0002, "loss": 1.6351, "step": 100170 }, { "epoch": 0.41, "grad_norm": 2.6065824031829834, "learning_rate": 0.0002, "loss": 1.5581, "step": 100180 }, { "epoch": 0.41, "grad_norm": 2.960465669631958, "learning_rate": 0.0002, "loss": 1.372, "step": 100190 }, { "epoch": 0.41, "grad_norm": 3.596379041671753, "learning_rate": 0.0002, "loss": 1.7874, "step": 100200 }, { "epoch": 0.41, "grad_norm": 1.9504337310791016, "learning_rate": 0.0002, "loss": 1.43, "step": 100210 }, { "epoch": 0.41, "grad_norm": 3.312060594558716, "learning_rate": 0.0002, "loss": 1.6521, "step": 100220 }, { "epoch": 0.41, "grad_norm": 2.4389357566833496, "learning_rate": 0.0002, "loss": 1.7992, "step": 100230 }, { "epoch": 0.41, "grad_norm": 3.0849013328552246, "learning_rate": 0.0002, "loss": 1.7251, "step": 100240 }, { "epoch": 0.41, "grad_norm": 3.9186112880706787, "learning_rate": 0.0002, "loss": 1.3533, "step": 100250 }, { "epoch": 0.41, "grad_norm": 3.8090531826019287, "learning_rate": 0.0002, "loss": 1.5768, "step": 100260 }, { "epoch": 0.41, "grad_norm": 3.1037023067474365, "learning_rate": 0.0002, "loss": 1.552, "step": 100270 }, { "epoch": 0.41, "grad_norm": 2.43500018119812, "learning_rate": 0.0002, "loss": 1.7561, "step": 100280 }, { "epoch": 0.41, "grad_norm": 2.7156741619110107, "learning_rate": 0.0002, "loss": 1.4394, "step": 100290 }, { "epoch": 0.41, "grad_norm": 3.586169719696045, "learning_rate": 0.0002, "loss": 1.4542, "step": 100300 }, { "epoch": 0.41, "grad_norm": 2.8008692264556885, "learning_rate": 0.0002, "loss": 1.3211, "step": 100310 }, { "epoch": 0.41, "grad_norm": 3.2319953441619873, "learning_rate": 0.0002, "loss": 1.7769, "step": 100320 }, { "epoch": 0.41, "grad_norm": 2.6074676513671875, "learning_rate": 0.0002, "loss": 1.4787, "step": 100330 }, { "epoch": 0.41, "grad_norm": 3.309347629547119, "learning_rate": 0.0002, "loss": 1.5549, "step": 100340 }, { "epoch": 0.41, "grad_norm": 2.5628249645233154, "learning_rate": 0.0002, "loss": 1.5923, "step": 100350 }, { "epoch": 0.41, "grad_norm": 3.426953077316284, "learning_rate": 0.0002, "loss": 1.5601, "step": 100360 }, { "epoch": 0.41, "grad_norm": 4.07013463973999, "learning_rate": 0.0002, "loss": 1.3937, "step": 100370 }, { "epoch": 0.41, "grad_norm": 7.408646583557129, "learning_rate": 0.0002, "loss": 1.3643, "step": 100380 }, { "epoch": 0.41, "grad_norm": 2.5119638442993164, "learning_rate": 0.0002, "loss": 1.6509, "step": 100390 }, { "epoch": 0.41, "grad_norm": 3.4142940044403076, "learning_rate": 0.0002, "loss": 1.6515, "step": 100400 }, { "epoch": 0.41, "grad_norm": 2.9390249252319336, "learning_rate": 0.0002, "loss": 1.4708, "step": 100410 }, { "epoch": 0.41, "grad_norm": 2.821976661682129, "learning_rate": 0.0002, "loss": 1.7112, "step": 100420 }, { "epoch": 0.41, "grad_norm": 6.7068657875061035, "learning_rate": 0.0002, "loss": 1.6286, "step": 100430 }, { "epoch": 0.41, "grad_norm": 3.1755754947662354, "learning_rate": 0.0002, "loss": 1.7503, "step": 100440 }, { "epoch": 0.41, "grad_norm": 2.8281915187835693, "learning_rate": 0.0002, "loss": 1.5641, "step": 100450 }, { "epoch": 0.41, "grad_norm": 2.8363089561462402, "learning_rate": 0.0002, "loss": 1.6861, "step": 100460 }, { "epoch": 0.41, "grad_norm": 2.286201238632202, "learning_rate": 0.0002, "loss": 1.5289, "step": 100470 }, { "epoch": 0.41, "grad_norm": 2.4990668296813965, "learning_rate": 0.0002, "loss": 1.7639, "step": 100480 }, { "epoch": 0.41, "grad_norm": 2.0194528102874756, "learning_rate": 0.0002, "loss": 1.2621, "step": 100490 }, { "epoch": 0.41, "grad_norm": 2.753000259399414, "learning_rate": 0.0002, "loss": 1.5783, "step": 100500 }, { "epoch": 0.41, "grad_norm": 2.109914541244507, "learning_rate": 0.0002, "loss": 1.4661, "step": 100510 }, { "epoch": 0.41, "grad_norm": 1.7908402681350708, "learning_rate": 0.0002, "loss": 1.5329, "step": 100520 }, { "epoch": 0.41, "grad_norm": 2.8970787525177, "learning_rate": 0.0002, "loss": 1.3143, "step": 100530 }, { "epoch": 0.41, "grad_norm": 2.8667237758636475, "learning_rate": 0.0002, "loss": 1.5305, "step": 100540 }, { "epoch": 0.41, "grad_norm": 3.060265064239502, "learning_rate": 0.0002, "loss": 1.4999, "step": 100550 }, { "epoch": 0.41, "grad_norm": 2.5143988132476807, "learning_rate": 0.0002, "loss": 1.5318, "step": 100560 }, { "epoch": 0.41, "grad_norm": 2.2930307388305664, "learning_rate": 0.0002, "loss": 1.5019, "step": 100570 }, { "epoch": 0.41, "grad_norm": 1.6409536600112915, "learning_rate": 0.0002, "loss": 1.4672, "step": 100580 }, { "epoch": 0.41, "grad_norm": 2.302333354949951, "learning_rate": 0.0002, "loss": 1.5475, "step": 100590 }, { "epoch": 0.41, "grad_norm": 4.44802713394165, "learning_rate": 0.0002, "loss": 1.7318, "step": 100600 }, { "epoch": 0.41, "grad_norm": 5.655271530151367, "learning_rate": 0.0002, "loss": 1.6625, "step": 100610 }, { "epoch": 0.41, "grad_norm": 3.4657020568847656, "learning_rate": 0.0002, "loss": 1.5789, "step": 100620 }, { "epoch": 0.41, "grad_norm": 2.8995697498321533, "learning_rate": 0.0002, "loss": 1.8626, "step": 100630 }, { "epoch": 0.41, "grad_norm": 4.049042701721191, "learning_rate": 0.0002, "loss": 1.5764, "step": 100640 }, { "epoch": 0.41, "grad_norm": 2.5566599369049072, "learning_rate": 0.0002, "loss": 1.6373, "step": 100650 }, { "epoch": 0.41, "grad_norm": 2.894096851348877, "learning_rate": 0.0002, "loss": 1.444, "step": 100660 }, { "epoch": 0.41, "grad_norm": 3.0219786167144775, "learning_rate": 0.0002, "loss": 1.8994, "step": 100670 }, { "epoch": 0.41, "grad_norm": 4.632587909698486, "learning_rate": 0.0002, "loss": 1.3933, "step": 100680 }, { "epoch": 0.41, "grad_norm": 2.614621639251709, "learning_rate": 0.0002, "loss": 1.4853, "step": 100690 }, { "epoch": 0.41, "grad_norm": 2.1909570693969727, "learning_rate": 0.0002, "loss": 1.4049, "step": 100700 }, { "epoch": 0.41, "grad_norm": 2.0834202766418457, "learning_rate": 0.0002, "loss": 1.6184, "step": 100710 }, { "epoch": 0.41, "grad_norm": 3.2001688480377197, "learning_rate": 0.0002, "loss": 1.501, "step": 100720 }, { "epoch": 0.41, "grad_norm": 3.901632070541382, "learning_rate": 0.0002, "loss": 1.6571, "step": 100730 }, { "epoch": 0.41, "grad_norm": 3.499009609222412, "learning_rate": 0.0002, "loss": 1.5908, "step": 100740 }, { "epoch": 0.41, "grad_norm": 2.6994895935058594, "learning_rate": 0.0002, "loss": 1.6136, "step": 100750 }, { "epoch": 0.41, "grad_norm": 2.7297043800354004, "learning_rate": 0.0002, "loss": 1.6143, "step": 100760 }, { "epoch": 0.41, "grad_norm": 2.2639384269714355, "learning_rate": 0.0002, "loss": 1.6458, "step": 100770 }, { "epoch": 0.41, "grad_norm": 3.306833028793335, "learning_rate": 0.0002, "loss": 1.5881, "step": 100780 }, { "epoch": 0.41, "grad_norm": 1.969022274017334, "learning_rate": 0.0002, "loss": 1.577, "step": 100790 }, { "epoch": 0.41, "grad_norm": 2.880725622177124, "learning_rate": 0.0002, "loss": 1.5026, "step": 100800 }, { "epoch": 0.41, "grad_norm": 2.647732734680176, "learning_rate": 0.0002, "loss": 1.6968, "step": 100810 }, { "epoch": 0.41, "grad_norm": 4.384435176849365, "learning_rate": 0.0002, "loss": 1.7294, "step": 100820 }, { "epoch": 0.41, "grad_norm": Infinity, "learning_rate": 0.0002, "loss": 1.5431, "step": 100830 }, { "epoch": 0.41, "grad_norm": 2.9560563564300537, "learning_rate": 0.0002, "loss": 1.6164, "step": 100840 }, { "epoch": 0.41, "grad_norm": 3.0213356018066406, "learning_rate": 0.0002, "loss": 1.3021, "step": 100850 }, { "epoch": 0.41, "grad_norm": 2.40911865234375, "learning_rate": 0.0002, "loss": 1.4701, "step": 100860 }, { "epoch": 0.41, "grad_norm": 4.083016872406006, "learning_rate": 0.0002, "loss": 1.7637, "step": 100870 }, { "epoch": 0.41, "grad_norm": 1.833417534828186, "learning_rate": 0.0002, "loss": 1.4435, "step": 100880 }, { "epoch": 0.41, "grad_norm": 6.000734806060791, "learning_rate": 0.0002, "loss": 1.7234, "step": 100890 }, { "epoch": 0.41, "grad_norm": 12.903032302856445, "learning_rate": 0.0002, "loss": 1.8014, "step": 100900 }, { "epoch": 0.41, "grad_norm": 3.235747814178467, "learning_rate": 0.0002, "loss": 1.4162, "step": 100910 }, { "epoch": 0.41, "grad_norm": 3.349651336669922, "learning_rate": 0.0002, "loss": 1.8788, "step": 100920 }, { "epoch": 0.41, "grad_norm": 5.359551429748535, "learning_rate": 0.0002, "loss": 1.4666, "step": 100930 }, { "epoch": 0.41, "grad_norm": 2.655566930770874, "learning_rate": 0.0002, "loss": 1.2923, "step": 100940 }, { "epoch": 0.41, "grad_norm": 1.9047505855560303, "learning_rate": 0.0002, "loss": 1.5146, "step": 100950 }, { "epoch": 0.41, "grad_norm": 2.487560510635376, "learning_rate": 0.0002, "loss": 1.6322, "step": 100960 }, { "epoch": 0.41, "grad_norm": 2.423301935195923, "learning_rate": 0.0002, "loss": 1.6023, "step": 100970 }, { "epoch": 0.41, "grad_norm": 1.8263789415359497, "learning_rate": 0.0002, "loss": 1.6762, "step": 100980 }, { "epoch": 0.41, "grad_norm": 2.93440842628479, "learning_rate": 0.0002, "loss": 1.5806, "step": 100990 }, { "epoch": 0.41, "grad_norm": 2.164766550064087, "learning_rate": 0.0002, "loss": 1.4644, "step": 101000 }, { "epoch": 0.41, "grad_norm": 4.793224811553955, "learning_rate": 0.0002, "loss": 1.493, "step": 101010 }, { "epoch": 0.41, "grad_norm": 4.3101887702941895, "learning_rate": 0.0002, "loss": 1.8226, "step": 101020 }, { "epoch": 0.41, "grad_norm": 3.605966567993164, "learning_rate": 0.0002, "loss": 1.7536, "step": 101030 }, { "epoch": 0.41, "grad_norm": 3.039964199066162, "learning_rate": 0.0002, "loss": 1.5955, "step": 101040 }, { "epoch": 0.41, "grad_norm": 4.2144694328308105, "learning_rate": 0.0002, "loss": 1.4506, "step": 101050 }, { "epoch": 0.41, "grad_norm": 6.6074981689453125, "learning_rate": 0.0002, "loss": 1.2949, "step": 101060 }, { "epoch": 0.41, "grad_norm": 3.957944631576538, "learning_rate": 0.0002, "loss": 1.2637, "step": 101070 }, { "epoch": 0.41, "grad_norm": 1.9998419284820557, "learning_rate": 0.0002, "loss": 1.7373, "step": 101080 }, { "epoch": 0.41, "grad_norm": 3.02691650390625, "learning_rate": 0.0002, "loss": 1.472, "step": 101090 }, { "epoch": 0.41, "grad_norm": 2.9004530906677246, "learning_rate": 0.0002, "loss": 1.5417, "step": 101100 }, { "epoch": 0.41, "grad_norm": 2.80850887298584, "learning_rate": 0.0002, "loss": 1.5967, "step": 101110 }, { "epoch": 0.41, "grad_norm": 4.488705635070801, "learning_rate": 0.0002, "loss": 1.3778, "step": 101120 }, { "epoch": 0.41, "grad_norm": 2.0139427185058594, "learning_rate": 0.0002, "loss": 1.3769, "step": 101130 }, { "epoch": 0.41, "grad_norm": 3.322002649307251, "learning_rate": 0.0002, "loss": 1.7708, "step": 101140 }, { "epoch": 0.41, "grad_norm": 5.085222244262695, "learning_rate": 0.0002, "loss": 1.2849, "step": 101150 }, { "epoch": 0.41, "grad_norm": 4.013408184051514, "learning_rate": 0.0002, "loss": 1.5982, "step": 101160 }, { "epoch": 0.41, "grad_norm": 3.1815383434295654, "learning_rate": 0.0002, "loss": 1.5827, "step": 101170 }, { "epoch": 0.41, "grad_norm": 1.4017504453659058, "learning_rate": 0.0002, "loss": 1.5943, "step": 101180 }, { "epoch": 0.41, "grad_norm": 2.3474128246307373, "learning_rate": 0.0002, "loss": 1.4427, "step": 101190 }, { "epoch": 0.41, "grad_norm": 3.1705923080444336, "learning_rate": 0.0002, "loss": 1.5915, "step": 101200 }, { "epoch": 0.41, "grad_norm": 2.597425699234009, "learning_rate": 0.0002, "loss": 1.6344, "step": 101210 }, { "epoch": 0.41, "grad_norm": 2.44907283782959, "learning_rate": 0.0002, "loss": 1.8126, "step": 101220 }, { "epoch": 0.41, "grad_norm": 2.6079511642456055, "learning_rate": 0.0002, "loss": 1.445, "step": 101230 }, { "epoch": 0.41, "grad_norm": 1.9920779466629028, "learning_rate": 0.0002, "loss": 1.5844, "step": 101240 }, { "epoch": 0.41, "grad_norm": 4.637495994567871, "learning_rate": 0.0002, "loss": 1.4466, "step": 101250 }, { "epoch": 0.41, "grad_norm": 2.578158378601074, "learning_rate": 0.0002, "loss": 1.2398, "step": 101260 }, { "epoch": 0.41, "grad_norm": 3.8100786209106445, "learning_rate": 0.0002, "loss": 1.5101, "step": 101270 }, { "epoch": 0.41, "grad_norm": 2.307811737060547, "learning_rate": 0.0002, "loss": 1.7478, "step": 101280 }, { "epoch": 0.41, "grad_norm": 3.0654916763305664, "learning_rate": 0.0002, "loss": 1.7077, "step": 101290 }, { "epoch": 0.41, "grad_norm": 4.257863998413086, "learning_rate": 0.0002, "loss": 1.6346, "step": 101300 }, { "epoch": 0.41, "grad_norm": 2.9963786602020264, "learning_rate": 0.0002, "loss": 1.4497, "step": 101310 }, { "epoch": 0.41, "grad_norm": 5.720396041870117, "learning_rate": 0.0002, "loss": 1.5816, "step": 101320 }, { "epoch": 0.41, "grad_norm": 1.831317663192749, "learning_rate": 0.0002, "loss": 1.7036, "step": 101330 }, { "epoch": 0.41, "grad_norm": 2.4281022548675537, "learning_rate": 0.0002, "loss": 1.5798, "step": 101340 }, { "epoch": 0.41, "grad_norm": 2.749640464782715, "learning_rate": 0.0002, "loss": 1.7463, "step": 101350 }, { "epoch": 0.41, "grad_norm": 2.550525188446045, "learning_rate": 0.0002, "loss": 1.5175, "step": 101360 }, { "epoch": 0.41, "grad_norm": 2.5397512912750244, "learning_rate": 0.0002, "loss": 1.6784, "step": 101370 }, { "epoch": 0.41, "grad_norm": 3.482588529586792, "learning_rate": 0.0002, "loss": 1.4374, "step": 101380 }, { "epoch": 0.41, "grad_norm": 3.0785293579101562, "learning_rate": 0.0002, "loss": 1.6458, "step": 101390 }, { "epoch": 0.41, "grad_norm": 4.074370384216309, "learning_rate": 0.0002, "loss": 1.777, "step": 101400 }, { "epoch": 0.41, "grad_norm": 2.7284634113311768, "learning_rate": 0.0002, "loss": 1.5341, "step": 101410 }, { "epoch": 0.41, "grad_norm": 1.639142632484436, "learning_rate": 0.0002, "loss": 1.4731, "step": 101420 }, { "epoch": 0.41, "grad_norm": 2.331113815307617, "learning_rate": 0.0002, "loss": 1.5058, "step": 101430 }, { "epoch": 0.41, "grad_norm": 2.992952585220337, "learning_rate": 0.0002, "loss": 1.5008, "step": 101440 }, { "epoch": 0.41, "grad_norm": 2.7122507095336914, "learning_rate": 0.0002, "loss": 1.6766, "step": 101450 }, { "epoch": 0.41, "grad_norm": 9.87483024597168, "learning_rate": 0.0002, "loss": 1.6208, "step": 101460 }, { "epoch": 0.41, "grad_norm": 2.535429000854492, "learning_rate": 0.0002, "loss": 1.3384, "step": 101470 }, { "epoch": 0.41, "grad_norm": 2.4999754428863525, "learning_rate": 0.0002, "loss": 1.3778, "step": 101480 }, { "epoch": 0.41, "grad_norm": 2.042003870010376, "learning_rate": 0.0002, "loss": 1.5815, "step": 101490 }, { "epoch": 0.41, "grad_norm": 2.167227029800415, "learning_rate": 0.0002, "loss": 1.6015, "step": 101500 }, { "epoch": 0.41, "grad_norm": 2.6947457790374756, "learning_rate": 0.0002, "loss": 1.7365, "step": 101510 }, { "epoch": 0.41, "grad_norm": 2.8955299854278564, "learning_rate": 0.0002, "loss": 1.8631, "step": 101520 }, { "epoch": 0.41, "grad_norm": 3.508603096008301, "learning_rate": 0.0002, "loss": 1.6105, "step": 101530 }, { "epoch": 0.41, "grad_norm": 4.435288429260254, "learning_rate": 0.0002, "loss": 1.7598, "step": 101540 }, { "epoch": 0.41, "grad_norm": 2.6628975868225098, "learning_rate": 0.0002, "loss": 1.5222, "step": 101550 }, { "epoch": 0.41, "grad_norm": 2.2958502769470215, "learning_rate": 0.0002, "loss": 1.5112, "step": 101560 }, { "epoch": 0.41, "grad_norm": 2.53477144241333, "learning_rate": 0.0002, "loss": 1.3647, "step": 101570 }, { "epoch": 0.41, "grad_norm": 3.7620248794555664, "learning_rate": 0.0002, "loss": 1.5035, "step": 101580 }, { "epoch": 0.41, "grad_norm": 4.310177803039551, "learning_rate": 0.0002, "loss": 1.7302, "step": 101590 }, { "epoch": 0.41, "grad_norm": 3.0484728813171387, "learning_rate": 0.0002, "loss": 1.6536, "step": 101600 }, { "epoch": 0.41, "grad_norm": 3.45694637298584, "learning_rate": 0.0002, "loss": 1.9062, "step": 101610 }, { "epoch": 0.41, "grad_norm": 2.6997101306915283, "learning_rate": 0.0002, "loss": 1.5026, "step": 101620 }, { "epoch": 0.41, "grad_norm": 2.771364688873291, "learning_rate": 0.0002, "loss": 1.4511, "step": 101630 }, { "epoch": 0.41, "grad_norm": 3.7202041149139404, "learning_rate": 0.0002, "loss": 1.719, "step": 101640 }, { "epoch": 0.41, "grad_norm": 2.9861700534820557, "learning_rate": 0.0002, "loss": 1.4616, "step": 101650 }, { "epoch": 0.41, "grad_norm": 3.027733564376831, "learning_rate": 0.0002, "loss": 1.5075, "step": 101660 }, { "epoch": 0.41, "grad_norm": 2.87111759185791, "learning_rate": 0.0002, "loss": 1.5104, "step": 101670 }, { "epoch": 0.41, "grad_norm": 4.356630802154541, "learning_rate": 0.0002, "loss": 1.4721, "step": 101680 }, { "epoch": 0.41, "grad_norm": 1.5439822673797607, "learning_rate": 0.0002, "loss": 1.7186, "step": 101690 }, { "epoch": 0.41, "grad_norm": 4.412297248840332, "learning_rate": 0.0002, "loss": 1.4592, "step": 101700 }, { "epoch": 0.41, "grad_norm": 3.2199082374572754, "learning_rate": 0.0002, "loss": 1.5911, "step": 101710 }, { "epoch": 0.41, "grad_norm": 3.340733766555786, "learning_rate": 0.0002, "loss": 1.579, "step": 101720 }, { "epoch": 0.41, "grad_norm": 2.6866703033447266, "learning_rate": 0.0002, "loss": 1.4237, "step": 101730 }, { "epoch": 0.41, "grad_norm": 1.920526385307312, "learning_rate": 0.0002, "loss": 1.6901, "step": 101740 }, { "epoch": 0.41, "grad_norm": 1.6325007677078247, "learning_rate": 0.0002, "loss": 1.5756, "step": 101750 }, { "epoch": 0.41, "grad_norm": 4.824122905731201, "learning_rate": 0.0002, "loss": 1.4443, "step": 101760 }, { "epoch": 0.41, "grad_norm": 3.6606509685516357, "learning_rate": 0.0002, "loss": 1.697, "step": 101770 }, { "epoch": 0.41, "grad_norm": 3.7056500911712646, "learning_rate": 0.0002, "loss": 1.6274, "step": 101780 }, { "epoch": 0.41, "grad_norm": 2.3881733417510986, "learning_rate": 0.0002, "loss": 1.6475, "step": 101790 }, { "epoch": 0.41, "grad_norm": 2.7099173069000244, "learning_rate": 0.0002, "loss": 1.6929, "step": 101800 }, { "epoch": 0.41, "grad_norm": 2.4245898723602295, "learning_rate": 0.0002, "loss": 1.8136, "step": 101810 }, { "epoch": 0.41, "grad_norm": 2.4524710178375244, "learning_rate": 0.0002, "loss": 1.6541, "step": 101820 }, { "epoch": 0.41, "grad_norm": 1.9040088653564453, "learning_rate": 0.0002, "loss": 1.4737, "step": 101830 }, { "epoch": 0.41, "grad_norm": 2.584529399871826, "learning_rate": 0.0002, "loss": 1.4441, "step": 101840 }, { "epoch": 0.41, "grad_norm": 3.3849754333496094, "learning_rate": 0.0002, "loss": 1.6496, "step": 101850 }, { "epoch": 0.41, "grad_norm": 3.059786558151245, "learning_rate": 0.0002, "loss": 1.484, "step": 101860 }, { "epoch": 0.41, "grad_norm": 3.38387131690979, "learning_rate": 0.0002, "loss": 1.4742, "step": 101870 }, { "epoch": 0.41, "grad_norm": 3.662590742111206, "learning_rate": 0.0002, "loss": 1.7999, "step": 101880 }, { "epoch": 0.41, "grad_norm": 3.0120930671691895, "learning_rate": 0.0002, "loss": 1.6302, "step": 101890 }, { "epoch": 0.41, "grad_norm": 3.6143105030059814, "learning_rate": 0.0002, "loss": 1.8307, "step": 101900 }, { "epoch": 0.41, "grad_norm": 4.50283145904541, "learning_rate": 0.0002, "loss": 1.3273, "step": 101910 }, { "epoch": 0.41, "grad_norm": 3.4658422470092773, "learning_rate": 0.0002, "loss": 1.6158, "step": 101920 }, { "epoch": 0.41, "grad_norm": 2.0907249450683594, "learning_rate": 0.0002, "loss": 1.434, "step": 101930 }, { "epoch": 0.41, "grad_norm": 2.1580514907836914, "learning_rate": 0.0002, "loss": 1.5561, "step": 101940 }, { "epoch": 0.42, "grad_norm": 4.38085412979126, "learning_rate": 0.0002, "loss": 1.7317, "step": 101950 }, { "epoch": 0.42, "grad_norm": 11.493451118469238, "learning_rate": 0.0002, "loss": 1.4893, "step": 101960 }, { "epoch": 0.42, "grad_norm": 5.323681354522705, "learning_rate": 0.0002, "loss": 1.648, "step": 101970 }, { "epoch": 0.42, "grad_norm": 4.130913734436035, "learning_rate": 0.0002, "loss": 1.5611, "step": 101980 }, { "epoch": 0.42, "grad_norm": 4.202375411987305, "learning_rate": 0.0002, "loss": 1.6919, "step": 101990 }, { "epoch": 0.42, "grad_norm": 3.6585707664489746, "learning_rate": 0.0002, "loss": 1.5404, "step": 102000 }, { "epoch": 0.42, "grad_norm": 3.2798168659210205, "learning_rate": 0.0002, "loss": 1.6102, "step": 102010 }, { "epoch": 0.42, "grad_norm": 3.220136880874634, "learning_rate": 0.0002, "loss": 1.6421, "step": 102020 }, { "epoch": 0.42, "grad_norm": 4.1212477684021, "learning_rate": 0.0002, "loss": 1.5515, "step": 102030 }, { "epoch": 0.42, "grad_norm": 2.410133123397827, "learning_rate": 0.0002, "loss": 1.5951, "step": 102040 }, { "epoch": 0.42, "grad_norm": 5.283308029174805, "learning_rate": 0.0002, "loss": 1.4827, "step": 102050 }, { "epoch": 0.42, "grad_norm": 3.122785806655884, "learning_rate": 0.0002, "loss": 1.4459, "step": 102060 }, { "epoch": 0.42, "grad_norm": 3.1097471714019775, "learning_rate": 0.0002, "loss": 1.5907, "step": 102070 }, { "epoch": 0.42, "grad_norm": 3.1432111263275146, "learning_rate": 0.0002, "loss": 1.5302, "step": 102080 }, { "epoch": 0.42, "grad_norm": 4.636699676513672, "learning_rate": 0.0002, "loss": 1.5368, "step": 102090 }, { "epoch": 0.42, "grad_norm": 3.162569761276245, "learning_rate": 0.0002, "loss": 1.5898, "step": 102100 }, { "epoch": 0.42, "grad_norm": 3.302473545074463, "learning_rate": 0.0002, "loss": 1.1035, "step": 102110 }, { "epoch": 0.42, "grad_norm": 5.662713527679443, "learning_rate": 0.0002, "loss": 1.5375, "step": 102120 }, { "epoch": 0.42, "grad_norm": 3.209111213684082, "learning_rate": 0.0002, "loss": 1.5628, "step": 102130 }, { "epoch": 0.42, "grad_norm": 9.741943359375, "learning_rate": 0.0002, "loss": 1.4356, "step": 102140 }, { "epoch": 0.42, "grad_norm": 3.323202133178711, "learning_rate": 0.0002, "loss": 1.5462, "step": 102150 }, { "epoch": 0.42, "grad_norm": 2.27453875541687, "learning_rate": 0.0002, "loss": 1.532, "step": 102160 }, { "epoch": 0.42, "grad_norm": 2.5498085021972656, "learning_rate": 0.0002, "loss": 1.692, "step": 102170 }, { "epoch": 0.42, "grad_norm": 3.7445008754730225, "learning_rate": 0.0002, "loss": 1.5778, "step": 102180 }, { "epoch": 0.42, "grad_norm": 3.1926376819610596, "learning_rate": 0.0002, "loss": 1.4106, "step": 102190 }, { "epoch": 0.42, "grad_norm": 3.1738789081573486, "learning_rate": 0.0002, "loss": 1.5647, "step": 102200 }, { "epoch": 0.42, "grad_norm": 3.7710671424865723, "learning_rate": 0.0002, "loss": 1.5321, "step": 102210 }, { "epoch": 0.42, "grad_norm": 2.129101276397705, "learning_rate": 0.0002, "loss": 1.4767, "step": 102220 }, { "epoch": 0.42, "grad_norm": 2.0343658924102783, "learning_rate": 0.0002, "loss": 1.3319, "step": 102230 }, { "epoch": 0.42, "grad_norm": 2.3750295639038086, "learning_rate": 0.0002, "loss": 1.4744, "step": 102240 }, { "epoch": 0.42, "grad_norm": 2.028266668319702, "learning_rate": 0.0002, "loss": 1.8676, "step": 102250 }, { "epoch": 0.42, "grad_norm": 2.944748640060425, "learning_rate": 0.0002, "loss": 1.6556, "step": 102260 }, { "epoch": 0.42, "grad_norm": 2.0950982570648193, "learning_rate": 0.0002, "loss": 1.6927, "step": 102270 }, { "epoch": 0.42, "grad_norm": 3.9952566623687744, "learning_rate": 0.0002, "loss": 1.3736, "step": 102280 }, { "epoch": 0.42, "grad_norm": 3.692133903503418, "learning_rate": 0.0002, "loss": 1.7784, "step": 102290 }, { "epoch": 0.42, "grad_norm": 3.268488883972168, "learning_rate": 0.0002, "loss": 1.4036, "step": 102300 }, { "epoch": 0.42, "grad_norm": 2.1124472618103027, "learning_rate": 0.0002, "loss": 1.674, "step": 102310 }, { "epoch": 0.42, "grad_norm": 2.181840658187866, "learning_rate": 0.0002, "loss": 1.556, "step": 102320 }, { "epoch": 0.42, "grad_norm": 2.04076886177063, "learning_rate": 0.0002, "loss": 1.3068, "step": 102330 }, { "epoch": 0.42, "grad_norm": 1.8426458835601807, "learning_rate": 0.0002, "loss": 1.6131, "step": 102340 }, { "epoch": 0.42, "grad_norm": 1.8671989440917969, "learning_rate": 0.0002, "loss": 1.3594, "step": 102350 }, { "epoch": 0.42, "grad_norm": 2.3355538845062256, "learning_rate": 0.0002, "loss": 1.4869, "step": 102360 }, { "epoch": 0.42, "grad_norm": 2.079810619354248, "learning_rate": 0.0002, "loss": 1.6164, "step": 102370 }, { "epoch": 0.42, "grad_norm": 1.570622205734253, "learning_rate": 0.0002, "loss": 1.6189, "step": 102380 }, { "epoch": 0.42, "grad_norm": 3.40590763092041, "learning_rate": 0.0002, "loss": 1.952, "step": 102390 }, { "epoch": 0.42, "grad_norm": 2.4620721340179443, "learning_rate": 0.0002, "loss": 1.6379, "step": 102400 }, { "epoch": 0.42, "grad_norm": 5.00417423248291, "learning_rate": 0.0002, "loss": 1.4787, "step": 102410 }, { "epoch": 0.42, "grad_norm": 2.5340428352355957, "learning_rate": 0.0002, "loss": 1.2787, "step": 102420 }, { "epoch": 0.42, "grad_norm": 1.8501296043395996, "learning_rate": 0.0002, "loss": 1.4186, "step": 102430 }, { "epoch": 0.42, "grad_norm": 8.394062995910645, "learning_rate": 0.0002, "loss": 1.6323, "step": 102440 }, { "epoch": 0.42, "grad_norm": 1.8565272092819214, "learning_rate": 0.0002, "loss": 1.6331, "step": 102450 }, { "epoch": 0.42, "grad_norm": 4.310740947723389, "learning_rate": 0.0002, "loss": 1.5167, "step": 102460 }, { "epoch": 0.42, "grad_norm": 2.6769325733184814, "learning_rate": 0.0002, "loss": 1.4634, "step": 102470 }, { "epoch": 0.42, "grad_norm": 2.477405071258545, "learning_rate": 0.0002, "loss": 1.6835, "step": 102480 }, { "epoch": 0.42, "grad_norm": 2.5571696758270264, "learning_rate": 0.0002, "loss": 1.6176, "step": 102490 }, { "epoch": 0.42, "grad_norm": 2.450795888900757, "learning_rate": 0.0002, "loss": 1.5334, "step": 102500 }, { "epoch": 0.42, "grad_norm": 2.3305652141571045, "learning_rate": 0.0002, "loss": 1.504, "step": 102510 }, { "epoch": 0.42, "grad_norm": 2.897064208984375, "learning_rate": 0.0002, "loss": 1.5847, "step": 102520 }, { "epoch": 0.42, "grad_norm": 2.8447954654693604, "learning_rate": 0.0002, "loss": 1.6555, "step": 102530 }, { "epoch": 0.42, "grad_norm": 2.022784948348999, "learning_rate": 0.0002, "loss": 1.6469, "step": 102540 }, { "epoch": 0.42, "grad_norm": 3.058748483657837, "learning_rate": 0.0002, "loss": 1.5974, "step": 102550 }, { "epoch": 0.42, "grad_norm": 3.2582991123199463, "learning_rate": 0.0002, "loss": 1.383, "step": 102560 }, { "epoch": 0.42, "grad_norm": 2.386091947555542, "learning_rate": 0.0002, "loss": 1.4056, "step": 102570 }, { "epoch": 0.42, "grad_norm": 2.544156551361084, "learning_rate": 0.0002, "loss": 1.5464, "step": 102580 }, { "epoch": 0.42, "grad_norm": 2.214834213256836, "learning_rate": 0.0002, "loss": 1.5431, "step": 102590 }, { "epoch": 0.42, "grad_norm": 2.927013635635376, "learning_rate": 0.0002, "loss": 1.6901, "step": 102600 }, { "epoch": 0.42, "grad_norm": 3.5215096473693848, "learning_rate": 0.0002, "loss": 1.3544, "step": 102610 }, { "epoch": 0.42, "grad_norm": 3.060081720352173, "learning_rate": 0.0002, "loss": 1.8317, "step": 102620 }, { "epoch": 0.42, "grad_norm": 4.012922286987305, "learning_rate": 0.0002, "loss": 1.5627, "step": 102630 }, { "epoch": 0.42, "grad_norm": 3.583500385284424, "learning_rate": 0.0002, "loss": 1.7203, "step": 102640 }, { "epoch": 0.42, "grad_norm": 3.1178791522979736, "learning_rate": 0.0002, "loss": 1.5069, "step": 102650 }, { "epoch": 0.42, "grad_norm": 3.188912868499756, "learning_rate": 0.0002, "loss": 1.8283, "step": 102660 }, { "epoch": 0.42, "grad_norm": 2.5709807872772217, "learning_rate": 0.0002, "loss": 1.543, "step": 102670 }, { "epoch": 0.42, "grad_norm": 2.1101884841918945, "learning_rate": 0.0002, "loss": 1.438, "step": 102680 }, { "epoch": 0.42, "grad_norm": 1.2739731073379517, "learning_rate": 0.0002, "loss": 1.658, "step": 102690 }, { "epoch": 0.42, "grad_norm": 3.432827949523926, "learning_rate": 0.0002, "loss": 1.7152, "step": 102700 }, { "epoch": 0.42, "grad_norm": 5.374680519104004, "learning_rate": 0.0002, "loss": 1.5371, "step": 102710 }, { "epoch": 0.42, "grad_norm": 5.844674587249756, "learning_rate": 0.0002, "loss": 1.4848, "step": 102720 }, { "epoch": 0.42, "grad_norm": 3.1663107872009277, "learning_rate": 0.0002, "loss": 1.5955, "step": 102730 }, { "epoch": 0.42, "grad_norm": 4.06532096862793, "learning_rate": 0.0002, "loss": 1.7683, "step": 102740 }, { "epoch": 0.42, "grad_norm": 2.7830727100372314, "learning_rate": 0.0002, "loss": 1.7718, "step": 102750 }, { "epoch": 0.42, "grad_norm": 2.132164239883423, "learning_rate": 0.0002, "loss": 1.5392, "step": 102760 }, { "epoch": 0.42, "grad_norm": 3.878338575363159, "learning_rate": 0.0002, "loss": 1.4822, "step": 102770 }, { "epoch": 0.42, "grad_norm": 2.432659864425659, "learning_rate": 0.0002, "loss": 1.6077, "step": 102780 }, { "epoch": 0.42, "grad_norm": 3.4925296306610107, "learning_rate": 0.0002, "loss": 1.671, "step": 102790 }, { "epoch": 0.42, "grad_norm": 4.87492036819458, "learning_rate": 0.0002, "loss": 1.726, "step": 102800 }, { "epoch": 0.42, "grad_norm": 5.358684539794922, "learning_rate": 0.0002, "loss": 1.7084, "step": 102810 }, { "epoch": 0.42, "grad_norm": 3.5519654750823975, "learning_rate": 0.0002, "loss": 1.7823, "step": 102820 }, { "epoch": 0.42, "grad_norm": 3.0040760040283203, "learning_rate": 0.0002, "loss": 1.6959, "step": 102830 }, { "epoch": 0.42, "grad_norm": 3.353072166442871, "learning_rate": 0.0002, "loss": 1.8388, "step": 102840 }, { "epoch": 0.42, "grad_norm": 1.7696665525436401, "learning_rate": 0.0002, "loss": 1.4582, "step": 102850 }, { "epoch": 0.42, "grad_norm": 4.880384922027588, "learning_rate": 0.0002, "loss": 1.4742, "step": 102860 }, { "epoch": 0.42, "grad_norm": 2.484436511993408, "learning_rate": 0.0002, "loss": 1.5591, "step": 102870 }, { "epoch": 0.42, "grad_norm": 4.229867458343506, "learning_rate": 0.0002, "loss": 1.7226, "step": 102880 }, { "epoch": 0.42, "grad_norm": 3.2267022132873535, "learning_rate": 0.0002, "loss": 1.5877, "step": 102890 }, { "epoch": 0.42, "grad_norm": 3.616044521331787, "learning_rate": 0.0002, "loss": 1.4838, "step": 102900 }, { "epoch": 0.42, "grad_norm": 4.3689961433410645, "learning_rate": 0.0002, "loss": 1.3517, "step": 102910 }, { "epoch": 0.42, "grad_norm": 2.235640287399292, "learning_rate": 0.0002, "loss": 1.352, "step": 102920 }, { "epoch": 0.42, "grad_norm": 2.6178159713745117, "learning_rate": 0.0002, "loss": 1.4562, "step": 102930 }, { "epoch": 0.42, "grad_norm": 1.6774200201034546, "learning_rate": 0.0002, "loss": 1.2975, "step": 102940 }, { "epoch": 0.42, "grad_norm": 2.1668694019317627, "learning_rate": 0.0002, "loss": 1.3626, "step": 102950 }, { "epoch": 0.42, "grad_norm": 5.218659400939941, "learning_rate": 0.0002, "loss": 1.4836, "step": 102960 }, { "epoch": 0.42, "grad_norm": 4.425620079040527, "learning_rate": 0.0002, "loss": 1.6884, "step": 102970 }, { "epoch": 0.42, "grad_norm": 3.0614876747131348, "learning_rate": 0.0002, "loss": 1.7396, "step": 102980 }, { "epoch": 0.42, "grad_norm": 3.6083946228027344, "learning_rate": 0.0002, "loss": 1.838, "step": 102990 }, { "epoch": 0.42, "grad_norm": 2.988320827484131, "learning_rate": 0.0002, "loss": 1.5507, "step": 103000 }, { "epoch": 0.42, "grad_norm": 2.1966190338134766, "learning_rate": 0.0002, "loss": 1.49, "step": 103010 }, { "epoch": 0.42, "grad_norm": 2.464704990386963, "learning_rate": 0.0002, "loss": 1.5153, "step": 103020 }, { "epoch": 0.42, "grad_norm": 2.834726333618164, "learning_rate": 0.0002, "loss": 1.6223, "step": 103030 }, { "epoch": 0.42, "grad_norm": 2.1052706241607666, "learning_rate": 0.0002, "loss": 1.1962, "step": 103040 }, { "epoch": 0.42, "grad_norm": 1.8476178646087646, "learning_rate": 0.0002, "loss": 1.6886, "step": 103050 }, { "epoch": 0.42, "grad_norm": 7.967728137969971, "learning_rate": 0.0002, "loss": 1.7012, "step": 103060 }, { "epoch": 0.42, "grad_norm": 2.5207912921905518, "learning_rate": 0.0002, "loss": 1.545, "step": 103070 }, { "epoch": 0.42, "grad_norm": 2.1982572078704834, "learning_rate": 0.0002, "loss": 1.3852, "step": 103080 }, { "epoch": 0.42, "grad_norm": 2.7975919246673584, "learning_rate": 0.0002, "loss": 1.55, "step": 103090 }, { "epoch": 0.42, "grad_norm": 3.812392234802246, "learning_rate": 0.0002, "loss": 1.2807, "step": 103100 }, { "epoch": 0.42, "grad_norm": 2.605252981185913, "learning_rate": 0.0002, "loss": 1.5354, "step": 103110 }, { "epoch": 0.42, "grad_norm": 2.1277403831481934, "learning_rate": 0.0002, "loss": 1.7264, "step": 103120 }, { "epoch": 0.42, "grad_norm": 3.267209768295288, "learning_rate": 0.0002, "loss": 1.6141, "step": 103130 }, { "epoch": 0.42, "grad_norm": 1.9192620515823364, "learning_rate": 0.0002, "loss": 1.3805, "step": 103140 }, { "epoch": 0.42, "grad_norm": 2.1902735233306885, "learning_rate": 0.0002, "loss": 1.5181, "step": 103150 }, { "epoch": 0.42, "grad_norm": 3.5761466026306152, "learning_rate": 0.0002, "loss": 1.7601, "step": 103160 }, { "epoch": 0.42, "grad_norm": 3.489783525466919, "learning_rate": 0.0002, "loss": 1.5502, "step": 103170 }, { "epoch": 0.42, "grad_norm": 2.084981918334961, "learning_rate": 0.0002, "loss": 1.5441, "step": 103180 }, { "epoch": 0.42, "grad_norm": 3.0125625133514404, "learning_rate": 0.0002, "loss": 1.641, "step": 103190 }, { "epoch": 0.42, "grad_norm": 3.4843828678131104, "learning_rate": 0.0002, "loss": 1.5467, "step": 103200 }, { "epoch": 0.42, "grad_norm": 4.650064945220947, "learning_rate": 0.0002, "loss": 1.5926, "step": 103210 }, { "epoch": 0.42, "grad_norm": 2.0879368782043457, "learning_rate": 0.0002, "loss": 1.6335, "step": 103220 }, { "epoch": 0.42, "grad_norm": 4.074497222900391, "learning_rate": 0.0002, "loss": 1.3447, "step": 103230 }, { "epoch": 0.42, "grad_norm": 3.120434522628784, "learning_rate": 0.0002, "loss": 1.5549, "step": 103240 }, { "epoch": 0.42, "grad_norm": 5.1906843185424805, "learning_rate": 0.0002, "loss": 1.5054, "step": 103250 }, { "epoch": 0.42, "grad_norm": 2.6062028408050537, "learning_rate": 0.0002, "loss": 1.619, "step": 103260 }, { "epoch": 0.42, "grad_norm": 5.798335075378418, "learning_rate": 0.0002, "loss": 1.6975, "step": 103270 }, { "epoch": 0.42, "grad_norm": 2.5350730419158936, "learning_rate": 0.0002, "loss": 1.5067, "step": 103280 }, { "epoch": 0.42, "grad_norm": 2.347041368484497, "learning_rate": 0.0002, "loss": 1.3294, "step": 103290 }, { "epoch": 0.42, "grad_norm": 2.9943721294403076, "learning_rate": 0.0002, "loss": 1.6191, "step": 103300 }, { "epoch": 0.42, "grad_norm": 4.512999057769775, "learning_rate": 0.0002, "loss": 1.7685, "step": 103310 }, { "epoch": 0.42, "grad_norm": 4.357737064361572, "learning_rate": 0.0002, "loss": 1.5736, "step": 103320 }, { "epoch": 0.42, "grad_norm": 1.6027017831802368, "learning_rate": 0.0002, "loss": 1.3125, "step": 103330 }, { "epoch": 0.42, "grad_norm": 2.4805901050567627, "learning_rate": 0.0002, "loss": 1.4889, "step": 103340 }, { "epoch": 0.42, "grad_norm": 3.2277846336364746, "learning_rate": 0.0002, "loss": 1.4184, "step": 103350 }, { "epoch": 0.42, "grad_norm": 3.0099501609802246, "learning_rate": 0.0002, "loss": 1.3049, "step": 103360 }, { "epoch": 0.42, "grad_norm": 2.397545099258423, "learning_rate": 0.0002, "loss": 1.6926, "step": 103370 }, { "epoch": 0.42, "grad_norm": 2.8343772888183594, "learning_rate": 0.0002, "loss": 1.6285, "step": 103380 }, { "epoch": 0.42, "grad_norm": 2.8499855995178223, "learning_rate": 0.0002, "loss": 1.7656, "step": 103390 }, { "epoch": 0.42, "grad_norm": 3.7407867908477783, "learning_rate": 0.0002, "loss": 1.7101, "step": 103400 }, { "epoch": 0.42, "grad_norm": 4.797382354736328, "learning_rate": 0.0002, "loss": 1.7273, "step": 103410 }, { "epoch": 0.42, "grad_norm": 2.3810770511627197, "learning_rate": 0.0002, "loss": 1.4615, "step": 103420 }, { "epoch": 0.42, "grad_norm": 2.9867262840270996, "learning_rate": 0.0002, "loss": 1.6181, "step": 103430 }, { "epoch": 0.42, "grad_norm": 2.881763219833374, "learning_rate": 0.0002, "loss": 1.5774, "step": 103440 }, { "epoch": 0.42, "grad_norm": 2.8686349391937256, "learning_rate": 0.0002, "loss": 1.4638, "step": 103450 }, { "epoch": 0.42, "grad_norm": 6.52532434463501, "learning_rate": 0.0002, "loss": 1.6921, "step": 103460 }, { "epoch": 0.42, "grad_norm": 2.197517156600952, "learning_rate": 0.0002, "loss": 1.3982, "step": 103470 }, { "epoch": 0.42, "grad_norm": 2.253321409225464, "learning_rate": 0.0002, "loss": 1.625, "step": 103480 }, { "epoch": 0.42, "grad_norm": 3.2940986156463623, "learning_rate": 0.0002, "loss": 1.455, "step": 103490 }, { "epoch": 0.42, "grad_norm": 4.3843770027160645, "learning_rate": 0.0002, "loss": 1.6766, "step": 103500 }, { "epoch": 0.42, "grad_norm": 2.4724903106689453, "learning_rate": 0.0002, "loss": 1.5999, "step": 103510 }, { "epoch": 0.42, "grad_norm": 2.6652989387512207, "learning_rate": 0.0002, "loss": 1.7363, "step": 103520 }, { "epoch": 0.42, "grad_norm": 2.1439497470855713, "learning_rate": 0.0002, "loss": 1.5769, "step": 103530 }, { "epoch": 0.42, "grad_norm": 2.9367125034332275, "learning_rate": 0.0002, "loss": 1.6333, "step": 103540 }, { "epoch": 0.42, "grad_norm": 3.2810826301574707, "learning_rate": 0.0002, "loss": 1.7076, "step": 103550 }, { "epoch": 0.42, "grad_norm": 3.084873914718628, "learning_rate": 0.0002, "loss": 1.6631, "step": 103560 }, { "epoch": 0.42, "grad_norm": 2.731755256652832, "learning_rate": 0.0002, "loss": 1.4626, "step": 103570 }, { "epoch": 0.42, "grad_norm": 2.7125701904296875, "learning_rate": 0.0002, "loss": 1.5704, "step": 103580 }, { "epoch": 0.42, "grad_norm": 3.1749136447906494, "learning_rate": 0.0002, "loss": 1.6485, "step": 103590 }, { "epoch": 0.42, "grad_norm": 2.0754146575927734, "learning_rate": 0.0002, "loss": 1.3428, "step": 103600 }, { "epoch": 0.42, "grad_norm": 3.6741275787353516, "learning_rate": 0.0002, "loss": 1.6642, "step": 103610 }, { "epoch": 0.42, "grad_norm": 2.6473019123077393, "learning_rate": 0.0002, "loss": 1.7259, "step": 103620 }, { "epoch": 0.42, "grad_norm": 4.48750638961792, "learning_rate": 0.0002, "loss": 1.5069, "step": 103630 }, { "epoch": 0.42, "grad_norm": 2.1373424530029297, "learning_rate": 0.0002, "loss": 1.4155, "step": 103640 }, { "epoch": 0.42, "grad_norm": 3.617985486984253, "learning_rate": 0.0002, "loss": 1.5345, "step": 103650 }, { "epoch": 0.42, "grad_norm": 3.9724550247192383, "learning_rate": 0.0002, "loss": 1.7601, "step": 103660 }, { "epoch": 0.42, "grad_norm": 1.57110595703125, "learning_rate": 0.0002, "loss": 1.5408, "step": 103670 }, { "epoch": 0.42, "grad_norm": 4.910722255706787, "learning_rate": 0.0002, "loss": 1.7783, "step": 103680 }, { "epoch": 0.42, "grad_norm": 4.149409294128418, "learning_rate": 0.0002, "loss": 1.5144, "step": 103690 }, { "epoch": 0.42, "grad_norm": 2.71885347366333, "learning_rate": 0.0002, "loss": 1.5496, "step": 103700 }, { "epoch": 0.42, "grad_norm": 2.5184242725372314, "learning_rate": 0.0002, "loss": 1.4883, "step": 103710 }, { "epoch": 0.42, "grad_norm": 5.393368721008301, "learning_rate": 0.0002, "loss": 1.6547, "step": 103720 }, { "epoch": 0.42, "grad_norm": 2.5471713542938232, "learning_rate": 0.0002, "loss": 1.4497, "step": 103730 }, { "epoch": 0.42, "grad_norm": 1.8735378980636597, "learning_rate": 0.0002, "loss": 1.6351, "step": 103740 }, { "epoch": 0.42, "grad_norm": 1.5904262065887451, "learning_rate": 0.0002, "loss": 1.4965, "step": 103750 }, { "epoch": 0.42, "grad_norm": 2.991654872894287, "learning_rate": 0.0002, "loss": 1.5065, "step": 103760 }, { "epoch": 0.42, "grad_norm": 3.2309653759002686, "learning_rate": 0.0002, "loss": 1.4931, "step": 103770 }, { "epoch": 0.42, "grad_norm": 4.907064437866211, "learning_rate": 0.0002, "loss": 1.5991, "step": 103780 }, { "epoch": 0.42, "grad_norm": 3.5090692043304443, "learning_rate": 0.0002, "loss": 1.8432, "step": 103790 }, { "epoch": 0.42, "grad_norm": 3.7943921089172363, "learning_rate": 0.0002, "loss": 1.6517, "step": 103800 }, { "epoch": 0.42, "grad_norm": 2.8267462253570557, "learning_rate": 0.0002, "loss": 1.8039, "step": 103810 }, { "epoch": 0.42, "grad_norm": 2.6675398349761963, "learning_rate": 0.0002, "loss": 1.5667, "step": 103820 }, { "epoch": 0.42, "grad_norm": 4.787604331970215, "learning_rate": 0.0002, "loss": 1.63, "step": 103830 }, { "epoch": 0.42, "grad_norm": 4.304493427276611, "learning_rate": 0.0002, "loss": 1.3783, "step": 103840 }, { "epoch": 0.42, "grad_norm": 3.1791183948516846, "learning_rate": 0.0002, "loss": 1.4564, "step": 103850 }, { "epoch": 0.42, "grad_norm": 6.763660430908203, "learning_rate": 0.0002, "loss": 1.6869, "step": 103860 }, { "epoch": 0.42, "grad_norm": 2.42742919921875, "learning_rate": 0.0002, "loss": 1.3035, "step": 103870 }, { "epoch": 0.42, "grad_norm": 3.511258363723755, "learning_rate": 0.0002, "loss": 1.4494, "step": 103880 }, { "epoch": 0.42, "grad_norm": 2.7123892307281494, "learning_rate": 0.0002, "loss": 1.3188, "step": 103890 }, { "epoch": 0.42, "grad_norm": 3.0857460498809814, "learning_rate": 0.0002, "loss": 1.6874, "step": 103900 }, { "epoch": 0.42, "grad_norm": 3.475234270095825, "learning_rate": 0.0002, "loss": 1.6399, "step": 103910 }, { "epoch": 0.42, "grad_norm": 4.486386775970459, "learning_rate": 0.0002, "loss": 1.3457, "step": 103920 }, { "epoch": 0.42, "grad_norm": 1.8061579465866089, "learning_rate": 0.0002, "loss": 1.6948, "step": 103930 }, { "epoch": 0.42, "grad_norm": 6.463961601257324, "learning_rate": 0.0002, "loss": 1.8541, "step": 103940 }, { "epoch": 0.42, "grad_norm": 2.1134793758392334, "learning_rate": 0.0002, "loss": 1.5922, "step": 103950 }, { "epoch": 0.42, "grad_norm": 3.430868148803711, "learning_rate": 0.0002, "loss": 1.6107, "step": 103960 }, { "epoch": 0.42, "grad_norm": 3.067375421524048, "learning_rate": 0.0002, "loss": 1.3651, "step": 103970 }, { "epoch": 0.42, "grad_norm": 2.7640573978424072, "learning_rate": 0.0002, "loss": 1.5635, "step": 103980 }, { "epoch": 0.42, "grad_norm": 20.501300811767578, "learning_rate": 0.0002, "loss": 1.4369, "step": 103990 }, { "epoch": 0.42, "grad_norm": 2.2619543075561523, "learning_rate": 0.0002, "loss": 1.5344, "step": 104000 }, { "epoch": 0.42, "grad_norm": 3.970075845718384, "learning_rate": 0.0002, "loss": 1.6132, "step": 104010 }, { "epoch": 0.42, "grad_norm": 3.385646343231201, "learning_rate": 0.0002, "loss": 1.6369, "step": 104020 }, { "epoch": 0.42, "grad_norm": 2.3520760536193848, "learning_rate": 0.0002, "loss": 1.4561, "step": 104030 }, { "epoch": 0.42, "grad_norm": 4.509494304656982, "learning_rate": 0.0002, "loss": 1.3766, "step": 104040 }, { "epoch": 0.42, "grad_norm": 3.905580520629883, "learning_rate": 0.0002, "loss": 1.7601, "step": 104050 }, { "epoch": 0.42, "grad_norm": 1.5342003107070923, "learning_rate": 0.0002, "loss": 1.4172, "step": 104060 }, { "epoch": 0.42, "grad_norm": 4.114332675933838, "learning_rate": 0.0002, "loss": 1.5737, "step": 104070 }, { "epoch": 0.42, "grad_norm": 2.0530807971954346, "learning_rate": 0.0002, "loss": 1.5433, "step": 104080 }, { "epoch": 0.42, "grad_norm": 3.0200066566467285, "learning_rate": 0.0002, "loss": 1.5196, "step": 104090 }, { "epoch": 0.42, "grad_norm": 5.174947261810303, "learning_rate": 0.0002, "loss": 1.5063, "step": 104100 }, { "epoch": 0.42, "grad_norm": 3.03719162940979, "learning_rate": 0.0002, "loss": 1.6231, "step": 104110 }, { "epoch": 0.42, "grad_norm": 3.365708112716675, "learning_rate": 0.0002, "loss": 1.6831, "step": 104120 }, { "epoch": 0.42, "grad_norm": 2.3895223140716553, "learning_rate": 0.0002, "loss": 1.6634, "step": 104130 }, { "epoch": 0.42, "grad_norm": 3.123760938644409, "learning_rate": 0.0002, "loss": 1.7893, "step": 104140 }, { "epoch": 0.42, "grad_norm": 2.3352091312408447, "learning_rate": 0.0002, "loss": 1.7047, "step": 104150 }, { "epoch": 0.42, "grad_norm": 2.7094199657440186, "learning_rate": 0.0002, "loss": 1.4424, "step": 104160 }, { "epoch": 0.42, "grad_norm": 4.242899417877197, "learning_rate": 0.0002, "loss": 1.8081, "step": 104170 }, { "epoch": 0.42, "grad_norm": 2.4572904109954834, "learning_rate": 0.0002, "loss": 1.5511, "step": 104180 }, { "epoch": 0.42, "grad_norm": 2.8570950031280518, "learning_rate": 0.0002, "loss": 1.5759, "step": 104190 }, { "epoch": 0.42, "grad_norm": 3.60577654838562, "learning_rate": 0.0002, "loss": 1.467, "step": 104200 }, { "epoch": 0.42, "grad_norm": 3.5162925720214844, "learning_rate": 0.0002, "loss": 1.4785, "step": 104210 }, { "epoch": 0.42, "grad_norm": 3.018490791320801, "learning_rate": 0.0002, "loss": 1.6079, "step": 104220 }, { "epoch": 0.42, "grad_norm": 3.7690980434417725, "learning_rate": 0.0002, "loss": 1.5035, "step": 104230 }, { "epoch": 0.42, "grad_norm": 2.7462141513824463, "learning_rate": 0.0002, "loss": 1.6435, "step": 104240 }, { "epoch": 0.42, "grad_norm": 1.5806885957717896, "learning_rate": 0.0002, "loss": 1.5866, "step": 104250 }, { "epoch": 0.42, "grad_norm": 2.7080516815185547, "learning_rate": 0.0002, "loss": 1.4809, "step": 104260 }, { "epoch": 0.42, "grad_norm": 3.169149398803711, "learning_rate": 0.0002, "loss": 1.5999, "step": 104270 }, { "epoch": 0.42, "grad_norm": 2.9643609523773193, "learning_rate": 0.0002, "loss": 1.5523, "step": 104280 }, { "epoch": 0.42, "grad_norm": 2.70509934425354, "learning_rate": 0.0002, "loss": 1.5881, "step": 104290 }, { "epoch": 0.42, "grad_norm": 2.3901846408843994, "learning_rate": 0.0002, "loss": 1.742, "step": 104300 }, { "epoch": 0.42, "grad_norm": 2.869950532913208, "learning_rate": 0.0002, "loss": 1.6948, "step": 104310 }, { "epoch": 0.42, "grad_norm": 2.9112555980682373, "learning_rate": 0.0002, "loss": 1.6788, "step": 104320 }, { "epoch": 0.42, "grad_norm": 3.6757400035858154, "learning_rate": 0.0002, "loss": 1.5549, "step": 104330 }, { "epoch": 0.42, "grad_norm": 3.4336187839508057, "learning_rate": 0.0002, "loss": 1.5045, "step": 104340 }, { "epoch": 0.42, "grad_norm": 3.1739838123321533, "learning_rate": 0.0002, "loss": 1.7551, "step": 104350 }, { "epoch": 0.42, "grad_norm": 4.74634313583374, "learning_rate": 0.0002, "loss": 1.4549, "step": 104360 }, { "epoch": 0.42, "grad_norm": 4.221538543701172, "learning_rate": 0.0002, "loss": 1.2661, "step": 104370 }, { "epoch": 0.42, "grad_norm": 2.014404535293579, "learning_rate": 0.0002, "loss": 1.5406, "step": 104380 }, { "epoch": 0.42, "grad_norm": 2.7966926097869873, "learning_rate": 0.0002, "loss": 1.4073, "step": 104390 }, { "epoch": 0.43, "grad_norm": 3.662231206893921, "learning_rate": 0.0002, "loss": 1.758, "step": 104400 }, { "epoch": 0.43, "grad_norm": 4.579889297485352, "learning_rate": 0.0002, "loss": 1.5988, "step": 104410 }, { "epoch": 0.43, "grad_norm": 4.183000564575195, "learning_rate": 0.0002, "loss": 1.4859, "step": 104420 }, { "epoch": 0.43, "grad_norm": 2.7466068267822266, "learning_rate": 0.0002, "loss": 1.748, "step": 104430 }, { "epoch": 0.43, "grad_norm": 3.316596031188965, "learning_rate": 0.0002, "loss": 1.4684, "step": 104440 }, { "epoch": 0.43, "grad_norm": 2.515033006668091, "learning_rate": 0.0002, "loss": 1.6456, "step": 104450 }, { "epoch": 0.43, "grad_norm": 2.0703961849212646, "learning_rate": 0.0002, "loss": 1.6334, "step": 104460 }, { "epoch": 0.43, "grad_norm": 2.2373571395874023, "learning_rate": 0.0002, "loss": 1.8031, "step": 104470 }, { "epoch": 0.43, "grad_norm": 4.263761043548584, "learning_rate": 0.0002, "loss": 1.5959, "step": 104480 }, { "epoch": 0.43, "grad_norm": 2.092604637145996, "learning_rate": 0.0002, "loss": 1.7837, "step": 104490 }, { "epoch": 0.43, "grad_norm": 2.4235990047454834, "learning_rate": 0.0002, "loss": 1.4791, "step": 104500 }, { "epoch": 0.43, "grad_norm": 2.9384829998016357, "learning_rate": 0.0002, "loss": 1.4315, "step": 104510 }, { "epoch": 0.43, "grad_norm": 2.3227736949920654, "learning_rate": 0.0002, "loss": 1.6686, "step": 104520 }, { "epoch": 0.43, "grad_norm": 3.4673049449920654, "learning_rate": 0.0002, "loss": 1.6389, "step": 104530 }, { "epoch": 0.43, "grad_norm": 3.41507887840271, "learning_rate": 0.0002, "loss": 1.7683, "step": 104540 }, { "epoch": 0.43, "grad_norm": 3.1720082759857178, "learning_rate": 0.0002, "loss": 1.2484, "step": 104550 }, { "epoch": 0.43, "grad_norm": 3.399451971054077, "learning_rate": 0.0002, "loss": 1.6556, "step": 104560 }, { "epoch": 0.43, "grad_norm": 2.6420602798461914, "learning_rate": 0.0002, "loss": 1.6219, "step": 104570 }, { "epoch": 0.43, "grad_norm": 2.9903197288513184, "learning_rate": 0.0002, "loss": 1.6067, "step": 104580 }, { "epoch": 0.43, "grad_norm": 3.3553624153137207, "learning_rate": 0.0002, "loss": 1.4333, "step": 104590 }, { "epoch": 0.43, "grad_norm": 4.018752098083496, "learning_rate": 0.0002, "loss": 1.4978, "step": 104600 }, { "epoch": 0.43, "grad_norm": 3.562045097351074, "learning_rate": 0.0002, "loss": 1.5181, "step": 104610 }, { "epoch": 0.43, "grad_norm": 2.3689231872558594, "learning_rate": 0.0002, "loss": 1.3747, "step": 104620 }, { "epoch": 0.43, "grad_norm": 5.935523509979248, "learning_rate": 0.0002, "loss": 1.5274, "step": 104630 }, { "epoch": 0.43, "grad_norm": 7.494073390960693, "learning_rate": 0.0002, "loss": 1.6404, "step": 104640 }, { "epoch": 0.43, "grad_norm": 4.335690498352051, "learning_rate": 0.0002, "loss": 1.6595, "step": 104650 }, { "epoch": 0.43, "grad_norm": 3.654238224029541, "learning_rate": 0.0002, "loss": 1.6747, "step": 104660 }, { "epoch": 0.43, "grad_norm": 3.2441391944885254, "learning_rate": 0.0002, "loss": 1.779, "step": 104670 }, { "epoch": 0.43, "grad_norm": 3.28922700881958, "learning_rate": 0.0002, "loss": 1.7531, "step": 104680 }, { "epoch": 0.43, "grad_norm": 3.360893487930298, "learning_rate": 0.0002, "loss": 1.6599, "step": 104690 }, { "epoch": 0.43, "grad_norm": 2.8017466068267822, "learning_rate": 0.0002, "loss": 1.7116, "step": 104700 }, { "epoch": 0.43, "grad_norm": 3.611896276473999, "learning_rate": 0.0002, "loss": 1.5088, "step": 104710 }, { "epoch": 0.43, "grad_norm": 2.1182520389556885, "learning_rate": 0.0002, "loss": 1.3552, "step": 104720 }, { "epoch": 0.43, "grad_norm": 3.119485855102539, "learning_rate": 0.0002, "loss": 1.851, "step": 104730 }, { "epoch": 0.43, "grad_norm": 2.4237189292907715, "learning_rate": 0.0002, "loss": 1.5206, "step": 104740 }, { "epoch": 0.43, "grad_norm": 2.8725719451904297, "learning_rate": 0.0002, "loss": 1.4187, "step": 104750 }, { "epoch": 0.43, "grad_norm": 3.207566261291504, "learning_rate": 0.0002, "loss": 1.7007, "step": 104760 }, { "epoch": 0.43, "grad_norm": 1.8716450929641724, "learning_rate": 0.0002, "loss": 1.3711, "step": 104770 }, { "epoch": 0.43, "grad_norm": 1.8985087871551514, "learning_rate": 0.0002, "loss": 1.7812, "step": 104780 }, { "epoch": 0.43, "grad_norm": 3.9474854469299316, "learning_rate": 0.0002, "loss": 1.6121, "step": 104790 }, { "epoch": 0.43, "grad_norm": 1.3121073246002197, "learning_rate": 0.0002, "loss": 1.6274, "step": 104800 }, { "epoch": 0.43, "grad_norm": 2.644965887069702, "learning_rate": 0.0002, "loss": 1.6484, "step": 104810 }, { "epoch": 0.43, "grad_norm": 2.577759027481079, "learning_rate": 0.0002, "loss": 1.8065, "step": 104820 }, { "epoch": 0.43, "grad_norm": 3.6175146102905273, "learning_rate": 0.0002, "loss": 1.9314, "step": 104830 }, { "epoch": 0.43, "grad_norm": 1.964019536972046, "learning_rate": 0.0002, "loss": 1.5523, "step": 104840 }, { "epoch": 0.43, "grad_norm": 3.9032256603240967, "learning_rate": 0.0002, "loss": 1.6684, "step": 104850 }, { "epoch": 0.43, "grad_norm": 3.7109296321868896, "learning_rate": 0.0002, "loss": 1.7264, "step": 104860 }, { "epoch": 0.43, "grad_norm": 3.2210278511047363, "learning_rate": 0.0002, "loss": 1.5319, "step": 104870 }, { "epoch": 0.43, "grad_norm": 2.7875161170959473, "learning_rate": 0.0002, "loss": 1.6255, "step": 104880 }, { "epoch": 0.43, "grad_norm": 3.7111103534698486, "learning_rate": 0.0002, "loss": 1.5828, "step": 104890 }, { "epoch": 0.43, "grad_norm": 2.722224473953247, "learning_rate": 0.0002, "loss": 1.6122, "step": 104900 }, { "epoch": 0.43, "grad_norm": 1.9467695951461792, "learning_rate": 0.0002, "loss": 1.4194, "step": 104910 }, { "epoch": 0.43, "grad_norm": 3.932892084121704, "learning_rate": 0.0002, "loss": 1.7665, "step": 104920 }, { "epoch": 0.43, "grad_norm": 4.84856653213501, "learning_rate": 0.0002, "loss": 1.5875, "step": 104930 }, { "epoch": 0.43, "grad_norm": 1.8660048246383667, "learning_rate": 0.0002, "loss": 1.6934, "step": 104940 }, { "epoch": 0.43, "grad_norm": 2.658061981201172, "learning_rate": 0.0002, "loss": 1.6375, "step": 104950 }, { "epoch": 0.43, "grad_norm": 2.2606823444366455, "learning_rate": 0.0002, "loss": 1.4714, "step": 104960 }, { "epoch": 0.43, "grad_norm": 3.4386942386627197, "learning_rate": 0.0002, "loss": 1.5804, "step": 104970 }, { "epoch": 0.43, "grad_norm": 3.5226235389709473, "learning_rate": 0.0002, "loss": 1.7253, "step": 104980 }, { "epoch": 0.43, "grad_norm": 2.428950548171997, "learning_rate": 0.0002, "loss": 1.5137, "step": 104990 }, { "epoch": 0.43, "grad_norm": 2.8952994346618652, "learning_rate": 0.0002, "loss": 1.5545, "step": 105000 }, { "epoch": 0.43, "grad_norm": 1.5820648670196533, "learning_rate": 0.0002, "loss": 1.5742, "step": 105010 }, { "epoch": 0.43, "grad_norm": 2.538445234298706, "learning_rate": 0.0002, "loss": 1.4162, "step": 105020 }, { "epoch": 0.43, "grad_norm": 1.9478181600570679, "learning_rate": 0.0002, "loss": 1.6324, "step": 105030 }, { "epoch": 0.43, "grad_norm": 3.8042516708374023, "learning_rate": 0.0002, "loss": 1.7015, "step": 105040 }, { "epoch": 0.43, "grad_norm": 2.7399933338165283, "learning_rate": 0.0002, "loss": 1.7184, "step": 105050 }, { "epoch": 0.43, "grad_norm": 5.400908946990967, "learning_rate": 0.0002, "loss": 1.5801, "step": 105060 }, { "epoch": 0.43, "grad_norm": 4.357347011566162, "learning_rate": 0.0002, "loss": 1.5693, "step": 105070 }, { "epoch": 0.43, "grad_norm": 3.3405609130859375, "learning_rate": 0.0002, "loss": 1.5896, "step": 105080 }, { "epoch": 0.43, "grad_norm": 3.750795364379883, "learning_rate": 0.0002, "loss": 1.4989, "step": 105090 }, { "epoch": 0.43, "grad_norm": 3.89328932762146, "learning_rate": 0.0002, "loss": 1.4779, "step": 105100 }, { "epoch": 0.43, "grad_norm": 3.0223548412323, "learning_rate": 0.0002, "loss": 1.6215, "step": 105110 }, { "epoch": 0.43, "grad_norm": 2.450993061065674, "learning_rate": 0.0002, "loss": 1.4179, "step": 105120 }, { "epoch": 0.43, "grad_norm": 2.861539840698242, "learning_rate": 0.0002, "loss": 1.7099, "step": 105130 }, { "epoch": 0.43, "grad_norm": 2.076781988143921, "learning_rate": 0.0002, "loss": 1.6571, "step": 105140 }, { "epoch": 0.43, "grad_norm": 3.133410692214966, "learning_rate": 0.0002, "loss": 1.5644, "step": 105150 }, { "epoch": 0.43, "grad_norm": 5.350871562957764, "learning_rate": 0.0002, "loss": 1.4484, "step": 105160 }, { "epoch": 0.43, "grad_norm": 2.741652488708496, "learning_rate": 0.0002, "loss": 1.5028, "step": 105170 }, { "epoch": 0.43, "grad_norm": 1.6862105131149292, "learning_rate": 0.0002, "loss": 1.6521, "step": 105180 }, { "epoch": 0.43, "grad_norm": 2.5133979320526123, "learning_rate": 0.0002, "loss": 1.3403, "step": 105190 }, { "epoch": 0.43, "grad_norm": 2.7929883003234863, "learning_rate": 0.0002, "loss": 1.6106, "step": 105200 }, { "epoch": 0.43, "grad_norm": 4.663041114807129, "learning_rate": 0.0002, "loss": 1.5554, "step": 105210 }, { "epoch": 0.43, "grad_norm": 3.7004730701446533, "learning_rate": 0.0002, "loss": 1.5628, "step": 105220 }, { "epoch": 0.43, "grad_norm": 6.840669631958008, "learning_rate": 0.0002, "loss": 1.7747, "step": 105230 }, { "epoch": 0.43, "grad_norm": 2.6565046310424805, "learning_rate": 0.0002, "loss": 1.3399, "step": 105240 }, { "epoch": 0.43, "grad_norm": 2.313096761703491, "learning_rate": 0.0002, "loss": 1.7816, "step": 105250 }, { "epoch": 0.43, "grad_norm": 2.724039077758789, "learning_rate": 0.0002, "loss": 1.5889, "step": 105260 }, { "epoch": 0.43, "grad_norm": 3.862316846847534, "learning_rate": 0.0002, "loss": 1.6038, "step": 105270 }, { "epoch": 0.43, "grad_norm": 3.2871310710906982, "learning_rate": 0.0002, "loss": 1.4758, "step": 105280 }, { "epoch": 0.43, "grad_norm": 3.445208787918091, "learning_rate": 0.0002, "loss": 1.6475, "step": 105290 }, { "epoch": 0.43, "grad_norm": 3.0126023292541504, "learning_rate": 0.0002, "loss": 1.635, "step": 105300 }, { "epoch": 0.43, "grad_norm": 1.9166665077209473, "learning_rate": 0.0002, "loss": 1.4654, "step": 105310 }, { "epoch": 0.43, "grad_norm": 3.026477336883545, "learning_rate": 0.0002, "loss": 1.5187, "step": 105320 }, { "epoch": 0.43, "grad_norm": 3.2683284282684326, "learning_rate": 0.0002, "loss": 1.596, "step": 105330 }, { "epoch": 0.43, "grad_norm": 3.628504753112793, "learning_rate": 0.0002, "loss": 1.3826, "step": 105340 }, { "epoch": 0.43, "grad_norm": 2.0302510261535645, "learning_rate": 0.0002, "loss": 1.6744, "step": 105350 }, { "epoch": 0.43, "grad_norm": 3.050147533416748, "learning_rate": 0.0002, "loss": 1.7724, "step": 105360 }, { "epoch": 0.43, "grad_norm": 4.936948299407959, "learning_rate": 0.0002, "loss": 1.6689, "step": 105370 }, { "epoch": 0.43, "grad_norm": 4.590659141540527, "learning_rate": 0.0002, "loss": 1.3844, "step": 105380 }, { "epoch": 0.43, "grad_norm": 2.7657761573791504, "learning_rate": 0.0002, "loss": 1.5846, "step": 105390 }, { "epoch": 0.43, "grad_norm": 3.7906270027160645, "learning_rate": 0.0002, "loss": 1.7947, "step": 105400 }, { "epoch": 0.43, "grad_norm": 3.482110023498535, "learning_rate": 0.0002, "loss": 1.6187, "step": 105410 }, { "epoch": 0.43, "grad_norm": 2.932879686355591, "learning_rate": 0.0002, "loss": 1.6944, "step": 105420 }, { "epoch": 0.43, "grad_norm": 2.846493721008301, "learning_rate": 0.0002, "loss": 1.6082, "step": 105430 }, { "epoch": 0.43, "grad_norm": 3.4326164722442627, "learning_rate": 0.0002, "loss": 1.8254, "step": 105440 }, { "epoch": 0.43, "grad_norm": 1.7831999063491821, "learning_rate": 0.0002, "loss": 1.7931, "step": 105450 }, { "epoch": 0.43, "grad_norm": 3.154442548751831, "learning_rate": 0.0002, "loss": 1.4197, "step": 105460 }, { "epoch": 0.43, "grad_norm": 2.918799638748169, "learning_rate": 0.0002, "loss": 1.857, "step": 105470 }, { "epoch": 0.43, "grad_norm": 5.676022529602051, "learning_rate": 0.0002, "loss": 1.495, "step": 105480 }, { "epoch": 0.43, "grad_norm": 2.1302707195281982, "learning_rate": 0.0002, "loss": 1.3831, "step": 105490 }, { "epoch": 0.43, "grad_norm": 2.0801045894622803, "learning_rate": 0.0002, "loss": 1.6486, "step": 105500 }, { "epoch": 0.43, "grad_norm": 1.4755324125289917, "learning_rate": 0.0002, "loss": 1.3638, "step": 105510 }, { "epoch": 0.43, "grad_norm": 3.6551034450531006, "learning_rate": 0.0002, "loss": 1.5506, "step": 105520 }, { "epoch": 0.43, "grad_norm": 5.081961154937744, "learning_rate": 0.0002, "loss": 1.4736, "step": 105530 }, { "epoch": 0.43, "grad_norm": 2.2390925884246826, "learning_rate": 0.0002, "loss": 1.707, "step": 105540 }, { "epoch": 0.43, "grad_norm": 2.55643630027771, "learning_rate": 0.0002, "loss": 1.4475, "step": 105550 }, { "epoch": 0.43, "grad_norm": 1.9898256063461304, "learning_rate": 0.0002, "loss": 1.4487, "step": 105560 }, { "epoch": 0.43, "grad_norm": 2.389045476913452, "learning_rate": 0.0002, "loss": 1.4502, "step": 105570 }, { "epoch": 0.43, "grad_norm": 1.9488353729248047, "learning_rate": 0.0002, "loss": 1.4048, "step": 105580 }, { "epoch": 0.43, "grad_norm": 1.8784515857696533, "learning_rate": 0.0002, "loss": 1.5205, "step": 105590 }, { "epoch": 0.43, "grad_norm": 4.227440357208252, "learning_rate": 0.0002, "loss": 1.502, "step": 105600 }, { "epoch": 0.43, "grad_norm": 2.5297255516052246, "learning_rate": 0.0002, "loss": 1.5942, "step": 105610 }, { "epoch": 0.43, "grad_norm": 2.775383234024048, "learning_rate": 0.0002, "loss": 1.6258, "step": 105620 }, { "epoch": 0.43, "grad_norm": 3.6426491737365723, "learning_rate": 0.0002, "loss": 1.6133, "step": 105630 }, { "epoch": 0.43, "grad_norm": 4.125016212463379, "learning_rate": 0.0002, "loss": 1.4064, "step": 105640 }, { "epoch": 0.43, "grad_norm": 3.6959118843078613, "learning_rate": 0.0002, "loss": 1.6864, "step": 105650 }, { "epoch": 0.43, "grad_norm": 4.475645065307617, "learning_rate": 0.0002, "loss": 1.7067, "step": 105660 }, { "epoch": 0.43, "grad_norm": 2.3048665523529053, "learning_rate": 0.0002, "loss": 1.8316, "step": 105670 }, { "epoch": 0.43, "grad_norm": 3.455524206161499, "learning_rate": 0.0002, "loss": 1.6445, "step": 105680 }, { "epoch": 0.43, "grad_norm": 2.2001349925994873, "learning_rate": 0.0002, "loss": 1.4082, "step": 105690 }, { "epoch": 0.43, "grad_norm": 2.626863718032837, "learning_rate": 0.0002, "loss": 1.4584, "step": 105700 }, { "epoch": 0.43, "grad_norm": 3.3809444904327393, "learning_rate": 0.0002, "loss": 1.2767, "step": 105710 }, { "epoch": 0.43, "grad_norm": 3.616567611694336, "learning_rate": 0.0002, "loss": 1.6292, "step": 105720 }, { "epoch": 0.43, "grad_norm": 2.5792078971862793, "learning_rate": 0.0002, "loss": 1.9308, "step": 105730 }, { "epoch": 0.43, "grad_norm": 2.2638328075408936, "learning_rate": 0.0002, "loss": 1.5965, "step": 105740 }, { "epoch": 0.43, "grad_norm": 2.357461452484131, "learning_rate": 0.0002, "loss": 1.5894, "step": 105750 }, { "epoch": 0.43, "grad_norm": 2.08610463142395, "learning_rate": 0.0002, "loss": 1.4741, "step": 105760 }, { "epoch": 0.43, "grad_norm": 1.9478285312652588, "learning_rate": 0.0002, "loss": 1.6912, "step": 105770 }, { "epoch": 0.43, "grad_norm": 4.197672367095947, "learning_rate": 0.0002, "loss": 1.455, "step": 105780 }, { "epoch": 0.43, "grad_norm": 1.9481998682022095, "learning_rate": 0.0002, "loss": 1.4967, "step": 105790 }, { "epoch": 0.43, "grad_norm": 2.023766040802002, "learning_rate": 0.0002, "loss": 1.4576, "step": 105800 }, { "epoch": 0.43, "grad_norm": 4.989976406097412, "learning_rate": 0.0002, "loss": 1.5944, "step": 105810 }, { "epoch": 0.43, "grad_norm": 5.361483573913574, "learning_rate": 0.0002, "loss": 1.5108, "step": 105820 }, { "epoch": 0.43, "grad_norm": 2.5067694187164307, "learning_rate": 0.0002, "loss": 1.6326, "step": 105830 }, { "epoch": 0.43, "grad_norm": 2.9346344470977783, "learning_rate": 0.0002, "loss": 1.4801, "step": 105840 }, { "epoch": 0.43, "grad_norm": 2.8686180114746094, "learning_rate": 0.0002, "loss": 1.4729, "step": 105850 }, { "epoch": 0.43, "grad_norm": 2.8080320358276367, "learning_rate": 0.0002, "loss": 1.7862, "step": 105860 }, { "epoch": 0.43, "grad_norm": 1.4621963500976562, "learning_rate": 0.0002, "loss": 1.8597, "step": 105870 }, { "epoch": 0.43, "grad_norm": 2.5196120738983154, "learning_rate": 0.0002, "loss": 1.3508, "step": 105880 }, { "epoch": 0.43, "grad_norm": 2.4171135425567627, "learning_rate": 0.0002, "loss": 1.5271, "step": 105890 }, { "epoch": 0.43, "grad_norm": 2.169055938720703, "learning_rate": 0.0002, "loss": 1.5541, "step": 105900 }, { "epoch": 0.43, "grad_norm": 3.6213390827178955, "learning_rate": 0.0002, "loss": 1.6283, "step": 105910 }, { "epoch": 0.43, "grad_norm": 4.239074230194092, "learning_rate": 0.0002, "loss": 1.7979, "step": 105920 }, { "epoch": 0.43, "grad_norm": 3.754574775695801, "learning_rate": 0.0002, "loss": 1.6632, "step": 105930 }, { "epoch": 0.43, "grad_norm": 3.0500214099884033, "learning_rate": 0.0002, "loss": 1.6674, "step": 105940 }, { "epoch": 0.43, "grad_norm": 3.3887157440185547, "learning_rate": 0.0002, "loss": 1.7471, "step": 105950 }, { "epoch": 0.43, "grad_norm": 3.4116477966308594, "learning_rate": 0.0002, "loss": 1.3061, "step": 105960 }, { "epoch": 0.43, "grad_norm": 4.837768077850342, "learning_rate": 0.0002, "loss": 1.587, "step": 105970 }, { "epoch": 0.43, "grad_norm": 2.122295618057251, "learning_rate": 0.0002, "loss": 1.4459, "step": 105980 }, { "epoch": 0.43, "grad_norm": 3.2127344608306885, "learning_rate": 0.0002, "loss": 1.6095, "step": 105990 }, { "epoch": 0.43, "grad_norm": 2.347391128540039, "learning_rate": 0.0002, "loss": 1.363, "step": 106000 }, { "epoch": 0.43, "grad_norm": 4.194771766662598, "learning_rate": 0.0002, "loss": 1.5162, "step": 106010 }, { "epoch": 0.43, "grad_norm": 3.8281681537628174, "learning_rate": 0.0002, "loss": 1.5446, "step": 106020 }, { "epoch": 0.43, "grad_norm": 3.1354269981384277, "learning_rate": 0.0002, "loss": 1.6844, "step": 106030 }, { "epoch": 0.43, "grad_norm": 2.6286733150482178, "learning_rate": 0.0002, "loss": 1.5322, "step": 106040 }, { "epoch": 0.43, "grad_norm": 2.1198017597198486, "learning_rate": 0.0002, "loss": 1.3409, "step": 106050 }, { "epoch": 0.43, "grad_norm": 2.732998847961426, "learning_rate": 0.0002, "loss": 1.3478, "step": 106060 }, { "epoch": 0.43, "grad_norm": 2.903514862060547, "learning_rate": 0.0002, "loss": 1.6372, "step": 106070 }, { "epoch": 0.43, "grad_norm": 5.716491222381592, "learning_rate": 0.0002, "loss": 1.6231, "step": 106080 }, { "epoch": 0.43, "grad_norm": 2.2734627723693848, "learning_rate": 0.0002, "loss": 1.4876, "step": 106090 }, { "epoch": 0.43, "grad_norm": 3.480517625808716, "learning_rate": 0.0002, "loss": 1.5062, "step": 106100 }, { "epoch": 0.43, "grad_norm": 3.4769020080566406, "learning_rate": 0.0002, "loss": 1.4486, "step": 106110 }, { "epoch": 0.43, "grad_norm": 2.174656629562378, "learning_rate": 0.0002, "loss": 1.5624, "step": 106120 }, { "epoch": 0.43, "grad_norm": 2.5610740184783936, "learning_rate": 0.0002, "loss": 1.848, "step": 106130 }, { "epoch": 0.43, "grad_norm": 2.941967487335205, "learning_rate": 0.0002, "loss": 1.699, "step": 106140 }, { "epoch": 0.43, "grad_norm": 1.7545666694641113, "learning_rate": 0.0002, "loss": 1.5383, "step": 106150 }, { "epoch": 0.43, "grad_norm": 1.9079855680465698, "learning_rate": 0.0002, "loss": 1.62, "step": 106160 }, { "epoch": 0.43, "grad_norm": 3.989985466003418, "learning_rate": 0.0002, "loss": 1.7131, "step": 106170 }, { "epoch": 0.43, "grad_norm": 2.5451414585113525, "learning_rate": 0.0002, "loss": 1.5045, "step": 106180 }, { "epoch": 0.43, "grad_norm": 1.8969563245773315, "learning_rate": 0.0002, "loss": 1.5426, "step": 106190 }, { "epoch": 0.43, "grad_norm": 3.704042911529541, "learning_rate": 0.0002, "loss": 1.2751, "step": 106200 }, { "epoch": 0.43, "grad_norm": 1.555167317390442, "learning_rate": 0.0002, "loss": 1.6005, "step": 106210 }, { "epoch": 0.43, "grad_norm": 4.04355001449585, "learning_rate": 0.0002, "loss": 1.5917, "step": 106220 }, { "epoch": 0.43, "grad_norm": 2.564619302749634, "learning_rate": 0.0002, "loss": 1.6747, "step": 106230 }, { "epoch": 0.43, "grad_norm": 2.8647730350494385, "learning_rate": 0.0002, "loss": 1.5246, "step": 106240 }, { "epoch": 0.43, "grad_norm": 4.857524394989014, "learning_rate": 0.0002, "loss": 1.243, "step": 106250 }, { "epoch": 0.43, "grad_norm": 2.1486802101135254, "learning_rate": 0.0002, "loss": 1.457, "step": 106260 }, { "epoch": 0.43, "grad_norm": 3.0982491970062256, "learning_rate": 0.0002, "loss": 1.592, "step": 106270 }, { "epoch": 0.43, "grad_norm": 2.957439422607422, "learning_rate": 0.0002, "loss": 1.4782, "step": 106280 }, { "epoch": 0.43, "grad_norm": 2.1688899993896484, "learning_rate": 0.0002, "loss": 1.5916, "step": 106290 }, { "epoch": 0.43, "grad_norm": 2.928105592727661, "learning_rate": 0.0002, "loss": 1.46, "step": 106300 }, { "epoch": 0.43, "grad_norm": 3.011510133743286, "learning_rate": 0.0002, "loss": 1.4785, "step": 106310 }, { "epoch": 0.43, "grad_norm": 2.843799352645874, "learning_rate": 0.0002, "loss": 1.3156, "step": 106320 }, { "epoch": 0.43, "grad_norm": 4.685025691986084, "learning_rate": 0.0002, "loss": 1.5753, "step": 106330 }, { "epoch": 0.43, "grad_norm": 3.3548412322998047, "learning_rate": 0.0002, "loss": 1.7287, "step": 106340 }, { "epoch": 0.43, "grad_norm": 1.8597623109817505, "learning_rate": 0.0002, "loss": 1.832, "step": 106350 }, { "epoch": 0.43, "grad_norm": 4.211085796356201, "learning_rate": 0.0002, "loss": 1.4997, "step": 106360 }, { "epoch": 0.43, "grad_norm": 2.1004674434661865, "learning_rate": 0.0002, "loss": 1.5224, "step": 106370 }, { "epoch": 0.43, "grad_norm": 1.9830108880996704, "learning_rate": 0.0002, "loss": 1.5208, "step": 106380 }, { "epoch": 0.43, "grad_norm": 3.221346139907837, "learning_rate": 0.0002, "loss": 1.7945, "step": 106390 }, { "epoch": 0.43, "grad_norm": 4.978363990783691, "learning_rate": 0.0002, "loss": 1.4185, "step": 106400 }, { "epoch": 0.43, "grad_norm": 3.835336685180664, "learning_rate": 0.0002, "loss": 1.4542, "step": 106410 }, { "epoch": 0.43, "grad_norm": 3.4706993103027344, "learning_rate": 0.0002, "loss": 1.5483, "step": 106420 }, { "epoch": 0.43, "grad_norm": 3.1831343173980713, "learning_rate": 0.0002, "loss": 1.6963, "step": 106430 }, { "epoch": 0.43, "grad_norm": 5.788822174072266, "learning_rate": 0.0002, "loss": 1.5322, "step": 106440 }, { "epoch": 0.43, "grad_norm": 3.8795065879821777, "learning_rate": 0.0002, "loss": 1.609, "step": 106450 }, { "epoch": 0.43, "grad_norm": 4.000240325927734, "learning_rate": 0.0002, "loss": 1.372, "step": 106460 }, { "epoch": 0.43, "grad_norm": 4.198722839355469, "learning_rate": 0.0002, "loss": 1.3873, "step": 106470 }, { "epoch": 0.43, "grad_norm": 2.1944546699523926, "learning_rate": 0.0002, "loss": 1.539, "step": 106480 }, { "epoch": 0.43, "grad_norm": 2.7968404293060303, "learning_rate": 0.0002, "loss": 1.5251, "step": 106490 }, { "epoch": 0.43, "grad_norm": 2.1913907527923584, "learning_rate": 0.0002, "loss": 1.6658, "step": 106500 }, { "epoch": 0.43, "grad_norm": 4.408627986907959, "learning_rate": 0.0002, "loss": 1.4188, "step": 106510 }, { "epoch": 0.43, "grad_norm": 2.566896438598633, "learning_rate": 0.0002, "loss": 1.6922, "step": 106520 }, { "epoch": 0.43, "grad_norm": 8.398991584777832, "learning_rate": 0.0002, "loss": 1.4967, "step": 106530 }, { "epoch": 0.43, "grad_norm": 2.7542362213134766, "learning_rate": 0.0002, "loss": 1.772, "step": 106540 }, { "epoch": 0.43, "grad_norm": 2.3844175338745117, "learning_rate": 0.0002, "loss": 1.4075, "step": 106550 }, { "epoch": 0.43, "grad_norm": 3.014322280883789, "learning_rate": 0.0002, "loss": 1.6967, "step": 106560 }, { "epoch": 0.43, "grad_norm": 2.281874418258667, "learning_rate": 0.0002, "loss": 1.5863, "step": 106570 }, { "epoch": 0.43, "grad_norm": 2.521470785140991, "learning_rate": 0.0002, "loss": 1.2864, "step": 106580 }, { "epoch": 0.43, "grad_norm": 4.714476108551025, "learning_rate": 0.0002, "loss": 1.6414, "step": 106590 }, { "epoch": 0.43, "grad_norm": 3.293081521987915, "learning_rate": 0.0002, "loss": 1.458, "step": 106600 }, { "epoch": 0.43, "grad_norm": 3.2481846809387207, "learning_rate": 0.0002, "loss": 1.8197, "step": 106610 }, { "epoch": 0.43, "grad_norm": 2.3951849937438965, "learning_rate": 0.0002, "loss": 1.4144, "step": 106620 }, { "epoch": 0.43, "grad_norm": 3.392268419265747, "learning_rate": 0.0002, "loss": 1.51, "step": 106630 }, { "epoch": 0.43, "grad_norm": 3.3564610481262207, "learning_rate": 0.0002, "loss": 1.5849, "step": 106640 }, { "epoch": 0.43, "grad_norm": 3.0285491943359375, "learning_rate": 0.0002, "loss": 1.7221, "step": 106650 }, { "epoch": 0.43, "grad_norm": 2.8427224159240723, "learning_rate": 0.0002, "loss": 1.5765, "step": 106660 }, { "epoch": 0.43, "grad_norm": 3.876697540283203, "learning_rate": 0.0002, "loss": 1.7355, "step": 106670 }, { "epoch": 0.43, "grad_norm": 2.707033157348633, "learning_rate": 0.0002, "loss": 1.5751, "step": 106680 }, { "epoch": 0.43, "grad_norm": 4.322067737579346, "learning_rate": 0.0002, "loss": 2.0162, "step": 106690 }, { "epoch": 0.43, "grad_norm": 5.46891975402832, "learning_rate": 0.0002, "loss": 2.0092, "step": 106700 }, { "epoch": 0.43, "grad_norm": 3.6566693782806396, "learning_rate": 0.0002, "loss": 1.5793, "step": 106710 }, { "epoch": 0.43, "grad_norm": 3.9929232597351074, "learning_rate": 0.0002, "loss": 1.5654, "step": 106720 }, { "epoch": 0.43, "grad_norm": 2.633882761001587, "learning_rate": 0.0002, "loss": 1.4667, "step": 106730 }, { "epoch": 0.43, "grad_norm": 2.688128709793091, "learning_rate": 0.0002, "loss": 1.5235, "step": 106740 }, { "epoch": 0.43, "grad_norm": 2.469353199005127, "learning_rate": 0.0002, "loss": 1.6666, "step": 106750 }, { "epoch": 0.43, "grad_norm": 2.181870698928833, "learning_rate": 0.0002, "loss": 1.5555, "step": 106760 }, { "epoch": 0.43, "grad_norm": 2.8462090492248535, "learning_rate": 0.0002, "loss": 1.6369, "step": 106770 }, { "epoch": 0.43, "grad_norm": 3.3457322120666504, "learning_rate": 0.0002, "loss": 1.44, "step": 106780 }, { "epoch": 0.43, "grad_norm": 3.4946365356445312, "learning_rate": 0.0002, "loss": 1.6144, "step": 106790 }, { "epoch": 0.43, "grad_norm": 1.9508473873138428, "learning_rate": 0.0002, "loss": 1.5043, "step": 106800 }, { "epoch": 0.43, "grad_norm": 4.707062244415283, "learning_rate": 0.0002, "loss": 1.5902, "step": 106810 }, { "epoch": 0.43, "grad_norm": 3.341505765914917, "learning_rate": 0.0002, "loss": 1.4336, "step": 106820 }, { "epoch": 0.43, "grad_norm": 2.6688311100006104, "learning_rate": 0.0002, "loss": 1.4908, "step": 106830 }, { "epoch": 0.43, "grad_norm": 2.742415189743042, "learning_rate": 0.0002, "loss": 1.5558, "step": 106840 }, { "epoch": 0.43, "grad_norm": 2.317958354949951, "learning_rate": 0.0002, "loss": 1.41, "step": 106850 }, { "epoch": 0.44, "grad_norm": 5.535280704498291, "learning_rate": 0.0002, "loss": 1.549, "step": 106860 }, { "epoch": 0.44, "grad_norm": 3.4279356002807617, "learning_rate": 0.0002, "loss": 1.5299, "step": 106870 }, { "epoch": 0.44, "grad_norm": 5.427247047424316, "learning_rate": 0.0002, "loss": 1.4307, "step": 106880 }, { "epoch": 0.44, "grad_norm": 1.912236213684082, "learning_rate": 0.0002, "loss": 1.3796, "step": 106890 }, { "epoch": 0.44, "grad_norm": 2.25373911857605, "learning_rate": 0.0002, "loss": 1.6391, "step": 106900 }, { "epoch": 0.44, "grad_norm": 3.151716470718384, "learning_rate": 0.0002, "loss": 1.6959, "step": 106910 }, { "epoch": 0.44, "grad_norm": 4.29671573638916, "learning_rate": 0.0002, "loss": 1.4174, "step": 106920 }, { "epoch": 0.44, "grad_norm": 2.7827889919281006, "learning_rate": 0.0002, "loss": 1.6652, "step": 106930 }, { "epoch": 0.44, "grad_norm": 3.6060235500335693, "learning_rate": 0.0002, "loss": 1.4967, "step": 106940 }, { "epoch": 0.44, "grad_norm": 3.35183048248291, "learning_rate": 0.0002, "loss": 1.6491, "step": 106950 }, { "epoch": 0.44, "grad_norm": 2.6065657138824463, "learning_rate": 0.0002, "loss": 1.4686, "step": 106960 }, { "epoch": 0.44, "grad_norm": 4.935576438903809, "learning_rate": 0.0002, "loss": 1.4972, "step": 106970 }, { "epoch": 0.44, "grad_norm": 2.8218753337860107, "learning_rate": 0.0002, "loss": 1.5271, "step": 106980 }, { "epoch": 0.44, "grad_norm": 4.073293209075928, "learning_rate": 0.0002, "loss": 1.3746, "step": 106990 }, { "epoch": 0.44, "grad_norm": 1.6850258111953735, "learning_rate": 0.0002, "loss": 1.7449, "step": 107000 }, { "epoch": 0.44, "grad_norm": 3.47424578666687, "learning_rate": 0.0002, "loss": 1.6142, "step": 107010 }, { "epoch": 0.44, "grad_norm": 2.9950528144836426, "learning_rate": 0.0002, "loss": 1.6643, "step": 107020 }, { "epoch": 0.44, "grad_norm": 6.601385593414307, "learning_rate": 0.0002, "loss": 1.617, "step": 107030 }, { "epoch": 0.44, "grad_norm": 2.2615203857421875, "learning_rate": 0.0002, "loss": 1.6921, "step": 107040 }, { "epoch": 0.44, "grad_norm": 1.7440578937530518, "learning_rate": 0.0002, "loss": 1.5383, "step": 107050 }, { "epoch": 0.44, "grad_norm": 2.517155170440674, "learning_rate": 0.0002, "loss": 1.4723, "step": 107060 }, { "epoch": 0.44, "grad_norm": 4.603586196899414, "learning_rate": 0.0002, "loss": 1.5232, "step": 107070 }, { "epoch": 0.44, "grad_norm": 3.194138526916504, "learning_rate": 0.0002, "loss": 1.5914, "step": 107080 }, { "epoch": 0.44, "grad_norm": 4.277865886688232, "learning_rate": 0.0002, "loss": 1.5628, "step": 107090 }, { "epoch": 0.44, "grad_norm": 3.1688039302825928, "learning_rate": 0.0002, "loss": 1.3447, "step": 107100 }, { "epoch": 0.44, "grad_norm": 3.9446349143981934, "learning_rate": 0.0002, "loss": 1.3759, "step": 107110 }, { "epoch": 0.44, "grad_norm": 2.717107057571411, "learning_rate": 0.0002, "loss": 1.463, "step": 107120 }, { "epoch": 0.44, "grad_norm": 2.8041019439697266, "learning_rate": 0.0002, "loss": 1.7074, "step": 107130 }, { "epoch": 0.44, "grad_norm": 4.074114799499512, "learning_rate": 0.0002, "loss": 1.3085, "step": 107140 }, { "epoch": 0.44, "grad_norm": 2.541529655456543, "learning_rate": 0.0002, "loss": 1.6073, "step": 107150 }, { "epoch": 0.44, "grad_norm": 1.9169859886169434, "learning_rate": 0.0002, "loss": 1.4252, "step": 107160 }, { "epoch": 0.44, "grad_norm": 2.8746042251586914, "learning_rate": 0.0002, "loss": 1.7293, "step": 107170 }, { "epoch": 0.44, "grad_norm": 2.87565279006958, "learning_rate": 0.0002, "loss": 1.8935, "step": 107180 }, { "epoch": 0.44, "grad_norm": 2.0191664695739746, "learning_rate": 0.0002, "loss": 1.7948, "step": 107190 }, { "epoch": 0.44, "grad_norm": 2.2200655937194824, "learning_rate": 0.0002, "loss": 1.5569, "step": 107200 }, { "epoch": 0.44, "grad_norm": 4.449750900268555, "learning_rate": 0.0002, "loss": 1.6022, "step": 107210 }, { "epoch": 0.44, "grad_norm": 1.8761705160140991, "learning_rate": 0.0002, "loss": 1.3193, "step": 107220 }, { "epoch": 0.44, "grad_norm": 2.458420991897583, "learning_rate": 0.0002, "loss": 1.5972, "step": 107230 }, { "epoch": 0.44, "grad_norm": 3.08198881149292, "learning_rate": 0.0002, "loss": 1.5059, "step": 107240 }, { "epoch": 0.44, "grad_norm": 2.0423622131347656, "learning_rate": 0.0002, "loss": 1.4294, "step": 107250 }, { "epoch": 0.44, "grad_norm": 2.0711193084716797, "learning_rate": 0.0002, "loss": 1.5228, "step": 107260 }, { "epoch": 0.44, "grad_norm": 3.112903118133545, "learning_rate": 0.0002, "loss": 1.7275, "step": 107270 }, { "epoch": 0.44, "grad_norm": 3.3183109760284424, "learning_rate": 0.0002, "loss": 1.4531, "step": 107280 }, { "epoch": 0.44, "grad_norm": 2.570143938064575, "learning_rate": 0.0002, "loss": 1.6427, "step": 107290 }, { "epoch": 0.44, "grad_norm": 3.1393072605133057, "learning_rate": 0.0002, "loss": 1.5112, "step": 107300 }, { "epoch": 0.44, "grad_norm": 2.4247312545776367, "learning_rate": 0.0002, "loss": 1.4867, "step": 107310 }, { "epoch": 0.44, "grad_norm": 3.878542184829712, "learning_rate": 0.0002, "loss": 1.3987, "step": 107320 }, { "epoch": 0.44, "grad_norm": 4.720030784606934, "learning_rate": 0.0002, "loss": 1.6858, "step": 107330 }, { "epoch": 0.44, "grad_norm": 1.6369132995605469, "learning_rate": 0.0002, "loss": 1.5792, "step": 107340 }, { "epoch": 0.44, "grad_norm": 1.883718729019165, "learning_rate": 0.0002, "loss": 1.7236, "step": 107350 }, { "epoch": 0.44, "grad_norm": 2.1853036880493164, "learning_rate": 0.0002, "loss": 1.587, "step": 107360 }, { "epoch": 0.44, "grad_norm": 2.5772385597229004, "learning_rate": 0.0002, "loss": 1.4939, "step": 107370 }, { "epoch": 0.44, "grad_norm": 2.2786619663238525, "learning_rate": 0.0002, "loss": 1.4086, "step": 107380 }, { "epoch": 0.44, "grad_norm": 3.416743040084839, "learning_rate": 0.0002, "loss": 1.2647, "step": 107390 }, { "epoch": 0.44, "grad_norm": 2.14786696434021, "learning_rate": 0.0002, "loss": 1.6466, "step": 107400 }, { "epoch": 0.44, "grad_norm": 4.78232479095459, "learning_rate": 0.0002, "loss": 1.7083, "step": 107410 }, { "epoch": 0.44, "grad_norm": 2.5042037963867188, "learning_rate": 0.0002, "loss": 1.6843, "step": 107420 }, { "epoch": 0.44, "grad_norm": 3.4697649478912354, "learning_rate": 0.0002, "loss": 1.6758, "step": 107430 }, { "epoch": 0.44, "grad_norm": 3.2591888904571533, "learning_rate": 0.0002, "loss": 1.7101, "step": 107440 }, { "epoch": 0.44, "grad_norm": 2.1322731971740723, "learning_rate": 0.0002, "loss": 1.6759, "step": 107450 }, { "epoch": 0.44, "grad_norm": 3.833923101425171, "learning_rate": 0.0002, "loss": 1.2716, "step": 107460 }, { "epoch": 0.44, "grad_norm": 1.8611795902252197, "learning_rate": 0.0002, "loss": 1.3932, "step": 107470 }, { "epoch": 0.44, "grad_norm": 2.944896936416626, "learning_rate": 0.0002, "loss": 1.5781, "step": 107480 }, { "epoch": 0.44, "grad_norm": 4.451555252075195, "learning_rate": 0.0002, "loss": 1.5136, "step": 107490 }, { "epoch": 0.44, "grad_norm": 3.0958304405212402, "learning_rate": 0.0002, "loss": 1.6067, "step": 107500 }, { "epoch": 0.44, "grad_norm": 3.0656251907348633, "learning_rate": 0.0002, "loss": 1.4532, "step": 107510 }, { "epoch": 0.44, "grad_norm": 4.898759365081787, "learning_rate": 0.0002, "loss": 1.48, "step": 107520 }, { "epoch": 0.44, "grad_norm": 6.90534782409668, "learning_rate": 0.0002, "loss": 1.7243, "step": 107530 }, { "epoch": 0.44, "grad_norm": 6.746105670928955, "learning_rate": 0.0002, "loss": 1.7029, "step": 107540 }, { "epoch": 0.44, "grad_norm": 2.7105093002319336, "learning_rate": 0.0002, "loss": 1.4672, "step": 107550 }, { "epoch": 0.44, "grad_norm": 2.2528445720672607, "learning_rate": 0.0002, "loss": 1.8, "step": 107560 }, { "epoch": 0.44, "grad_norm": 2.8030247688293457, "learning_rate": 0.0002, "loss": 1.5454, "step": 107570 }, { "epoch": 0.44, "grad_norm": 3.377614736557007, "learning_rate": 0.0002, "loss": 1.3816, "step": 107580 }, { "epoch": 0.44, "grad_norm": 3.130657911300659, "learning_rate": 0.0002, "loss": 1.7935, "step": 107590 }, { "epoch": 0.44, "grad_norm": 4.544848918914795, "learning_rate": 0.0002, "loss": 1.5833, "step": 107600 }, { "epoch": 0.44, "grad_norm": 3.445732593536377, "learning_rate": 0.0002, "loss": 1.5675, "step": 107610 }, { "epoch": 0.44, "grad_norm": 2.4076051712036133, "learning_rate": 0.0002, "loss": 1.4701, "step": 107620 }, { "epoch": 0.44, "grad_norm": 2.6366353034973145, "learning_rate": 0.0002, "loss": 1.6304, "step": 107630 }, { "epoch": 0.44, "grad_norm": 2.1451730728149414, "learning_rate": 0.0002, "loss": 1.4748, "step": 107640 }, { "epoch": 0.44, "grad_norm": 3.0542984008789062, "learning_rate": 0.0002, "loss": 1.6734, "step": 107650 }, { "epoch": 0.44, "grad_norm": 3.455345630645752, "learning_rate": 0.0002, "loss": 1.6351, "step": 107660 }, { "epoch": 0.44, "grad_norm": 1.8628442287445068, "learning_rate": 0.0002, "loss": 1.3827, "step": 107670 }, { "epoch": 0.44, "grad_norm": 2.930729389190674, "learning_rate": 0.0002, "loss": 1.4958, "step": 107680 }, { "epoch": 0.44, "grad_norm": 3.6304023265838623, "learning_rate": 0.0002, "loss": 1.6403, "step": 107690 }, { "epoch": 0.44, "grad_norm": 3.309522867202759, "learning_rate": 0.0002, "loss": 1.5649, "step": 107700 }, { "epoch": 0.44, "grad_norm": 3.9495842456817627, "learning_rate": 0.0002, "loss": 1.619, "step": 107710 }, { "epoch": 0.44, "grad_norm": 3.2836246490478516, "learning_rate": 0.0002, "loss": 1.5525, "step": 107720 }, { "epoch": 0.44, "grad_norm": 2.2642173767089844, "learning_rate": 0.0002, "loss": 1.3188, "step": 107730 }, { "epoch": 0.44, "grad_norm": 2.9021072387695312, "learning_rate": 0.0002, "loss": 1.6296, "step": 107740 }, { "epoch": 0.44, "grad_norm": 2.4932963848114014, "learning_rate": 0.0002, "loss": 1.8356, "step": 107750 }, { "epoch": 0.44, "grad_norm": 2.9012038707733154, "learning_rate": 0.0002, "loss": 1.5414, "step": 107760 }, { "epoch": 0.44, "grad_norm": 2.8066976070404053, "learning_rate": 0.0002, "loss": 1.5826, "step": 107770 }, { "epoch": 0.44, "grad_norm": 3.0305845737457275, "learning_rate": 0.0002, "loss": 1.6089, "step": 107780 }, { "epoch": 0.44, "grad_norm": 3.219449520111084, "learning_rate": 0.0002, "loss": 1.7304, "step": 107790 }, { "epoch": 0.44, "grad_norm": 3.7960362434387207, "learning_rate": 0.0002, "loss": 1.3947, "step": 107800 }, { "epoch": 0.44, "grad_norm": 5.562304973602295, "learning_rate": 0.0002, "loss": 1.6763, "step": 107810 }, { "epoch": 0.44, "grad_norm": 3.3740646839141846, "learning_rate": 0.0002, "loss": 1.6725, "step": 107820 }, { "epoch": 0.44, "grad_norm": 3.8564934730529785, "learning_rate": 0.0002, "loss": 1.719, "step": 107830 }, { "epoch": 0.44, "grad_norm": 2.2909796237945557, "learning_rate": 0.0002, "loss": 1.4556, "step": 107840 }, { "epoch": 0.44, "grad_norm": 4.180943965911865, "learning_rate": 0.0002, "loss": 1.6036, "step": 107850 }, { "epoch": 0.44, "grad_norm": 4.199794769287109, "learning_rate": 0.0002, "loss": 1.7306, "step": 107860 }, { "epoch": 0.44, "grad_norm": 1.589724063873291, "learning_rate": 0.0002, "loss": 1.5654, "step": 107870 }, { "epoch": 0.44, "grad_norm": 2.0093326568603516, "learning_rate": 0.0002, "loss": 1.45, "step": 107880 }, { "epoch": 0.44, "grad_norm": 2.955047845840454, "learning_rate": 0.0002, "loss": 1.5489, "step": 107890 }, { "epoch": 0.44, "grad_norm": 4.205031394958496, "learning_rate": 0.0002, "loss": 1.4167, "step": 107900 }, { "epoch": 0.44, "grad_norm": 3.7892186641693115, "learning_rate": 0.0002, "loss": 1.4991, "step": 107910 }, { "epoch": 0.44, "grad_norm": 5.979318618774414, "learning_rate": 0.0002, "loss": 1.9149, "step": 107920 }, { "epoch": 0.44, "grad_norm": 2.4750897884368896, "learning_rate": 0.0002, "loss": 1.6121, "step": 107930 }, { "epoch": 0.44, "grad_norm": 3.2640984058380127, "learning_rate": 0.0002, "loss": 1.6506, "step": 107940 }, { "epoch": 0.44, "grad_norm": 4.43555212020874, "learning_rate": 0.0002, "loss": 1.6254, "step": 107950 }, { "epoch": 0.44, "grad_norm": 2.5753333568573, "learning_rate": 0.0002, "loss": 1.4089, "step": 107960 }, { "epoch": 0.44, "grad_norm": 2.617393732070923, "learning_rate": 0.0002, "loss": 1.6706, "step": 107970 }, { "epoch": 0.44, "grad_norm": 3.1869900226593018, "learning_rate": 0.0002, "loss": 1.6769, "step": 107980 }, { "epoch": 0.44, "grad_norm": 1.8251832723617554, "learning_rate": 0.0002, "loss": 1.7329, "step": 107990 }, { "epoch": 0.44, "grad_norm": 2.6378893852233887, "learning_rate": 0.0002, "loss": 1.506, "step": 108000 }, { "epoch": 0.44, "grad_norm": 3.6106207370758057, "learning_rate": 0.0002, "loss": 1.5895, "step": 108010 }, { "epoch": 0.44, "grad_norm": 2.354031562805176, "learning_rate": 0.0002, "loss": 1.5622, "step": 108020 }, { "epoch": 0.44, "grad_norm": 2.0765955448150635, "learning_rate": 0.0002, "loss": 1.3986, "step": 108030 }, { "epoch": 0.44, "grad_norm": 2.511802911758423, "learning_rate": 0.0002, "loss": 1.2846, "step": 108040 }, { "epoch": 0.44, "grad_norm": 3.2609364986419678, "learning_rate": 0.0002, "loss": 1.2365, "step": 108050 }, { "epoch": 0.44, "grad_norm": 2.8038833141326904, "learning_rate": 0.0002, "loss": 1.5617, "step": 108060 }, { "epoch": 0.44, "grad_norm": 3.9749927520751953, "learning_rate": 0.0002, "loss": 1.7268, "step": 108070 }, { "epoch": 0.44, "grad_norm": 4.29231071472168, "learning_rate": 0.0002, "loss": 1.4247, "step": 108080 }, { "epoch": 0.44, "grad_norm": 3.4637250900268555, "learning_rate": 0.0002, "loss": 1.7933, "step": 108090 }, { "epoch": 0.44, "grad_norm": 2.7306671142578125, "learning_rate": 0.0002, "loss": 1.6005, "step": 108100 }, { "epoch": 0.44, "grad_norm": 2.9023661613464355, "learning_rate": 0.0002, "loss": 1.5987, "step": 108110 }, { "epoch": 0.44, "grad_norm": 3.824124336242676, "learning_rate": 0.0002, "loss": 1.5441, "step": 108120 }, { "epoch": 0.44, "grad_norm": 4.171998023986816, "learning_rate": 0.0002, "loss": 1.4047, "step": 108130 }, { "epoch": 0.44, "grad_norm": 2.544400930404663, "learning_rate": 0.0002, "loss": 1.758, "step": 108140 }, { "epoch": 0.44, "grad_norm": 2.9482717514038086, "learning_rate": 0.0002, "loss": 1.5966, "step": 108150 }, { "epoch": 0.44, "grad_norm": 2.766340494155884, "learning_rate": 0.0002, "loss": 1.6535, "step": 108160 }, { "epoch": 0.44, "grad_norm": 3.045879602432251, "learning_rate": 0.0002, "loss": 1.7371, "step": 108170 }, { "epoch": 0.44, "grad_norm": 2.9030938148498535, "learning_rate": 0.0002, "loss": 1.4969, "step": 108180 }, { "epoch": 0.44, "grad_norm": 2.199479103088379, "learning_rate": 0.0002, "loss": 1.6474, "step": 108190 }, { "epoch": 0.44, "grad_norm": 3.212268829345703, "learning_rate": 0.0002, "loss": 1.5983, "step": 108200 }, { "epoch": 0.44, "grad_norm": 2.956993818283081, "learning_rate": 0.0002, "loss": 1.4756, "step": 108210 }, { "epoch": 0.44, "grad_norm": 3.796182155609131, "learning_rate": 0.0002, "loss": 1.6734, "step": 108220 }, { "epoch": 0.44, "grad_norm": 2.8372273445129395, "learning_rate": 0.0002, "loss": 1.6134, "step": 108230 }, { "epoch": 0.44, "grad_norm": 4.6494245529174805, "learning_rate": 0.0002, "loss": 1.6596, "step": 108240 }, { "epoch": 0.44, "grad_norm": 2.4244210720062256, "learning_rate": 0.0002, "loss": 1.5171, "step": 108250 }, { "epoch": 0.44, "grad_norm": 4.5690789222717285, "learning_rate": 0.0002, "loss": 1.5301, "step": 108260 }, { "epoch": 0.44, "grad_norm": 2.21164608001709, "learning_rate": 0.0002, "loss": 1.6871, "step": 108270 }, { "epoch": 0.44, "grad_norm": 3.482372283935547, "learning_rate": 0.0002, "loss": 1.438, "step": 108280 }, { "epoch": 0.44, "grad_norm": 3.5041511058807373, "learning_rate": 0.0002, "loss": 1.6821, "step": 108290 }, { "epoch": 0.44, "grad_norm": 2.997004985809326, "learning_rate": 0.0002, "loss": 1.547, "step": 108300 }, { "epoch": 0.44, "grad_norm": 3.633176565170288, "learning_rate": 0.0002, "loss": 1.3523, "step": 108310 }, { "epoch": 0.44, "grad_norm": 2.627155303955078, "learning_rate": 0.0002, "loss": 1.3483, "step": 108320 }, { "epoch": 0.44, "grad_norm": 3.30014967918396, "learning_rate": 0.0002, "loss": 1.5518, "step": 108330 }, { "epoch": 0.44, "grad_norm": 2.447524070739746, "learning_rate": 0.0002, "loss": 1.6612, "step": 108340 }, { "epoch": 0.44, "grad_norm": 2.3918962478637695, "learning_rate": 0.0002, "loss": 1.6099, "step": 108350 }, { "epoch": 0.44, "grad_norm": 2.1772711277008057, "learning_rate": 0.0002, "loss": 1.6504, "step": 108360 }, { "epoch": 0.44, "grad_norm": 3.2128849029541016, "learning_rate": 0.0002, "loss": 1.7466, "step": 108370 }, { "epoch": 0.44, "grad_norm": 3.92172908782959, "learning_rate": 0.0002, "loss": 1.8976, "step": 108380 }, { "epoch": 0.44, "grad_norm": 2.7312145233154297, "learning_rate": 0.0002, "loss": 1.5891, "step": 108390 }, { "epoch": 0.44, "grad_norm": 3.017564296722412, "learning_rate": 0.0002, "loss": 1.5405, "step": 108400 }, { "epoch": 0.44, "grad_norm": 3.168259859085083, "learning_rate": 0.0002, "loss": 1.4518, "step": 108410 }, { "epoch": 0.44, "grad_norm": 8.000596046447754, "learning_rate": 0.0002, "loss": 1.4192, "step": 108420 }, { "epoch": 0.44, "grad_norm": 2.2452077865600586, "learning_rate": 0.0002, "loss": 1.4654, "step": 108430 }, { "epoch": 0.44, "grad_norm": 3.2301082611083984, "learning_rate": 0.0002, "loss": 1.6578, "step": 108440 }, { "epoch": 0.44, "grad_norm": 2.221123456954956, "learning_rate": 0.0002, "loss": 1.5851, "step": 108450 }, { "epoch": 0.44, "grad_norm": 2.9324874877929688, "learning_rate": 0.0002, "loss": 1.5553, "step": 108460 }, { "epoch": 0.44, "grad_norm": 3.0449137687683105, "learning_rate": 0.0002, "loss": 1.7852, "step": 108470 }, { "epoch": 0.44, "grad_norm": 2.849947214126587, "learning_rate": 0.0002, "loss": 1.638, "step": 108480 }, { "epoch": 0.44, "grad_norm": 2.2478699684143066, "learning_rate": 0.0002, "loss": 1.6533, "step": 108490 }, { "epoch": 0.44, "grad_norm": 3.3989741802215576, "learning_rate": 0.0002, "loss": 1.4053, "step": 108500 }, { "epoch": 0.44, "grad_norm": 2.1240339279174805, "learning_rate": 0.0002, "loss": 1.5653, "step": 108510 }, { "epoch": 0.44, "grad_norm": 3.100820779800415, "learning_rate": 0.0002, "loss": 1.6903, "step": 108520 }, { "epoch": 0.44, "grad_norm": 1.9986138343811035, "learning_rate": 0.0002, "loss": 1.7174, "step": 108530 }, { "epoch": 0.44, "grad_norm": 1.979573130607605, "learning_rate": 0.0002, "loss": 1.6312, "step": 108540 }, { "epoch": 0.44, "grad_norm": 2.8055248260498047, "learning_rate": 0.0002, "loss": 1.5106, "step": 108550 }, { "epoch": 0.44, "grad_norm": 2.691779136657715, "learning_rate": 0.0002, "loss": 1.4833, "step": 108560 }, { "epoch": 0.44, "grad_norm": 2.794886589050293, "learning_rate": 0.0002, "loss": 1.698, "step": 108570 }, { "epoch": 0.44, "grad_norm": 2.238616943359375, "learning_rate": 0.0002, "loss": 1.8236, "step": 108580 }, { "epoch": 0.44, "grad_norm": 1.3373734951019287, "learning_rate": 0.0002, "loss": 1.7239, "step": 108590 }, { "epoch": 0.44, "grad_norm": 3.010352611541748, "learning_rate": 0.0002, "loss": 1.6443, "step": 108600 }, { "epoch": 0.44, "grad_norm": 3.3955156803131104, "learning_rate": 0.0002, "loss": 1.6153, "step": 108610 }, { "epoch": 0.44, "grad_norm": 2.265069007873535, "learning_rate": 0.0002, "loss": 1.6274, "step": 108620 }, { "epoch": 0.44, "grad_norm": 2.304900884628296, "learning_rate": 0.0002, "loss": 1.4389, "step": 108630 }, { "epoch": 0.44, "grad_norm": 1.513537883758545, "learning_rate": 0.0002, "loss": 1.2357, "step": 108640 }, { "epoch": 0.44, "grad_norm": 3.1315648555755615, "learning_rate": 0.0002, "loss": 1.4654, "step": 108650 }, { "epoch": 0.44, "grad_norm": 3.668898344039917, "learning_rate": 0.0002, "loss": 1.6344, "step": 108660 }, { "epoch": 0.44, "grad_norm": 3.0674209594726562, "learning_rate": 0.0002, "loss": 1.6043, "step": 108670 }, { "epoch": 0.44, "grad_norm": 3.82859468460083, "learning_rate": 0.0002, "loss": 1.5435, "step": 108680 }, { "epoch": 0.44, "grad_norm": 2.698381185531616, "learning_rate": 0.0002, "loss": 1.4775, "step": 108690 }, { "epoch": 0.44, "grad_norm": 2.8840177059173584, "learning_rate": 0.0002, "loss": 1.6056, "step": 108700 }, { "epoch": 0.44, "grad_norm": 7.398564338684082, "learning_rate": 0.0002, "loss": 1.5996, "step": 108710 }, { "epoch": 0.44, "grad_norm": 2.586855888366699, "learning_rate": 0.0002, "loss": 1.6975, "step": 108720 }, { "epoch": 0.44, "grad_norm": 2.576366662979126, "learning_rate": 0.0002, "loss": 1.7886, "step": 108730 }, { "epoch": 0.44, "grad_norm": 2.299304723739624, "learning_rate": 0.0002, "loss": 1.6027, "step": 108740 }, { "epoch": 0.44, "grad_norm": 3.493884563446045, "learning_rate": 0.0002, "loss": 1.8308, "step": 108750 }, { "epoch": 0.44, "grad_norm": 2.5164635181427, "learning_rate": 0.0002, "loss": 1.2847, "step": 108760 }, { "epoch": 0.44, "grad_norm": 3.0057311058044434, "learning_rate": 0.0002, "loss": 1.4131, "step": 108770 }, { "epoch": 0.44, "grad_norm": 1.87458074092865, "learning_rate": 0.0002, "loss": 1.5687, "step": 108780 }, { "epoch": 0.44, "grad_norm": 8.927760124206543, "learning_rate": 0.0002, "loss": 1.6265, "step": 108790 }, { "epoch": 0.44, "grad_norm": 4.101227760314941, "learning_rate": 0.0002, "loss": 1.6886, "step": 108800 }, { "epoch": 0.44, "grad_norm": 5.249675750732422, "learning_rate": 0.0002, "loss": 1.3251, "step": 108810 }, { "epoch": 0.44, "grad_norm": 3.9896838665008545, "learning_rate": 0.0002, "loss": 1.5052, "step": 108820 }, { "epoch": 0.44, "grad_norm": 4.857518672943115, "learning_rate": 0.0002, "loss": 1.5218, "step": 108830 }, { "epoch": 0.44, "grad_norm": 2.43546462059021, "learning_rate": 0.0002, "loss": 1.5251, "step": 108840 }, { "epoch": 0.44, "grad_norm": 4.147271156311035, "learning_rate": 0.0002, "loss": 1.6608, "step": 108850 }, { "epoch": 0.44, "grad_norm": 3.116053581237793, "learning_rate": 0.0002, "loss": 1.499, "step": 108860 }, { "epoch": 0.44, "grad_norm": 2.1270241737365723, "learning_rate": 0.0002, "loss": 1.7333, "step": 108870 }, { "epoch": 0.44, "grad_norm": 3.3722376823425293, "learning_rate": 0.0002, "loss": 1.784, "step": 108880 }, { "epoch": 0.44, "grad_norm": 3.153653621673584, "learning_rate": 0.0002, "loss": 1.6591, "step": 108890 }, { "epoch": 0.44, "grad_norm": 2.7753307819366455, "learning_rate": 0.0002, "loss": 1.6562, "step": 108900 }, { "epoch": 0.44, "grad_norm": 3.1047346591949463, "learning_rate": 0.0002, "loss": 1.6359, "step": 108910 }, { "epoch": 0.44, "grad_norm": 3.4983036518096924, "learning_rate": 0.0002, "loss": 1.8537, "step": 108920 }, { "epoch": 0.44, "grad_norm": 1.3173277378082275, "learning_rate": 0.0002, "loss": 1.627, "step": 108930 }, { "epoch": 0.44, "grad_norm": 2.85019588470459, "learning_rate": 0.0002, "loss": 1.514, "step": 108940 }, { "epoch": 0.44, "grad_norm": 2.425948143005371, "learning_rate": 0.0002, "loss": 1.583, "step": 108950 }, { "epoch": 0.44, "grad_norm": 2.8180770874023438, "learning_rate": 0.0002, "loss": 1.8005, "step": 108960 }, { "epoch": 0.44, "grad_norm": 2.6316611766815186, "learning_rate": 0.0002, "loss": 1.4635, "step": 108970 }, { "epoch": 0.44, "grad_norm": 3.995426893234253, "learning_rate": 0.0002, "loss": 1.4142, "step": 108980 }, { "epoch": 0.44, "grad_norm": 3.943267583847046, "learning_rate": 0.0002, "loss": 1.6004, "step": 108990 }, { "epoch": 0.44, "grad_norm": 2.6353137493133545, "learning_rate": 0.0002, "loss": 1.5542, "step": 109000 }, { "epoch": 0.44, "grad_norm": 3.3814446926116943, "learning_rate": 0.0002, "loss": 1.7141, "step": 109010 }, { "epoch": 0.44, "grad_norm": 2.4118854999542236, "learning_rate": 0.0002, "loss": 1.5103, "step": 109020 }, { "epoch": 0.44, "grad_norm": 5.133961200714111, "learning_rate": 0.0002, "loss": 1.8735, "step": 109030 }, { "epoch": 0.44, "grad_norm": 2.621243715286255, "learning_rate": 0.0002, "loss": 1.6804, "step": 109040 }, { "epoch": 0.44, "grad_norm": 2.252762794494629, "learning_rate": 0.0002, "loss": 1.7451, "step": 109050 }, { "epoch": 0.44, "grad_norm": 5.404964923858643, "learning_rate": 0.0002, "loss": 1.5258, "step": 109060 }, { "epoch": 0.44, "grad_norm": 3.2025516033172607, "learning_rate": 0.0002, "loss": 1.682, "step": 109070 }, { "epoch": 0.44, "grad_norm": 2.404066324234009, "learning_rate": 0.0002, "loss": 1.731, "step": 109080 }, { "epoch": 0.44, "grad_norm": 3.1964213848114014, "learning_rate": 0.0002, "loss": 1.4675, "step": 109090 }, { "epoch": 0.44, "grad_norm": 3.059636354446411, "learning_rate": 0.0002, "loss": 1.7505, "step": 109100 }, { "epoch": 0.44, "grad_norm": 2.7662227153778076, "learning_rate": 0.0002, "loss": 1.6409, "step": 109110 }, { "epoch": 0.44, "grad_norm": 6.795945644378662, "learning_rate": 0.0002, "loss": 1.5426, "step": 109120 }, { "epoch": 0.44, "grad_norm": 2.960007429122925, "learning_rate": 0.0002, "loss": 1.4724, "step": 109130 }, { "epoch": 0.44, "grad_norm": 2.4962306022644043, "learning_rate": 0.0002, "loss": 1.5347, "step": 109140 }, { "epoch": 0.44, "grad_norm": 2.184758424758911, "learning_rate": 0.0002, "loss": 1.7313, "step": 109150 }, { "epoch": 0.44, "grad_norm": 2.4310660362243652, "learning_rate": 0.0002, "loss": 1.5735, "step": 109160 }, { "epoch": 0.44, "grad_norm": 3.153491735458374, "learning_rate": 0.0002, "loss": 1.5546, "step": 109170 }, { "epoch": 0.44, "grad_norm": 2.2822234630584717, "learning_rate": 0.0002, "loss": 1.635, "step": 109180 }, { "epoch": 0.44, "grad_norm": 2.323270320892334, "learning_rate": 0.0002, "loss": 1.724, "step": 109190 }, { "epoch": 0.44, "grad_norm": 3.0507960319519043, "learning_rate": 0.0002, "loss": 1.6399, "step": 109200 }, { "epoch": 0.44, "grad_norm": 2.252147674560547, "learning_rate": 0.0002, "loss": 1.552, "step": 109210 }, { "epoch": 0.44, "grad_norm": 8.609496116638184, "learning_rate": 0.0002, "loss": 1.5255, "step": 109220 }, { "epoch": 0.44, "grad_norm": 3.9526402950286865, "learning_rate": 0.0002, "loss": 1.6986, "step": 109230 }, { "epoch": 0.44, "grad_norm": 2.621835708618164, "learning_rate": 0.0002, "loss": 1.4406, "step": 109240 }, { "epoch": 0.44, "grad_norm": 2.2664613723754883, "learning_rate": 0.0002, "loss": 1.6932, "step": 109250 }, { "epoch": 0.44, "grad_norm": 2.917907238006592, "learning_rate": 0.0002, "loss": 1.4738, "step": 109260 }, { "epoch": 0.44, "grad_norm": 3.725531578063965, "learning_rate": 0.0002, "loss": 1.5741, "step": 109270 }, { "epoch": 0.44, "grad_norm": 1.6258783340454102, "learning_rate": 0.0002, "loss": 1.5328, "step": 109280 }, { "epoch": 0.44, "grad_norm": 2.984238862991333, "learning_rate": 0.0002, "loss": 1.5696, "step": 109290 }, { "epoch": 0.44, "grad_norm": 2.730564832687378, "learning_rate": 0.0002, "loss": 1.5051, "step": 109300 }, { "epoch": 0.44, "grad_norm": 4.17752742767334, "learning_rate": 0.0002, "loss": 1.8176, "step": 109310 }, { "epoch": 0.45, "grad_norm": 3.494572401046753, "learning_rate": 0.0002, "loss": 1.4884, "step": 109320 }, { "epoch": 0.45, "grad_norm": 1.4412816762924194, "learning_rate": 0.0002, "loss": 1.5293, "step": 109330 }, { "epoch": 0.45, "grad_norm": 3.4135239124298096, "learning_rate": 0.0002, "loss": 1.3123, "step": 109340 }, { "epoch": 0.45, "grad_norm": 3.0307984352111816, "learning_rate": 0.0002, "loss": 1.522, "step": 109350 }, { "epoch": 0.45, "grad_norm": 3.2273330688476562, "learning_rate": 0.0002, "loss": 1.6261, "step": 109360 }, { "epoch": 0.45, "grad_norm": 3.9818036556243896, "learning_rate": 0.0002, "loss": 1.7459, "step": 109370 }, { "epoch": 0.45, "grad_norm": 4.814120292663574, "learning_rate": 0.0002, "loss": 1.7381, "step": 109380 }, { "epoch": 0.45, "grad_norm": 3.3351070880889893, "learning_rate": 0.0002, "loss": 1.8787, "step": 109390 }, { "epoch": 0.45, "grad_norm": 3.778369426727295, "learning_rate": 0.0002, "loss": 1.7761, "step": 109400 }, { "epoch": 0.45, "grad_norm": 2.7699508666992188, "learning_rate": 0.0002, "loss": 1.8545, "step": 109410 }, { "epoch": 0.45, "grad_norm": 4.240338325500488, "learning_rate": 0.0002, "loss": 1.6788, "step": 109420 }, { "epoch": 0.45, "grad_norm": 2.9904699325561523, "learning_rate": 0.0002, "loss": 1.5768, "step": 109430 }, { "epoch": 0.45, "grad_norm": 2.8231582641601562, "learning_rate": 0.0002, "loss": 1.6806, "step": 109440 }, { "epoch": 0.45, "grad_norm": 2.998854398727417, "learning_rate": 0.0002, "loss": 1.3002, "step": 109450 }, { "epoch": 0.45, "grad_norm": 4.0648393630981445, "learning_rate": 0.0002, "loss": 1.5532, "step": 109460 }, { "epoch": 0.45, "grad_norm": 2.8509361743927, "learning_rate": 0.0002, "loss": 1.7937, "step": 109470 }, { "epoch": 0.45, "grad_norm": 1.7519947290420532, "learning_rate": 0.0002, "loss": 1.6064, "step": 109480 }, { "epoch": 0.45, "grad_norm": 3.5015666484832764, "learning_rate": 0.0002, "loss": 1.4824, "step": 109490 }, { "epoch": 0.45, "grad_norm": 2.5262956619262695, "learning_rate": 0.0002, "loss": 1.8707, "step": 109500 }, { "epoch": 0.45, "grad_norm": 2.973142147064209, "learning_rate": 0.0002, "loss": 1.8372, "step": 109510 }, { "epoch": 0.45, "grad_norm": 2.8372015953063965, "learning_rate": 0.0002, "loss": 1.6551, "step": 109520 }, { "epoch": 0.45, "grad_norm": 4.930442810058594, "learning_rate": 0.0002, "loss": 1.6351, "step": 109530 }, { "epoch": 0.45, "grad_norm": 2.202122211456299, "learning_rate": 0.0002, "loss": 1.7349, "step": 109540 }, { "epoch": 0.45, "grad_norm": 2.9074108600616455, "learning_rate": 0.0002, "loss": 1.7099, "step": 109550 }, { "epoch": 0.45, "grad_norm": 2.9364993572235107, "learning_rate": 0.0002, "loss": 1.6179, "step": 109560 }, { "epoch": 0.45, "grad_norm": 2.2397730350494385, "learning_rate": 0.0002, "loss": 1.7693, "step": 109570 }, { "epoch": 0.45, "grad_norm": 2.433051109313965, "learning_rate": 0.0002, "loss": 1.6142, "step": 109580 }, { "epoch": 0.45, "grad_norm": 1.2339041233062744, "learning_rate": 0.0002, "loss": 1.3442, "step": 109590 }, { "epoch": 0.45, "grad_norm": 4.195013999938965, "learning_rate": 0.0002, "loss": 1.6379, "step": 109600 }, { "epoch": 0.45, "grad_norm": 3.2941370010375977, "learning_rate": 0.0002, "loss": 1.5671, "step": 109610 }, { "epoch": 0.45, "grad_norm": 3.1132795810699463, "learning_rate": 0.0002, "loss": 1.2959, "step": 109620 }, { "epoch": 0.45, "grad_norm": 2.0482513904571533, "learning_rate": 0.0002, "loss": 1.5562, "step": 109630 }, { "epoch": 0.45, "grad_norm": 5.022053241729736, "learning_rate": 0.0002, "loss": 1.5786, "step": 109640 }, { "epoch": 0.45, "grad_norm": 3.3081259727478027, "learning_rate": 0.0002, "loss": 1.3235, "step": 109650 }, { "epoch": 0.45, "grad_norm": 2.2452380657196045, "learning_rate": 0.0002, "loss": 1.371, "step": 109660 }, { "epoch": 0.45, "grad_norm": 5.675369739532471, "learning_rate": 0.0002, "loss": 1.5876, "step": 109670 }, { "epoch": 0.45, "grad_norm": 2.4293386936187744, "learning_rate": 0.0002, "loss": 1.7078, "step": 109680 }, { "epoch": 0.45, "grad_norm": 2.221627950668335, "learning_rate": 0.0002, "loss": 1.4513, "step": 109690 }, { "epoch": 0.45, "grad_norm": 3.0141870975494385, "learning_rate": 0.0002, "loss": 1.613, "step": 109700 }, { "epoch": 0.45, "grad_norm": 4.096319675445557, "learning_rate": 0.0002, "loss": 1.5677, "step": 109710 }, { "epoch": 0.45, "grad_norm": 3.7102930545806885, "learning_rate": 0.0002, "loss": 1.6047, "step": 109720 }, { "epoch": 0.45, "grad_norm": 3.7947115898132324, "learning_rate": 0.0002, "loss": 1.506, "step": 109730 }, { "epoch": 0.45, "grad_norm": 2.9602184295654297, "learning_rate": 0.0002, "loss": 1.4571, "step": 109740 }, { "epoch": 0.45, "grad_norm": 3.844313859939575, "learning_rate": 0.0002, "loss": 1.8243, "step": 109750 }, { "epoch": 0.45, "grad_norm": 3.277494430541992, "learning_rate": 0.0002, "loss": 1.5473, "step": 109760 }, { "epoch": 0.45, "grad_norm": 2.35612416267395, "learning_rate": 0.0002, "loss": 1.7014, "step": 109770 }, { "epoch": 0.45, "grad_norm": 3.003559112548828, "learning_rate": 0.0002, "loss": 1.7319, "step": 109780 }, { "epoch": 0.45, "grad_norm": 2.9838786125183105, "learning_rate": 0.0002, "loss": 1.651, "step": 109790 }, { "epoch": 0.45, "grad_norm": 3.3361053466796875, "learning_rate": 0.0002, "loss": 1.8142, "step": 109800 }, { "epoch": 0.45, "grad_norm": 3.1853036880493164, "learning_rate": 0.0002, "loss": 1.9581, "step": 109810 }, { "epoch": 0.45, "grad_norm": 2.6631574630737305, "learning_rate": 0.0002, "loss": 1.7017, "step": 109820 }, { "epoch": 0.45, "grad_norm": 3.972158432006836, "learning_rate": 0.0002, "loss": 1.6253, "step": 109830 }, { "epoch": 0.45, "grad_norm": 2.912057399749756, "learning_rate": 0.0002, "loss": 1.8, "step": 109840 }, { "epoch": 0.45, "grad_norm": 3.7065606117248535, "learning_rate": 0.0002, "loss": 1.5864, "step": 109850 }, { "epoch": 0.45, "grad_norm": 1.8252549171447754, "learning_rate": 0.0002, "loss": 1.4286, "step": 109860 }, { "epoch": 0.45, "grad_norm": 1.549896478652954, "learning_rate": 0.0002, "loss": 1.3137, "step": 109870 }, { "epoch": 0.45, "grad_norm": 2.6501805782318115, "learning_rate": 0.0002, "loss": 1.6016, "step": 109880 }, { "epoch": 0.45, "grad_norm": 4.726177215576172, "learning_rate": 0.0002, "loss": 1.7022, "step": 109890 }, { "epoch": 0.45, "grad_norm": 3.546422243118286, "learning_rate": 0.0002, "loss": 1.5035, "step": 109900 }, { "epoch": 0.45, "grad_norm": 3.0396997928619385, "learning_rate": 0.0002, "loss": 1.763, "step": 109910 }, { "epoch": 0.45, "grad_norm": 3.0139896869659424, "learning_rate": 0.0002, "loss": 1.6314, "step": 109920 }, { "epoch": 0.45, "grad_norm": 2.8925843238830566, "learning_rate": 0.0002, "loss": 1.5687, "step": 109930 }, { "epoch": 0.45, "grad_norm": 4.7516350746154785, "learning_rate": 0.0002, "loss": 1.6444, "step": 109940 }, { "epoch": 0.45, "grad_norm": 2.7067456245422363, "learning_rate": 0.0002, "loss": 1.7131, "step": 109950 }, { "epoch": 0.45, "grad_norm": 2.8836758136749268, "learning_rate": 0.0002, "loss": 1.6203, "step": 109960 }, { "epoch": 0.45, "grad_norm": 2.7770442962646484, "learning_rate": 0.0002, "loss": 1.7272, "step": 109970 }, { "epoch": 0.45, "grad_norm": 3.6736810207366943, "learning_rate": 0.0002, "loss": 1.4998, "step": 109980 }, { "epoch": 0.45, "grad_norm": 2.4425768852233887, "learning_rate": 0.0002, "loss": 1.8202, "step": 109990 }, { "epoch": 0.45, "grad_norm": 3.885917901992798, "learning_rate": 0.0002, "loss": 1.6613, "step": 110000 }, { "epoch": 0.45, "grad_norm": 4.203155994415283, "learning_rate": 0.0002, "loss": 1.9433, "step": 110010 }, { "epoch": 0.45, "grad_norm": 1.9785581827163696, "learning_rate": 0.0002, "loss": 1.6256, "step": 110020 }, { "epoch": 0.45, "grad_norm": 2.6113643646240234, "learning_rate": 0.0002, "loss": 1.7172, "step": 110030 }, { "epoch": 0.45, "grad_norm": 2.728151798248291, "learning_rate": 0.0002, "loss": 1.604, "step": 110040 }, { "epoch": 0.45, "grad_norm": 3.425313711166382, "learning_rate": 0.0002, "loss": 1.7706, "step": 110050 }, { "epoch": 0.45, "grad_norm": 3.9548604488372803, "learning_rate": 0.0002, "loss": 1.6878, "step": 110060 }, { "epoch": 0.45, "grad_norm": 2.2193641662597656, "learning_rate": 0.0002, "loss": 1.4727, "step": 110070 }, { "epoch": 0.45, "grad_norm": 1.8644710779190063, "learning_rate": 0.0002, "loss": 1.7928, "step": 110080 }, { "epoch": 0.45, "grad_norm": 2.6904420852661133, "learning_rate": 0.0002, "loss": 1.7596, "step": 110090 }, { "epoch": 0.45, "grad_norm": 2.988085985183716, "learning_rate": 0.0002, "loss": 1.5477, "step": 110100 }, { "epoch": 0.45, "grad_norm": 4.903970718383789, "learning_rate": 0.0002, "loss": 1.7261, "step": 110110 }, { "epoch": 0.45, "grad_norm": 2.9377634525299072, "learning_rate": 0.0002, "loss": 1.5557, "step": 110120 }, { "epoch": 0.45, "grad_norm": 2.988525152206421, "learning_rate": 0.0002, "loss": 1.544, "step": 110130 }, { "epoch": 0.45, "grad_norm": 2.1451404094696045, "learning_rate": 0.0002, "loss": 1.5681, "step": 110140 }, { "epoch": 0.45, "grad_norm": 2.839646100997925, "learning_rate": 0.0002, "loss": 1.4492, "step": 110150 }, { "epoch": 0.45, "grad_norm": 3.5847623348236084, "learning_rate": 0.0002, "loss": 1.6141, "step": 110160 }, { "epoch": 0.45, "grad_norm": 3.776017427444458, "learning_rate": 0.0002, "loss": 1.5008, "step": 110170 }, { "epoch": 0.45, "grad_norm": 2.479092836380005, "learning_rate": 0.0002, "loss": 1.3687, "step": 110180 }, { "epoch": 0.45, "grad_norm": 2.9516756534576416, "learning_rate": 0.0002, "loss": 1.4596, "step": 110190 }, { "epoch": 0.45, "grad_norm": 2.588895082473755, "learning_rate": 0.0002, "loss": 1.6843, "step": 110200 }, { "epoch": 0.45, "grad_norm": 2.809427261352539, "learning_rate": 0.0002, "loss": 1.5261, "step": 110210 }, { "epoch": 0.45, "grad_norm": 3.204845666885376, "learning_rate": 0.0002, "loss": 1.6004, "step": 110220 }, { "epoch": 0.45, "grad_norm": 4.439894676208496, "learning_rate": 0.0002, "loss": 1.473, "step": 110230 }, { "epoch": 0.45, "grad_norm": 2.3966116905212402, "learning_rate": 0.0002, "loss": 1.6061, "step": 110240 }, { "epoch": 0.45, "grad_norm": 2.789769411087036, "learning_rate": 0.0002, "loss": 1.5401, "step": 110250 }, { "epoch": 0.45, "grad_norm": 4.668115615844727, "learning_rate": 0.0002, "loss": 1.352, "step": 110260 }, { "epoch": 0.45, "grad_norm": 3.89326810836792, "learning_rate": 0.0002, "loss": 1.3582, "step": 110270 }, { "epoch": 0.45, "grad_norm": 2.77496337890625, "learning_rate": 0.0002, "loss": 1.5094, "step": 110280 }, { "epoch": 0.45, "grad_norm": 3.351257801055908, "learning_rate": 0.0002, "loss": 1.6989, "step": 110290 }, { "epoch": 0.45, "grad_norm": 3.3381268978118896, "learning_rate": 0.0002, "loss": 1.4027, "step": 110300 }, { "epoch": 0.45, "grad_norm": 4.402015686035156, "learning_rate": 0.0002, "loss": 1.6218, "step": 110310 }, { "epoch": 0.45, "grad_norm": 3.3627920150756836, "learning_rate": 0.0002, "loss": 1.5053, "step": 110320 }, { "epoch": 0.45, "grad_norm": 1.8479845523834229, "learning_rate": 0.0002, "loss": 1.9429, "step": 110330 }, { "epoch": 0.45, "grad_norm": 2.1101367473602295, "learning_rate": 0.0002, "loss": 1.3386, "step": 110340 }, { "epoch": 0.45, "grad_norm": 4.00771427154541, "learning_rate": 0.0002, "loss": 1.754, "step": 110350 }, { "epoch": 0.45, "grad_norm": 10.267203330993652, "learning_rate": 0.0002, "loss": 1.4854, "step": 110360 }, { "epoch": 0.45, "grad_norm": 3.041640043258667, "learning_rate": 0.0002, "loss": 1.3979, "step": 110370 }, { "epoch": 0.45, "grad_norm": 2.8740880489349365, "learning_rate": 0.0002, "loss": 1.4599, "step": 110380 }, { "epoch": 0.45, "grad_norm": 2.72259783744812, "learning_rate": 0.0002, "loss": 1.5205, "step": 110390 }, { "epoch": 0.45, "grad_norm": 1.7321029901504517, "learning_rate": 0.0002, "loss": 1.7283, "step": 110400 }, { "epoch": 0.45, "grad_norm": 4.720709800720215, "learning_rate": 0.0002, "loss": 1.6693, "step": 110410 }, { "epoch": 0.45, "grad_norm": 3.5073304176330566, "learning_rate": 0.0002, "loss": 1.6278, "step": 110420 }, { "epoch": 0.45, "grad_norm": 3.2734122276306152, "learning_rate": 0.0002, "loss": 1.5315, "step": 110430 }, { "epoch": 0.45, "grad_norm": 2.969017744064331, "learning_rate": 0.0002, "loss": 1.4301, "step": 110440 }, { "epoch": 0.45, "grad_norm": 1.743672490119934, "learning_rate": 0.0002, "loss": 1.5076, "step": 110450 }, { "epoch": 0.45, "grad_norm": 3.246140718460083, "learning_rate": 0.0002, "loss": 1.358, "step": 110460 }, { "epoch": 0.45, "grad_norm": 3.009993553161621, "learning_rate": 0.0002, "loss": 1.7521, "step": 110470 }, { "epoch": 0.45, "grad_norm": 3.9807519912719727, "learning_rate": 0.0002, "loss": 1.9309, "step": 110480 }, { "epoch": 0.45, "grad_norm": 3.340036392211914, "learning_rate": 0.0002, "loss": 1.4319, "step": 110490 }, { "epoch": 0.45, "grad_norm": 2.964193820953369, "learning_rate": 0.0002, "loss": 1.3485, "step": 110500 }, { "epoch": 0.45, "grad_norm": 3.9381229877471924, "learning_rate": 0.0002, "loss": 1.4836, "step": 110510 }, { "epoch": 0.45, "grad_norm": 2.3407328128814697, "learning_rate": 0.0002, "loss": 1.6338, "step": 110520 }, { "epoch": 0.45, "grad_norm": 3.304837942123413, "learning_rate": 0.0002, "loss": 1.2346, "step": 110530 }, { "epoch": 0.45, "grad_norm": 2.8819773197174072, "learning_rate": 0.0002, "loss": 1.4631, "step": 110540 }, { "epoch": 0.45, "grad_norm": 2.7183117866516113, "learning_rate": 0.0002, "loss": 1.6312, "step": 110550 }, { "epoch": 0.45, "grad_norm": 2.578007936477661, "learning_rate": 0.0002, "loss": 1.5425, "step": 110560 }, { "epoch": 0.45, "grad_norm": 2.199162483215332, "learning_rate": 0.0002, "loss": 1.8019, "step": 110570 }, { "epoch": 0.45, "grad_norm": 3.251500368118286, "learning_rate": 0.0002, "loss": 1.6901, "step": 110580 }, { "epoch": 0.45, "grad_norm": 3.243582248687744, "learning_rate": 0.0002, "loss": 1.7153, "step": 110590 }, { "epoch": 0.45, "grad_norm": 4.167074680328369, "learning_rate": 0.0002, "loss": 1.5754, "step": 110600 }, { "epoch": 0.45, "grad_norm": 2.363518714904785, "learning_rate": 0.0002, "loss": 1.6478, "step": 110610 }, { "epoch": 0.45, "grad_norm": 2.367042303085327, "learning_rate": 0.0002, "loss": 1.4341, "step": 110620 }, { "epoch": 0.45, "grad_norm": 4.008329391479492, "learning_rate": 0.0002, "loss": 1.8148, "step": 110630 }, { "epoch": 0.45, "grad_norm": 1.870026707649231, "learning_rate": 0.0002, "loss": 1.433, "step": 110640 }, { "epoch": 0.45, "grad_norm": 2.734609603881836, "learning_rate": 0.0002, "loss": 1.6835, "step": 110650 }, { "epoch": 0.45, "grad_norm": 3.150540828704834, "learning_rate": 0.0002, "loss": 1.5933, "step": 110660 }, { "epoch": 0.45, "grad_norm": 6.42894983291626, "learning_rate": 0.0002, "loss": 1.471, "step": 110670 }, { "epoch": 0.45, "grad_norm": 3.1736652851104736, "learning_rate": 0.0002, "loss": 1.2315, "step": 110680 }, { "epoch": 0.45, "grad_norm": 2.659722328186035, "learning_rate": 0.0002, "loss": 1.794, "step": 110690 }, { "epoch": 0.45, "grad_norm": 2.4170238971710205, "learning_rate": 0.0002, "loss": 1.599, "step": 110700 }, { "epoch": 0.45, "grad_norm": 3.3404695987701416, "learning_rate": 0.0002, "loss": 1.6436, "step": 110710 }, { "epoch": 0.45, "grad_norm": 2.991823196411133, "learning_rate": 0.0002, "loss": 1.694, "step": 110720 }, { "epoch": 0.45, "grad_norm": 3.5325119495391846, "learning_rate": 0.0002, "loss": 1.6078, "step": 110730 }, { "epoch": 0.45, "grad_norm": 2.4129281044006348, "learning_rate": 0.0002, "loss": 1.6973, "step": 110740 }, { "epoch": 0.45, "grad_norm": 4.82786226272583, "learning_rate": 0.0002, "loss": 1.7256, "step": 110750 }, { "epoch": 0.45, "grad_norm": 2.9848227500915527, "learning_rate": 0.0002, "loss": 1.395, "step": 110760 }, { "epoch": 0.45, "grad_norm": 2.779268980026245, "learning_rate": 0.0002, "loss": 1.6063, "step": 110770 }, { "epoch": 0.45, "grad_norm": 3.1198368072509766, "learning_rate": 0.0002, "loss": 1.8237, "step": 110780 }, { "epoch": 0.45, "grad_norm": 3.6661946773529053, "learning_rate": 0.0002, "loss": 1.4424, "step": 110790 }, { "epoch": 0.45, "grad_norm": 3.1572418212890625, "learning_rate": 0.0002, "loss": 1.41, "step": 110800 }, { "epoch": 0.45, "grad_norm": 1.8422303199768066, "learning_rate": 0.0002, "loss": 1.5442, "step": 110810 }, { "epoch": 0.45, "grad_norm": 2.536968946456909, "learning_rate": 0.0002, "loss": 1.6228, "step": 110820 }, { "epoch": 0.45, "grad_norm": 2.961815595626831, "learning_rate": 0.0002, "loss": 1.7818, "step": 110830 }, { "epoch": 0.45, "grad_norm": 3.8671298027038574, "learning_rate": 0.0002, "loss": 1.6209, "step": 110840 }, { "epoch": 0.45, "grad_norm": 1.51640784740448, "learning_rate": 0.0002, "loss": 1.5375, "step": 110850 }, { "epoch": 0.45, "grad_norm": 2.7664475440979004, "learning_rate": 0.0002, "loss": 1.585, "step": 110860 }, { "epoch": 0.45, "grad_norm": 2.794799327850342, "learning_rate": 0.0002, "loss": 1.344, "step": 110870 }, { "epoch": 0.45, "grad_norm": 3.6683647632598877, "learning_rate": 0.0002, "loss": 1.6808, "step": 110880 }, { "epoch": 0.45, "grad_norm": 1.7891877889633179, "learning_rate": 0.0002, "loss": 1.5438, "step": 110890 }, { "epoch": 0.45, "grad_norm": 2.8034462928771973, "learning_rate": 0.0002, "loss": 1.5688, "step": 110900 }, { "epoch": 0.45, "grad_norm": 3.2564914226531982, "learning_rate": 0.0002, "loss": 1.6216, "step": 110910 }, { "epoch": 0.45, "grad_norm": 1.396925926208496, "learning_rate": 0.0002, "loss": 1.1373, "step": 110920 }, { "epoch": 0.45, "grad_norm": 3.232433319091797, "learning_rate": 0.0002, "loss": 1.635, "step": 110930 }, { "epoch": 0.45, "grad_norm": 1.41679847240448, "learning_rate": 0.0002, "loss": 1.5254, "step": 110940 }, { "epoch": 0.45, "grad_norm": 2.805706024169922, "learning_rate": 0.0002, "loss": 1.8961, "step": 110950 }, { "epoch": 0.45, "grad_norm": 3.9969589710235596, "learning_rate": 0.0002, "loss": 1.7478, "step": 110960 }, { "epoch": 0.45, "grad_norm": 2.2942252159118652, "learning_rate": 0.0002, "loss": 1.4317, "step": 110970 }, { "epoch": 0.45, "grad_norm": 2.1377291679382324, "learning_rate": 0.0002, "loss": 1.4028, "step": 110980 }, { "epoch": 0.45, "grad_norm": 2.4129979610443115, "learning_rate": 0.0002, "loss": 1.5455, "step": 110990 }, { "epoch": 0.45, "grad_norm": 3.6465742588043213, "learning_rate": 0.0002, "loss": 1.587, "step": 111000 }, { "epoch": 0.45, "grad_norm": 2.5972111225128174, "learning_rate": 0.0002, "loss": 1.6579, "step": 111010 }, { "epoch": 0.45, "grad_norm": 2.797097682952881, "learning_rate": 0.0002, "loss": 1.7123, "step": 111020 }, { "epoch": 0.45, "grad_norm": 2.087653875350952, "learning_rate": 0.0002, "loss": 1.6778, "step": 111030 }, { "epoch": 0.45, "grad_norm": 3.0355257987976074, "learning_rate": 0.0002, "loss": 1.5706, "step": 111040 }, { "epoch": 0.45, "grad_norm": 1.5809882879257202, "learning_rate": 0.0002, "loss": 1.5726, "step": 111050 }, { "epoch": 0.45, "grad_norm": 3.130916118621826, "learning_rate": 0.0002, "loss": 1.573, "step": 111060 }, { "epoch": 0.45, "grad_norm": 4.086919784545898, "learning_rate": 0.0002, "loss": 1.2744, "step": 111070 }, { "epoch": 0.45, "grad_norm": 4.2094011306762695, "learning_rate": 0.0002, "loss": 1.6493, "step": 111080 }, { "epoch": 0.45, "grad_norm": 3.2470180988311768, "learning_rate": 0.0002, "loss": 1.4791, "step": 111090 }, { "epoch": 0.45, "grad_norm": 3.335770606994629, "learning_rate": 0.0002, "loss": 1.4834, "step": 111100 }, { "epoch": 0.45, "grad_norm": 2.0709333419799805, "learning_rate": 0.0002, "loss": 1.5207, "step": 111110 }, { "epoch": 0.45, "grad_norm": 2.4931156635284424, "learning_rate": 0.0002, "loss": 1.5175, "step": 111120 }, { "epoch": 0.45, "grad_norm": 3.638975143432617, "learning_rate": 0.0002, "loss": 1.5324, "step": 111130 }, { "epoch": 0.45, "grad_norm": 2.2407567501068115, "learning_rate": 0.0002, "loss": 1.6701, "step": 111140 }, { "epoch": 0.45, "grad_norm": 4.196712493896484, "learning_rate": 0.0002, "loss": 1.5028, "step": 111150 }, { "epoch": 0.45, "grad_norm": 4.093746185302734, "learning_rate": 0.0002, "loss": 1.6407, "step": 111160 }, { "epoch": 0.45, "grad_norm": 2.5054330825805664, "learning_rate": 0.0002, "loss": 1.6614, "step": 111170 }, { "epoch": 0.45, "grad_norm": 1.7954127788543701, "learning_rate": 0.0002, "loss": 1.6141, "step": 111180 }, { "epoch": 0.45, "grad_norm": 6.954748153686523, "learning_rate": 0.0002, "loss": 1.5978, "step": 111190 }, { "epoch": 0.45, "grad_norm": 4.526285648345947, "learning_rate": 0.0002, "loss": 1.7222, "step": 111200 }, { "epoch": 0.45, "grad_norm": 2.1794233322143555, "learning_rate": 0.0002, "loss": 1.5677, "step": 111210 }, { "epoch": 0.45, "grad_norm": 3.0445072650909424, "learning_rate": 0.0002, "loss": 1.776, "step": 111220 }, { "epoch": 0.45, "grad_norm": 4.008513450622559, "learning_rate": 0.0002, "loss": 1.4821, "step": 111230 }, { "epoch": 0.45, "grad_norm": 3.69429087638855, "learning_rate": 0.0002, "loss": 1.4703, "step": 111240 }, { "epoch": 0.45, "grad_norm": 3.1805009841918945, "learning_rate": 0.0002, "loss": 1.4336, "step": 111250 }, { "epoch": 0.45, "grad_norm": 3.183507204055786, "learning_rate": 0.0002, "loss": 1.6675, "step": 111260 }, { "epoch": 0.45, "grad_norm": 3.2341554164886475, "learning_rate": 0.0002, "loss": 1.5246, "step": 111270 }, { "epoch": 0.45, "grad_norm": 3.8603272438049316, "learning_rate": 0.0002, "loss": 1.5552, "step": 111280 }, { "epoch": 0.45, "grad_norm": 2.6947269439697266, "learning_rate": 0.0002, "loss": 1.3575, "step": 111290 }, { "epoch": 0.45, "grad_norm": 2.9164228439331055, "learning_rate": 0.0002, "loss": 1.6142, "step": 111300 }, { "epoch": 0.45, "grad_norm": 2.5485501289367676, "learning_rate": 0.0002, "loss": 1.7829, "step": 111310 }, { "epoch": 0.45, "grad_norm": 2.5700244903564453, "learning_rate": 0.0002, "loss": 1.4954, "step": 111320 }, { "epoch": 0.45, "grad_norm": 2.2123234272003174, "learning_rate": 0.0002, "loss": 1.5054, "step": 111330 }, { "epoch": 0.45, "grad_norm": 4.494832515716553, "learning_rate": 0.0002, "loss": 1.7518, "step": 111340 }, { "epoch": 0.45, "grad_norm": 1.9805065393447876, "learning_rate": 0.0002, "loss": 1.4261, "step": 111350 }, { "epoch": 0.45, "grad_norm": 3.172236442565918, "learning_rate": 0.0002, "loss": 1.6086, "step": 111360 }, { "epoch": 0.45, "grad_norm": 3.2930123805999756, "learning_rate": 0.0002, "loss": 1.5769, "step": 111370 }, { "epoch": 0.45, "grad_norm": 1.5115268230438232, "learning_rate": 0.0002, "loss": 1.4114, "step": 111380 }, { "epoch": 0.45, "grad_norm": 4.243422985076904, "learning_rate": 0.0002, "loss": 1.6289, "step": 111390 }, { "epoch": 0.45, "grad_norm": 2.874990463256836, "learning_rate": 0.0002, "loss": 1.5078, "step": 111400 }, { "epoch": 0.45, "grad_norm": 2.020270347595215, "learning_rate": 0.0002, "loss": 1.5924, "step": 111410 }, { "epoch": 0.45, "grad_norm": 2.7861313819885254, "learning_rate": 0.0002, "loss": 1.7376, "step": 111420 }, { "epoch": 0.45, "grad_norm": 6.25139045715332, "learning_rate": 0.0002, "loss": 1.4353, "step": 111430 }, { "epoch": 0.45, "grad_norm": 3.632244348526001, "learning_rate": 0.0002, "loss": 1.8311, "step": 111440 }, { "epoch": 0.45, "grad_norm": 2.2136390209198, "learning_rate": 0.0002, "loss": 1.6012, "step": 111450 }, { "epoch": 0.45, "grad_norm": 2.3589725494384766, "learning_rate": 0.0002, "loss": 1.4976, "step": 111460 }, { "epoch": 0.45, "grad_norm": 2.816638708114624, "learning_rate": 0.0002, "loss": 1.271, "step": 111470 }, { "epoch": 0.45, "grad_norm": 2.88325834274292, "learning_rate": 0.0002, "loss": 1.516, "step": 111480 }, { "epoch": 0.45, "grad_norm": 3.9924957752227783, "learning_rate": 0.0002, "loss": 1.5521, "step": 111490 }, { "epoch": 0.45, "grad_norm": 2.7174525260925293, "learning_rate": 0.0002, "loss": 1.7917, "step": 111500 }, { "epoch": 0.45, "grad_norm": 1.6369621753692627, "learning_rate": 0.0002, "loss": 1.6394, "step": 111510 }, { "epoch": 0.45, "grad_norm": 3.6429648399353027, "learning_rate": 0.0002, "loss": 1.8309, "step": 111520 }, { "epoch": 0.45, "grad_norm": 3.87707781791687, "learning_rate": 0.0002, "loss": 1.6122, "step": 111530 }, { "epoch": 0.45, "grad_norm": 2.9163095951080322, "learning_rate": 0.0002, "loss": 1.7777, "step": 111540 }, { "epoch": 0.45, "grad_norm": 2.7843689918518066, "learning_rate": 0.0002, "loss": 1.5024, "step": 111550 }, { "epoch": 0.45, "grad_norm": 3.1954526901245117, "learning_rate": 0.0002, "loss": 1.6073, "step": 111560 }, { "epoch": 0.45, "grad_norm": 2.9718360900878906, "learning_rate": 0.0002, "loss": 1.6622, "step": 111570 }, { "epoch": 0.45, "grad_norm": 1.92820405960083, "learning_rate": 0.0002, "loss": 1.7169, "step": 111580 }, { "epoch": 0.45, "grad_norm": 4.557009220123291, "learning_rate": 0.0002, "loss": 1.5737, "step": 111590 }, { "epoch": 0.45, "grad_norm": 2.95100998878479, "learning_rate": 0.0002, "loss": 1.456, "step": 111600 }, { "epoch": 0.45, "grad_norm": 3.3889262676239014, "learning_rate": 0.0002, "loss": 1.8193, "step": 111610 }, { "epoch": 0.45, "grad_norm": 2.0006191730499268, "learning_rate": 0.0002, "loss": 1.5725, "step": 111620 }, { "epoch": 0.45, "grad_norm": 2.268374443054199, "learning_rate": 0.0002, "loss": 1.6017, "step": 111630 }, { "epoch": 0.45, "grad_norm": 4.759651184082031, "learning_rate": 0.0002, "loss": 1.7333, "step": 111640 }, { "epoch": 0.45, "grad_norm": 2.7589809894561768, "learning_rate": 0.0002, "loss": 1.5984, "step": 111650 }, { "epoch": 0.45, "grad_norm": 2.347400665283203, "learning_rate": 0.0002, "loss": 1.5678, "step": 111660 }, { "epoch": 0.45, "grad_norm": 4.955624580383301, "learning_rate": 0.0002, "loss": 1.5956, "step": 111670 }, { "epoch": 0.45, "grad_norm": 3.0385499000549316, "learning_rate": 0.0002, "loss": 1.385, "step": 111680 }, { "epoch": 0.45, "grad_norm": 4.74675989151001, "learning_rate": 0.0002, "loss": 1.6657, "step": 111690 }, { "epoch": 0.45, "grad_norm": 2.4647843837738037, "learning_rate": 0.0002, "loss": 1.4373, "step": 111700 }, { "epoch": 0.45, "grad_norm": 4.8669352531433105, "learning_rate": 0.0002, "loss": 1.5157, "step": 111710 }, { "epoch": 0.45, "grad_norm": 3.029916286468506, "learning_rate": 0.0002, "loss": 1.5515, "step": 111720 }, { "epoch": 0.45, "grad_norm": 2.683018207550049, "learning_rate": 0.0002, "loss": 1.7926, "step": 111730 }, { "epoch": 0.45, "grad_norm": 2.127971649169922, "learning_rate": 0.0002, "loss": 1.5323, "step": 111740 }, { "epoch": 0.45, "grad_norm": 4.720705032348633, "learning_rate": 0.0002, "loss": 1.3345, "step": 111750 }, { "epoch": 0.45, "grad_norm": 1.9503308534622192, "learning_rate": 0.0002, "loss": 1.5644, "step": 111760 }, { "epoch": 0.46, "grad_norm": 3.4913649559020996, "learning_rate": 0.0002, "loss": 1.8399, "step": 111770 }, { "epoch": 0.46, "grad_norm": 3.048234701156616, "learning_rate": 0.0002, "loss": 1.708, "step": 111780 }, { "epoch": 0.46, "grad_norm": 6.315066814422607, "learning_rate": 0.0002, "loss": 1.6836, "step": 111790 }, { "epoch": 0.46, "grad_norm": 4.417503833770752, "learning_rate": 0.0002, "loss": 1.4599, "step": 111800 }, { "epoch": 0.46, "grad_norm": 3.124316453933716, "learning_rate": 0.0002, "loss": 1.4219, "step": 111810 }, { "epoch": 0.46, "grad_norm": 4.146186828613281, "learning_rate": 0.0002, "loss": 1.4126, "step": 111820 }, { "epoch": 0.46, "grad_norm": 4.846954822540283, "learning_rate": 0.0002, "loss": 1.7, "step": 111830 }, { "epoch": 0.46, "grad_norm": 4.578767776489258, "learning_rate": 0.0002, "loss": 1.4976, "step": 111840 }, { "epoch": 0.46, "grad_norm": 3.133507490158081, "learning_rate": 0.0002, "loss": 1.6158, "step": 111850 }, { "epoch": 0.46, "grad_norm": 3.11906361579895, "learning_rate": 0.0002, "loss": 1.6769, "step": 111860 }, { "epoch": 0.46, "grad_norm": 5.763523101806641, "learning_rate": 0.0002, "loss": 1.6364, "step": 111870 }, { "epoch": 0.46, "grad_norm": 3.029366970062256, "learning_rate": 0.0002, "loss": 1.6232, "step": 111880 }, { "epoch": 0.46, "grad_norm": 3.219974994659424, "learning_rate": 0.0002, "loss": 1.4302, "step": 111890 }, { "epoch": 0.46, "grad_norm": 2.740211248397827, "learning_rate": 0.0002, "loss": 1.4624, "step": 111900 }, { "epoch": 0.46, "grad_norm": 3.1050796508789062, "learning_rate": 0.0002, "loss": 1.5113, "step": 111910 }, { "epoch": 0.46, "grad_norm": 2.225127696990967, "learning_rate": 0.0002, "loss": 1.5594, "step": 111920 }, { "epoch": 0.46, "grad_norm": 3.525562286376953, "learning_rate": 0.0002, "loss": 1.3715, "step": 111930 }, { "epoch": 0.46, "grad_norm": 2.624354124069214, "learning_rate": 0.0002, "loss": 1.5829, "step": 111940 }, { "epoch": 0.46, "grad_norm": 3.5854594707489014, "learning_rate": 0.0002, "loss": 1.663, "step": 111950 }, { "epoch": 0.46, "grad_norm": 3.5546352863311768, "learning_rate": 0.0002, "loss": 1.698, "step": 111960 }, { "epoch": 0.46, "grad_norm": 2.715498924255371, "learning_rate": 0.0002, "loss": 1.6919, "step": 111970 }, { "epoch": 0.46, "grad_norm": 2.700410842895508, "learning_rate": 0.0002, "loss": 1.7153, "step": 111980 }, { "epoch": 0.46, "grad_norm": 2.567115068435669, "learning_rate": 0.0002, "loss": 1.4683, "step": 111990 }, { "epoch": 0.46, "grad_norm": 3.320089817047119, "learning_rate": 0.0002, "loss": 1.7754, "step": 112000 }, { "epoch": 0.46, "grad_norm": 1.926796793937683, "learning_rate": 0.0002, "loss": 1.4418, "step": 112010 }, { "epoch": 0.46, "grad_norm": 4.550867557525635, "learning_rate": 0.0002, "loss": 1.8562, "step": 112020 }, { "epoch": 0.46, "grad_norm": 2.7000019550323486, "learning_rate": 0.0002, "loss": 1.695, "step": 112030 }, { "epoch": 0.46, "grad_norm": 1.088634967803955, "learning_rate": 0.0002, "loss": 1.5258, "step": 112040 }, { "epoch": 0.46, "grad_norm": 1.9920029640197754, "learning_rate": 0.0002, "loss": 1.555, "step": 112050 }, { "epoch": 0.46, "grad_norm": 6.298170566558838, "learning_rate": 0.0002, "loss": 1.4036, "step": 112060 }, { "epoch": 0.46, "grad_norm": 3.7829058170318604, "learning_rate": 0.0002, "loss": 1.4991, "step": 112070 }, { "epoch": 0.46, "grad_norm": 3.1863157749176025, "learning_rate": 0.0002, "loss": 1.6226, "step": 112080 }, { "epoch": 0.46, "grad_norm": 4.654996395111084, "learning_rate": 0.0002, "loss": 1.9448, "step": 112090 }, { "epoch": 0.46, "grad_norm": 3.1004199981689453, "learning_rate": 0.0002, "loss": 1.6429, "step": 112100 }, { "epoch": 0.46, "grad_norm": 2.4418985843658447, "learning_rate": 0.0002, "loss": 1.5419, "step": 112110 }, { "epoch": 0.46, "grad_norm": 2.6058762073516846, "learning_rate": 0.0002, "loss": 1.3704, "step": 112120 }, { "epoch": 0.46, "grad_norm": 3.4567108154296875, "learning_rate": 0.0002, "loss": 1.3801, "step": 112130 }, { "epoch": 0.46, "grad_norm": 2.754744291305542, "learning_rate": 0.0002, "loss": 1.4796, "step": 112140 }, { "epoch": 0.46, "grad_norm": 3.4927573204040527, "learning_rate": 0.0002, "loss": 1.4843, "step": 112150 }, { "epoch": 0.46, "grad_norm": 3.343029022216797, "learning_rate": 0.0002, "loss": 1.5504, "step": 112160 }, { "epoch": 0.46, "grad_norm": 4.024998188018799, "learning_rate": 0.0002, "loss": 1.4035, "step": 112170 }, { "epoch": 0.46, "grad_norm": 2.710588216781616, "learning_rate": 0.0002, "loss": 1.7036, "step": 112180 }, { "epoch": 0.46, "grad_norm": 2.619150400161743, "learning_rate": 0.0002, "loss": 1.4078, "step": 112190 }, { "epoch": 0.46, "grad_norm": 2.0511274337768555, "learning_rate": 0.0002, "loss": 1.4758, "step": 112200 }, { "epoch": 0.46, "grad_norm": 2.1895270347595215, "learning_rate": 0.0002, "loss": 1.6204, "step": 112210 }, { "epoch": 0.46, "grad_norm": 3.3104496002197266, "learning_rate": 0.0002, "loss": 1.328, "step": 112220 }, { "epoch": 0.46, "grad_norm": 2.9842941761016846, "learning_rate": 0.0002, "loss": 1.6542, "step": 112230 }, { "epoch": 0.46, "grad_norm": 2.741384267807007, "learning_rate": 0.0002, "loss": 1.6045, "step": 112240 }, { "epoch": 0.46, "grad_norm": 4.177288055419922, "learning_rate": 0.0002, "loss": 1.4662, "step": 112250 }, { "epoch": 0.46, "grad_norm": 2.161888599395752, "learning_rate": 0.0002, "loss": 1.5203, "step": 112260 }, { "epoch": 0.46, "grad_norm": 2.716296672821045, "learning_rate": 0.0002, "loss": 1.6255, "step": 112270 }, { "epoch": 0.46, "grad_norm": 2.0376992225646973, "learning_rate": 0.0002, "loss": 1.4319, "step": 112280 }, { "epoch": 0.46, "grad_norm": 4.9206743240356445, "learning_rate": 0.0002, "loss": 1.5993, "step": 112290 }, { "epoch": 0.46, "grad_norm": 3.8768210411071777, "learning_rate": 0.0002, "loss": 1.8289, "step": 112300 }, { "epoch": 0.46, "grad_norm": 2.9174070358276367, "learning_rate": 0.0002, "loss": 1.6971, "step": 112310 }, { "epoch": 0.46, "grad_norm": 10.962023735046387, "learning_rate": 0.0002, "loss": 1.6859, "step": 112320 }, { "epoch": 0.46, "grad_norm": 4.611273288726807, "learning_rate": 0.0002, "loss": 1.5442, "step": 112330 }, { "epoch": 0.46, "grad_norm": 3.46870756149292, "learning_rate": 0.0002, "loss": 1.6004, "step": 112340 }, { "epoch": 0.46, "grad_norm": 4.949125289916992, "learning_rate": 0.0002, "loss": 1.6736, "step": 112350 }, { "epoch": 0.46, "grad_norm": 3.4215400218963623, "learning_rate": 0.0002, "loss": 1.6299, "step": 112360 }, { "epoch": 0.46, "grad_norm": 4.601654052734375, "learning_rate": 0.0002, "loss": 1.5497, "step": 112370 }, { "epoch": 0.46, "grad_norm": 3.914757490158081, "learning_rate": 0.0002, "loss": 1.5416, "step": 112380 }, { "epoch": 0.46, "grad_norm": 3.5603325366973877, "learning_rate": 0.0002, "loss": 1.5942, "step": 112390 }, { "epoch": 0.46, "grad_norm": 3.9599037170410156, "learning_rate": 0.0002, "loss": 1.4597, "step": 112400 }, { "epoch": 0.46, "grad_norm": 2.1008050441741943, "learning_rate": 0.0002, "loss": 1.3273, "step": 112410 }, { "epoch": 0.46, "grad_norm": 4.332613468170166, "learning_rate": 0.0002, "loss": 1.5948, "step": 112420 }, { "epoch": 0.46, "grad_norm": 1.8476264476776123, "learning_rate": 0.0002, "loss": 1.528, "step": 112430 }, { "epoch": 0.46, "grad_norm": 2.7309582233428955, "learning_rate": 0.0002, "loss": 1.7254, "step": 112440 }, { "epoch": 0.46, "grad_norm": 2.7441537380218506, "learning_rate": 0.0002, "loss": 1.545, "step": 112450 }, { "epoch": 0.46, "grad_norm": 4.539092540740967, "learning_rate": 0.0002, "loss": 1.6498, "step": 112460 }, { "epoch": 0.46, "grad_norm": 2.1684672832489014, "learning_rate": 0.0002, "loss": 1.8984, "step": 112470 }, { "epoch": 0.46, "grad_norm": 3.2650763988494873, "learning_rate": 0.0002, "loss": 1.8266, "step": 112480 }, { "epoch": 0.46, "grad_norm": 2.0794625282287598, "learning_rate": 0.0002, "loss": 1.4594, "step": 112490 }, { "epoch": 0.46, "grad_norm": 2.940223455429077, "learning_rate": 0.0002, "loss": 1.5088, "step": 112500 }, { "epoch": 0.46, "grad_norm": 4.267263412475586, "learning_rate": 0.0002, "loss": 1.567, "step": 112510 }, { "epoch": 0.46, "grad_norm": 2.7147436141967773, "learning_rate": 0.0002, "loss": 1.6367, "step": 112520 }, { "epoch": 0.46, "grad_norm": 2.8644278049468994, "learning_rate": 0.0002, "loss": 1.7774, "step": 112530 }, { "epoch": 0.46, "grad_norm": 3.917158603668213, "learning_rate": 0.0002, "loss": 1.4729, "step": 112540 }, { "epoch": 0.46, "grad_norm": 4.657785415649414, "learning_rate": 0.0002, "loss": 1.6499, "step": 112550 }, { "epoch": 0.46, "grad_norm": 4.016990661621094, "learning_rate": 0.0002, "loss": 1.429, "step": 112560 }, { "epoch": 0.46, "grad_norm": 2.8670153617858887, "learning_rate": 0.0002, "loss": 1.7344, "step": 112570 }, { "epoch": 0.46, "grad_norm": 2.3406713008880615, "learning_rate": 0.0002, "loss": 1.6798, "step": 112580 }, { "epoch": 0.46, "grad_norm": 3.0519134998321533, "learning_rate": 0.0002, "loss": 1.5653, "step": 112590 }, { "epoch": 0.46, "grad_norm": 1.8633450269699097, "learning_rate": 0.0002, "loss": 1.8427, "step": 112600 }, { "epoch": 0.46, "grad_norm": 2.2547922134399414, "learning_rate": 0.0002, "loss": 1.4574, "step": 112610 }, { "epoch": 0.46, "grad_norm": 2.199735403060913, "learning_rate": 0.0002, "loss": 1.6747, "step": 112620 }, { "epoch": 0.46, "grad_norm": 3.508251190185547, "learning_rate": 0.0002, "loss": 1.3081, "step": 112630 }, { "epoch": 0.46, "grad_norm": 4.786989212036133, "learning_rate": 0.0002, "loss": 1.4461, "step": 112640 }, { "epoch": 0.46, "grad_norm": 2.1432862281799316, "learning_rate": 0.0002, "loss": 1.5571, "step": 112650 }, { "epoch": 0.46, "grad_norm": 3.7340877056121826, "learning_rate": 0.0002, "loss": 1.7212, "step": 112660 }, { "epoch": 0.46, "grad_norm": 2.2269198894500732, "learning_rate": 0.0002, "loss": 1.6124, "step": 112670 }, { "epoch": 0.46, "grad_norm": 2.709707260131836, "learning_rate": 0.0002, "loss": 1.5223, "step": 112680 }, { "epoch": 0.46, "grad_norm": 2.3979434967041016, "learning_rate": 0.0002, "loss": 1.5752, "step": 112690 }, { "epoch": 0.46, "grad_norm": 3.9761972427368164, "learning_rate": 0.0002, "loss": 1.6214, "step": 112700 }, { "epoch": 0.46, "grad_norm": 3.90360951423645, "learning_rate": 0.0002, "loss": 1.6129, "step": 112710 }, { "epoch": 0.46, "grad_norm": 3.7784035205841064, "learning_rate": 0.0002, "loss": 1.6235, "step": 112720 }, { "epoch": 0.46, "grad_norm": 3.884516716003418, "learning_rate": 0.0002, "loss": 1.467, "step": 112730 }, { "epoch": 0.46, "grad_norm": 3.625114679336548, "learning_rate": 0.0002, "loss": 1.5923, "step": 112740 }, { "epoch": 0.46, "grad_norm": 3.3531198501586914, "learning_rate": 0.0002, "loss": 1.7124, "step": 112750 }, { "epoch": 0.46, "grad_norm": 3.1877245903015137, "learning_rate": 0.0002, "loss": 1.6741, "step": 112760 }, { "epoch": 0.46, "grad_norm": 2.707350254058838, "learning_rate": 0.0002, "loss": 1.4879, "step": 112770 }, { "epoch": 0.46, "grad_norm": 2.905661106109619, "learning_rate": 0.0002, "loss": 1.5448, "step": 112780 }, { "epoch": 0.46, "grad_norm": 3.841064453125, "learning_rate": 0.0002, "loss": 1.6287, "step": 112790 }, { "epoch": 0.46, "grad_norm": 2.0948338508605957, "learning_rate": 0.0002, "loss": 1.6179, "step": 112800 }, { "epoch": 0.46, "grad_norm": 1.624807596206665, "learning_rate": 0.0002, "loss": 1.5772, "step": 112810 }, { "epoch": 0.46, "grad_norm": 3.6695053577423096, "learning_rate": 0.0002, "loss": 1.5724, "step": 112820 }, { "epoch": 0.46, "grad_norm": 2.050126552581787, "learning_rate": 0.0002, "loss": 1.4415, "step": 112830 }, { "epoch": 0.46, "grad_norm": 2.6560275554656982, "learning_rate": 0.0002, "loss": 1.5252, "step": 112840 }, { "epoch": 0.46, "grad_norm": 2.9057974815368652, "learning_rate": 0.0002, "loss": 1.7358, "step": 112850 }, { "epoch": 0.46, "grad_norm": 3.6585049629211426, "learning_rate": 0.0002, "loss": 1.5782, "step": 112860 }, { "epoch": 0.46, "grad_norm": 2.162536144256592, "learning_rate": 0.0002, "loss": 1.6328, "step": 112870 }, { "epoch": 0.46, "grad_norm": 2.110626697540283, "learning_rate": 0.0002, "loss": 1.5807, "step": 112880 }, { "epoch": 0.46, "grad_norm": 3.069615602493286, "learning_rate": 0.0002, "loss": 1.4873, "step": 112890 }, { "epoch": 0.46, "grad_norm": 3.3137054443359375, "learning_rate": 0.0002, "loss": 1.6469, "step": 112900 }, { "epoch": 0.46, "grad_norm": 4.349453926086426, "learning_rate": 0.0002, "loss": 1.6296, "step": 112910 }, { "epoch": 0.46, "grad_norm": 5.3163580894470215, "learning_rate": 0.0002, "loss": 1.5387, "step": 112920 }, { "epoch": 0.46, "grad_norm": 2.8322348594665527, "learning_rate": 0.0002, "loss": 1.6407, "step": 112930 }, { "epoch": 0.46, "grad_norm": 3.2950756549835205, "learning_rate": 0.0002, "loss": 1.5972, "step": 112940 }, { "epoch": 0.46, "grad_norm": 3.49630069732666, "learning_rate": 0.0002, "loss": 1.4853, "step": 112950 }, { "epoch": 0.46, "grad_norm": 1.7410393953323364, "learning_rate": 0.0002, "loss": 1.7898, "step": 112960 }, { "epoch": 0.46, "grad_norm": 2.3222100734710693, "learning_rate": 0.0002, "loss": 1.4008, "step": 112970 }, { "epoch": 0.46, "grad_norm": 2.2755286693573, "learning_rate": 0.0002, "loss": 1.7051, "step": 112980 }, { "epoch": 0.46, "grad_norm": 3.4724369049072266, "learning_rate": 0.0002, "loss": 1.2578, "step": 112990 }, { "epoch": 0.46, "grad_norm": 2.2288706302642822, "learning_rate": 0.0002, "loss": 1.4706, "step": 113000 }, { "epoch": 0.46, "grad_norm": 2.9496965408325195, "learning_rate": 0.0002, "loss": 1.4914, "step": 113010 }, { "epoch": 0.46, "grad_norm": 4.234375476837158, "learning_rate": 0.0002, "loss": 1.6577, "step": 113020 }, { "epoch": 0.46, "grad_norm": 3.2661759853363037, "learning_rate": 0.0002, "loss": 1.6085, "step": 113030 }, { "epoch": 0.46, "grad_norm": 1.6060963869094849, "learning_rate": 0.0002, "loss": 1.6122, "step": 113040 }, { "epoch": 0.46, "grad_norm": 5.934834957122803, "learning_rate": 0.0002, "loss": 1.5835, "step": 113050 }, { "epoch": 0.46, "grad_norm": 3.608685255050659, "learning_rate": 0.0002, "loss": 1.5152, "step": 113060 }, { "epoch": 0.46, "grad_norm": 4.13015079498291, "learning_rate": 0.0002, "loss": 1.5388, "step": 113070 }, { "epoch": 0.46, "grad_norm": 1.8125450611114502, "learning_rate": 0.0002, "loss": 1.5676, "step": 113080 }, { "epoch": 0.46, "grad_norm": 3.5743484497070312, "learning_rate": 0.0002, "loss": 1.5319, "step": 113090 }, { "epoch": 0.46, "grad_norm": 2.195246696472168, "learning_rate": 0.0002, "loss": 1.6397, "step": 113100 }, { "epoch": 0.46, "grad_norm": 2.9025940895080566, "learning_rate": 0.0002, "loss": 1.5759, "step": 113110 }, { "epoch": 0.46, "grad_norm": 2.659517765045166, "learning_rate": 0.0002, "loss": 1.5222, "step": 113120 }, { "epoch": 0.46, "grad_norm": 3.656078577041626, "learning_rate": 0.0002, "loss": 1.33, "step": 113130 }, { "epoch": 0.46, "grad_norm": 2.8583998680114746, "learning_rate": 0.0002, "loss": 1.4875, "step": 113140 }, { "epoch": 0.46, "grad_norm": 4.279513835906982, "learning_rate": 0.0002, "loss": 1.5597, "step": 113150 }, { "epoch": 0.46, "grad_norm": 4.667049407958984, "learning_rate": 0.0002, "loss": 1.591, "step": 113160 }, { "epoch": 0.46, "grad_norm": 3.863499164581299, "learning_rate": 0.0002, "loss": 1.7395, "step": 113170 }, { "epoch": 0.46, "grad_norm": 3.2510592937469482, "learning_rate": 0.0002, "loss": 1.5965, "step": 113180 }, { "epoch": 0.46, "grad_norm": 2.685131549835205, "learning_rate": 0.0002, "loss": 1.6493, "step": 113190 }, { "epoch": 0.46, "grad_norm": 2.9580249786376953, "learning_rate": 0.0002, "loss": 1.5572, "step": 113200 }, { "epoch": 0.46, "grad_norm": 2.2975988388061523, "learning_rate": 0.0002, "loss": 1.6517, "step": 113210 }, { "epoch": 0.46, "grad_norm": 1.543354868888855, "learning_rate": 0.0002, "loss": 1.4404, "step": 113220 }, { "epoch": 0.46, "grad_norm": 3.5790398120880127, "learning_rate": 0.0002, "loss": 1.8726, "step": 113230 }, { "epoch": 0.46, "grad_norm": 2.8675379753112793, "learning_rate": 0.0002, "loss": 1.4003, "step": 113240 }, { "epoch": 0.46, "grad_norm": 3.040432929992676, "learning_rate": 0.0002, "loss": 1.5029, "step": 113250 }, { "epoch": 0.46, "grad_norm": 2.587736129760742, "learning_rate": 0.0002, "loss": 1.3648, "step": 113260 }, { "epoch": 0.46, "grad_norm": 2.1834964752197266, "learning_rate": 0.0002, "loss": 1.4747, "step": 113270 }, { "epoch": 0.46, "grad_norm": 2.630004644393921, "learning_rate": 0.0002, "loss": 1.6789, "step": 113280 }, { "epoch": 0.46, "grad_norm": 3.915684938430786, "learning_rate": 0.0002, "loss": 1.6593, "step": 113290 }, { "epoch": 0.46, "grad_norm": 2.333583116531372, "learning_rate": 0.0002, "loss": 1.2897, "step": 113300 }, { "epoch": 0.46, "grad_norm": 2.547786235809326, "learning_rate": 0.0002, "loss": 1.6409, "step": 113310 }, { "epoch": 0.46, "grad_norm": 6.052501678466797, "learning_rate": 0.0002, "loss": 1.7276, "step": 113320 }, { "epoch": 0.46, "grad_norm": 2.336911916732788, "learning_rate": 0.0002, "loss": 1.598, "step": 113330 }, { "epoch": 0.46, "grad_norm": 2.7011334896087646, "learning_rate": 0.0002, "loss": 1.7275, "step": 113340 }, { "epoch": 0.46, "grad_norm": 5.948685646057129, "learning_rate": 0.0002, "loss": 1.6829, "step": 113350 }, { "epoch": 0.46, "grad_norm": 2.1943776607513428, "learning_rate": 0.0002, "loss": 1.6204, "step": 113360 }, { "epoch": 0.46, "grad_norm": 2.8857789039611816, "learning_rate": 0.0002, "loss": 1.59, "step": 113370 }, { "epoch": 0.46, "grad_norm": 3.018108367919922, "learning_rate": 0.0002, "loss": 1.7698, "step": 113380 }, { "epoch": 0.46, "grad_norm": 2.7372913360595703, "learning_rate": 0.0002, "loss": 1.3932, "step": 113390 }, { "epoch": 0.46, "grad_norm": 3.20307993888855, "learning_rate": 0.0002, "loss": 1.8736, "step": 113400 }, { "epoch": 0.46, "grad_norm": 3.2962517738342285, "learning_rate": 0.0002, "loss": 1.6428, "step": 113410 }, { "epoch": 0.46, "grad_norm": 2.752058267593384, "learning_rate": 0.0002, "loss": 1.748, "step": 113420 }, { "epoch": 0.46, "grad_norm": 3.234339952468872, "learning_rate": 0.0002, "loss": 1.3139, "step": 113430 }, { "epoch": 0.46, "grad_norm": 2.1374614238739014, "learning_rate": 0.0002, "loss": 1.5405, "step": 113440 }, { "epoch": 0.46, "grad_norm": 2.8418898582458496, "learning_rate": 0.0002, "loss": 1.6326, "step": 113450 }, { "epoch": 0.46, "grad_norm": 4.303965091705322, "learning_rate": 0.0002, "loss": 1.5455, "step": 113460 }, { "epoch": 0.46, "grad_norm": 1.7956284284591675, "learning_rate": 0.0002, "loss": 1.6585, "step": 113470 }, { "epoch": 0.46, "grad_norm": 3.1347885131835938, "learning_rate": 0.0002, "loss": 1.7044, "step": 113480 }, { "epoch": 0.46, "grad_norm": 2.067682981491089, "learning_rate": 0.0002, "loss": 1.879, "step": 113490 }, { "epoch": 0.46, "grad_norm": 2.11902117729187, "learning_rate": 0.0002, "loss": 1.4508, "step": 113500 }, { "epoch": 0.46, "grad_norm": 3.3996686935424805, "learning_rate": 0.0002, "loss": 1.4059, "step": 113510 }, { "epoch": 0.46, "grad_norm": 3.8123056888580322, "learning_rate": 0.0002, "loss": 1.639, "step": 113520 }, { "epoch": 0.46, "grad_norm": 3.0643608570098877, "learning_rate": 0.0002, "loss": 1.625, "step": 113530 }, { "epoch": 0.46, "grad_norm": 2.463270902633667, "learning_rate": 0.0002, "loss": 1.7586, "step": 113540 }, { "epoch": 0.46, "grad_norm": 2.7235677242279053, "learning_rate": 0.0002, "loss": 1.7169, "step": 113550 }, { "epoch": 0.46, "grad_norm": 3.0153703689575195, "learning_rate": 0.0002, "loss": 1.8214, "step": 113560 }, { "epoch": 0.46, "grad_norm": 2.862393379211426, "learning_rate": 0.0002, "loss": 1.9798, "step": 113570 }, { "epoch": 0.46, "grad_norm": 4.554527282714844, "learning_rate": 0.0002, "loss": 1.8029, "step": 113580 }, { "epoch": 0.46, "grad_norm": 3.889270544052124, "learning_rate": 0.0002, "loss": 1.7706, "step": 113590 }, { "epoch": 0.46, "grad_norm": 2.4647819995880127, "learning_rate": 0.0002, "loss": 1.4896, "step": 113600 }, { "epoch": 0.46, "grad_norm": 3.6571433544158936, "learning_rate": 0.0002, "loss": 1.907, "step": 113610 }, { "epoch": 0.46, "grad_norm": 3.4873337745666504, "learning_rate": 0.0002, "loss": 1.4427, "step": 113620 }, { "epoch": 0.46, "grad_norm": 2.326772451400757, "learning_rate": 0.0002, "loss": 1.7425, "step": 113630 }, { "epoch": 0.46, "grad_norm": 3.5502703189849854, "learning_rate": 0.0002, "loss": 1.4244, "step": 113640 }, { "epoch": 0.46, "grad_norm": 3.70206618309021, "learning_rate": 0.0002, "loss": 1.4779, "step": 113650 }, { "epoch": 0.46, "grad_norm": 3.435748338699341, "learning_rate": 0.0002, "loss": 1.595, "step": 113660 }, { "epoch": 0.46, "grad_norm": 2.983980655670166, "learning_rate": 0.0002, "loss": 1.687, "step": 113670 }, { "epoch": 0.46, "grad_norm": 3.56598162651062, "learning_rate": 0.0002, "loss": 1.5427, "step": 113680 }, { "epoch": 0.46, "grad_norm": 3.860102891921997, "learning_rate": 0.0002, "loss": 1.4581, "step": 113690 }, { "epoch": 0.46, "grad_norm": 4.7472310066223145, "learning_rate": 0.0002, "loss": 1.4938, "step": 113700 }, { "epoch": 0.46, "grad_norm": 1.6396188735961914, "learning_rate": 0.0002, "loss": 1.2708, "step": 113710 }, { "epoch": 0.46, "grad_norm": 4.986116886138916, "learning_rate": 0.0002, "loss": 1.6524, "step": 113720 }, { "epoch": 0.46, "grad_norm": 3.2169177532196045, "learning_rate": 0.0002, "loss": 1.2987, "step": 113730 }, { "epoch": 0.46, "grad_norm": 1.9142390489578247, "learning_rate": 0.0002, "loss": 1.7067, "step": 113740 }, { "epoch": 0.46, "grad_norm": 2.8474884033203125, "learning_rate": 0.0002, "loss": 1.5572, "step": 113750 }, { "epoch": 0.46, "grad_norm": 2.3420519828796387, "learning_rate": 0.0002, "loss": 1.6358, "step": 113760 }, { "epoch": 0.46, "grad_norm": 2.8136634826660156, "learning_rate": 0.0002, "loss": 1.4718, "step": 113770 }, { "epoch": 0.46, "grad_norm": 2.9306135177612305, "learning_rate": 0.0002, "loss": 1.5157, "step": 113780 }, { "epoch": 0.46, "grad_norm": 3.316065549850464, "learning_rate": 0.0002, "loss": 1.4717, "step": 113790 }, { "epoch": 0.46, "grad_norm": 2.2672014236450195, "learning_rate": 0.0002, "loss": 1.467, "step": 113800 }, { "epoch": 0.46, "grad_norm": 3.5034985542297363, "learning_rate": 0.0002, "loss": 1.6555, "step": 113810 }, { "epoch": 0.46, "grad_norm": 2.0646660327911377, "learning_rate": 0.0002, "loss": 1.8224, "step": 113820 }, { "epoch": 0.46, "grad_norm": 4.23916482925415, "learning_rate": 0.0002, "loss": 1.5523, "step": 113830 }, { "epoch": 0.46, "grad_norm": 2.6806528568267822, "learning_rate": 0.0002, "loss": 1.533, "step": 113840 }, { "epoch": 0.46, "grad_norm": 4.031210422515869, "learning_rate": 0.0002, "loss": 1.4867, "step": 113850 }, { "epoch": 0.46, "grad_norm": 2.3625171184539795, "learning_rate": 0.0002, "loss": 1.8407, "step": 113860 }, { "epoch": 0.46, "grad_norm": 2.7672648429870605, "learning_rate": 0.0002, "loss": 1.8113, "step": 113870 }, { "epoch": 0.46, "grad_norm": 10.417435646057129, "learning_rate": 0.0002, "loss": 1.6564, "step": 113880 }, { "epoch": 0.46, "grad_norm": 2.600672483444214, "learning_rate": 0.0002, "loss": 1.4087, "step": 113890 }, { "epoch": 0.46, "grad_norm": 3.306469202041626, "learning_rate": 0.0002, "loss": 1.4616, "step": 113900 }, { "epoch": 0.46, "grad_norm": 2.3637733459472656, "learning_rate": 0.0002, "loss": 1.6486, "step": 113910 }, { "epoch": 0.46, "grad_norm": 3.3479087352752686, "learning_rate": 0.0002, "loss": 1.5936, "step": 113920 }, { "epoch": 0.46, "grad_norm": 3.055769205093384, "learning_rate": 0.0002, "loss": 1.427, "step": 113930 }, { "epoch": 0.46, "grad_norm": 2.929730176925659, "learning_rate": 0.0002, "loss": 1.65, "step": 113940 }, { "epoch": 0.46, "grad_norm": 2.372840404510498, "learning_rate": 0.0002, "loss": 1.3019, "step": 113950 }, { "epoch": 0.46, "grad_norm": 4.804747104644775, "learning_rate": 0.0002, "loss": 1.4779, "step": 113960 }, { "epoch": 0.46, "grad_norm": 2.6514933109283447, "learning_rate": 0.0002, "loss": 1.7597, "step": 113970 }, { "epoch": 0.46, "grad_norm": 2.7486536502838135, "learning_rate": 0.0002, "loss": 1.5354, "step": 113980 }, { "epoch": 0.46, "grad_norm": 4.125380516052246, "learning_rate": 0.0002, "loss": 1.4854, "step": 113990 }, { "epoch": 0.46, "grad_norm": 4.175863742828369, "learning_rate": 0.0002, "loss": 1.6441, "step": 114000 }, { "epoch": 0.46, "grad_norm": 3.9797024726867676, "learning_rate": 0.0002, "loss": 1.4112, "step": 114010 }, { "epoch": 0.46, "grad_norm": 3.184730291366577, "learning_rate": 0.0002, "loss": 1.4514, "step": 114020 }, { "epoch": 0.46, "grad_norm": 3.117617130279541, "learning_rate": 0.0002, "loss": 1.6018, "step": 114030 }, { "epoch": 0.46, "grad_norm": 1.5932273864746094, "learning_rate": 0.0002, "loss": 1.4502, "step": 114040 }, { "epoch": 0.46, "grad_norm": 3.039105176925659, "learning_rate": 0.0002, "loss": 1.5074, "step": 114050 }, { "epoch": 0.46, "grad_norm": 2.456817626953125, "learning_rate": 0.0002, "loss": 1.5405, "step": 114060 }, { "epoch": 0.46, "grad_norm": 2.855529308319092, "learning_rate": 0.0002, "loss": 1.6499, "step": 114070 }, { "epoch": 0.46, "grad_norm": 5.678494453430176, "learning_rate": 0.0002, "loss": 1.4844, "step": 114080 }, { "epoch": 0.46, "grad_norm": 5.087318420410156, "learning_rate": 0.0002, "loss": 1.4898, "step": 114090 }, { "epoch": 0.46, "grad_norm": 4.914507865905762, "learning_rate": 0.0002, "loss": 1.5081, "step": 114100 }, { "epoch": 0.46, "grad_norm": 2.428802251815796, "learning_rate": 0.0002, "loss": 1.5649, "step": 114110 }, { "epoch": 0.46, "grad_norm": 3.8836214542388916, "learning_rate": 0.0002, "loss": 1.4755, "step": 114120 }, { "epoch": 0.46, "grad_norm": 2.2213518619537354, "learning_rate": 0.0002, "loss": 1.346, "step": 114130 }, { "epoch": 0.46, "grad_norm": 4.292737007141113, "learning_rate": 0.0002, "loss": 1.3984, "step": 114140 }, { "epoch": 0.46, "grad_norm": 3.32677960395813, "learning_rate": 0.0002, "loss": 1.7144, "step": 114150 }, { "epoch": 0.46, "grad_norm": 2.3498058319091797, "learning_rate": 0.0002, "loss": 1.7231, "step": 114160 }, { "epoch": 0.46, "grad_norm": 3.4537384510040283, "learning_rate": 0.0002, "loss": 1.7626, "step": 114170 }, { "epoch": 0.46, "grad_norm": 3.0158891677856445, "learning_rate": 0.0002, "loss": 1.3409, "step": 114180 }, { "epoch": 0.46, "grad_norm": 2.8104279041290283, "learning_rate": 0.0002, "loss": 1.6814, "step": 114190 }, { "epoch": 0.46, "grad_norm": 4.8956708908081055, "learning_rate": 0.0002, "loss": 1.6055, "step": 114200 }, { "epoch": 0.46, "grad_norm": 4.63470458984375, "learning_rate": 0.0002, "loss": 1.4629, "step": 114210 }, { "epoch": 0.46, "grad_norm": 2.880999803543091, "learning_rate": 0.0002, "loss": 1.7755, "step": 114220 }, { "epoch": 0.47, "grad_norm": 4.007060527801514, "learning_rate": 0.0002, "loss": 1.6635, "step": 114230 }, { "epoch": 0.47, "grad_norm": 2.689194679260254, "learning_rate": 0.0002, "loss": 1.5112, "step": 114240 }, { "epoch": 0.47, "grad_norm": 6.587192535400391, "learning_rate": 0.0002, "loss": 1.5378, "step": 114250 }, { "epoch": 0.47, "grad_norm": 7.143150329589844, "learning_rate": 0.0002, "loss": 1.7985, "step": 114260 }, { "epoch": 0.47, "grad_norm": 4.354735374450684, "learning_rate": 0.0002, "loss": 1.6502, "step": 114270 }, { "epoch": 0.47, "grad_norm": 1.8839340209960938, "learning_rate": 0.0002, "loss": 1.5295, "step": 114280 }, { "epoch": 0.47, "grad_norm": 2.721686363220215, "learning_rate": 0.0002, "loss": 1.4323, "step": 114290 }, { "epoch": 0.47, "grad_norm": 3.492727279663086, "learning_rate": 0.0002, "loss": 1.5693, "step": 114300 }, { "epoch": 0.47, "grad_norm": 3.005798578262329, "learning_rate": 0.0002, "loss": 1.5871, "step": 114310 }, { "epoch": 0.47, "grad_norm": 1.6349283456802368, "learning_rate": 0.0002, "loss": 1.655, "step": 114320 }, { "epoch": 0.47, "grad_norm": 6.226407051086426, "learning_rate": 0.0002, "loss": 1.4913, "step": 114330 }, { "epoch": 0.47, "grad_norm": 2.423556327819824, "learning_rate": 0.0002, "loss": 1.5024, "step": 114340 }, { "epoch": 0.47, "grad_norm": 1.3732260465621948, "learning_rate": 0.0002, "loss": 1.5403, "step": 114350 }, { "epoch": 0.47, "grad_norm": 3.769944429397583, "learning_rate": 0.0002, "loss": 1.3368, "step": 114360 }, { "epoch": 0.47, "grad_norm": 1.4145233631134033, "learning_rate": 0.0002, "loss": 1.6261, "step": 114370 }, { "epoch": 0.47, "grad_norm": 2.553208112716675, "learning_rate": 0.0002, "loss": 1.7307, "step": 114380 }, { "epoch": 0.47, "grad_norm": 2.3380792140960693, "learning_rate": 0.0002, "loss": 1.5229, "step": 114390 }, { "epoch": 0.47, "grad_norm": 2.24971866607666, "learning_rate": 0.0002, "loss": 1.4681, "step": 114400 }, { "epoch": 0.47, "grad_norm": 2.8350114822387695, "learning_rate": 0.0002, "loss": 1.7167, "step": 114410 }, { "epoch": 0.47, "grad_norm": 3.0202934741973877, "learning_rate": 0.0002, "loss": 1.679, "step": 114420 }, { "epoch": 0.47, "grad_norm": 5.838747024536133, "learning_rate": 0.0002, "loss": 1.8052, "step": 114430 }, { "epoch": 0.47, "grad_norm": 2.943549156188965, "learning_rate": 0.0002, "loss": 1.6794, "step": 114440 }, { "epoch": 0.47, "grad_norm": 4.565519332885742, "learning_rate": 0.0002, "loss": 1.6553, "step": 114450 }, { "epoch": 0.47, "grad_norm": 2.459090232849121, "learning_rate": 0.0002, "loss": 1.7563, "step": 114460 }, { "epoch": 0.47, "grad_norm": 2.9027349948883057, "learning_rate": 0.0002, "loss": 1.6636, "step": 114470 }, { "epoch": 0.47, "grad_norm": 9.577798843383789, "learning_rate": 0.0002, "loss": 1.7553, "step": 114480 }, { "epoch": 0.47, "grad_norm": 2.737409830093384, "learning_rate": 0.0002, "loss": 1.6054, "step": 114490 }, { "epoch": 0.47, "grad_norm": 2.019680976867676, "learning_rate": 0.0002, "loss": 1.5252, "step": 114500 }, { "epoch": 0.47, "grad_norm": 1.8253923654556274, "learning_rate": 0.0002, "loss": 1.604, "step": 114510 }, { "epoch": 0.47, "grad_norm": 3.027494430541992, "learning_rate": 0.0002, "loss": 1.4837, "step": 114520 }, { "epoch": 0.47, "grad_norm": 4.283937931060791, "learning_rate": 0.0002, "loss": 1.7743, "step": 114530 }, { "epoch": 0.47, "grad_norm": 3.0825161933898926, "learning_rate": 0.0002, "loss": 1.75, "step": 114540 }, { "epoch": 0.47, "grad_norm": 4.858303070068359, "learning_rate": 0.0002, "loss": 1.612, "step": 114550 }, { "epoch": 0.47, "grad_norm": 2.101262331008911, "learning_rate": 0.0002, "loss": 1.5267, "step": 114560 }, { "epoch": 0.47, "grad_norm": 1.3645271062850952, "learning_rate": 0.0002, "loss": 1.3503, "step": 114570 }, { "epoch": 0.47, "grad_norm": 3.8186264038085938, "learning_rate": 0.0002, "loss": 1.6509, "step": 114580 }, { "epoch": 0.47, "grad_norm": 2.345827579498291, "learning_rate": 0.0002, "loss": 1.3427, "step": 114590 }, { "epoch": 0.47, "grad_norm": 3.09334397315979, "learning_rate": 0.0002, "loss": 1.6599, "step": 114600 }, { "epoch": 0.47, "grad_norm": 1.8950648307800293, "learning_rate": 0.0002, "loss": 1.3773, "step": 114610 }, { "epoch": 0.47, "grad_norm": 4.20930290222168, "learning_rate": 0.0002, "loss": 1.58, "step": 114620 }, { "epoch": 0.47, "grad_norm": 4.931499481201172, "learning_rate": 0.0002, "loss": 1.6583, "step": 114630 }, { "epoch": 0.47, "grad_norm": 2.734375238418579, "learning_rate": 0.0002, "loss": 1.7204, "step": 114640 }, { "epoch": 0.47, "grad_norm": 3.1549746990203857, "learning_rate": 0.0002, "loss": 1.7694, "step": 114650 }, { "epoch": 0.47, "grad_norm": 4.544471263885498, "learning_rate": 0.0002, "loss": 1.2633, "step": 114660 }, { "epoch": 0.47, "grad_norm": 4.979643821716309, "learning_rate": 0.0002, "loss": 1.6152, "step": 114670 }, { "epoch": 0.47, "grad_norm": 1.8745648860931396, "learning_rate": 0.0002, "loss": 1.3119, "step": 114680 }, { "epoch": 0.47, "grad_norm": 2.744213819503784, "learning_rate": 0.0002, "loss": 1.6496, "step": 114690 }, { "epoch": 0.47, "grad_norm": 2.4342708587646484, "learning_rate": 0.0002, "loss": 1.527, "step": 114700 }, { "epoch": 0.47, "grad_norm": 2.8680810928344727, "learning_rate": 0.0002, "loss": 1.4725, "step": 114710 }, { "epoch": 0.47, "grad_norm": 1.9740314483642578, "learning_rate": 0.0002, "loss": 1.6332, "step": 114720 }, { "epoch": 0.47, "grad_norm": 3.7925989627838135, "learning_rate": 0.0002, "loss": 1.572, "step": 114730 }, { "epoch": 0.47, "grad_norm": 2.7915120124816895, "learning_rate": 0.0002, "loss": 1.5359, "step": 114740 }, { "epoch": 0.47, "grad_norm": 2.352694511413574, "learning_rate": 0.0002, "loss": 1.5101, "step": 114750 }, { "epoch": 0.47, "grad_norm": 4.650732040405273, "learning_rate": 0.0002, "loss": 1.5826, "step": 114760 }, { "epoch": 0.47, "grad_norm": 3.1089866161346436, "learning_rate": 0.0002, "loss": 1.5764, "step": 114770 }, { "epoch": 0.47, "grad_norm": 3.570929527282715, "learning_rate": 0.0002, "loss": 1.5374, "step": 114780 }, { "epoch": 0.47, "grad_norm": 3.3677830696105957, "learning_rate": 0.0002, "loss": 1.5988, "step": 114790 }, { "epoch": 0.47, "grad_norm": 4.3784403800964355, "learning_rate": 0.0002, "loss": 1.6295, "step": 114800 }, { "epoch": 0.47, "grad_norm": 6.382531642913818, "learning_rate": 0.0002, "loss": 1.5443, "step": 114810 }, { "epoch": 0.47, "grad_norm": 2.2042226791381836, "learning_rate": 0.0002, "loss": 1.6598, "step": 114820 }, { "epoch": 0.47, "grad_norm": 2.3817105293273926, "learning_rate": 0.0002, "loss": 1.5691, "step": 114830 }, { "epoch": 0.47, "grad_norm": 3.0748932361602783, "learning_rate": 0.0002, "loss": 1.7632, "step": 114840 }, { "epoch": 0.47, "grad_norm": 2.6049795150756836, "learning_rate": 0.0002, "loss": 1.4544, "step": 114850 }, { "epoch": 0.47, "grad_norm": 3.4134881496429443, "learning_rate": 0.0002, "loss": 1.7331, "step": 114860 }, { "epoch": 0.47, "grad_norm": 4.059773921966553, "learning_rate": 0.0002, "loss": 1.6997, "step": 114870 }, { "epoch": 0.47, "grad_norm": 4.137296676635742, "learning_rate": 0.0002, "loss": 1.631, "step": 114880 }, { "epoch": 0.47, "grad_norm": 4.749115467071533, "learning_rate": 0.0002, "loss": 1.5432, "step": 114890 }, { "epoch": 0.47, "grad_norm": 4.372751712799072, "learning_rate": 0.0002, "loss": 1.4366, "step": 114900 }, { "epoch": 0.47, "grad_norm": 4.507827281951904, "learning_rate": 0.0002, "loss": 1.7121, "step": 114910 }, { "epoch": 0.47, "grad_norm": 3.274923086166382, "learning_rate": 0.0002, "loss": 1.5137, "step": 114920 }, { "epoch": 0.47, "grad_norm": 4.783367156982422, "learning_rate": 0.0002, "loss": 1.3784, "step": 114930 }, { "epoch": 0.47, "grad_norm": 5.228901386260986, "learning_rate": 0.0002, "loss": 1.7119, "step": 114940 }, { "epoch": 0.47, "grad_norm": 4.676131725311279, "learning_rate": 0.0002, "loss": 1.5383, "step": 114950 }, { "epoch": 0.47, "grad_norm": 2.68990421295166, "learning_rate": 0.0002, "loss": 1.6703, "step": 114960 }, { "epoch": 0.47, "grad_norm": 3.4972522258758545, "learning_rate": 0.0002, "loss": 1.4786, "step": 114970 }, { "epoch": 0.47, "grad_norm": 3.7408065795898438, "learning_rate": 0.0002, "loss": 1.7422, "step": 114980 }, { "epoch": 0.47, "grad_norm": 3.356881618499756, "learning_rate": 0.0002, "loss": 1.456, "step": 114990 }, { "epoch": 0.47, "grad_norm": 3.732815742492676, "learning_rate": 0.0002, "loss": 1.5839, "step": 115000 }, { "epoch": 0.47, "grad_norm": 4.283742904663086, "learning_rate": 0.0002, "loss": 1.5078, "step": 115010 }, { "epoch": 0.47, "grad_norm": 4.400417327880859, "learning_rate": 0.0002, "loss": 1.6897, "step": 115020 }, { "epoch": 0.47, "grad_norm": 2.155660390853882, "learning_rate": 0.0002, "loss": 1.4877, "step": 115030 }, { "epoch": 0.47, "grad_norm": 3.698516607284546, "learning_rate": 0.0002, "loss": 1.628, "step": 115040 }, { "epoch": 0.47, "grad_norm": 2.6682121753692627, "learning_rate": 0.0002, "loss": 1.485, "step": 115050 }, { "epoch": 0.47, "grad_norm": 2.3749802112579346, "learning_rate": 0.0002, "loss": 1.5332, "step": 115060 }, { "epoch": 0.47, "grad_norm": 1.4530069828033447, "learning_rate": 0.0002, "loss": 1.5213, "step": 115070 }, { "epoch": 0.47, "grad_norm": 3.2166056632995605, "learning_rate": 0.0002, "loss": 1.674, "step": 115080 }, { "epoch": 0.47, "grad_norm": 3.231471538543701, "learning_rate": 0.0002, "loss": 1.6855, "step": 115090 }, { "epoch": 0.47, "grad_norm": 3.6615352630615234, "learning_rate": 0.0002, "loss": 1.6786, "step": 115100 }, { "epoch": 0.47, "grad_norm": 2.4585721492767334, "learning_rate": 0.0002, "loss": 1.2622, "step": 115110 }, { "epoch": 0.47, "grad_norm": 4.1636061668396, "learning_rate": 0.0002, "loss": 1.5575, "step": 115120 }, { "epoch": 0.47, "grad_norm": 3.3869192600250244, "learning_rate": 0.0002, "loss": 1.5377, "step": 115130 }, { "epoch": 0.47, "grad_norm": 2.41921329498291, "learning_rate": 0.0002, "loss": 1.5285, "step": 115140 }, { "epoch": 0.47, "grad_norm": 3.8423354625701904, "learning_rate": 0.0002, "loss": 1.6598, "step": 115150 }, { "epoch": 0.47, "grad_norm": 2.1254560947418213, "learning_rate": 0.0002, "loss": 1.5185, "step": 115160 }, { "epoch": 0.47, "grad_norm": 2.838029146194458, "learning_rate": 0.0002, "loss": 1.7934, "step": 115170 }, { "epoch": 0.47, "grad_norm": 2.989879608154297, "learning_rate": 0.0002, "loss": 1.3373, "step": 115180 }, { "epoch": 0.47, "grad_norm": 2.954988479614258, "learning_rate": 0.0002, "loss": 1.7881, "step": 115190 }, { "epoch": 0.47, "grad_norm": 2.598830223083496, "learning_rate": 0.0002, "loss": 1.6885, "step": 115200 }, { "epoch": 0.47, "grad_norm": 3.1407694816589355, "learning_rate": 0.0002, "loss": 1.5782, "step": 115210 }, { "epoch": 0.47, "grad_norm": 3.757826089859009, "learning_rate": 0.0002, "loss": 1.7261, "step": 115220 }, { "epoch": 0.47, "grad_norm": 2.1110599040985107, "learning_rate": 0.0002, "loss": 1.6802, "step": 115230 }, { "epoch": 0.47, "grad_norm": 2.588001012802124, "learning_rate": 0.0002, "loss": 1.3707, "step": 115240 }, { "epoch": 0.47, "grad_norm": 2.0197744369506836, "learning_rate": 0.0002, "loss": 1.7582, "step": 115250 }, { "epoch": 0.47, "grad_norm": 2.419032096862793, "learning_rate": 0.0002, "loss": 1.7623, "step": 115260 }, { "epoch": 0.47, "grad_norm": 4.647573471069336, "learning_rate": 0.0002, "loss": 1.4186, "step": 115270 }, { "epoch": 0.47, "grad_norm": 2.9474825859069824, "learning_rate": 0.0002, "loss": 1.3492, "step": 115280 }, { "epoch": 0.47, "grad_norm": 2.169013023376465, "learning_rate": 0.0002, "loss": 1.5746, "step": 115290 }, { "epoch": 0.47, "grad_norm": 2.925441026687622, "learning_rate": 0.0002, "loss": 1.6052, "step": 115300 }, { "epoch": 0.47, "grad_norm": 2.00148606300354, "learning_rate": 0.0002, "loss": 1.7087, "step": 115310 }, { "epoch": 0.47, "grad_norm": 2.5172600746154785, "learning_rate": 0.0002, "loss": 1.3997, "step": 115320 }, { "epoch": 0.47, "grad_norm": 2.7331950664520264, "learning_rate": 0.0002, "loss": 1.3373, "step": 115330 }, { "epoch": 0.47, "grad_norm": 2.9812371730804443, "learning_rate": 0.0002, "loss": 1.5493, "step": 115340 }, { "epoch": 0.47, "grad_norm": 3.421963930130005, "learning_rate": 0.0002, "loss": 1.7046, "step": 115350 }, { "epoch": 0.47, "grad_norm": 6.098638534545898, "learning_rate": 0.0002, "loss": 1.5376, "step": 115360 }, { "epoch": 0.47, "grad_norm": 4.0070037841796875, "learning_rate": 0.0002, "loss": 1.3881, "step": 115370 }, { "epoch": 0.47, "grad_norm": 3.2037363052368164, "learning_rate": 0.0002, "loss": 1.7678, "step": 115380 }, { "epoch": 0.47, "grad_norm": 2.336660385131836, "learning_rate": 0.0002, "loss": 1.7693, "step": 115390 }, { "epoch": 0.47, "grad_norm": 2.7944726943969727, "learning_rate": 0.0002, "loss": 1.3465, "step": 115400 }, { "epoch": 0.47, "grad_norm": 2.9570422172546387, "learning_rate": 0.0002, "loss": 1.6174, "step": 115410 }, { "epoch": 0.47, "grad_norm": 3.726806402206421, "learning_rate": 0.0002, "loss": 1.646, "step": 115420 }, { "epoch": 0.47, "grad_norm": 3.7368102073669434, "learning_rate": 0.0002, "loss": 1.615, "step": 115430 }, { "epoch": 0.47, "grad_norm": 3.131866455078125, "learning_rate": 0.0002, "loss": 1.6604, "step": 115440 }, { "epoch": 0.47, "grad_norm": 3.9721879959106445, "learning_rate": 0.0002, "loss": 1.89, "step": 115450 }, { "epoch": 0.47, "grad_norm": 4.3887434005737305, "learning_rate": 0.0002, "loss": 1.6124, "step": 115460 }, { "epoch": 0.47, "grad_norm": 2.1784920692443848, "learning_rate": 0.0002, "loss": 2.0201, "step": 115470 }, { "epoch": 0.47, "grad_norm": 2.455606460571289, "learning_rate": 0.0002, "loss": 1.5268, "step": 115480 }, { "epoch": 0.47, "grad_norm": 3.5852062702178955, "learning_rate": 0.0002, "loss": 1.5666, "step": 115490 }, { "epoch": 0.47, "grad_norm": 3.345123052597046, "learning_rate": 0.0002, "loss": 1.6604, "step": 115500 }, { "epoch": 0.47, "grad_norm": 2.7867770195007324, "learning_rate": 0.0002, "loss": 1.4267, "step": 115510 }, { "epoch": 0.47, "grad_norm": 2.3327016830444336, "learning_rate": 0.0002, "loss": 1.7398, "step": 115520 }, { "epoch": 0.47, "grad_norm": 3.0613224506378174, "learning_rate": 0.0002, "loss": 1.9347, "step": 115530 }, { "epoch": 0.47, "grad_norm": 2.7647061347961426, "learning_rate": 0.0002, "loss": 1.3725, "step": 115540 }, { "epoch": 0.47, "grad_norm": 2.7265636920928955, "learning_rate": 0.0002, "loss": 1.3091, "step": 115550 }, { "epoch": 0.47, "grad_norm": 5.264529705047607, "learning_rate": 0.0002, "loss": 1.5012, "step": 115560 }, { "epoch": 0.47, "grad_norm": 3.2617461681365967, "learning_rate": 0.0002, "loss": 1.6048, "step": 115570 }, { "epoch": 0.47, "grad_norm": 2.3449108600616455, "learning_rate": 0.0002, "loss": 1.9388, "step": 115580 }, { "epoch": 0.47, "grad_norm": 3.1444597244262695, "learning_rate": 0.0002, "loss": 1.5915, "step": 115590 }, { "epoch": 0.47, "grad_norm": 3.0704352855682373, "learning_rate": 0.0002, "loss": 1.6846, "step": 115600 }, { "epoch": 0.47, "grad_norm": 2.8878750801086426, "learning_rate": 0.0002, "loss": 1.5754, "step": 115610 }, { "epoch": 0.47, "grad_norm": 2.693885326385498, "learning_rate": 0.0002, "loss": 1.8071, "step": 115620 }, { "epoch": 0.47, "grad_norm": 2.5961315631866455, "learning_rate": 0.0002, "loss": 1.4185, "step": 115630 }, { "epoch": 0.47, "grad_norm": 4.408453464508057, "learning_rate": 0.0002, "loss": 1.7171, "step": 115640 }, { "epoch": 0.47, "grad_norm": 2.935579299926758, "learning_rate": 0.0002, "loss": 1.6269, "step": 115650 }, { "epoch": 0.47, "grad_norm": 3.190086603164673, "learning_rate": 0.0002, "loss": 1.5419, "step": 115660 }, { "epoch": 0.47, "grad_norm": 1.7704787254333496, "learning_rate": 0.0002, "loss": 1.5533, "step": 115670 }, { "epoch": 0.47, "grad_norm": 2.825040340423584, "learning_rate": 0.0002, "loss": 1.4468, "step": 115680 }, { "epoch": 0.47, "grad_norm": 3.003570318222046, "learning_rate": 0.0002, "loss": 1.5958, "step": 115690 }, { "epoch": 0.47, "grad_norm": 4.803170680999756, "learning_rate": 0.0002, "loss": 1.567, "step": 115700 }, { "epoch": 0.47, "grad_norm": 2.1305434703826904, "learning_rate": 0.0002, "loss": 1.8439, "step": 115710 }, { "epoch": 0.47, "grad_norm": 1.8049578666687012, "learning_rate": 0.0002, "loss": 1.553, "step": 115720 }, { "epoch": 0.47, "grad_norm": 4.378310680389404, "learning_rate": 0.0002, "loss": 1.8521, "step": 115730 }, { "epoch": 0.47, "grad_norm": 3.149714469909668, "learning_rate": 0.0002, "loss": 1.4074, "step": 115740 }, { "epoch": 0.47, "grad_norm": 2.2126011848449707, "learning_rate": 0.0002, "loss": 1.4125, "step": 115750 }, { "epoch": 0.47, "grad_norm": 5.539302825927734, "learning_rate": 0.0002, "loss": 1.6251, "step": 115760 }, { "epoch": 0.47, "grad_norm": 3.2441511154174805, "learning_rate": 0.0002, "loss": 1.8152, "step": 115770 }, { "epoch": 0.47, "grad_norm": 2.2323532104492188, "learning_rate": 0.0002, "loss": 1.6581, "step": 115780 }, { "epoch": 0.47, "grad_norm": 3.2865545749664307, "learning_rate": 0.0002, "loss": 1.2816, "step": 115790 }, { "epoch": 0.47, "grad_norm": 2.364279270172119, "learning_rate": 0.0002, "loss": 1.5245, "step": 115800 }, { "epoch": 0.47, "grad_norm": 5.012217044830322, "learning_rate": 0.0002, "loss": 1.6872, "step": 115810 }, { "epoch": 0.47, "grad_norm": 2.4789252281188965, "learning_rate": 0.0002, "loss": 1.7305, "step": 115820 }, { "epoch": 0.47, "grad_norm": 2.7024033069610596, "learning_rate": 0.0002, "loss": 1.4654, "step": 115830 }, { "epoch": 0.47, "grad_norm": 2.9077184200286865, "learning_rate": 0.0002, "loss": 1.4462, "step": 115840 }, { "epoch": 0.47, "grad_norm": 4.508922576904297, "learning_rate": 0.0002, "loss": 1.5592, "step": 115850 }, { "epoch": 0.47, "grad_norm": 2.947850227355957, "learning_rate": 0.0002, "loss": 1.6059, "step": 115860 }, { "epoch": 0.47, "grad_norm": 2.1181297302246094, "learning_rate": 0.0002, "loss": 1.4704, "step": 115870 }, { "epoch": 0.47, "grad_norm": 2.967989444732666, "learning_rate": 0.0002, "loss": 1.6312, "step": 115880 }, { "epoch": 0.47, "grad_norm": 2.6509711742401123, "learning_rate": 0.0002, "loss": 1.7737, "step": 115890 }, { "epoch": 0.47, "grad_norm": 4.454118251800537, "learning_rate": 0.0002, "loss": 1.5762, "step": 115900 }, { "epoch": 0.47, "grad_norm": 4.106707572937012, "learning_rate": 0.0002, "loss": 1.5108, "step": 115910 }, { "epoch": 0.47, "grad_norm": 3.1662955284118652, "learning_rate": 0.0002, "loss": 1.4903, "step": 115920 }, { "epoch": 0.47, "grad_norm": 3.2620880603790283, "learning_rate": 0.0002, "loss": 1.5457, "step": 115930 }, { "epoch": 0.47, "grad_norm": 2.65982723236084, "learning_rate": 0.0002, "loss": 1.6999, "step": 115940 }, { "epoch": 0.47, "grad_norm": 2.0448436737060547, "learning_rate": 0.0002, "loss": 1.6795, "step": 115950 }, { "epoch": 0.47, "grad_norm": 3.7816920280456543, "learning_rate": 0.0002, "loss": 1.59, "step": 115960 }, { "epoch": 0.47, "grad_norm": 2.5263028144836426, "learning_rate": 0.0002, "loss": 1.3896, "step": 115970 }, { "epoch": 0.47, "grad_norm": 3.0997965335845947, "learning_rate": 0.0002, "loss": 1.6862, "step": 115980 }, { "epoch": 0.47, "grad_norm": 2.3878631591796875, "learning_rate": 0.0002, "loss": 1.5109, "step": 115990 }, { "epoch": 0.47, "grad_norm": 4.077291965484619, "learning_rate": 0.0002, "loss": 1.5296, "step": 116000 }, { "epoch": 0.47, "grad_norm": 3.003851890563965, "learning_rate": 0.0002, "loss": 1.4989, "step": 116010 }, { "epoch": 0.47, "grad_norm": 4.444873809814453, "learning_rate": 0.0002, "loss": 1.9003, "step": 116020 }, { "epoch": 0.47, "grad_norm": 1.7429014444351196, "learning_rate": 0.0002, "loss": 1.5843, "step": 116030 }, { "epoch": 0.47, "grad_norm": 18.708133697509766, "learning_rate": 0.0002, "loss": 1.2901, "step": 116040 }, { "epoch": 0.47, "grad_norm": 2.9497716426849365, "learning_rate": 0.0002, "loss": 1.3641, "step": 116050 }, { "epoch": 0.47, "grad_norm": 2.8613994121551514, "learning_rate": 0.0002, "loss": 1.689, "step": 116060 }, { "epoch": 0.47, "grad_norm": 3.0984344482421875, "learning_rate": 0.0002, "loss": 1.6591, "step": 116070 }, { "epoch": 0.47, "grad_norm": 4.22422456741333, "learning_rate": 0.0002, "loss": 1.4821, "step": 116080 }, { "epoch": 0.47, "grad_norm": 3.6524088382720947, "learning_rate": 0.0002, "loss": 1.6053, "step": 116090 }, { "epoch": 0.47, "grad_norm": 4.463890075683594, "learning_rate": 0.0002, "loss": 1.7809, "step": 116100 }, { "epoch": 0.47, "grad_norm": 3.0318496227264404, "learning_rate": 0.0002, "loss": 1.3293, "step": 116110 }, { "epoch": 0.47, "grad_norm": 3.2018141746520996, "learning_rate": 0.0002, "loss": 1.3597, "step": 116120 }, { "epoch": 0.47, "grad_norm": 2.164762258529663, "learning_rate": 0.0002, "loss": 1.558, "step": 116130 }, { "epoch": 0.47, "grad_norm": 3.8207104206085205, "learning_rate": 0.0002, "loss": 1.4649, "step": 116140 }, { "epoch": 0.47, "grad_norm": 2.6452994346618652, "learning_rate": 0.0002, "loss": 1.6076, "step": 116150 }, { "epoch": 0.47, "grad_norm": 3.6514105796813965, "learning_rate": 0.0002, "loss": 1.4748, "step": 116160 }, { "epoch": 0.47, "grad_norm": 2.9698469638824463, "learning_rate": 0.0002, "loss": 1.5835, "step": 116170 }, { "epoch": 0.47, "grad_norm": 4.168884754180908, "learning_rate": 0.0002, "loss": 1.6367, "step": 116180 }, { "epoch": 0.47, "grad_norm": 2.9221878051757812, "learning_rate": 0.0002, "loss": 1.5851, "step": 116190 }, { "epoch": 0.47, "grad_norm": 3.4931516647338867, "learning_rate": 0.0002, "loss": 1.66, "step": 116200 }, { "epoch": 0.47, "grad_norm": 3.0152673721313477, "learning_rate": 0.0002, "loss": 1.6602, "step": 116210 }, { "epoch": 0.47, "grad_norm": 2.0598227977752686, "learning_rate": 0.0002, "loss": 1.4414, "step": 116220 }, { "epoch": 0.47, "grad_norm": 3.113267421722412, "learning_rate": 0.0002, "loss": 1.6375, "step": 116230 }, { "epoch": 0.47, "grad_norm": 2.6170482635498047, "learning_rate": 0.0002, "loss": 1.5045, "step": 116240 }, { "epoch": 0.47, "grad_norm": 2.0375685691833496, "learning_rate": 0.0002, "loss": 1.6877, "step": 116250 }, { "epoch": 0.47, "grad_norm": 2.310441017150879, "learning_rate": 0.0002, "loss": 1.6766, "step": 116260 }, { "epoch": 0.47, "grad_norm": 3.7181267738342285, "learning_rate": 0.0002, "loss": 1.6073, "step": 116270 }, { "epoch": 0.47, "grad_norm": 3.0807409286499023, "learning_rate": 0.0002, "loss": 1.796, "step": 116280 }, { "epoch": 0.47, "grad_norm": 2.5974419116973877, "learning_rate": 0.0002, "loss": 1.3417, "step": 116290 }, { "epoch": 0.47, "grad_norm": 4.459047794342041, "learning_rate": 0.0002, "loss": 1.6553, "step": 116300 }, { "epoch": 0.47, "grad_norm": 4.129210948944092, "learning_rate": 0.0002, "loss": 1.6024, "step": 116310 }, { "epoch": 0.47, "grad_norm": 2.6447677612304688, "learning_rate": 0.0002, "loss": 1.3983, "step": 116320 }, { "epoch": 0.47, "grad_norm": 2.5623843669891357, "learning_rate": 0.0002, "loss": 1.4898, "step": 116330 }, { "epoch": 0.47, "grad_norm": 4.27000617980957, "learning_rate": 0.0002, "loss": 1.6769, "step": 116340 }, { "epoch": 0.47, "grad_norm": 2.599585771560669, "learning_rate": 0.0002, "loss": 1.8038, "step": 116350 }, { "epoch": 0.47, "grad_norm": 2.8220958709716797, "learning_rate": 0.0002, "loss": 1.5167, "step": 116360 }, { "epoch": 0.47, "grad_norm": 3.318359851837158, "learning_rate": 0.0002, "loss": 1.8188, "step": 116370 }, { "epoch": 0.47, "grad_norm": 2.8261876106262207, "learning_rate": 0.0002, "loss": 1.4594, "step": 116380 }, { "epoch": 0.47, "grad_norm": 3.5147576332092285, "learning_rate": 0.0002, "loss": 1.4362, "step": 116390 }, { "epoch": 0.47, "grad_norm": 1.8381013870239258, "learning_rate": 0.0002, "loss": 1.4925, "step": 116400 }, { "epoch": 0.47, "grad_norm": 2.025912046432495, "learning_rate": 0.0002, "loss": 1.6845, "step": 116410 }, { "epoch": 0.47, "grad_norm": 1.5114226341247559, "learning_rate": 0.0002, "loss": 1.5564, "step": 116420 }, { "epoch": 0.47, "grad_norm": 6.664604187011719, "learning_rate": 0.0002, "loss": 1.2999, "step": 116430 }, { "epoch": 0.47, "grad_norm": 3.5035831928253174, "learning_rate": 0.0002, "loss": 1.6239, "step": 116440 }, { "epoch": 0.47, "grad_norm": 2.1467442512512207, "learning_rate": 0.0002, "loss": 1.6851, "step": 116450 }, { "epoch": 0.47, "grad_norm": 1.8343676328659058, "learning_rate": 0.0002, "loss": 1.3367, "step": 116460 }, { "epoch": 0.47, "grad_norm": 1.741560459136963, "learning_rate": 0.0002, "loss": 1.6248, "step": 116470 }, { "epoch": 0.47, "grad_norm": 3.2173027992248535, "learning_rate": 0.0002, "loss": 1.6097, "step": 116480 }, { "epoch": 0.47, "grad_norm": 3.0760445594787598, "learning_rate": 0.0002, "loss": 1.5987, "step": 116490 }, { "epoch": 0.47, "grad_norm": 2.388522148132324, "learning_rate": 0.0002, "loss": 1.2938, "step": 116500 }, { "epoch": 0.47, "grad_norm": 4.187451362609863, "learning_rate": 0.0002, "loss": 1.5998, "step": 116510 }, { "epoch": 0.47, "grad_norm": 2.2380568981170654, "learning_rate": 0.0002, "loss": 2.1162, "step": 116520 }, { "epoch": 0.47, "grad_norm": 3.7236759662628174, "learning_rate": 0.0002, "loss": 1.595, "step": 116530 }, { "epoch": 0.47, "grad_norm": 7.752627849578857, "learning_rate": 0.0002, "loss": 1.7492, "step": 116540 }, { "epoch": 0.47, "grad_norm": 3.578723192214966, "learning_rate": 0.0002, "loss": 1.6965, "step": 116550 }, { "epoch": 0.47, "grad_norm": 4.90241003036499, "learning_rate": 0.0002, "loss": 1.6716, "step": 116560 }, { "epoch": 0.47, "grad_norm": 1.9536610841751099, "learning_rate": 0.0002, "loss": 1.5829, "step": 116570 }, { "epoch": 0.47, "grad_norm": 2.819976806640625, "learning_rate": 0.0002, "loss": 1.6158, "step": 116580 }, { "epoch": 0.47, "grad_norm": 4.210825443267822, "learning_rate": 0.0002, "loss": 1.7252, "step": 116590 }, { "epoch": 0.47, "grad_norm": 2.963714838027954, "learning_rate": 0.0002, "loss": 1.4542, "step": 116600 }, { "epoch": 0.47, "grad_norm": 2.7746713161468506, "learning_rate": 0.0002, "loss": 1.6724, "step": 116610 }, { "epoch": 0.47, "grad_norm": 3.952650785446167, "learning_rate": 0.0002, "loss": 1.411, "step": 116620 }, { "epoch": 0.47, "grad_norm": 2.202319622039795, "learning_rate": 0.0002, "loss": 1.449, "step": 116630 }, { "epoch": 0.47, "grad_norm": 2.4552249908447266, "learning_rate": 0.0002, "loss": 1.3609, "step": 116640 }, { "epoch": 0.47, "grad_norm": 3.1479289531707764, "learning_rate": 0.0002, "loss": 1.7397, "step": 116650 }, { "epoch": 0.47, "grad_norm": 2.718430757522583, "learning_rate": 0.0002, "loss": 1.6308, "step": 116660 }, { "epoch": 0.47, "grad_norm": 4.6205668449401855, "learning_rate": 0.0002, "loss": 1.6586, "step": 116670 }, { "epoch": 0.47, "grad_norm": 3.0501487255096436, "learning_rate": 0.0002, "loss": 1.7968, "step": 116680 }, { "epoch": 0.48, "grad_norm": 3.1982576847076416, "learning_rate": 0.0002, "loss": 1.5703, "step": 116690 }, { "epoch": 0.48, "grad_norm": 3.041964530944824, "learning_rate": 0.0002, "loss": 1.7226, "step": 116700 }, { "epoch": 0.48, "grad_norm": 2.1573283672332764, "learning_rate": 0.0002, "loss": 1.6257, "step": 116710 }, { "epoch": 0.48, "grad_norm": 2.7270073890686035, "learning_rate": 0.0002, "loss": 1.6237, "step": 116720 }, { "epoch": 0.48, "grad_norm": 2.434746503829956, "learning_rate": 0.0002, "loss": 1.595, "step": 116730 }, { "epoch": 0.48, "grad_norm": 3.2463583946228027, "learning_rate": 0.0002, "loss": 1.5019, "step": 116740 }, { "epoch": 0.48, "grad_norm": 3.606416940689087, "learning_rate": 0.0002, "loss": 1.6491, "step": 116750 }, { "epoch": 0.48, "grad_norm": 2.6940248012542725, "learning_rate": 0.0002, "loss": 1.4841, "step": 116760 }, { "epoch": 0.48, "grad_norm": 3.2454824447631836, "learning_rate": 0.0002, "loss": 1.5768, "step": 116770 }, { "epoch": 0.48, "grad_norm": 3.0288355350494385, "learning_rate": 0.0002, "loss": 1.8905, "step": 116780 }, { "epoch": 0.48, "grad_norm": 3.465851306915283, "learning_rate": 0.0002, "loss": 1.8406, "step": 116790 }, { "epoch": 0.48, "grad_norm": 3.254258871078491, "learning_rate": 0.0002, "loss": 1.5304, "step": 116800 }, { "epoch": 0.48, "grad_norm": 2.895634412765503, "learning_rate": 0.0002, "loss": 1.7683, "step": 116810 }, { "epoch": 0.48, "grad_norm": 2.6286332607269287, "learning_rate": 0.0002, "loss": 1.6784, "step": 116820 }, { "epoch": 0.48, "grad_norm": 2.0475964546203613, "learning_rate": 0.0002, "loss": 1.5157, "step": 116830 }, { "epoch": 0.48, "grad_norm": 3.200263738632202, "learning_rate": 0.0002, "loss": 1.5503, "step": 116840 }, { "epoch": 0.48, "grad_norm": 3.560582160949707, "learning_rate": 0.0002, "loss": 1.7027, "step": 116850 }, { "epoch": 0.48, "grad_norm": 2.6491892337799072, "learning_rate": 0.0002, "loss": 1.6512, "step": 116860 }, { "epoch": 0.48, "grad_norm": 4.059523582458496, "learning_rate": 0.0002, "loss": 1.708, "step": 116870 }, { "epoch": 0.48, "grad_norm": 3.086789846420288, "learning_rate": 0.0002, "loss": 1.5407, "step": 116880 }, { "epoch": 0.48, "grad_norm": 2.4086685180664062, "learning_rate": 0.0002, "loss": 1.4316, "step": 116890 }, { "epoch": 0.48, "grad_norm": 2.362415075302124, "learning_rate": 0.0002, "loss": 1.7264, "step": 116900 }, { "epoch": 0.48, "grad_norm": 2.711207866668701, "learning_rate": 0.0002, "loss": 1.6791, "step": 116910 }, { "epoch": 0.48, "grad_norm": 4.877712726593018, "learning_rate": 0.0002, "loss": 1.4555, "step": 116920 }, { "epoch": 0.48, "grad_norm": 2.7307939529418945, "learning_rate": 0.0002, "loss": 1.4844, "step": 116930 }, { "epoch": 0.48, "grad_norm": 2.8506994247436523, "learning_rate": 0.0002, "loss": 1.6725, "step": 116940 }, { "epoch": 0.48, "grad_norm": 3.338042736053467, "learning_rate": 0.0002, "loss": 1.6019, "step": 116950 }, { "epoch": 0.48, "grad_norm": 3.065181016921997, "learning_rate": 0.0002, "loss": 1.3957, "step": 116960 }, { "epoch": 0.48, "grad_norm": 1.4818708896636963, "learning_rate": 0.0002, "loss": 1.5259, "step": 116970 }, { "epoch": 0.48, "grad_norm": 3.990403413772583, "learning_rate": 0.0002, "loss": 1.6514, "step": 116980 }, { "epoch": 0.48, "grad_norm": 3.669050931930542, "learning_rate": 0.0002, "loss": 1.4312, "step": 116990 }, { "epoch": 0.48, "grad_norm": 3.028165102005005, "learning_rate": 0.0002, "loss": 1.4813, "step": 117000 }, { "epoch": 0.48, "grad_norm": 3.4420931339263916, "learning_rate": 0.0002, "loss": 1.4101, "step": 117010 }, { "epoch": 0.48, "grad_norm": 3.021353006362915, "learning_rate": 0.0002, "loss": 1.6377, "step": 117020 }, { "epoch": 0.48, "grad_norm": 2.9401230812072754, "learning_rate": 0.0002, "loss": 1.4819, "step": 117030 }, { "epoch": 0.48, "grad_norm": 2.974034547805786, "learning_rate": 0.0002, "loss": 1.8131, "step": 117040 }, { "epoch": 0.48, "grad_norm": 1.8759803771972656, "learning_rate": 0.0002, "loss": 1.3545, "step": 117050 }, { "epoch": 0.48, "grad_norm": 2.203113317489624, "learning_rate": 0.0002, "loss": 1.4033, "step": 117060 }, { "epoch": 0.48, "grad_norm": 5.136491775512695, "learning_rate": 0.0002, "loss": 1.7567, "step": 117070 }, { "epoch": 0.48, "grad_norm": 3.5175364017486572, "learning_rate": 0.0002, "loss": 1.4335, "step": 117080 }, { "epoch": 0.48, "grad_norm": 2.644261598587036, "learning_rate": 0.0002, "loss": 1.4391, "step": 117090 }, { "epoch": 0.48, "grad_norm": 3.4099085330963135, "learning_rate": 0.0002, "loss": 1.7706, "step": 117100 }, { "epoch": 0.48, "grad_norm": 2.461383819580078, "learning_rate": 0.0002, "loss": 1.5183, "step": 117110 }, { "epoch": 0.48, "grad_norm": 2.2864723205566406, "learning_rate": 0.0002, "loss": 1.4967, "step": 117120 }, { "epoch": 0.48, "grad_norm": 2.676107406616211, "learning_rate": 0.0002, "loss": 1.7905, "step": 117130 }, { "epoch": 0.48, "grad_norm": 1.5787221193313599, "learning_rate": 0.0002, "loss": 1.7549, "step": 117140 }, { "epoch": 0.48, "grad_norm": 2.1238207817077637, "learning_rate": 0.0002, "loss": 1.3874, "step": 117150 }, { "epoch": 0.48, "grad_norm": 2.408923864364624, "learning_rate": 0.0002, "loss": 1.761, "step": 117160 }, { "epoch": 0.48, "grad_norm": 3.58373761177063, "learning_rate": 0.0002, "loss": 1.8013, "step": 117170 }, { "epoch": 0.48, "grad_norm": 3.144531488418579, "learning_rate": 0.0002, "loss": 1.9681, "step": 117180 }, { "epoch": 0.48, "grad_norm": 2.673088788986206, "learning_rate": 0.0002, "loss": 1.4001, "step": 117190 }, { "epoch": 0.48, "grad_norm": 2.154764413833618, "learning_rate": 0.0002, "loss": 1.3627, "step": 117200 }, { "epoch": 0.48, "grad_norm": 4.102441310882568, "learning_rate": 0.0002, "loss": 1.619, "step": 117210 }, { "epoch": 0.48, "grad_norm": 6.857385635375977, "learning_rate": 0.0002, "loss": 1.5799, "step": 117220 }, { "epoch": 0.48, "grad_norm": 2.958954334259033, "learning_rate": 0.0002, "loss": 1.7107, "step": 117230 }, { "epoch": 0.48, "grad_norm": 3.116455316543579, "learning_rate": 0.0002, "loss": 1.6522, "step": 117240 }, { "epoch": 0.48, "grad_norm": 2.715008020401001, "learning_rate": 0.0002, "loss": 1.6899, "step": 117250 }, { "epoch": 0.48, "grad_norm": 3.5345654487609863, "learning_rate": 0.0002, "loss": 1.4513, "step": 117260 }, { "epoch": 0.48, "grad_norm": 1.919898271560669, "learning_rate": 0.0002, "loss": 1.5262, "step": 117270 }, { "epoch": 0.48, "grad_norm": 7.319366455078125, "learning_rate": 0.0002, "loss": 1.5329, "step": 117280 }, { "epoch": 0.48, "grad_norm": 3.0215821266174316, "learning_rate": 0.0002, "loss": 1.4574, "step": 117290 }, { "epoch": 0.48, "grad_norm": 3.8974170684814453, "learning_rate": 0.0002, "loss": 1.8411, "step": 117300 }, { "epoch": 0.48, "grad_norm": 2.1641156673431396, "learning_rate": 0.0002, "loss": 1.4829, "step": 117310 }, { "epoch": 0.48, "grad_norm": 3.9209017753601074, "learning_rate": 0.0002, "loss": 1.7909, "step": 117320 }, { "epoch": 0.48, "grad_norm": 1.358330488204956, "learning_rate": 0.0002, "loss": 1.5664, "step": 117330 }, { "epoch": 0.48, "grad_norm": 2.8375296592712402, "learning_rate": 0.0002, "loss": 1.5801, "step": 117340 }, { "epoch": 0.48, "grad_norm": 2.349029779434204, "learning_rate": 0.0002, "loss": 1.6911, "step": 117350 }, { "epoch": 0.48, "grad_norm": 2.326054811477661, "learning_rate": 0.0002, "loss": 1.5049, "step": 117360 }, { "epoch": 0.48, "grad_norm": 3.458080530166626, "learning_rate": 0.0002, "loss": 1.5284, "step": 117370 }, { "epoch": 0.48, "grad_norm": 3.659437894821167, "learning_rate": 0.0002, "loss": 1.712, "step": 117380 }, { "epoch": 0.48, "grad_norm": 2.731377601623535, "learning_rate": 0.0002, "loss": 1.5451, "step": 117390 }, { "epoch": 0.48, "grad_norm": 3.916576862335205, "learning_rate": 0.0002, "loss": 1.6904, "step": 117400 }, { "epoch": 0.48, "grad_norm": 3.2537002563476562, "learning_rate": 0.0002, "loss": 1.7348, "step": 117410 }, { "epoch": 0.48, "grad_norm": 3.7045037746429443, "learning_rate": 0.0002, "loss": 1.4096, "step": 117420 }, { "epoch": 0.48, "grad_norm": 2.988924026489258, "learning_rate": 0.0002, "loss": 1.5511, "step": 117430 }, { "epoch": 0.48, "grad_norm": 3.72084379196167, "learning_rate": 0.0002, "loss": 1.6811, "step": 117440 }, { "epoch": 0.48, "grad_norm": 3.4500620365142822, "learning_rate": 0.0002, "loss": 1.5568, "step": 117450 }, { "epoch": 0.48, "grad_norm": 6.036454677581787, "learning_rate": 0.0002, "loss": 1.476, "step": 117460 }, { "epoch": 0.48, "grad_norm": 3.0196197032928467, "learning_rate": 0.0002, "loss": 1.3628, "step": 117470 }, { "epoch": 0.48, "grad_norm": 3.018017053604126, "learning_rate": 0.0002, "loss": 1.5529, "step": 117480 }, { "epoch": 0.48, "grad_norm": 3.404766321182251, "learning_rate": 0.0002, "loss": 1.6684, "step": 117490 }, { "epoch": 0.48, "grad_norm": 2.409860372543335, "learning_rate": 0.0002, "loss": 1.4666, "step": 117500 }, { "epoch": 0.48, "grad_norm": 2.733236789703369, "learning_rate": 0.0002, "loss": 1.4048, "step": 117510 }, { "epoch": 0.48, "grad_norm": 4.179098129272461, "learning_rate": 0.0002, "loss": 1.4091, "step": 117520 }, { "epoch": 0.48, "grad_norm": 2.3394503593444824, "learning_rate": 0.0002, "loss": 1.8111, "step": 117530 }, { "epoch": 0.48, "grad_norm": 2.861748695373535, "learning_rate": 0.0002, "loss": 1.2316, "step": 117540 }, { "epoch": 0.48, "grad_norm": 2.427039861679077, "learning_rate": 0.0002, "loss": 1.7098, "step": 117550 }, { "epoch": 0.48, "grad_norm": 3.6517183780670166, "learning_rate": 0.0002, "loss": 1.4183, "step": 117560 }, { "epoch": 0.48, "grad_norm": 2.5291800498962402, "learning_rate": 0.0002, "loss": 1.5853, "step": 117570 }, { "epoch": 0.48, "grad_norm": 3.1431190967559814, "learning_rate": 0.0002, "loss": 1.5614, "step": 117580 }, { "epoch": 0.48, "grad_norm": 2.5680153369903564, "learning_rate": 0.0002, "loss": 1.553, "step": 117590 }, { "epoch": 0.48, "grad_norm": 2.098478317260742, "learning_rate": 0.0002, "loss": 1.5822, "step": 117600 }, { "epoch": 0.48, "grad_norm": 4.208208084106445, "learning_rate": 0.0002, "loss": 1.6728, "step": 117610 }, { "epoch": 0.48, "grad_norm": 3.682912588119507, "learning_rate": 0.0002, "loss": 1.6007, "step": 117620 }, { "epoch": 0.48, "grad_norm": 2.0505993366241455, "learning_rate": 0.0002, "loss": 1.5986, "step": 117630 }, { "epoch": 0.48, "grad_norm": 9.384940147399902, "learning_rate": 0.0002, "loss": 1.5139, "step": 117640 }, { "epoch": 0.48, "grad_norm": 3.2459874153137207, "learning_rate": 0.0002, "loss": 1.7335, "step": 117650 }, { "epoch": 0.48, "grad_norm": 3.382091522216797, "learning_rate": 0.0002, "loss": 1.5656, "step": 117660 }, { "epoch": 0.48, "grad_norm": 3.5402286052703857, "learning_rate": 0.0002, "loss": 1.381, "step": 117670 }, { "epoch": 0.48, "grad_norm": 4.986198425292969, "learning_rate": 0.0002, "loss": 1.7157, "step": 117680 }, { "epoch": 0.48, "grad_norm": 2.996173858642578, "learning_rate": 0.0002, "loss": 1.5282, "step": 117690 }, { "epoch": 0.48, "grad_norm": 2.189052104949951, "learning_rate": 0.0002, "loss": 1.5407, "step": 117700 }, { "epoch": 0.48, "grad_norm": 3.3843586444854736, "learning_rate": 0.0002, "loss": 1.5424, "step": 117710 }, { "epoch": 0.48, "grad_norm": 2.471698045730591, "learning_rate": 0.0002, "loss": 1.489, "step": 117720 }, { "epoch": 0.48, "grad_norm": 1.6607789993286133, "learning_rate": 0.0002, "loss": 1.769, "step": 117730 }, { "epoch": 0.48, "grad_norm": 1.7578659057617188, "learning_rate": 0.0002, "loss": 1.622, "step": 117740 }, { "epoch": 0.48, "grad_norm": 1.8257814645767212, "learning_rate": 0.0002, "loss": 1.4704, "step": 117750 }, { "epoch": 0.48, "grad_norm": 3.030423164367676, "learning_rate": 0.0002, "loss": 1.7568, "step": 117760 }, { "epoch": 0.48, "grad_norm": 3.0616238117218018, "learning_rate": 0.0002, "loss": 1.6893, "step": 117770 }, { "epoch": 0.48, "grad_norm": 3.5514438152313232, "learning_rate": 0.0002, "loss": 1.5564, "step": 117780 }, { "epoch": 0.48, "grad_norm": 3.202592611312866, "learning_rate": 0.0002, "loss": 1.5586, "step": 117790 }, { "epoch": 0.48, "grad_norm": 2.6781482696533203, "learning_rate": 0.0002, "loss": 1.6953, "step": 117800 }, { "epoch": 0.48, "grad_norm": 2.568298578262329, "learning_rate": 0.0002, "loss": 1.5131, "step": 117810 }, { "epoch": 0.48, "grad_norm": 1.6019247770309448, "learning_rate": 0.0002, "loss": 1.4932, "step": 117820 }, { "epoch": 0.48, "grad_norm": 2.6026625633239746, "learning_rate": 0.0002, "loss": 1.5875, "step": 117830 }, { "epoch": 0.48, "grad_norm": 3.860933780670166, "learning_rate": 0.0002, "loss": 1.4871, "step": 117840 }, { "epoch": 0.48, "grad_norm": 3.5781147480010986, "learning_rate": 0.0002, "loss": 1.6264, "step": 117850 }, { "epoch": 0.48, "grad_norm": 4.702335357666016, "learning_rate": 0.0002, "loss": 1.4437, "step": 117860 }, { "epoch": 0.48, "grad_norm": 3.68328595161438, "learning_rate": 0.0002, "loss": 1.7449, "step": 117870 }, { "epoch": 0.48, "grad_norm": 3.4832043647766113, "learning_rate": 0.0002, "loss": 1.4938, "step": 117880 }, { "epoch": 0.48, "grad_norm": 3.688972234725952, "learning_rate": 0.0002, "loss": 1.417, "step": 117890 }, { "epoch": 0.48, "grad_norm": 3.7204818725585938, "learning_rate": 0.0002, "loss": 1.6238, "step": 117900 }, { "epoch": 0.48, "grad_norm": 2.762770652770996, "learning_rate": 0.0002, "loss": 1.7735, "step": 117910 }, { "epoch": 0.48, "grad_norm": 5.446463108062744, "learning_rate": 0.0002, "loss": 1.7349, "step": 117920 }, { "epoch": 0.48, "grad_norm": 3.2183163166046143, "learning_rate": 0.0002, "loss": 1.5054, "step": 117930 }, { "epoch": 0.48, "grad_norm": 3.701183557510376, "learning_rate": 0.0002, "loss": 1.5491, "step": 117940 }, { "epoch": 0.48, "grad_norm": 3.149867534637451, "learning_rate": 0.0002, "loss": 1.5545, "step": 117950 }, { "epoch": 0.48, "grad_norm": 2.884960651397705, "learning_rate": 0.0002, "loss": 1.6859, "step": 117960 }, { "epoch": 0.48, "grad_norm": 2.8729727268218994, "learning_rate": 0.0002, "loss": 1.4489, "step": 117970 }, { "epoch": 0.48, "grad_norm": 2.255546808242798, "learning_rate": 0.0002, "loss": 1.6687, "step": 117980 }, { "epoch": 0.48, "grad_norm": 2.580704927444458, "learning_rate": 0.0002, "loss": 1.7344, "step": 117990 }, { "epoch": 0.48, "grad_norm": 3.473675489425659, "learning_rate": 0.0002, "loss": 1.5522, "step": 118000 }, { "epoch": 0.48, "grad_norm": 6.013026714324951, "learning_rate": 0.0002, "loss": 1.507, "step": 118010 }, { "epoch": 0.48, "grad_norm": 3.415437936782837, "learning_rate": 0.0002, "loss": 1.629, "step": 118020 }, { "epoch": 0.48, "grad_norm": 2.2171406745910645, "learning_rate": 0.0002, "loss": 1.6637, "step": 118030 }, { "epoch": 0.48, "grad_norm": 6.222632884979248, "learning_rate": 0.0002, "loss": 1.6945, "step": 118040 }, { "epoch": 0.48, "grad_norm": 2.366457223892212, "learning_rate": 0.0002, "loss": 1.5267, "step": 118050 }, { "epoch": 0.48, "grad_norm": 3.0551304817199707, "learning_rate": 0.0002, "loss": 1.629, "step": 118060 }, { "epoch": 0.48, "grad_norm": 5.850032806396484, "learning_rate": 0.0002, "loss": 1.5895, "step": 118070 }, { "epoch": 0.48, "grad_norm": 3.5120999813079834, "learning_rate": 0.0002, "loss": 1.3983, "step": 118080 }, { "epoch": 0.48, "grad_norm": 2.2184576988220215, "learning_rate": 0.0002, "loss": 1.8708, "step": 118090 }, { "epoch": 0.48, "grad_norm": 2.59451961517334, "learning_rate": 0.0002, "loss": 1.8714, "step": 118100 }, { "epoch": 0.48, "grad_norm": 3.2566394805908203, "learning_rate": 0.0002, "loss": 1.8637, "step": 118110 }, { "epoch": 0.48, "grad_norm": 3.3844010829925537, "learning_rate": 0.0002, "loss": 1.7491, "step": 118120 }, { "epoch": 0.48, "grad_norm": 1.5590957403182983, "learning_rate": 0.0002, "loss": 1.5921, "step": 118130 }, { "epoch": 0.48, "grad_norm": 3.075531005859375, "learning_rate": 0.0002, "loss": 1.3719, "step": 118140 }, { "epoch": 0.48, "grad_norm": 1.7316194772720337, "learning_rate": 0.0002, "loss": 1.4647, "step": 118150 }, { "epoch": 0.48, "grad_norm": 2.448294162750244, "learning_rate": 0.0002, "loss": 1.5541, "step": 118160 }, { "epoch": 0.48, "grad_norm": 2.3710873126983643, "learning_rate": 0.0002, "loss": 1.5465, "step": 118170 }, { "epoch": 0.48, "grad_norm": 3.8569748401641846, "learning_rate": 0.0002, "loss": 1.3459, "step": 118180 }, { "epoch": 0.48, "grad_norm": 2.260587692260742, "learning_rate": 0.0002, "loss": 1.612, "step": 118190 }, { "epoch": 0.48, "grad_norm": 2.610703229904175, "learning_rate": 0.0002, "loss": 1.3223, "step": 118200 }, { "epoch": 0.48, "grad_norm": 3.084911346435547, "learning_rate": 0.0002, "loss": 1.4845, "step": 118210 }, { "epoch": 0.48, "grad_norm": 5.753296852111816, "learning_rate": 0.0002, "loss": 1.6976, "step": 118220 }, { "epoch": 0.48, "grad_norm": 2.2075648307800293, "learning_rate": 0.0002, "loss": 1.6434, "step": 118230 }, { "epoch": 0.48, "grad_norm": 3.2862226963043213, "learning_rate": 0.0002, "loss": 1.4487, "step": 118240 }, { "epoch": 0.48, "grad_norm": 2.616271495819092, "learning_rate": 0.0002, "loss": 1.6261, "step": 118250 }, { "epoch": 0.48, "grad_norm": 1.9038803577423096, "learning_rate": 0.0002, "loss": 1.4355, "step": 118260 }, { "epoch": 0.48, "grad_norm": 5.316282272338867, "learning_rate": 0.0002, "loss": 1.7218, "step": 118270 }, { "epoch": 0.48, "grad_norm": 4.836486339569092, "learning_rate": 0.0002, "loss": 1.4662, "step": 118280 }, { "epoch": 0.48, "grad_norm": 3.7353034019470215, "learning_rate": 0.0002, "loss": 1.6874, "step": 118290 }, { "epoch": 0.48, "grad_norm": 3.684135675430298, "learning_rate": 0.0002, "loss": 1.6743, "step": 118300 }, { "epoch": 0.48, "grad_norm": 3.963026762008667, "learning_rate": 0.0002, "loss": 1.688, "step": 118310 }, { "epoch": 0.48, "grad_norm": 2.6631529331207275, "learning_rate": 0.0002, "loss": 1.8534, "step": 118320 }, { "epoch": 0.48, "grad_norm": 2.877131700515747, "learning_rate": 0.0002, "loss": 1.5402, "step": 118330 }, { "epoch": 0.48, "grad_norm": 2.958076238632202, "learning_rate": 0.0002, "loss": 1.7071, "step": 118340 }, { "epoch": 0.48, "grad_norm": 3.7827272415161133, "learning_rate": 0.0002, "loss": 1.6818, "step": 118350 }, { "epoch": 0.48, "grad_norm": 1.9416780471801758, "learning_rate": 0.0002, "loss": 1.4536, "step": 118360 }, { "epoch": 0.48, "grad_norm": 4.158787727355957, "learning_rate": 0.0002, "loss": 1.365, "step": 118370 }, { "epoch": 0.48, "grad_norm": 3.2106716632843018, "learning_rate": 0.0002, "loss": 1.7095, "step": 118380 }, { "epoch": 0.48, "grad_norm": 3.0646581649780273, "learning_rate": 0.0002, "loss": 1.7742, "step": 118390 }, { "epoch": 0.48, "grad_norm": 2.705742835998535, "learning_rate": 0.0002, "loss": 1.6197, "step": 118400 }, { "epoch": 0.48, "grad_norm": 2.153769016265869, "learning_rate": 0.0002, "loss": 1.7054, "step": 118410 }, { "epoch": 0.48, "grad_norm": 3.858867645263672, "learning_rate": 0.0002, "loss": 1.4152, "step": 118420 }, { "epoch": 0.48, "grad_norm": 4.127315521240234, "learning_rate": 0.0002, "loss": 1.3972, "step": 118430 }, { "epoch": 0.48, "grad_norm": 4.4804558753967285, "learning_rate": 0.0002, "loss": 1.5959, "step": 118440 }, { "epoch": 0.48, "grad_norm": 2.288858652114868, "learning_rate": 0.0002, "loss": 1.8191, "step": 118450 }, { "epoch": 0.48, "grad_norm": 2.852813959121704, "learning_rate": 0.0002, "loss": 1.5123, "step": 118460 }, { "epoch": 0.48, "grad_norm": 2.9153220653533936, "learning_rate": 0.0002, "loss": 1.5514, "step": 118470 }, { "epoch": 0.48, "grad_norm": 2.5389297008514404, "learning_rate": 0.0002, "loss": 1.4552, "step": 118480 }, { "epoch": 0.48, "grad_norm": 4.672131061553955, "learning_rate": 0.0002, "loss": 1.684, "step": 118490 }, { "epoch": 0.48, "grad_norm": 5.12708044052124, "learning_rate": 0.0002, "loss": 1.7764, "step": 118500 }, { "epoch": 0.48, "grad_norm": 2.3615353107452393, "learning_rate": 0.0002, "loss": 1.4532, "step": 118510 }, { "epoch": 0.48, "grad_norm": 1.817632794380188, "learning_rate": 0.0002, "loss": 1.705, "step": 118520 }, { "epoch": 0.48, "grad_norm": 3.1351351737976074, "learning_rate": 0.0002, "loss": 1.5958, "step": 118530 }, { "epoch": 0.48, "grad_norm": 3.739485740661621, "learning_rate": 0.0002, "loss": 1.8093, "step": 118540 }, { "epoch": 0.48, "grad_norm": 3.3028059005737305, "learning_rate": 0.0002, "loss": 1.4741, "step": 118550 }, { "epoch": 0.48, "grad_norm": 2.9958484172821045, "learning_rate": 0.0002, "loss": 1.3075, "step": 118560 }, { "epoch": 0.48, "grad_norm": 3.125154733657837, "learning_rate": 0.0002, "loss": 1.5383, "step": 118570 }, { "epoch": 0.48, "grad_norm": 3.544923782348633, "learning_rate": 0.0002, "loss": 1.5532, "step": 118580 }, { "epoch": 0.48, "grad_norm": 2.897325277328491, "learning_rate": 0.0002, "loss": 1.6935, "step": 118590 }, { "epoch": 0.48, "grad_norm": 2.312487840652466, "learning_rate": 0.0002, "loss": 1.6575, "step": 118600 }, { "epoch": 0.48, "grad_norm": 1.8758301734924316, "learning_rate": 0.0002, "loss": 1.3379, "step": 118610 }, { "epoch": 0.48, "grad_norm": 3.9701340198516846, "learning_rate": 0.0002, "loss": 1.7704, "step": 118620 }, { "epoch": 0.48, "grad_norm": 3.8184947967529297, "learning_rate": 0.0002, "loss": 1.5369, "step": 118630 }, { "epoch": 0.48, "grad_norm": 7.5967206954956055, "learning_rate": 0.0002, "loss": 1.5647, "step": 118640 }, { "epoch": 0.48, "grad_norm": 2.1464312076568604, "learning_rate": 0.0002, "loss": 1.5711, "step": 118650 }, { "epoch": 0.48, "grad_norm": 3.914802312850952, "learning_rate": 0.0002, "loss": 1.6411, "step": 118660 }, { "epoch": 0.48, "grad_norm": 3.757878541946411, "learning_rate": 0.0002, "loss": 1.7499, "step": 118670 }, { "epoch": 0.48, "grad_norm": 3.412665605545044, "learning_rate": 0.0002, "loss": 1.5611, "step": 118680 }, { "epoch": 0.48, "grad_norm": 1.8159689903259277, "learning_rate": 0.0002, "loss": 1.5476, "step": 118690 }, { "epoch": 0.48, "grad_norm": 3.0867745876312256, "learning_rate": 0.0002, "loss": 1.2979, "step": 118700 }, { "epoch": 0.48, "grad_norm": 3.5614707469940186, "learning_rate": 0.0002, "loss": 1.4417, "step": 118710 }, { "epoch": 0.48, "grad_norm": 4.052720546722412, "learning_rate": 0.0002, "loss": 1.7649, "step": 118720 }, { "epoch": 0.48, "grad_norm": 6.168283939361572, "learning_rate": 0.0002, "loss": 1.4628, "step": 118730 }, { "epoch": 0.48, "grad_norm": 3.4565138816833496, "learning_rate": 0.0002, "loss": 1.7751, "step": 118740 }, { "epoch": 0.48, "grad_norm": 3.419633626937866, "learning_rate": 0.0002, "loss": 1.5232, "step": 118750 }, { "epoch": 0.48, "grad_norm": 2.8089213371276855, "learning_rate": 0.0002, "loss": 1.4071, "step": 118760 }, { "epoch": 0.48, "grad_norm": 4.463341236114502, "learning_rate": 0.0002, "loss": 1.5415, "step": 118770 }, { "epoch": 0.48, "grad_norm": 2.40444278717041, "learning_rate": 0.0002, "loss": 1.7096, "step": 118780 }, { "epoch": 0.48, "grad_norm": 1.4954867362976074, "learning_rate": 0.0002, "loss": 1.7091, "step": 118790 }, { "epoch": 0.48, "grad_norm": 1.7730367183685303, "learning_rate": 0.0002, "loss": 1.4866, "step": 118800 }, { "epoch": 0.48, "grad_norm": 2.9004929065704346, "learning_rate": 0.0002, "loss": 1.4999, "step": 118810 }, { "epoch": 0.48, "grad_norm": 5.086145401000977, "learning_rate": 0.0002, "loss": 1.359, "step": 118820 }, { "epoch": 0.48, "grad_norm": 3.567786931991577, "learning_rate": 0.0002, "loss": 1.3678, "step": 118830 }, { "epoch": 0.48, "grad_norm": 3.598891019821167, "learning_rate": 0.0002, "loss": 1.3552, "step": 118840 }, { "epoch": 0.48, "grad_norm": 2.9645609855651855, "learning_rate": 0.0002, "loss": 1.6007, "step": 118850 }, { "epoch": 0.48, "grad_norm": 3.1952762603759766, "learning_rate": 0.0002, "loss": 1.6634, "step": 118860 }, { "epoch": 0.48, "grad_norm": 4.019526958465576, "learning_rate": 0.0002, "loss": 1.7444, "step": 118870 }, { "epoch": 0.48, "grad_norm": 1.8711458444595337, "learning_rate": 0.0002, "loss": 1.8521, "step": 118880 }, { "epoch": 0.48, "grad_norm": 3.1014246940612793, "learning_rate": 0.0002, "loss": 1.687, "step": 118890 }, { "epoch": 0.48, "grad_norm": 2.8122692108154297, "learning_rate": 0.0002, "loss": 1.8083, "step": 118900 }, { "epoch": 0.48, "grad_norm": 1.975103735923767, "learning_rate": 0.0002, "loss": 1.5192, "step": 118910 }, { "epoch": 0.48, "grad_norm": 3.853306293487549, "learning_rate": 0.0002, "loss": 1.7245, "step": 118920 }, { "epoch": 0.48, "grad_norm": 3.434995174407959, "learning_rate": 0.0002, "loss": 1.7124, "step": 118930 }, { "epoch": 0.48, "grad_norm": 3.4081802368164062, "learning_rate": 0.0002, "loss": 1.5848, "step": 118940 }, { "epoch": 0.48, "grad_norm": 3.099104881286621, "learning_rate": 0.0002, "loss": 1.7711, "step": 118950 }, { "epoch": 0.48, "grad_norm": 3.2118654251098633, "learning_rate": 0.0002, "loss": 1.5847, "step": 118960 }, { "epoch": 0.48, "grad_norm": 4.180285930633545, "learning_rate": 0.0002, "loss": 1.6263, "step": 118970 }, { "epoch": 0.48, "grad_norm": 4.014133930206299, "learning_rate": 0.0002, "loss": 1.6005, "step": 118980 }, { "epoch": 0.48, "grad_norm": 2.8255386352539062, "learning_rate": 0.0002, "loss": 1.6147, "step": 118990 }, { "epoch": 0.48, "grad_norm": 5.64237117767334, "learning_rate": 0.0002, "loss": 1.6591, "step": 119000 }, { "epoch": 0.48, "grad_norm": 2.139514684677124, "learning_rate": 0.0002, "loss": 1.5954, "step": 119010 }, { "epoch": 0.48, "grad_norm": 2.96779203414917, "learning_rate": 0.0002, "loss": 1.4662, "step": 119020 }, { "epoch": 0.48, "grad_norm": 3.746760845184326, "learning_rate": 0.0002, "loss": 1.7153, "step": 119030 }, { "epoch": 0.48, "grad_norm": 2.7732458114624023, "learning_rate": 0.0002, "loss": 1.7624, "step": 119040 }, { "epoch": 0.48, "grad_norm": 2.989499092102051, "learning_rate": 0.0002, "loss": 1.5797, "step": 119050 }, { "epoch": 0.48, "grad_norm": 2.7552692890167236, "learning_rate": 0.0002, "loss": 1.5322, "step": 119060 }, { "epoch": 0.48, "grad_norm": 3.4445722103118896, "learning_rate": 0.0002, "loss": 1.5982, "step": 119070 }, { "epoch": 0.48, "grad_norm": 2.055630683898926, "learning_rate": 0.0002, "loss": 1.4663, "step": 119080 }, { "epoch": 0.48, "grad_norm": 2.366382122039795, "learning_rate": 0.0002, "loss": 1.3731, "step": 119090 }, { "epoch": 0.48, "grad_norm": 2.5808937549591064, "learning_rate": 0.0002, "loss": 1.7206, "step": 119100 }, { "epoch": 0.48, "grad_norm": 2.0925865173339844, "learning_rate": 0.0002, "loss": 1.5759, "step": 119110 }, { "epoch": 0.48, "grad_norm": 2.9112911224365234, "learning_rate": 0.0002, "loss": 1.6275, "step": 119120 }, { "epoch": 0.48, "grad_norm": 3.416577100753784, "learning_rate": 0.0002, "loss": 1.5783, "step": 119130 }, { "epoch": 0.49, "grad_norm": 3.665473222732544, "learning_rate": 0.0002, "loss": 1.6426, "step": 119140 }, { "epoch": 0.49, "grad_norm": 2.4427621364593506, "learning_rate": 0.0002, "loss": 1.5997, "step": 119150 }, { "epoch": 0.49, "grad_norm": 2.920607566833496, "learning_rate": 0.0002, "loss": 1.5017, "step": 119160 }, { "epoch": 0.49, "grad_norm": 2.062213897705078, "learning_rate": 0.0002, "loss": 1.5201, "step": 119170 }, { "epoch": 0.49, "grad_norm": 2.7945685386657715, "learning_rate": 0.0002, "loss": 1.5928, "step": 119180 }, { "epoch": 0.49, "grad_norm": 3.3732032775878906, "learning_rate": 0.0002, "loss": 1.7722, "step": 119190 }, { "epoch": 0.49, "grad_norm": 2.9545657634735107, "learning_rate": 0.0002, "loss": 1.383, "step": 119200 }, { "epoch": 0.49, "grad_norm": 3.1149699687957764, "learning_rate": 0.0002, "loss": 1.4069, "step": 119210 }, { "epoch": 0.49, "grad_norm": 4.201129913330078, "learning_rate": 0.0002, "loss": 1.6024, "step": 119220 }, { "epoch": 0.49, "grad_norm": 5.75337028503418, "learning_rate": 0.0002, "loss": 1.5608, "step": 119230 }, { "epoch": 0.49, "grad_norm": 3.476745367050171, "learning_rate": 0.0002, "loss": 1.2966, "step": 119240 }, { "epoch": 0.49, "grad_norm": 3.2639060020446777, "learning_rate": 0.0002, "loss": 1.6465, "step": 119250 }, { "epoch": 0.49, "grad_norm": 4.036921501159668, "learning_rate": 0.0002, "loss": 1.613, "step": 119260 }, { "epoch": 0.49, "grad_norm": 2.219193935394287, "learning_rate": 0.0002, "loss": 1.6133, "step": 119270 }, { "epoch": 0.49, "grad_norm": 3.8355047702789307, "learning_rate": 0.0002, "loss": 1.6352, "step": 119280 }, { "epoch": 0.49, "grad_norm": 3.722839117050171, "learning_rate": 0.0002, "loss": 1.5075, "step": 119290 }, { "epoch": 0.49, "grad_norm": 3.495948314666748, "learning_rate": 0.0002, "loss": 1.5085, "step": 119300 }, { "epoch": 0.49, "grad_norm": 3.6061248779296875, "learning_rate": 0.0002, "loss": 1.4695, "step": 119310 }, { "epoch": 0.49, "grad_norm": 1.522102952003479, "learning_rate": 0.0002, "loss": 1.4009, "step": 119320 }, { "epoch": 0.49, "grad_norm": 3.4489824771881104, "learning_rate": 0.0002, "loss": 1.5241, "step": 119330 }, { "epoch": 0.49, "grad_norm": 1.385486125946045, "learning_rate": 0.0002, "loss": 1.8246, "step": 119340 }, { "epoch": 0.49, "grad_norm": 1.906872272491455, "learning_rate": 0.0002, "loss": 1.5337, "step": 119350 }, { "epoch": 0.49, "grad_norm": 2.988974094390869, "learning_rate": 0.0002, "loss": 1.5307, "step": 119360 }, { "epoch": 0.49, "grad_norm": 4.115760326385498, "learning_rate": 0.0002, "loss": 1.4587, "step": 119370 }, { "epoch": 0.49, "grad_norm": 3.4903485774993896, "learning_rate": 0.0002, "loss": 1.6864, "step": 119380 }, { "epoch": 0.49, "grad_norm": 2.839663505554199, "learning_rate": 0.0002, "loss": 1.4914, "step": 119390 }, { "epoch": 0.49, "grad_norm": 3.219193458557129, "learning_rate": 0.0002, "loss": 1.528, "step": 119400 }, { "epoch": 0.49, "grad_norm": 1.9296238422393799, "learning_rate": 0.0002, "loss": 1.3894, "step": 119410 }, { "epoch": 0.49, "grad_norm": 2.8363912105560303, "learning_rate": 0.0002, "loss": 1.5664, "step": 119420 }, { "epoch": 0.49, "grad_norm": 2.917501449584961, "learning_rate": 0.0002, "loss": 1.5879, "step": 119430 }, { "epoch": 0.49, "grad_norm": 1.933141827583313, "learning_rate": 0.0002, "loss": 1.7077, "step": 119440 }, { "epoch": 0.49, "grad_norm": 2.5293939113616943, "learning_rate": 0.0002, "loss": 1.5401, "step": 119450 }, { "epoch": 0.49, "grad_norm": 1.9582180976867676, "learning_rate": 0.0002, "loss": 1.4728, "step": 119460 }, { "epoch": 0.49, "grad_norm": 3.999876022338867, "learning_rate": 0.0002, "loss": 1.6885, "step": 119470 }, { "epoch": 0.49, "grad_norm": 5.139742851257324, "learning_rate": 0.0002, "loss": 1.2857, "step": 119480 }, { "epoch": 0.49, "grad_norm": 2.517929792404175, "learning_rate": 0.0002, "loss": 1.559, "step": 119490 }, { "epoch": 0.49, "grad_norm": 2.2229888439178467, "learning_rate": 0.0002, "loss": 1.4729, "step": 119500 }, { "epoch": 0.49, "grad_norm": 2.289740800857544, "learning_rate": 0.0002, "loss": 1.586, "step": 119510 }, { "epoch": 0.49, "grad_norm": 3.290914535522461, "learning_rate": 0.0002, "loss": 1.4124, "step": 119520 }, { "epoch": 0.49, "grad_norm": 2.3402717113494873, "learning_rate": 0.0002, "loss": 1.6533, "step": 119530 }, { "epoch": 0.49, "grad_norm": 2.7307021617889404, "learning_rate": 0.0002, "loss": 1.5678, "step": 119540 }, { "epoch": 0.49, "grad_norm": 2.271054267883301, "learning_rate": 0.0002, "loss": 1.6999, "step": 119550 }, { "epoch": 0.49, "grad_norm": 2.022984266281128, "learning_rate": 0.0002, "loss": 1.5224, "step": 119560 }, { "epoch": 0.49, "grad_norm": 3.068164587020874, "learning_rate": 0.0002, "loss": 1.7469, "step": 119570 }, { "epoch": 0.49, "grad_norm": 2.818899393081665, "learning_rate": 0.0002, "loss": 1.9279, "step": 119580 }, { "epoch": 0.49, "grad_norm": 2.8802990913391113, "learning_rate": 0.0002, "loss": 1.5738, "step": 119590 }, { "epoch": 0.49, "grad_norm": 2.8859894275665283, "learning_rate": 0.0002, "loss": 1.4546, "step": 119600 }, { "epoch": 0.49, "grad_norm": 3.402280569076538, "learning_rate": 0.0002, "loss": 1.6449, "step": 119610 }, { "epoch": 0.49, "grad_norm": 3.4391090869903564, "learning_rate": 0.0002, "loss": 1.5986, "step": 119620 }, { "epoch": 0.49, "grad_norm": 3.907130002975464, "learning_rate": 0.0002, "loss": 1.3298, "step": 119630 }, { "epoch": 0.49, "grad_norm": 3.3943116664886475, "learning_rate": 0.0002, "loss": 1.7326, "step": 119640 }, { "epoch": 0.49, "grad_norm": 2.55995774269104, "learning_rate": 0.0002, "loss": 1.59, "step": 119650 }, { "epoch": 0.49, "grad_norm": 1.5077049732208252, "learning_rate": 0.0002, "loss": 1.4286, "step": 119660 }, { "epoch": 0.49, "grad_norm": 2.9168856143951416, "learning_rate": 0.0002, "loss": 1.5154, "step": 119670 }, { "epoch": 0.49, "grad_norm": 3.6036014556884766, "learning_rate": 0.0002, "loss": 1.4932, "step": 119680 }, { "epoch": 0.49, "grad_norm": 2.9232308864593506, "learning_rate": 0.0002, "loss": 1.6463, "step": 119690 }, { "epoch": 0.49, "grad_norm": 3.1672959327697754, "learning_rate": 0.0002, "loss": 1.7412, "step": 119700 }, { "epoch": 0.49, "grad_norm": 2.232957601547241, "learning_rate": 0.0002, "loss": 1.565, "step": 119710 }, { "epoch": 0.49, "grad_norm": 3.840763568878174, "learning_rate": 0.0002, "loss": 1.6087, "step": 119720 }, { "epoch": 0.49, "grad_norm": 1.9595485925674438, "learning_rate": 0.0002, "loss": 1.5321, "step": 119730 }, { "epoch": 0.49, "grad_norm": 5.132970809936523, "learning_rate": 0.0002, "loss": 1.5094, "step": 119740 }, { "epoch": 0.49, "grad_norm": 2.9933531284332275, "learning_rate": 0.0002, "loss": 1.4368, "step": 119750 }, { "epoch": 0.49, "grad_norm": 3.908215284347534, "learning_rate": 0.0002, "loss": 1.6097, "step": 119760 }, { "epoch": 0.49, "grad_norm": 4.395793914794922, "learning_rate": 0.0002, "loss": 1.9211, "step": 119770 }, { "epoch": 0.49, "grad_norm": 2.1224653720855713, "learning_rate": 0.0002, "loss": 1.7627, "step": 119780 }, { "epoch": 0.49, "grad_norm": 2.225083351135254, "learning_rate": 0.0002, "loss": 1.6483, "step": 119790 }, { "epoch": 0.49, "grad_norm": 3.8693196773529053, "learning_rate": 0.0002, "loss": 1.4305, "step": 119800 }, { "epoch": 0.49, "grad_norm": 2.3905398845672607, "learning_rate": 0.0002, "loss": 1.5832, "step": 119810 }, { "epoch": 0.49, "grad_norm": 2.2403578758239746, "learning_rate": 0.0002, "loss": 1.8423, "step": 119820 }, { "epoch": 0.49, "grad_norm": 2.247727870941162, "learning_rate": 0.0002, "loss": 1.8023, "step": 119830 }, { "epoch": 0.49, "grad_norm": 2.2253332138061523, "learning_rate": 0.0002, "loss": 1.5682, "step": 119840 }, { "epoch": 0.49, "grad_norm": 3.105196237564087, "learning_rate": 0.0002, "loss": 1.5169, "step": 119850 }, { "epoch": 0.49, "grad_norm": 2.832688808441162, "learning_rate": 0.0002, "loss": 1.6261, "step": 119860 }, { "epoch": 0.49, "grad_norm": 3.3014495372772217, "learning_rate": 0.0002, "loss": 1.5976, "step": 119870 }, { "epoch": 0.49, "grad_norm": 2.3509976863861084, "learning_rate": 0.0002, "loss": 1.4435, "step": 119880 }, { "epoch": 0.49, "grad_norm": 3.5256495475769043, "learning_rate": 0.0002, "loss": 1.7112, "step": 119890 }, { "epoch": 0.49, "grad_norm": 1.9527875185012817, "learning_rate": 0.0002, "loss": 1.4311, "step": 119900 }, { "epoch": 0.49, "grad_norm": 1.4502133131027222, "learning_rate": 0.0002, "loss": 1.4651, "step": 119910 }, { "epoch": 0.49, "grad_norm": 2.6302177906036377, "learning_rate": 0.0002, "loss": 1.434, "step": 119920 }, { "epoch": 0.49, "grad_norm": 2.66636061668396, "learning_rate": 0.0002, "loss": 1.5795, "step": 119930 }, { "epoch": 0.49, "grad_norm": 3.496614933013916, "learning_rate": 0.0002, "loss": 1.4546, "step": 119940 }, { "epoch": 0.49, "grad_norm": 3.4332973957061768, "learning_rate": 0.0002, "loss": 1.3493, "step": 119950 }, { "epoch": 0.49, "grad_norm": 2.4959444999694824, "learning_rate": 0.0002, "loss": 1.4785, "step": 119960 }, { "epoch": 0.49, "grad_norm": 2.383023977279663, "learning_rate": 0.0002, "loss": 1.6608, "step": 119970 }, { "epoch": 0.49, "grad_norm": 3.3254027366638184, "learning_rate": 0.0002, "loss": 1.4039, "step": 119980 }, { "epoch": 0.49, "grad_norm": 2.2206485271453857, "learning_rate": 0.0002, "loss": 1.7181, "step": 119990 }, { "epoch": 0.49, "grad_norm": 2.3486521244049072, "learning_rate": 0.0002, "loss": 1.7227, "step": 120000 }, { "epoch": 0.49, "grad_norm": 1.9648469686508179, "learning_rate": 0.0002, "loss": 1.6629, "step": 120010 }, { "epoch": 0.49, "grad_norm": 2.61163330078125, "learning_rate": 0.0002, "loss": 1.5347, "step": 120020 }, { "epoch": 0.49, "grad_norm": 14.146818161010742, "learning_rate": 0.0002, "loss": 1.7936, "step": 120030 }, { "epoch": 0.49, "grad_norm": 1.959288239479065, "learning_rate": 0.0002, "loss": 1.5252, "step": 120040 }, { "epoch": 0.49, "grad_norm": 2.177111864089966, "learning_rate": 0.0002, "loss": 1.5591, "step": 120050 }, { "epoch": 0.49, "grad_norm": 3.880201578140259, "learning_rate": 0.0002, "loss": 1.7014, "step": 120060 }, { "epoch": 0.49, "grad_norm": 4.984938144683838, "learning_rate": 0.0002, "loss": 1.6648, "step": 120070 }, { "epoch": 0.49, "grad_norm": 3.658705949783325, "learning_rate": 0.0002, "loss": 1.4636, "step": 120080 }, { "epoch": 0.49, "grad_norm": 2.870987892150879, "learning_rate": 0.0002, "loss": 1.4912, "step": 120090 }, { "epoch": 0.49, "grad_norm": 2.2994847297668457, "learning_rate": 0.0002, "loss": 1.7921, "step": 120100 }, { "epoch": 0.49, "grad_norm": 2.9858388900756836, "learning_rate": 0.0002, "loss": 1.4563, "step": 120110 }, { "epoch": 0.49, "grad_norm": 2.2486953735351562, "learning_rate": 0.0002, "loss": 1.6096, "step": 120120 }, { "epoch": 0.49, "grad_norm": 4.215436935424805, "learning_rate": 0.0002, "loss": 1.6779, "step": 120130 }, { "epoch": 0.49, "grad_norm": 3.095759868621826, "learning_rate": 0.0002, "loss": 1.731, "step": 120140 }, { "epoch": 0.49, "grad_norm": 3.933316469192505, "learning_rate": 0.0002, "loss": 1.5481, "step": 120150 }, { "epoch": 0.49, "grad_norm": 4.008564472198486, "learning_rate": 0.0002, "loss": 1.4376, "step": 120160 }, { "epoch": 0.49, "grad_norm": 4.564406871795654, "learning_rate": 0.0002, "loss": 1.7035, "step": 120170 }, { "epoch": 0.49, "grad_norm": 2.0350539684295654, "learning_rate": 0.0002, "loss": 1.6666, "step": 120180 }, { "epoch": 0.49, "grad_norm": 2.932943820953369, "learning_rate": 0.0002, "loss": 1.7022, "step": 120190 }, { "epoch": 0.49, "grad_norm": 2.1896467208862305, "learning_rate": 0.0002, "loss": 1.3649, "step": 120200 }, { "epoch": 0.49, "grad_norm": 3.2199718952178955, "learning_rate": 0.0002, "loss": 1.6221, "step": 120210 }, { "epoch": 0.49, "grad_norm": 3.5256118774414062, "learning_rate": 0.0002, "loss": 1.5769, "step": 120220 }, { "epoch": 0.49, "grad_norm": 3.2493622303009033, "learning_rate": 0.0002, "loss": 1.7418, "step": 120230 }, { "epoch": 0.49, "grad_norm": 4.6677327156066895, "learning_rate": 0.0002, "loss": 1.3997, "step": 120240 }, { "epoch": 0.49, "grad_norm": 4.213183879852295, "learning_rate": 0.0002, "loss": 1.5888, "step": 120250 }, { "epoch": 0.49, "grad_norm": 2.363910675048828, "learning_rate": 0.0002, "loss": 1.5236, "step": 120260 }, { "epoch": 0.49, "grad_norm": 4.000450134277344, "learning_rate": 0.0002, "loss": 1.5183, "step": 120270 }, { "epoch": 0.49, "grad_norm": 3.2399072647094727, "learning_rate": 0.0002, "loss": 1.5575, "step": 120280 }, { "epoch": 0.49, "grad_norm": 2.467784881591797, "learning_rate": 0.0002, "loss": 1.5178, "step": 120290 }, { "epoch": 0.49, "grad_norm": 2.7409958839416504, "learning_rate": 0.0002, "loss": 1.3401, "step": 120300 }, { "epoch": 0.49, "grad_norm": 2.5147202014923096, "learning_rate": 0.0002, "loss": 1.5225, "step": 120310 }, { "epoch": 0.49, "grad_norm": 3.155263662338257, "learning_rate": 0.0002, "loss": 1.6501, "step": 120320 }, { "epoch": 0.49, "grad_norm": 7.365976810455322, "learning_rate": 0.0002, "loss": 1.6137, "step": 120330 }, { "epoch": 0.49, "grad_norm": 3.3617360591888428, "learning_rate": 0.0002, "loss": 1.4532, "step": 120340 }, { "epoch": 0.49, "grad_norm": 2.324334144592285, "learning_rate": 0.0002, "loss": 1.7275, "step": 120350 }, { "epoch": 0.49, "grad_norm": 2.8253419399261475, "learning_rate": 0.0002, "loss": 1.6979, "step": 120360 }, { "epoch": 0.49, "grad_norm": 2.8078486919403076, "learning_rate": 0.0002, "loss": 1.6129, "step": 120370 }, { "epoch": 0.49, "grad_norm": 4.212010860443115, "learning_rate": 0.0002, "loss": 1.7037, "step": 120380 }, { "epoch": 0.49, "grad_norm": 3.6670539379119873, "learning_rate": 0.0002, "loss": 1.6242, "step": 120390 }, { "epoch": 0.49, "grad_norm": 3.455803632736206, "learning_rate": 0.0002, "loss": 1.6606, "step": 120400 }, { "epoch": 0.49, "grad_norm": 3.2365102767944336, "learning_rate": 0.0002, "loss": 1.2786, "step": 120410 }, { "epoch": 0.49, "grad_norm": 3.9667084217071533, "learning_rate": 0.0002, "loss": 1.695, "step": 120420 }, { "epoch": 0.49, "grad_norm": 4.042140007019043, "learning_rate": 0.0002, "loss": 1.7062, "step": 120430 }, { "epoch": 0.49, "grad_norm": 2.4959716796875, "learning_rate": 0.0002, "loss": 1.6313, "step": 120440 }, { "epoch": 0.49, "grad_norm": 2.873166084289551, "learning_rate": 0.0002, "loss": 1.8967, "step": 120450 }, { "epoch": 0.49, "grad_norm": 2.137345552444458, "learning_rate": 0.0002, "loss": 1.8511, "step": 120460 }, { "epoch": 0.49, "grad_norm": 2.965167999267578, "learning_rate": 0.0002, "loss": 1.4337, "step": 120470 }, { "epoch": 0.49, "grad_norm": 1.6316819190979004, "learning_rate": 0.0002, "loss": 1.7285, "step": 120480 }, { "epoch": 0.49, "grad_norm": 3.0590262413024902, "learning_rate": 0.0002, "loss": 1.4159, "step": 120490 }, { "epoch": 0.49, "grad_norm": 4.255564212799072, "learning_rate": 0.0002, "loss": 1.4744, "step": 120500 }, { "epoch": 0.49, "grad_norm": 1.9289954900741577, "learning_rate": 0.0002, "loss": 1.1994, "step": 120510 }, { "epoch": 0.49, "grad_norm": 3.753007173538208, "learning_rate": 0.0002, "loss": 1.561, "step": 120520 }, { "epoch": 0.49, "grad_norm": 2.834554672241211, "learning_rate": 0.0002, "loss": 1.7037, "step": 120530 }, { "epoch": 0.49, "grad_norm": 3.5188093185424805, "learning_rate": 0.0002, "loss": 1.4662, "step": 120540 }, { "epoch": 0.49, "grad_norm": 3.1773979663848877, "learning_rate": 0.0002, "loss": 1.8682, "step": 120550 }, { "epoch": 0.49, "grad_norm": 1.5965309143066406, "learning_rate": 0.0002, "loss": 1.5291, "step": 120560 }, { "epoch": 0.49, "grad_norm": 4.131364345550537, "learning_rate": 0.0002, "loss": 1.3312, "step": 120570 }, { "epoch": 0.49, "grad_norm": 4.166181564331055, "learning_rate": 0.0002, "loss": 1.4722, "step": 120580 }, { "epoch": 0.49, "grad_norm": 3.0353639125823975, "learning_rate": 0.0002, "loss": 1.4573, "step": 120590 }, { "epoch": 0.49, "grad_norm": 4.222213268280029, "learning_rate": 0.0002, "loss": 1.5068, "step": 120600 }, { "epoch": 0.49, "grad_norm": 2.327475070953369, "learning_rate": 0.0002, "loss": 1.4601, "step": 120610 }, { "epoch": 0.49, "grad_norm": 2.7633066177368164, "learning_rate": 0.0002, "loss": 1.5654, "step": 120620 }, { "epoch": 0.49, "grad_norm": 3.1510567665100098, "learning_rate": 0.0002, "loss": 1.6646, "step": 120630 }, { "epoch": 0.49, "grad_norm": 2.944136619567871, "learning_rate": 0.0002, "loss": 1.6221, "step": 120640 }, { "epoch": 0.49, "grad_norm": 2.2667455673217773, "learning_rate": 0.0002, "loss": 1.6571, "step": 120650 }, { "epoch": 0.49, "grad_norm": 2.660388708114624, "learning_rate": 0.0002, "loss": 1.6332, "step": 120660 }, { "epoch": 0.49, "grad_norm": 10.729362487792969, "learning_rate": 0.0002, "loss": 1.4989, "step": 120670 }, { "epoch": 0.49, "grad_norm": 3.5878236293792725, "learning_rate": 0.0002, "loss": 1.6238, "step": 120680 }, { "epoch": 0.49, "grad_norm": 2.974320411682129, "learning_rate": 0.0002, "loss": 1.5693, "step": 120690 }, { "epoch": 0.49, "grad_norm": 2.355984926223755, "learning_rate": 0.0002, "loss": 1.2936, "step": 120700 }, { "epoch": 0.49, "grad_norm": 2.709458827972412, "learning_rate": 0.0002, "loss": 1.6168, "step": 120710 }, { "epoch": 0.49, "grad_norm": 3.3652737140655518, "learning_rate": 0.0002, "loss": 1.245, "step": 120720 }, { "epoch": 0.49, "grad_norm": 4.239574432373047, "learning_rate": 0.0002, "loss": 1.4693, "step": 120730 }, { "epoch": 0.49, "grad_norm": 2.845834732055664, "learning_rate": 0.0002, "loss": 1.4238, "step": 120740 }, { "epoch": 0.49, "grad_norm": 4.751142501831055, "learning_rate": 0.0002, "loss": 1.658, "step": 120750 }, { "epoch": 0.49, "grad_norm": 2.19722318649292, "learning_rate": 0.0002, "loss": 1.5575, "step": 120760 }, { "epoch": 0.49, "grad_norm": 4.45243501663208, "learning_rate": 0.0002, "loss": 1.7199, "step": 120770 }, { "epoch": 0.49, "grad_norm": 3.8604583740234375, "learning_rate": 0.0002, "loss": 1.6124, "step": 120780 }, { "epoch": 0.49, "grad_norm": 3.2178542613983154, "learning_rate": 0.0002, "loss": 1.6633, "step": 120790 }, { "epoch": 0.49, "grad_norm": 2.625364065170288, "learning_rate": 0.0002, "loss": 1.3856, "step": 120800 }, { "epoch": 0.49, "grad_norm": 2.59474515914917, "learning_rate": 0.0002, "loss": 1.5772, "step": 120810 }, { "epoch": 0.49, "grad_norm": 3.089310884475708, "learning_rate": 0.0002, "loss": 1.7418, "step": 120820 }, { "epoch": 0.49, "grad_norm": 3.272730588912964, "learning_rate": 0.0002, "loss": 1.3955, "step": 120830 }, { "epoch": 0.49, "grad_norm": 1.42284095287323, "learning_rate": 0.0002, "loss": 1.4833, "step": 120840 }, { "epoch": 0.49, "grad_norm": 2.786533832550049, "learning_rate": 0.0002, "loss": 1.4057, "step": 120850 }, { "epoch": 0.49, "grad_norm": 2.397636651992798, "learning_rate": 0.0002, "loss": 1.5431, "step": 120860 }, { "epoch": 0.49, "grad_norm": 1.735064148902893, "learning_rate": 0.0002, "loss": 1.4228, "step": 120870 }, { "epoch": 0.49, "grad_norm": 6.217787265777588, "learning_rate": 0.0002, "loss": 1.5061, "step": 120880 }, { "epoch": 0.49, "grad_norm": 3.3851144313812256, "learning_rate": 0.0002, "loss": 1.5499, "step": 120890 }, { "epoch": 0.49, "grad_norm": 3.1581737995147705, "learning_rate": 0.0002, "loss": 1.6243, "step": 120900 }, { "epoch": 0.49, "grad_norm": 3.633948564529419, "learning_rate": 0.0002, "loss": 1.7527, "step": 120910 }, { "epoch": 0.49, "grad_norm": 1.7682554721832275, "learning_rate": 0.0002, "loss": 1.5681, "step": 120920 }, { "epoch": 0.49, "grad_norm": 2.89640474319458, "learning_rate": 0.0002, "loss": 1.4863, "step": 120930 }, { "epoch": 0.49, "grad_norm": 4.644827842712402, "learning_rate": 0.0002, "loss": 1.7474, "step": 120940 }, { "epoch": 0.49, "grad_norm": 4.44665002822876, "learning_rate": 0.0002, "loss": 1.6694, "step": 120950 }, { "epoch": 0.49, "grad_norm": 2.6006062030792236, "learning_rate": 0.0002, "loss": 1.6965, "step": 120960 }, { "epoch": 0.49, "grad_norm": 2.4611685276031494, "learning_rate": 0.0002, "loss": 1.5531, "step": 120970 }, { "epoch": 0.49, "grad_norm": 3.498422861099243, "learning_rate": 0.0002, "loss": 1.4723, "step": 120980 }, { "epoch": 0.49, "grad_norm": 4.273384094238281, "learning_rate": 0.0002, "loss": 1.5668, "step": 120990 }, { "epoch": 0.49, "grad_norm": 3.6545634269714355, "learning_rate": 0.0002, "loss": 1.5468, "step": 121000 }, { "epoch": 0.49, "grad_norm": 2.7333602905273438, "learning_rate": 0.0002, "loss": 1.6686, "step": 121010 }, { "epoch": 0.49, "grad_norm": 3.2310855388641357, "learning_rate": 0.0002, "loss": 1.6724, "step": 121020 }, { "epoch": 0.49, "grad_norm": 3.7977068424224854, "learning_rate": 0.0002, "loss": 1.6783, "step": 121030 }, { "epoch": 0.49, "grad_norm": 3.2885985374450684, "learning_rate": 0.0002, "loss": 1.9826, "step": 121040 }, { "epoch": 0.49, "grad_norm": 3.242666244506836, "learning_rate": 0.0002, "loss": 1.6492, "step": 121050 }, { "epoch": 0.49, "grad_norm": 1.824147343635559, "learning_rate": 0.0002, "loss": 1.691, "step": 121060 }, { "epoch": 0.49, "grad_norm": 2.7285919189453125, "learning_rate": 0.0002, "loss": 1.3801, "step": 121070 }, { "epoch": 0.49, "grad_norm": 2.1757442951202393, "learning_rate": 0.0002, "loss": 1.6849, "step": 121080 }, { "epoch": 0.49, "grad_norm": 2.194833993911743, "learning_rate": 0.0002, "loss": 1.6496, "step": 121090 }, { "epoch": 0.49, "grad_norm": 6.655188083648682, "learning_rate": 0.0002, "loss": 1.428, "step": 121100 }, { "epoch": 0.49, "grad_norm": 2.3860888481140137, "learning_rate": 0.0002, "loss": 1.5872, "step": 121110 }, { "epoch": 0.49, "grad_norm": 3.518348217010498, "learning_rate": 0.0002, "loss": 1.3847, "step": 121120 }, { "epoch": 0.49, "grad_norm": 5.005557537078857, "learning_rate": 0.0002, "loss": 1.4665, "step": 121130 }, { "epoch": 0.49, "grad_norm": 4.330581188201904, "learning_rate": 0.0002, "loss": 1.665, "step": 121140 }, { "epoch": 0.49, "grad_norm": 3.064239263534546, "learning_rate": 0.0002, "loss": 1.407, "step": 121150 }, { "epoch": 0.49, "grad_norm": 2.2116589546203613, "learning_rate": 0.0002, "loss": 1.5204, "step": 121160 }, { "epoch": 0.49, "grad_norm": 3.3433337211608887, "learning_rate": 0.0002, "loss": 1.5855, "step": 121170 }, { "epoch": 0.49, "grad_norm": 2.8872437477111816, "learning_rate": 0.0002, "loss": 1.7431, "step": 121180 }, { "epoch": 0.49, "grad_norm": 3.25858998298645, "learning_rate": 0.0002, "loss": 1.5025, "step": 121190 }, { "epoch": 0.49, "grad_norm": 2.565455436706543, "learning_rate": 0.0002, "loss": 1.3563, "step": 121200 }, { "epoch": 0.49, "grad_norm": 4.1917948722839355, "learning_rate": 0.0002, "loss": 1.7672, "step": 121210 }, { "epoch": 0.49, "grad_norm": 2.1105024814605713, "learning_rate": 0.0002, "loss": 1.8118, "step": 121220 }, { "epoch": 0.49, "grad_norm": 2.532172918319702, "learning_rate": 0.0002, "loss": 1.5904, "step": 121230 }, { "epoch": 0.49, "grad_norm": 3.317643165588379, "learning_rate": 0.0002, "loss": 1.4209, "step": 121240 }, { "epoch": 0.49, "grad_norm": 3.0469985008239746, "learning_rate": 0.0002, "loss": 1.598, "step": 121250 }, { "epoch": 0.49, "grad_norm": 2.617218494415283, "learning_rate": 0.0002, "loss": 1.6023, "step": 121260 }, { "epoch": 0.49, "grad_norm": 3.1836209297180176, "learning_rate": 0.0002, "loss": 1.6979, "step": 121270 }, { "epoch": 0.49, "grad_norm": 2.3490827083587646, "learning_rate": 0.0002, "loss": 1.7752, "step": 121280 }, { "epoch": 0.49, "grad_norm": 5.110702037811279, "learning_rate": 0.0002, "loss": 1.637, "step": 121290 }, { "epoch": 0.49, "grad_norm": 2.557762861251831, "learning_rate": 0.0002, "loss": 1.8543, "step": 121300 }, { "epoch": 0.49, "grad_norm": 3.797868251800537, "learning_rate": 0.0002, "loss": 1.5309, "step": 121310 }, { "epoch": 0.49, "grad_norm": 2.983707904815674, "learning_rate": 0.0002, "loss": 1.3259, "step": 121320 }, { "epoch": 0.49, "grad_norm": 3.690521717071533, "learning_rate": 0.0002, "loss": 1.5987, "step": 121330 }, { "epoch": 0.49, "grad_norm": 2.806055784225464, "learning_rate": 0.0002, "loss": 1.4961, "step": 121340 }, { "epoch": 0.49, "grad_norm": 2.603100299835205, "learning_rate": 0.0002, "loss": 1.5978, "step": 121350 }, { "epoch": 0.49, "grad_norm": 2.882930040359497, "learning_rate": 0.0002, "loss": 1.4544, "step": 121360 }, { "epoch": 0.49, "grad_norm": 3.526125431060791, "learning_rate": 0.0002, "loss": 1.5192, "step": 121370 }, { "epoch": 0.49, "grad_norm": 3.948195219039917, "learning_rate": 0.0002, "loss": 1.6804, "step": 121380 }, { "epoch": 0.49, "grad_norm": 3.2816197872161865, "learning_rate": 0.0002, "loss": 1.499, "step": 121390 }, { "epoch": 0.49, "grad_norm": 3.0901920795440674, "learning_rate": 0.0002, "loss": 1.5153, "step": 121400 }, { "epoch": 0.49, "grad_norm": 2.9914515018463135, "learning_rate": 0.0002, "loss": 1.5632, "step": 121410 }, { "epoch": 0.49, "grad_norm": 3.2031407356262207, "learning_rate": 0.0002, "loss": 1.542, "step": 121420 }, { "epoch": 0.49, "grad_norm": 2.3115499019622803, "learning_rate": 0.0002, "loss": 1.5423, "step": 121430 }, { "epoch": 0.49, "grad_norm": 2.3254315853118896, "learning_rate": 0.0002, "loss": 1.506, "step": 121440 }, { "epoch": 0.49, "grad_norm": 3.1401946544647217, "learning_rate": 0.0002, "loss": 1.6262, "step": 121450 }, { "epoch": 0.49, "grad_norm": 3.9587583541870117, "learning_rate": 0.0002, "loss": 1.6042, "step": 121460 }, { "epoch": 0.49, "grad_norm": 3.9577717781066895, "learning_rate": 0.0002, "loss": 1.4633, "step": 121470 }, { "epoch": 0.49, "grad_norm": 1.2015217542648315, "learning_rate": 0.0002, "loss": 1.4287, "step": 121480 }, { "epoch": 0.49, "grad_norm": 1.615541934967041, "learning_rate": 0.0002, "loss": 1.534, "step": 121490 }, { "epoch": 0.49, "grad_norm": 4.907099723815918, "learning_rate": 0.0002, "loss": 1.7375, "step": 121500 }, { "epoch": 0.49, "grad_norm": 3.5661425590515137, "learning_rate": 0.0002, "loss": 1.6293, "step": 121510 }, { "epoch": 0.49, "grad_norm": 3.6037611961364746, "learning_rate": 0.0002, "loss": 1.2446, "step": 121520 }, { "epoch": 0.49, "grad_norm": 3.30006742477417, "learning_rate": 0.0002, "loss": 1.3381, "step": 121530 }, { "epoch": 0.49, "grad_norm": 2.1263043880462646, "learning_rate": 0.0002, "loss": 1.581, "step": 121540 }, { "epoch": 0.49, "grad_norm": 3.5856707096099854, "learning_rate": 0.0002, "loss": 1.7838, "step": 121550 }, { "epoch": 0.49, "grad_norm": 2.2019569873809814, "learning_rate": 0.0002, "loss": 1.6685, "step": 121560 }, { "epoch": 0.49, "grad_norm": 2.4850568771362305, "learning_rate": 0.0002, "loss": 1.5385, "step": 121570 }, { "epoch": 0.49, "grad_norm": 6.268239974975586, "learning_rate": 0.0002, "loss": 1.6227, "step": 121580 }, { "epoch": 0.49, "grad_norm": 7.033203601837158, "learning_rate": 0.0002, "loss": 1.622, "step": 121590 }, { "epoch": 0.5, "grad_norm": Infinity, "learning_rate": 0.0002, "loss": 1.4314, "step": 121600 }, { "epoch": 0.5, "grad_norm": 2.692246198654175, "learning_rate": 0.0002, "loss": 1.5518, "step": 121610 }, { "epoch": 0.5, "grad_norm": 12.420270919799805, "learning_rate": 0.0002, "loss": 1.525, "step": 121620 }, { "epoch": 0.5, "grad_norm": 3.285146713256836, "learning_rate": 0.0002, "loss": 1.4996, "step": 121630 }, { "epoch": 0.5, "grad_norm": 2.894090175628662, "learning_rate": 0.0002, "loss": 1.5457, "step": 121640 }, { "epoch": 0.5, "grad_norm": 2.826476573944092, "learning_rate": 0.0002, "loss": 1.5357, "step": 121650 }, { "epoch": 0.5, "grad_norm": 2.92315411567688, "learning_rate": 0.0002, "loss": 1.4521, "step": 121660 }, { "epoch": 0.5, "grad_norm": 3.4559590816497803, "learning_rate": 0.0002, "loss": 1.5898, "step": 121670 }, { "epoch": 0.5, "grad_norm": 3.2870516777038574, "learning_rate": 0.0002, "loss": 1.709, "step": 121680 }, { "epoch": 0.5, "grad_norm": 3.370529890060425, "learning_rate": 0.0002, "loss": 1.4244, "step": 121690 }, { "epoch": 0.5, "grad_norm": 3.305037021636963, "learning_rate": 0.0002, "loss": 1.836, "step": 121700 }, { "epoch": 0.5, "grad_norm": 7.280301570892334, "learning_rate": 0.0002, "loss": 1.6874, "step": 121710 }, { "epoch": 0.5, "grad_norm": 9.475003242492676, "learning_rate": 0.0002, "loss": 1.7038, "step": 121720 }, { "epoch": 0.5, "grad_norm": 4.398587226867676, "learning_rate": 0.0002, "loss": 1.6144, "step": 121730 }, { "epoch": 0.5, "grad_norm": 3.2008168697357178, "learning_rate": 0.0002, "loss": 1.6568, "step": 121740 }, { "epoch": 0.5, "grad_norm": 2.985471487045288, "learning_rate": 0.0002, "loss": 1.6034, "step": 121750 }, { "epoch": 0.5, "grad_norm": 5.37954568862915, "learning_rate": 0.0002, "loss": 1.6054, "step": 121760 }, { "epoch": 0.5, "grad_norm": 1.9602887630462646, "learning_rate": 0.0002, "loss": 1.5572, "step": 121770 }, { "epoch": 0.5, "grad_norm": 2.0010218620300293, "learning_rate": 0.0002, "loss": 1.4855, "step": 121780 }, { "epoch": 0.5, "grad_norm": 3.182529926300049, "learning_rate": 0.0002, "loss": 1.4243, "step": 121790 }, { "epoch": 0.5, "grad_norm": 3.7034196853637695, "learning_rate": 0.0002, "loss": 1.6453, "step": 121800 }, { "epoch": 0.5, "grad_norm": 4.69058895111084, "learning_rate": 0.0002, "loss": 1.5332, "step": 121810 }, { "epoch": 0.5, "grad_norm": 2.662809371948242, "learning_rate": 0.0002, "loss": 1.5607, "step": 121820 }, { "epoch": 0.5, "grad_norm": 1.9586830139160156, "learning_rate": 0.0002, "loss": 1.3375, "step": 121830 }, { "epoch": 0.5, "grad_norm": 3.119293212890625, "learning_rate": 0.0002, "loss": 1.5268, "step": 121840 }, { "epoch": 0.5, "grad_norm": 2.3770740032196045, "learning_rate": 0.0002, "loss": 1.717, "step": 121850 }, { "epoch": 0.5, "grad_norm": 3.334566116333008, "learning_rate": 0.0002, "loss": 1.582, "step": 121860 }, { "epoch": 0.5, "grad_norm": 2.59794282913208, "learning_rate": 0.0002, "loss": 1.5355, "step": 121870 }, { "epoch": 0.5, "grad_norm": 2.3604204654693604, "learning_rate": 0.0002, "loss": 1.6636, "step": 121880 }, { "epoch": 0.5, "grad_norm": 2.6128976345062256, "learning_rate": 0.0002, "loss": 1.7928, "step": 121890 }, { "epoch": 0.5, "grad_norm": 3.4846129417419434, "learning_rate": 0.0002, "loss": 1.5137, "step": 121900 }, { "epoch": 0.5, "grad_norm": 2.8049211502075195, "learning_rate": 0.0002, "loss": 1.6488, "step": 121910 }, { "epoch": 0.5, "grad_norm": 3.0560855865478516, "learning_rate": 0.0002, "loss": 1.5716, "step": 121920 }, { "epoch": 0.5, "grad_norm": 3.270528793334961, "learning_rate": 0.0002, "loss": 1.5262, "step": 121930 }, { "epoch": 0.5, "grad_norm": 4.017024993896484, "learning_rate": 0.0002, "loss": 1.8889, "step": 121940 }, { "epoch": 0.5, "grad_norm": 3.09416127204895, "learning_rate": 0.0002, "loss": 1.6371, "step": 121950 }, { "epoch": 0.5, "grad_norm": 3.7654454708099365, "learning_rate": 0.0002, "loss": 1.7309, "step": 121960 }, { "epoch": 0.5, "grad_norm": 4.375482082366943, "learning_rate": 0.0002, "loss": 1.3415, "step": 121970 }, { "epoch": 0.5, "grad_norm": 4.126420021057129, "learning_rate": 0.0002, "loss": 1.588, "step": 121980 }, { "epoch": 0.5, "grad_norm": 2.564145088195801, "learning_rate": 0.0002, "loss": 1.4237, "step": 121990 }, { "epoch": 0.5, "grad_norm": 6.449422359466553, "learning_rate": 0.0002, "loss": 1.3, "step": 122000 }, { "epoch": 0.5, "grad_norm": 3.0045437812805176, "learning_rate": 0.0002, "loss": 1.4185, "step": 122010 }, { "epoch": 0.5, "grad_norm": 3.0949859619140625, "learning_rate": 0.0002, "loss": 1.4859, "step": 122020 }, { "epoch": 0.5, "grad_norm": 2.796962261199951, "learning_rate": 0.0002, "loss": 1.5314, "step": 122030 }, { "epoch": 0.5, "grad_norm": 2.5485055446624756, "learning_rate": 0.0002, "loss": 1.571, "step": 122040 }, { "epoch": 0.5, "grad_norm": 7.533629417419434, "learning_rate": 0.0002, "loss": 1.5718, "step": 122050 }, { "epoch": 0.5, "grad_norm": 3.5974745750427246, "learning_rate": 0.0002, "loss": 1.6501, "step": 122060 }, { "epoch": 0.5, "grad_norm": 4.019942283630371, "learning_rate": 0.0002, "loss": 1.525, "step": 122070 }, { "epoch": 0.5, "grad_norm": 2.6580488681793213, "learning_rate": 0.0002, "loss": 1.4371, "step": 122080 }, { "epoch": 0.5, "grad_norm": 3.6732821464538574, "learning_rate": 0.0002, "loss": 1.3419, "step": 122090 }, { "epoch": 0.5, "grad_norm": 3.0190672874450684, "learning_rate": 0.0002, "loss": 1.5814, "step": 122100 }, { "epoch": 0.5, "grad_norm": 3.383195400238037, "learning_rate": 0.0002, "loss": 1.6243, "step": 122110 }, { "epoch": 0.5, "grad_norm": 3.9537172317504883, "learning_rate": 0.0002, "loss": 1.7172, "step": 122120 }, { "epoch": 0.5, "grad_norm": 3.6391329765319824, "learning_rate": 0.0002, "loss": 1.3979, "step": 122130 }, { "epoch": 0.5, "grad_norm": 3.787015199661255, "learning_rate": 0.0002, "loss": 1.4834, "step": 122140 }, { "epoch": 0.5, "grad_norm": 5.039968967437744, "learning_rate": 0.0002, "loss": 1.5942, "step": 122150 }, { "epoch": 0.5, "grad_norm": 3.2348685264587402, "learning_rate": 0.0002, "loss": 1.6805, "step": 122160 }, { "epoch": 0.5, "grad_norm": 2.5380682945251465, "learning_rate": 0.0002, "loss": 1.3504, "step": 122170 }, { "epoch": 0.5, "grad_norm": 3.968266725540161, "learning_rate": 0.0002, "loss": 1.5163, "step": 122180 }, { "epoch": 0.5, "grad_norm": 2.181859254837036, "learning_rate": 0.0002, "loss": 1.6987, "step": 122190 }, { "epoch": 0.5, "grad_norm": 4.112468719482422, "learning_rate": 0.0002, "loss": 1.3871, "step": 122200 }, { "epoch": 0.5, "grad_norm": 2.4053828716278076, "learning_rate": 0.0002, "loss": 1.4853, "step": 122210 }, { "epoch": 0.5, "grad_norm": 2.1789472103118896, "learning_rate": 0.0002, "loss": 1.6825, "step": 122220 }, { "epoch": 0.5, "grad_norm": 3.9520113468170166, "learning_rate": 0.0002, "loss": 1.7665, "step": 122230 }, { "epoch": 0.5, "grad_norm": 2.6667094230651855, "learning_rate": 0.0002, "loss": 1.5555, "step": 122240 }, { "epoch": 0.5, "grad_norm": 2.631404161453247, "learning_rate": 0.0002, "loss": 1.6613, "step": 122250 }, { "epoch": 0.5, "grad_norm": 2.9067180156707764, "learning_rate": 0.0002, "loss": 1.4242, "step": 122260 }, { "epoch": 0.5, "grad_norm": 2.189915657043457, "learning_rate": 0.0002, "loss": 1.5157, "step": 122270 }, { "epoch": 0.5, "grad_norm": 4.301669597625732, "learning_rate": 0.0002, "loss": 1.7333, "step": 122280 }, { "epoch": 0.5, "grad_norm": 4.435693264007568, "learning_rate": 0.0002, "loss": 1.6707, "step": 122290 }, { "epoch": 0.5, "grad_norm": 6.275533199310303, "learning_rate": 0.0002, "loss": 1.612, "step": 122300 }, { "epoch": 0.5, "grad_norm": 4.3052592277526855, "learning_rate": 0.0002, "loss": 1.7298, "step": 122310 }, { "epoch": 0.5, "grad_norm": 2.7737905979156494, "learning_rate": 0.0002, "loss": 1.6042, "step": 122320 }, { "epoch": 0.5, "grad_norm": 3.8432235717773438, "learning_rate": 0.0002, "loss": 1.4179, "step": 122330 }, { "epoch": 0.5, "grad_norm": 3.768160343170166, "learning_rate": 0.0002, "loss": 1.6476, "step": 122340 }, { "epoch": 0.5, "grad_norm": 5.095324516296387, "learning_rate": 0.0002, "loss": 1.6118, "step": 122350 }, { "epoch": 0.5, "grad_norm": 2.739320755004883, "learning_rate": 0.0002, "loss": 1.3768, "step": 122360 }, { "epoch": 0.5, "grad_norm": 2.304347276687622, "learning_rate": 0.0002, "loss": 1.3613, "step": 122370 }, { "epoch": 0.5, "grad_norm": 5.6009697914123535, "learning_rate": 0.0002, "loss": 1.4567, "step": 122380 }, { "epoch": 0.5, "grad_norm": 3.9484620094299316, "learning_rate": 0.0002, "loss": 1.5676, "step": 122390 }, { "epoch": 0.5, "grad_norm": 2.346498489379883, "learning_rate": 0.0002, "loss": 1.6998, "step": 122400 }, { "epoch": 0.5, "grad_norm": 3.670240640640259, "learning_rate": 0.0002, "loss": 1.3954, "step": 122410 }, { "epoch": 0.5, "grad_norm": 4.416581153869629, "learning_rate": 0.0002, "loss": 1.5769, "step": 122420 }, { "epoch": 0.5, "grad_norm": 3.0385189056396484, "learning_rate": 0.0002, "loss": 1.6352, "step": 122430 }, { "epoch": 0.5, "grad_norm": 3.6119844913482666, "learning_rate": 0.0002, "loss": 1.497, "step": 122440 }, { "epoch": 0.5, "grad_norm": 4.875529766082764, "learning_rate": 0.0002, "loss": 1.4407, "step": 122450 }, { "epoch": 0.5, "grad_norm": 2.531200647354126, "learning_rate": 0.0002, "loss": 1.5905, "step": 122460 }, { "epoch": 0.5, "grad_norm": 4.892011642456055, "learning_rate": 0.0002, "loss": 1.6165, "step": 122470 }, { "epoch": 0.5, "grad_norm": 3.991157293319702, "learning_rate": 0.0002, "loss": 1.5838, "step": 122480 }, { "epoch": 0.5, "grad_norm": 2.5804946422576904, "learning_rate": 0.0002, "loss": 1.7005, "step": 122490 }, { "epoch": 0.5, "grad_norm": 3.3706724643707275, "learning_rate": 0.0002, "loss": 1.674, "step": 122500 }, { "epoch": 0.5, "grad_norm": 2.5921952724456787, "learning_rate": 0.0002, "loss": 1.5976, "step": 122510 }, { "epoch": 0.5, "grad_norm": 3.6414270401000977, "learning_rate": 0.0002, "loss": 1.4423, "step": 122520 }, { "epoch": 0.5, "grad_norm": 2.790158271789551, "learning_rate": 0.0002, "loss": 1.5673, "step": 122530 }, { "epoch": 0.5, "grad_norm": 2.5495033264160156, "learning_rate": 0.0002, "loss": 1.578, "step": 122540 }, { "epoch": 0.5, "grad_norm": 2.0199732780456543, "learning_rate": 0.0002, "loss": 1.6941, "step": 122550 }, { "epoch": 0.5, "grad_norm": 3.6707398891448975, "learning_rate": 0.0002, "loss": 1.4674, "step": 122560 }, { "epoch": 0.5, "grad_norm": 2.6571731567382812, "learning_rate": 0.0002, "loss": 1.6243, "step": 122570 }, { "epoch": 0.5, "grad_norm": 2.5021121501922607, "learning_rate": 0.0002, "loss": 1.5673, "step": 122580 }, { "epoch": 0.5, "grad_norm": 3.1124088764190674, "learning_rate": 0.0002, "loss": 1.5643, "step": 122590 }, { "epoch": 0.5, "grad_norm": 3.5817832946777344, "learning_rate": 0.0002, "loss": 1.5403, "step": 122600 }, { "epoch": 0.5, "grad_norm": 3.898967981338501, "learning_rate": 0.0002, "loss": 1.5163, "step": 122610 }, { "epoch": 0.5, "grad_norm": 4.6132917404174805, "learning_rate": 0.0002, "loss": 1.5867, "step": 122620 }, { "epoch": 0.5, "grad_norm": 3.672076463699341, "learning_rate": 0.0002, "loss": 1.5482, "step": 122630 }, { "epoch": 0.5, "grad_norm": 3.7890353202819824, "learning_rate": 0.0002, "loss": 1.3336, "step": 122640 }, { "epoch": 0.5, "grad_norm": 2.361182928085327, "learning_rate": 0.0002, "loss": 1.5635, "step": 122650 }, { "epoch": 0.5, "grad_norm": 2.0376691818237305, "learning_rate": 0.0002, "loss": 1.6618, "step": 122660 }, { "epoch": 0.5, "grad_norm": 2.552976369857788, "learning_rate": 0.0002, "loss": 1.4432, "step": 122670 }, { "epoch": 0.5, "grad_norm": 2.291788101196289, "learning_rate": 0.0002, "loss": 1.7382, "step": 122680 }, { "epoch": 0.5, "grad_norm": 3.8554742336273193, "learning_rate": 0.0002, "loss": 1.6376, "step": 122690 }, { "epoch": 0.5, "grad_norm": 10.704619407653809, "learning_rate": 0.0002, "loss": 1.714, "step": 122700 }, { "epoch": 0.5, "grad_norm": 2.6562485694885254, "learning_rate": 0.0002, "loss": 1.8417, "step": 122710 }, { "epoch": 0.5, "grad_norm": 4.164520263671875, "learning_rate": 0.0002, "loss": 1.7749, "step": 122720 }, { "epoch": 0.5, "grad_norm": 1.7222728729248047, "learning_rate": 0.0002, "loss": 1.5592, "step": 122730 }, { "epoch": 0.5, "grad_norm": 2.5862390995025635, "learning_rate": 0.0002, "loss": 1.5948, "step": 122740 }, { "epoch": 0.5, "grad_norm": 3.85971736907959, "learning_rate": 0.0002, "loss": 1.4739, "step": 122750 }, { "epoch": 0.5, "grad_norm": 3.583399772644043, "learning_rate": 0.0002, "loss": 1.7327, "step": 122760 }, { "epoch": 0.5, "grad_norm": 2.25947904586792, "learning_rate": 0.0002, "loss": 1.5123, "step": 122770 }, { "epoch": 0.5, "grad_norm": 2.1287403106689453, "learning_rate": 0.0002, "loss": 1.5313, "step": 122780 }, { "epoch": 0.5, "grad_norm": 3.351562738418579, "learning_rate": 0.0002, "loss": 1.5206, "step": 122790 }, { "epoch": 0.5, "grad_norm": 6.515544414520264, "learning_rate": 0.0002, "loss": 1.5063, "step": 122800 }, { "epoch": 0.5, "grad_norm": 3.9788732528686523, "learning_rate": 0.0002, "loss": 1.6234, "step": 122810 }, { "epoch": 0.5, "grad_norm": 2.0664782524108887, "learning_rate": 0.0002, "loss": 1.4698, "step": 122820 }, { "epoch": 0.5, "grad_norm": 5.698977947235107, "learning_rate": 0.0002, "loss": 1.5639, "step": 122830 }, { "epoch": 0.5, "grad_norm": 2.560607433319092, "learning_rate": 0.0002, "loss": 1.3635, "step": 122840 }, { "epoch": 0.5, "grad_norm": 2.5370616912841797, "learning_rate": 0.0002, "loss": 1.5485, "step": 122850 }, { "epoch": 0.5, "grad_norm": 4.530191421508789, "learning_rate": 0.0002, "loss": 1.5185, "step": 122860 }, { "epoch": 0.5, "grad_norm": 3.9047176837921143, "learning_rate": 0.0002, "loss": 1.4809, "step": 122870 }, { "epoch": 0.5, "grad_norm": 4.00625467300415, "learning_rate": 0.0002, "loss": 1.4559, "step": 122880 }, { "epoch": 0.5, "grad_norm": 3.2034223079681396, "learning_rate": 0.0002, "loss": 1.6803, "step": 122890 }, { "epoch": 0.5, "grad_norm": 2.304354190826416, "learning_rate": 0.0002, "loss": 1.636, "step": 122900 }, { "epoch": 0.5, "grad_norm": 3.358578681945801, "learning_rate": 0.0002, "loss": 1.8315, "step": 122910 }, { "epoch": 0.5, "grad_norm": 2.809911012649536, "learning_rate": 0.0002, "loss": 1.2945, "step": 122920 }, { "epoch": 0.5, "grad_norm": 4.6462626457214355, "learning_rate": 0.0002, "loss": 1.4034, "step": 122930 }, { "epoch": 0.5, "grad_norm": 2.004051685333252, "learning_rate": 0.0002, "loss": 1.4901, "step": 122940 }, { "epoch": 0.5, "grad_norm": 3.6899795532226562, "learning_rate": 0.0002, "loss": 1.4804, "step": 122950 }, { "epoch": 0.5, "grad_norm": 4.042530059814453, "learning_rate": 0.0002, "loss": 1.5721, "step": 122960 }, { "epoch": 0.5, "grad_norm": 4.625904083251953, "learning_rate": 0.0002, "loss": 1.6916, "step": 122970 }, { "epoch": 0.5, "grad_norm": 2.2142393589019775, "learning_rate": 0.0002, "loss": 1.3255, "step": 122980 }, { "epoch": 0.5, "grad_norm": 3.375143527984619, "learning_rate": 0.0002, "loss": 1.4134, "step": 122990 }, { "epoch": 0.5, "grad_norm": 3.4641835689544678, "learning_rate": 0.0002, "loss": 1.2819, "step": 123000 }, { "epoch": 0.5, "grad_norm": 3.456411600112915, "learning_rate": 0.0002, "loss": 1.8127, "step": 123010 }, { "epoch": 0.5, "grad_norm": 3.966442108154297, "learning_rate": 0.0002, "loss": 1.3497, "step": 123020 }, { "epoch": 0.5, "grad_norm": 2.062588691711426, "learning_rate": 0.0002, "loss": 1.4343, "step": 123030 }, { "epoch": 0.5, "grad_norm": 3.566586971282959, "learning_rate": 0.0002, "loss": 1.7578, "step": 123040 }, { "epoch": 0.5, "grad_norm": 5.119847774505615, "learning_rate": 0.0002, "loss": 1.6526, "step": 123050 }, { "epoch": 0.5, "grad_norm": 2.5872292518615723, "learning_rate": 0.0002, "loss": 1.5984, "step": 123060 }, { "epoch": 0.5, "grad_norm": 3.9979310035705566, "learning_rate": 0.0002, "loss": 1.6011, "step": 123070 }, { "epoch": 0.5, "grad_norm": 3.6274991035461426, "learning_rate": 0.0002, "loss": 1.5564, "step": 123080 }, { "epoch": 0.5, "grad_norm": 2.3689746856689453, "learning_rate": 0.0002, "loss": 1.4265, "step": 123090 }, { "epoch": 0.5, "grad_norm": 3.668362617492676, "learning_rate": 0.0002, "loss": 1.4619, "step": 123100 }, { "epoch": 0.5, "grad_norm": 3.1356353759765625, "learning_rate": 0.0002, "loss": 1.6222, "step": 123110 }, { "epoch": 0.5, "grad_norm": 2.452183246612549, "learning_rate": 0.0002, "loss": 1.539, "step": 123120 }, { "epoch": 0.5, "grad_norm": 3.295325517654419, "learning_rate": 0.0002, "loss": 1.433, "step": 123130 }, { "epoch": 0.5, "grad_norm": 3.9633357524871826, "learning_rate": 0.0002, "loss": 1.6644, "step": 123140 }, { "epoch": 0.5, "grad_norm": 2.735116481781006, "learning_rate": 0.0002, "loss": 1.3299, "step": 123150 }, { "epoch": 0.5, "grad_norm": 2.6518568992614746, "learning_rate": 0.0002, "loss": 1.5199, "step": 123160 }, { "epoch": 0.5, "grad_norm": 2.790593385696411, "learning_rate": 0.0002, "loss": 1.5957, "step": 123170 }, { "epoch": 0.5, "grad_norm": 2.527806520462036, "learning_rate": 0.0002, "loss": 1.7222, "step": 123180 }, { "epoch": 0.5, "grad_norm": 5.1033406257629395, "learning_rate": 0.0002, "loss": 1.6307, "step": 123190 }, { "epoch": 0.5, "grad_norm": 3.5991086959838867, "learning_rate": 0.0002, "loss": 1.4661, "step": 123200 }, { "epoch": 0.5, "grad_norm": 2.1095852851867676, "learning_rate": 0.0002, "loss": 1.883, "step": 123210 }, { "epoch": 0.5, "grad_norm": 2.0261292457580566, "learning_rate": 0.0002, "loss": 1.4947, "step": 123220 }, { "epoch": 0.5, "grad_norm": 4.105234146118164, "learning_rate": 0.0002, "loss": 1.573, "step": 123230 }, { "epoch": 0.5, "grad_norm": 3.0583646297454834, "learning_rate": 0.0002, "loss": 1.3178, "step": 123240 }, { "epoch": 0.5, "grad_norm": 3.521066188812256, "learning_rate": 0.0002, "loss": 1.4076, "step": 123250 }, { "epoch": 0.5, "grad_norm": 2.6663739681243896, "learning_rate": 0.0002, "loss": 1.4845, "step": 123260 }, { "epoch": 0.5, "grad_norm": 3.948310136795044, "learning_rate": 0.0002, "loss": 1.8658, "step": 123270 }, { "epoch": 0.5, "grad_norm": 2.8459696769714355, "learning_rate": 0.0002, "loss": 1.5111, "step": 123280 }, { "epoch": 0.5, "grad_norm": 4.1333184242248535, "learning_rate": 0.0002, "loss": 1.7576, "step": 123290 }, { "epoch": 0.5, "grad_norm": 2.43772029876709, "learning_rate": 0.0002, "loss": 1.5782, "step": 123300 }, { "epoch": 0.5, "grad_norm": 3.2505476474761963, "learning_rate": 0.0002, "loss": 1.6917, "step": 123310 }, { "epoch": 0.5, "grad_norm": 4.906725883483887, "learning_rate": 0.0002, "loss": 1.846, "step": 123320 }, { "epoch": 0.5, "grad_norm": 2.5693159103393555, "learning_rate": 0.0002, "loss": 1.6089, "step": 123330 }, { "epoch": 0.5, "grad_norm": 4.17441987991333, "learning_rate": 0.0002, "loss": 1.3429, "step": 123340 }, { "epoch": 0.5, "grad_norm": 3.454390287399292, "learning_rate": 0.0002, "loss": 1.715, "step": 123350 }, { "epoch": 0.5, "grad_norm": 3.468381404876709, "learning_rate": 0.0002, "loss": 1.2689, "step": 123360 }, { "epoch": 0.5, "grad_norm": 2.9387643337249756, "learning_rate": 0.0002, "loss": 1.894, "step": 123370 }, { "epoch": 0.5, "grad_norm": 2.9744086265563965, "learning_rate": 0.0002, "loss": 1.3654, "step": 123380 }, { "epoch": 0.5, "grad_norm": 2.838456869125366, "learning_rate": 0.0002, "loss": 1.8129, "step": 123390 }, { "epoch": 0.5, "grad_norm": 4.319774150848389, "learning_rate": 0.0002, "loss": 1.2773, "step": 123400 }, { "epoch": 0.5, "grad_norm": 2.2558014392852783, "learning_rate": 0.0002, "loss": 1.6727, "step": 123410 }, { "epoch": 0.5, "grad_norm": 1.8242273330688477, "learning_rate": 0.0002, "loss": 1.6619, "step": 123420 }, { "epoch": 0.5, "grad_norm": 3.215623617172241, "learning_rate": 0.0002, "loss": 1.6448, "step": 123430 }, { "epoch": 0.5, "grad_norm": 2.4000637531280518, "learning_rate": 0.0002, "loss": 1.6015, "step": 123440 }, { "epoch": 0.5, "grad_norm": 4.702251434326172, "learning_rate": 0.0002, "loss": 1.5906, "step": 123450 }, { "epoch": 0.5, "grad_norm": 4.737180233001709, "learning_rate": 0.0002, "loss": 1.6002, "step": 123460 }, { "epoch": 0.5, "grad_norm": 2.5231900215148926, "learning_rate": 0.0002, "loss": 1.341, "step": 123470 }, { "epoch": 0.5, "grad_norm": 1.9636650085449219, "learning_rate": 0.0002, "loss": 1.6779, "step": 123480 }, { "epoch": 0.5, "grad_norm": 2.259124279022217, "learning_rate": 0.0002, "loss": 1.7619, "step": 123490 }, { "epoch": 0.5, "grad_norm": 1.6061939001083374, "learning_rate": 0.0002, "loss": 1.4751, "step": 123500 }, { "epoch": 0.5, "grad_norm": 2.157170534133911, "learning_rate": 0.0002, "loss": 1.7154, "step": 123510 }, { "epoch": 0.5, "grad_norm": 2.657288074493408, "learning_rate": 0.0002, "loss": 1.4011, "step": 123520 }, { "epoch": 0.5, "grad_norm": 2.6440589427948, "learning_rate": 0.0002, "loss": 1.5805, "step": 123530 }, { "epoch": 0.5, "grad_norm": 5.330924034118652, "learning_rate": 0.0002, "loss": 1.5873, "step": 123540 }, { "epoch": 0.5, "grad_norm": 3.3479244709014893, "learning_rate": 0.0002, "loss": 1.5769, "step": 123550 }, { "epoch": 0.5, "grad_norm": 3.503380298614502, "learning_rate": 0.0002, "loss": 1.4307, "step": 123560 }, { "epoch": 0.5, "grad_norm": 2.05218243598938, "learning_rate": 0.0002, "loss": 1.5559, "step": 123570 }, { "epoch": 0.5, "grad_norm": 1.7944395542144775, "learning_rate": 0.0002, "loss": 1.6669, "step": 123580 }, { "epoch": 0.5, "grad_norm": 3.0612070560455322, "learning_rate": 0.0002, "loss": 1.4596, "step": 123590 }, { "epoch": 0.5, "grad_norm": 2.4612388610839844, "learning_rate": 0.0002, "loss": 1.7166, "step": 123600 }, { "epoch": 0.5, "grad_norm": 2.8783881664276123, "learning_rate": 0.0002, "loss": 1.2046, "step": 123610 }, { "epoch": 0.5, "grad_norm": 2.994312286376953, "learning_rate": 0.0002, "loss": 1.6524, "step": 123620 }, { "epoch": 0.5, "grad_norm": 2.407273530960083, "learning_rate": 0.0002, "loss": 1.6343, "step": 123630 }, { "epoch": 0.5, "grad_norm": 3.6341750621795654, "learning_rate": 0.0002, "loss": 1.4469, "step": 123640 }, { "epoch": 0.5, "grad_norm": 3.3627660274505615, "learning_rate": 0.0002, "loss": 1.8983, "step": 123650 }, { "epoch": 0.5, "grad_norm": 2.434462070465088, "learning_rate": 0.0002, "loss": 1.7414, "step": 123660 }, { "epoch": 0.5, "grad_norm": 2.110128402709961, "learning_rate": 0.0002, "loss": 1.5454, "step": 123670 }, { "epoch": 0.5, "grad_norm": 2.5603013038635254, "learning_rate": 0.0002, "loss": 1.5625, "step": 123680 }, { "epoch": 0.5, "grad_norm": 2.6318166255950928, "learning_rate": 0.0002, "loss": 1.4945, "step": 123690 }, { "epoch": 0.5, "grad_norm": 2.6954689025878906, "learning_rate": 0.0002, "loss": 1.4663, "step": 123700 }, { "epoch": 0.5, "grad_norm": 3.4379379749298096, "learning_rate": 0.0002, "loss": 1.4732, "step": 123710 }, { "epoch": 0.5, "grad_norm": 1.5642696619033813, "learning_rate": 0.0002, "loss": 1.6438, "step": 123720 }, { "epoch": 0.5, "grad_norm": 3.7074337005615234, "learning_rate": 0.0002, "loss": 1.5545, "step": 123730 }, { "epoch": 0.5, "grad_norm": 3.8149919509887695, "learning_rate": 0.0002, "loss": 1.6104, "step": 123740 }, { "epoch": 0.5, "grad_norm": 2.82254958152771, "learning_rate": 0.0002, "loss": 1.6711, "step": 123750 }, { "epoch": 0.5, "grad_norm": 1.453995943069458, "learning_rate": 0.0002, "loss": 1.5876, "step": 123760 }, { "epoch": 0.5, "grad_norm": 2.6354551315307617, "learning_rate": 0.0002, "loss": 1.4063, "step": 123770 }, { "epoch": 0.5, "grad_norm": 2.640312433242798, "learning_rate": 0.0002, "loss": 1.4758, "step": 123780 }, { "epoch": 0.5, "grad_norm": 3.675248384475708, "learning_rate": 0.0002, "loss": 1.488, "step": 123790 }, { "epoch": 0.5, "grad_norm": 2.3093392848968506, "learning_rate": 0.0002, "loss": 1.6581, "step": 123800 }, { "epoch": 0.5, "grad_norm": 1.9735743999481201, "learning_rate": 0.0002, "loss": 1.7022, "step": 123810 }, { "epoch": 0.5, "grad_norm": 3.7566018104553223, "learning_rate": 0.0002, "loss": 1.5709, "step": 123820 }, { "epoch": 0.5, "grad_norm": 1.5911967754364014, "learning_rate": 0.0002, "loss": 1.7606, "step": 123830 }, { "epoch": 0.5, "grad_norm": 3.696075439453125, "learning_rate": 0.0002, "loss": 1.6034, "step": 123840 }, { "epoch": 0.5, "grad_norm": 3.169877290725708, "learning_rate": 0.0002, "loss": 1.611, "step": 123850 }, { "epoch": 0.5, "grad_norm": 1.3614506721496582, "learning_rate": 0.0002, "loss": 1.693, "step": 123860 }, { "epoch": 0.5, "grad_norm": 4.493702411651611, "learning_rate": 0.0002, "loss": 1.7881, "step": 123870 }, { "epoch": 0.5, "grad_norm": 2.517871141433716, "learning_rate": 0.0002, "loss": 1.7237, "step": 123880 }, { "epoch": 0.5, "grad_norm": 2.79872727394104, "learning_rate": 0.0002, "loss": 1.7469, "step": 123890 }, { "epoch": 0.5, "grad_norm": 3.5671849250793457, "learning_rate": 0.0002, "loss": 1.6576, "step": 123900 }, { "epoch": 0.5, "grad_norm": 3.4462602138519287, "learning_rate": 0.0002, "loss": 1.5356, "step": 123910 }, { "epoch": 0.5, "grad_norm": 4.541136741638184, "learning_rate": 0.0002, "loss": 1.6612, "step": 123920 }, { "epoch": 0.5, "grad_norm": 2.075737953186035, "learning_rate": 0.0002, "loss": 1.6342, "step": 123930 }, { "epoch": 0.5, "grad_norm": 3.7629916667938232, "learning_rate": 0.0002, "loss": 1.558, "step": 123940 }, { "epoch": 0.5, "grad_norm": 4.031324863433838, "learning_rate": 0.0002, "loss": 1.8897, "step": 123950 }, { "epoch": 0.5, "grad_norm": 4.559774398803711, "learning_rate": 0.0002, "loss": 1.4551, "step": 123960 }, { "epoch": 0.5, "grad_norm": 2.8731417655944824, "learning_rate": 0.0002, "loss": 1.5154, "step": 123970 }, { "epoch": 0.5, "grad_norm": 6.697229385375977, "learning_rate": 0.0002, "loss": 1.3347, "step": 123980 }, { "epoch": 0.5, "grad_norm": 4.330357551574707, "learning_rate": 0.0002, "loss": 1.7085, "step": 123990 }, { "epoch": 0.5, "grad_norm": 2.6222586631774902, "learning_rate": 0.0002, "loss": 1.486, "step": 124000 }, { "epoch": 0.5, "grad_norm": 3.648458957672119, "learning_rate": 0.0002, "loss": 1.3116, "step": 124010 }, { "epoch": 0.5, "grad_norm": 1.8932478427886963, "learning_rate": 0.0002, "loss": 1.6163, "step": 124020 }, { "epoch": 0.5, "grad_norm": 4.636195182800293, "learning_rate": 0.0002, "loss": 1.5117, "step": 124030 }, { "epoch": 0.5, "grad_norm": 3.2724175453186035, "learning_rate": 0.0002, "loss": 1.8292, "step": 124040 }, { "epoch": 0.5, "grad_norm": 2.273961305618286, "learning_rate": 0.0002, "loss": 1.6679, "step": 124050 }, { "epoch": 0.51, "grad_norm": 4.6100754737854, "learning_rate": 0.0002, "loss": 1.7396, "step": 124060 }, { "epoch": 0.51, "grad_norm": 4.480730056762695, "learning_rate": 0.0002, "loss": 1.6263, "step": 124070 }, { "epoch": 0.51, "grad_norm": 3.5546140670776367, "learning_rate": 0.0002, "loss": 1.6386, "step": 124080 }, { "epoch": 0.51, "grad_norm": 2.7631499767303467, "learning_rate": 0.0002, "loss": 1.394, "step": 124090 }, { "epoch": 0.51, "grad_norm": 1.9095731973648071, "learning_rate": 0.0002, "loss": 1.6853, "step": 124100 }, { "epoch": 0.51, "grad_norm": 3.070892095565796, "learning_rate": 0.0002, "loss": 1.7403, "step": 124110 }, { "epoch": 0.51, "grad_norm": 2.6989099979400635, "learning_rate": 0.0002, "loss": 1.5741, "step": 124120 }, { "epoch": 0.51, "grad_norm": 2.763979196548462, "learning_rate": 0.0002, "loss": 1.6348, "step": 124130 }, { "epoch": 0.51, "grad_norm": 2.3073699474334717, "learning_rate": 0.0002, "loss": 1.5046, "step": 124140 }, { "epoch": 0.51, "grad_norm": 2.6824727058410645, "learning_rate": 0.0002, "loss": 1.5905, "step": 124150 }, { "epoch": 0.51, "grad_norm": 3.7819714546203613, "learning_rate": 0.0002, "loss": 1.5373, "step": 124160 }, { "epoch": 0.51, "grad_norm": 2.5576131343841553, "learning_rate": 0.0002, "loss": 1.665, "step": 124170 }, { "epoch": 0.51, "grad_norm": 2.3548693656921387, "learning_rate": 0.0002, "loss": 1.529, "step": 124180 }, { "epoch": 0.51, "grad_norm": 2.6969003677368164, "learning_rate": 0.0002, "loss": 1.2866, "step": 124190 }, { "epoch": 0.51, "grad_norm": 2.896185874938965, "learning_rate": 0.0002, "loss": 1.6155, "step": 124200 }, { "epoch": 0.51, "grad_norm": 3.7069664001464844, "learning_rate": 0.0002, "loss": 1.6438, "step": 124210 }, { "epoch": 0.51, "grad_norm": 3.2674100399017334, "learning_rate": 0.0002, "loss": 1.652, "step": 124220 }, { "epoch": 0.51, "grad_norm": 2.150951623916626, "learning_rate": 0.0002, "loss": 1.9214, "step": 124230 }, { "epoch": 0.51, "grad_norm": 2.9915883541107178, "learning_rate": 0.0002, "loss": 1.4218, "step": 124240 }, { "epoch": 0.51, "grad_norm": 2.7955551147460938, "learning_rate": 0.0002, "loss": 1.4603, "step": 124250 }, { "epoch": 0.51, "grad_norm": 2.6934773921966553, "learning_rate": 0.0002, "loss": 1.6242, "step": 124260 }, { "epoch": 0.51, "grad_norm": 2.57025146484375, "learning_rate": 0.0002, "loss": 1.7145, "step": 124270 }, { "epoch": 0.51, "grad_norm": 3.9055306911468506, "learning_rate": 0.0002, "loss": 1.7168, "step": 124280 }, { "epoch": 0.51, "grad_norm": 3.712097406387329, "learning_rate": 0.0002, "loss": 1.7831, "step": 124290 }, { "epoch": 0.51, "grad_norm": 3.629844903945923, "learning_rate": 0.0002, "loss": 1.6499, "step": 124300 }, { "epoch": 0.51, "grad_norm": 3.4569520950317383, "learning_rate": 0.0002, "loss": 1.529, "step": 124310 }, { "epoch": 0.51, "grad_norm": 2.706514835357666, "learning_rate": 0.0002, "loss": 1.4134, "step": 124320 }, { "epoch": 0.51, "grad_norm": 0.9299465417861938, "learning_rate": 0.0002, "loss": 1.4254, "step": 124330 }, { "epoch": 0.51, "grad_norm": 3.235318899154663, "learning_rate": 0.0002, "loss": 1.5808, "step": 124340 }, { "epoch": 0.51, "grad_norm": 4.038754940032959, "learning_rate": 0.0002, "loss": 1.5419, "step": 124350 }, { "epoch": 0.51, "grad_norm": 2.9321048259735107, "learning_rate": 0.0002, "loss": 1.8348, "step": 124360 }, { "epoch": 0.51, "grad_norm": 3.6442461013793945, "learning_rate": 0.0002, "loss": 1.8535, "step": 124370 }, { "epoch": 0.51, "grad_norm": 2.874715566635132, "learning_rate": 0.0002, "loss": 1.7719, "step": 124380 }, { "epoch": 0.51, "grad_norm": 2.9235479831695557, "learning_rate": 0.0002, "loss": 1.6653, "step": 124390 }, { "epoch": 0.51, "grad_norm": 2.6910717487335205, "learning_rate": 0.0002, "loss": 1.5747, "step": 124400 }, { "epoch": 0.51, "grad_norm": 4.6383442878723145, "learning_rate": 0.0002, "loss": 1.4553, "step": 124410 }, { "epoch": 0.51, "grad_norm": 2.3709144592285156, "learning_rate": 0.0002, "loss": 1.3884, "step": 124420 }, { "epoch": 0.51, "grad_norm": 2.1917715072631836, "learning_rate": 0.0002, "loss": 1.3373, "step": 124430 }, { "epoch": 0.51, "grad_norm": 2.1557254791259766, "learning_rate": 0.0002, "loss": 1.4359, "step": 124440 }, { "epoch": 0.51, "grad_norm": 2.7807178497314453, "learning_rate": 0.0002, "loss": 1.6451, "step": 124450 }, { "epoch": 0.51, "grad_norm": 5.324798583984375, "learning_rate": 0.0002, "loss": 1.6643, "step": 124460 }, { "epoch": 0.51, "grad_norm": 2.438308000564575, "learning_rate": 0.0002, "loss": 1.4914, "step": 124470 }, { "epoch": 0.51, "grad_norm": 5.5981574058532715, "learning_rate": 0.0002, "loss": 1.6323, "step": 124480 }, { "epoch": 0.51, "grad_norm": 3.1689205169677734, "learning_rate": 0.0002, "loss": 1.6569, "step": 124490 }, { "epoch": 0.51, "grad_norm": 3.3914313316345215, "learning_rate": 0.0002, "loss": 1.6833, "step": 124500 }, { "epoch": 0.51, "grad_norm": 3.023073434829712, "learning_rate": 0.0002, "loss": 1.3741, "step": 124510 }, { "epoch": 0.51, "grad_norm": 2.831662893295288, "learning_rate": 0.0002, "loss": 1.36, "step": 124520 }, { "epoch": 0.51, "grad_norm": 3.6598336696624756, "learning_rate": 0.0002, "loss": 1.6113, "step": 124530 }, { "epoch": 0.51, "grad_norm": 2.529972553253174, "learning_rate": 0.0002, "loss": 1.5053, "step": 124540 }, { "epoch": 0.51, "grad_norm": 2.927993059158325, "learning_rate": 0.0002, "loss": 1.5322, "step": 124550 }, { "epoch": 0.51, "grad_norm": 3.3708431720733643, "learning_rate": 0.0002, "loss": 1.7784, "step": 124560 }, { "epoch": 0.51, "grad_norm": 3.283825635910034, "learning_rate": 0.0002, "loss": 1.7402, "step": 124570 }, { "epoch": 0.51, "grad_norm": 4.154979705810547, "learning_rate": 0.0002, "loss": 1.5342, "step": 124580 }, { "epoch": 0.51, "grad_norm": 2.1893885135650635, "learning_rate": 0.0002, "loss": 1.6441, "step": 124590 }, { "epoch": 0.51, "grad_norm": 4.132131576538086, "learning_rate": 0.0002, "loss": 1.7273, "step": 124600 }, { "epoch": 0.51, "grad_norm": 1.8860119581222534, "learning_rate": 0.0002, "loss": 1.5627, "step": 124610 }, { "epoch": 0.51, "grad_norm": 4.681739330291748, "learning_rate": 0.0002, "loss": 1.5728, "step": 124620 }, { "epoch": 0.51, "grad_norm": 3.2793691158294678, "learning_rate": 0.0002, "loss": 1.8014, "step": 124630 }, { "epoch": 0.51, "grad_norm": 3.014094352722168, "learning_rate": 0.0002, "loss": 1.6762, "step": 124640 }, { "epoch": 0.51, "grad_norm": 2.769026279449463, "learning_rate": 0.0002, "loss": 1.4448, "step": 124650 }, { "epoch": 0.51, "grad_norm": 1.964195728302002, "learning_rate": 0.0002, "loss": 1.6021, "step": 124660 }, { "epoch": 0.51, "grad_norm": 2.992332935333252, "learning_rate": 0.0002, "loss": 1.6585, "step": 124670 }, { "epoch": 0.51, "grad_norm": 3.1071720123291016, "learning_rate": 0.0002, "loss": 1.4869, "step": 124680 }, { "epoch": 0.51, "grad_norm": 3.3318724632263184, "learning_rate": 0.0002, "loss": 1.6095, "step": 124690 }, { "epoch": 0.51, "grad_norm": 3.19588303565979, "learning_rate": 0.0002, "loss": 1.623, "step": 124700 }, { "epoch": 0.51, "grad_norm": 2.4295895099639893, "learning_rate": 0.0002, "loss": 1.8409, "step": 124710 }, { "epoch": 0.51, "grad_norm": 3.3474817276000977, "learning_rate": 0.0002, "loss": 1.457, "step": 124720 }, { "epoch": 0.51, "grad_norm": 3.04140567779541, "learning_rate": 0.0002, "loss": 1.8006, "step": 124730 }, { "epoch": 0.51, "grad_norm": 2.964224100112915, "learning_rate": 0.0002, "loss": 1.3879, "step": 124740 }, { "epoch": 0.51, "grad_norm": 3.583732843399048, "learning_rate": 0.0002, "loss": 1.7202, "step": 124750 }, { "epoch": 0.51, "grad_norm": 5.001397132873535, "learning_rate": 0.0002, "loss": 1.5164, "step": 124760 }, { "epoch": 0.51, "grad_norm": 3.4701812267303467, "learning_rate": 0.0002, "loss": 1.4928, "step": 124770 }, { "epoch": 0.51, "grad_norm": 3.2268946170806885, "learning_rate": 0.0002, "loss": 1.7789, "step": 124780 }, { "epoch": 0.51, "grad_norm": 2.541255235671997, "learning_rate": 0.0002, "loss": 1.5614, "step": 124790 }, { "epoch": 0.51, "grad_norm": 3.7233543395996094, "learning_rate": 0.0002, "loss": 1.7218, "step": 124800 }, { "epoch": 0.51, "grad_norm": 3.3306586742401123, "learning_rate": 0.0002, "loss": 1.596, "step": 124810 }, { "epoch": 0.51, "grad_norm": 3.0950567722320557, "learning_rate": 0.0002, "loss": 1.5003, "step": 124820 }, { "epoch": 0.51, "grad_norm": 2.2745859622955322, "learning_rate": 0.0002, "loss": 1.6416, "step": 124830 }, { "epoch": 0.51, "grad_norm": 3.779761791229248, "learning_rate": 0.0002, "loss": 1.7574, "step": 124840 }, { "epoch": 0.51, "grad_norm": 2.1043858528137207, "learning_rate": 0.0002, "loss": 1.5744, "step": 124850 }, { "epoch": 0.51, "grad_norm": 3.007685422897339, "learning_rate": 0.0002, "loss": 1.789, "step": 124860 }, { "epoch": 0.51, "grad_norm": 4.182347297668457, "learning_rate": 0.0002, "loss": 1.6892, "step": 124870 }, { "epoch": 0.51, "grad_norm": 4.227908134460449, "learning_rate": 0.0002, "loss": 1.5778, "step": 124880 }, { "epoch": 0.51, "grad_norm": 3.9016880989074707, "learning_rate": 0.0002, "loss": 1.5459, "step": 124890 }, { "epoch": 0.51, "grad_norm": 4.391439437866211, "learning_rate": 0.0002, "loss": 1.6537, "step": 124900 }, { "epoch": 0.51, "grad_norm": 2.773472309112549, "learning_rate": 0.0002, "loss": 1.5795, "step": 124910 }, { "epoch": 0.51, "grad_norm": 1.94021737575531, "learning_rate": 0.0002, "loss": 1.7724, "step": 124920 }, { "epoch": 0.51, "grad_norm": 4.023500442504883, "learning_rate": 0.0002, "loss": 1.4069, "step": 124930 }, { "epoch": 0.51, "grad_norm": 3.593881845474243, "learning_rate": 0.0002, "loss": 1.6777, "step": 124940 }, { "epoch": 0.51, "grad_norm": 5.245331764221191, "learning_rate": 0.0002, "loss": 1.5041, "step": 124950 }, { "epoch": 0.51, "grad_norm": 2.734043598175049, "learning_rate": 0.0002, "loss": 1.5119, "step": 124960 }, { "epoch": 0.51, "grad_norm": 2.08093523979187, "learning_rate": 0.0002, "loss": 1.6115, "step": 124970 }, { "epoch": 0.51, "grad_norm": 2.4451823234558105, "learning_rate": 0.0002, "loss": 1.5386, "step": 124980 }, { "epoch": 0.51, "grad_norm": 1.6753864288330078, "learning_rate": 0.0002, "loss": 1.5243, "step": 124990 }, { "epoch": 0.51, "grad_norm": 2.9730961322784424, "learning_rate": 0.0002, "loss": 1.621, "step": 125000 }, { "epoch": 0.51, "grad_norm": 2.2499074935913086, "learning_rate": 0.0002, "loss": 1.3386, "step": 125010 }, { "epoch": 0.51, "grad_norm": 5.476802825927734, "learning_rate": 0.0002, "loss": 1.6696, "step": 125020 }, { "epoch": 0.51, "grad_norm": 3.9021873474121094, "learning_rate": 0.0002, "loss": 1.5503, "step": 125030 }, { "epoch": 0.51, "grad_norm": 2.9776456356048584, "learning_rate": 0.0002, "loss": 1.4911, "step": 125040 }, { "epoch": 0.51, "grad_norm": 1.5247761011123657, "learning_rate": 0.0002, "loss": 1.6474, "step": 125050 }, { "epoch": 0.51, "grad_norm": 3.6948719024658203, "learning_rate": 0.0002, "loss": 1.7613, "step": 125060 }, { "epoch": 0.51, "grad_norm": 3.4568629264831543, "learning_rate": 0.0002, "loss": 1.6972, "step": 125070 }, { "epoch": 0.51, "grad_norm": 4.440313816070557, "learning_rate": 0.0002, "loss": 1.6965, "step": 125080 }, { "epoch": 0.51, "grad_norm": 5.069302082061768, "learning_rate": 0.0002, "loss": 1.8045, "step": 125090 }, { "epoch": 0.51, "grad_norm": 2.618236541748047, "learning_rate": 0.0002, "loss": 1.6657, "step": 125100 }, { "epoch": 0.51, "grad_norm": 2.2581584453582764, "learning_rate": 0.0002, "loss": 1.6764, "step": 125110 }, { "epoch": 0.51, "grad_norm": 3.2753000259399414, "learning_rate": 0.0002, "loss": 1.432, "step": 125120 }, { "epoch": 0.51, "grad_norm": 3.828382730484009, "learning_rate": 0.0002, "loss": 1.3728, "step": 125130 }, { "epoch": 0.51, "grad_norm": 7.042856216430664, "learning_rate": 0.0002, "loss": 1.4696, "step": 125140 }, { "epoch": 0.51, "grad_norm": 3.573408603668213, "learning_rate": 0.0002, "loss": 1.5795, "step": 125150 }, { "epoch": 0.51, "grad_norm": 3.3570990562438965, "learning_rate": 0.0002, "loss": 1.7202, "step": 125160 }, { "epoch": 0.51, "grad_norm": 4.987830638885498, "learning_rate": 0.0002, "loss": 1.655, "step": 125170 }, { "epoch": 0.51, "grad_norm": 3.100466012954712, "learning_rate": 0.0002, "loss": 1.7589, "step": 125180 }, { "epoch": 0.51, "grad_norm": 1.9835748672485352, "learning_rate": 0.0002, "loss": 1.3126, "step": 125190 }, { "epoch": 0.51, "grad_norm": 4.266632556915283, "learning_rate": 0.0002, "loss": 1.5058, "step": 125200 }, { "epoch": 0.51, "grad_norm": 1.4443883895874023, "learning_rate": 0.0002, "loss": 1.7733, "step": 125210 }, { "epoch": 0.51, "grad_norm": 2.053493022918701, "learning_rate": 0.0002, "loss": 1.5021, "step": 125220 }, { "epoch": 0.51, "grad_norm": 2.7811880111694336, "learning_rate": 0.0002, "loss": 1.4421, "step": 125230 }, { "epoch": 0.51, "grad_norm": 2.6830549240112305, "learning_rate": 0.0002, "loss": 1.7632, "step": 125240 }, { "epoch": 0.51, "grad_norm": 1.8446563482284546, "learning_rate": 0.0002, "loss": 1.5425, "step": 125250 }, { "epoch": 0.51, "grad_norm": 2.2935006618499756, "learning_rate": 0.0002, "loss": 1.5873, "step": 125260 }, { "epoch": 0.51, "grad_norm": 2.7882370948791504, "learning_rate": 0.0002, "loss": 1.371, "step": 125270 }, { "epoch": 0.51, "grad_norm": 2.473402261734009, "learning_rate": 0.0002, "loss": 1.4859, "step": 125280 }, { "epoch": 0.51, "grad_norm": 2.4089367389678955, "learning_rate": 0.0002, "loss": 1.7644, "step": 125290 }, { "epoch": 0.51, "grad_norm": 6.396365165710449, "learning_rate": 0.0002, "loss": 1.6754, "step": 125300 }, { "epoch": 0.51, "grad_norm": 3.7541775703430176, "learning_rate": 0.0002, "loss": 1.8125, "step": 125310 }, { "epoch": 0.51, "grad_norm": 4.796175479888916, "learning_rate": 0.0002, "loss": 1.6955, "step": 125320 }, { "epoch": 0.51, "grad_norm": 3.109795570373535, "learning_rate": 0.0002, "loss": 1.7842, "step": 125330 }, { "epoch": 0.51, "grad_norm": 7.858023166656494, "learning_rate": 0.0002, "loss": 1.2954, "step": 125340 }, { "epoch": 0.51, "grad_norm": 2.1416847705841064, "learning_rate": 0.0002, "loss": 1.6551, "step": 125350 }, { "epoch": 0.51, "grad_norm": 2.688765287399292, "learning_rate": 0.0002, "loss": 1.5713, "step": 125360 }, { "epoch": 0.51, "grad_norm": 2.985180377960205, "learning_rate": 0.0002, "loss": 1.474, "step": 125370 }, { "epoch": 0.51, "grad_norm": 3.242718458175659, "learning_rate": 0.0002, "loss": 1.8124, "step": 125380 }, { "epoch": 0.51, "grad_norm": 3.731525421142578, "learning_rate": 0.0002, "loss": 1.5525, "step": 125390 }, { "epoch": 0.51, "grad_norm": 3.509251594543457, "learning_rate": 0.0002, "loss": 1.7327, "step": 125400 }, { "epoch": 0.51, "grad_norm": 2.665675163269043, "learning_rate": 0.0002, "loss": 1.4074, "step": 125410 }, { "epoch": 0.51, "grad_norm": 4.6248908042907715, "learning_rate": 0.0002, "loss": 1.7311, "step": 125420 }, { "epoch": 0.51, "grad_norm": 3.003476142883301, "learning_rate": 0.0002, "loss": 1.5949, "step": 125430 }, { "epoch": 0.51, "grad_norm": 2.4787800312042236, "learning_rate": 0.0002, "loss": 1.8281, "step": 125440 }, { "epoch": 0.51, "grad_norm": 3.1444907188415527, "learning_rate": 0.0002, "loss": 1.7267, "step": 125450 }, { "epoch": 0.51, "grad_norm": 4.092931747436523, "learning_rate": 0.0002, "loss": 1.8405, "step": 125460 }, { "epoch": 0.51, "grad_norm": 3.6205530166625977, "learning_rate": 0.0002, "loss": 1.6356, "step": 125470 }, { "epoch": 0.51, "grad_norm": 3.6066813468933105, "learning_rate": 0.0002, "loss": 1.4588, "step": 125480 }, { "epoch": 0.51, "grad_norm": 2.491377115249634, "learning_rate": 0.0002, "loss": 1.5551, "step": 125490 }, { "epoch": 0.51, "grad_norm": 3.9056177139282227, "learning_rate": 0.0002, "loss": 1.2874, "step": 125500 }, { "epoch": 0.51, "grad_norm": 4.107309818267822, "learning_rate": 0.0002, "loss": 1.6659, "step": 125510 }, { "epoch": 0.51, "grad_norm": 3.1696062088012695, "learning_rate": 0.0002, "loss": 1.7994, "step": 125520 }, { "epoch": 0.51, "grad_norm": 2.3216354846954346, "learning_rate": 0.0002, "loss": 1.5986, "step": 125530 }, { "epoch": 0.51, "grad_norm": 2.0263583660125732, "learning_rate": 0.0002, "loss": 1.3993, "step": 125540 }, { "epoch": 0.51, "grad_norm": 4.224869251251221, "learning_rate": 0.0002, "loss": 1.6897, "step": 125550 }, { "epoch": 0.51, "grad_norm": 2.593473434448242, "learning_rate": 0.0002, "loss": 1.7014, "step": 125560 }, { "epoch": 0.51, "grad_norm": 4.005175590515137, "learning_rate": 0.0002, "loss": 1.7239, "step": 125570 }, { "epoch": 0.51, "grad_norm": 4.747397422790527, "learning_rate": 0.0002, "loss": 1.4615, "step": 125580 }, { "epoch": 0.51, "grad_norm": 3.407895565032959, "learning_rate": 0.0002, "loss": 1.2501, "step": 125590 }, { "epoch": 0.51, "grad_norm": 3.526125192642212, "learning_rate": 0.0002, "loss": 1.7049, "step": 125600 }, { "epoch": 0.51, "grad_norm": 2.3937766551971436, "learning_rate": 0.0002, "loss": 1.3302, "step": 125610 }, { "epoch": 0.51, "grad_norm": 3.3430371284484863, "learning_rate": 0.0002, "loss": 1.7537, "step": 125620 }, { "epoch": 0.51, "grad_norm": 3.7516753673553467, "learning_rate": 0.0002, "loss": 1.7307, "step": 125630 }, { "epoch": 0.51, "grad_norm": 2.593876361846924, "learning_rate": 0.0002, "loss": 1.4553, "step": 125640 }, { "epoch": 0.51, "grad_norm": 3.8387844562530518, "learning_rate": 0.0002, "loss": 1.5597, "step": 125650 }, { "epoch": 0.51, "grad_norm": 3.365919351577759, "learning_rate": 0.0002, "loss": 1.6884, "step": 125660 }, { "epoch": 0.51, "grad_norm": 3.011416435241699, "learning_rate": 0.0002, "loss": 1.4423, "step": 125670 }, { "epoch": 0.51, "grad_norm": 2.132458209991455, "learning_rate": 0.0002, "loss": 1.5976, "step": 125680 }, { "epoch": 0.51, "grad_norm": 3.605757236480713, "learning_rate": 0.0002, "loss": 1.6364, "step": 125690 }, { "epoch": 0.51, "grad_norm": 1.9404555559158325, "learning_rate": 0.0002, "loss": 1.6327, "step": 125700 }, { "epoch": 0.51, "grad_norm": 2.7636709213256836, "learning_rate": 0.0002, "loss": 1.4382, "step": 125710 }, { "epoch": 0.51, "grad_norm": 4.227602481842041, "learning_rate": 0.0002, "loss": 1.6136, "step": 125720 }, { "epoch": 0.51, "grad_norm": 3.396871328353882, "learning_rate": 0.0002, "loss": 1.5123, "step": 125730 }, { "epoch": 0.51, "grad_norm": 1.9455103874206543, "learning_rate": 0.0002, "loss": 1.5681, "step": 125740 }, { "epoch": 0.51, "grad_norm": 3.828730344772339, "learning_rate": 0.0002, "loss": 1.5585, "step": 125750 }, { "epoch": 0.51, "grad_norm": 2.02414608001709, "learning_rate": 0.0002, "loss": 1.3497, "step": 125760 }, { "epoch": 0.51, "grad_norm": 5.61517858505249, "learning_rate": 0.0002, "loss": 1.5495, "step": 125770 }, { "epoch": 0.51, "grad_norm": 3.1259005069732666, "learning_rate": 0.0002, "loss": 1.3687, "step": 125780 }, { "epoch": 0.51, "grad_norm": 2.2519454956054688, "learning_rate": 0.0002, "loss": 1.43, "step": 125790 }, { "epoch": 0.51, "grad_norm": 1.9488486051559448, "learning_rate": 0.0002, "loss": 1.5873, "step": 125800 }, { "epoch": 0.51, "grad_norm": 3.7222185134887695, "learning_rate": 0.0002, "loss": 1.5066, "step": 125810 }, { "epoch": 0.51, "grad_norm": 3.696897506713867, "learning_rate": 0.0002, "loss": 1.5893, "step": 125820 }, { "epoch": 0.51, "grad_norm": 2.244948148727417, "learning_rate": 0.0002, "loss": 1.7774, "step": 125830 }, { "epoch": 0.51, "grad_norm": 3.8714771270751953, "learning_rate": 0.0002, "loss": 1.65, "step": 125840 }, { "epoch": 0.51, "grad_norm": 1.8700348138809204, "learning_rate": 0.0002, "loss": 1.6259, "step": 125850 }, { "epoch": 0.51, "grad_norm": 3.4579389095306396, "learning_rate": 0.0002, "loss": 1.6991, "step": 125860 }, { "epoch": 0.51, "grad_norm": 7.533376693725586, "learning_rate": 0.0002, "loss": 1.5859, "step": 125870 }, { "epoch": 0.51, "grad_norm": 2.6547529697418213, "learning_rate": 0.0002, "loss": 1.8159, "step": 125880 }, { "epoch": 0.51, "grad_norm": 3.115558385848999, "learning_rate": 0.0002, "loss": 1.4508, "step": 125890 }, { "epoch": 0.51, "grad_norm": 2.92610239982605, "learning_rate": 0.0002, "loss": 1.2485, "step": 125900 }, { "epoch": 0.51, "grad_norm": 3.06453537940979, "learning_rate": 0.0002, "loss": 1.6625, "step": 125910 }, { "epoch": 0.51, "grad_norm": 3.950723648071289, "learning_rate": 0.0002, "loss": 1.7337, "step": 125920 }, { "epoch": 0.51, "grad_norm": 2.7305705547332764, "learning_rate": 0.0002, "loss": 1.5025, "step": 125930 }, { "epoch": 0.51, "grad_norm": 6.837577819824219, "learning_rate": 0.0002, "loss": 1.5632, "step": 125940 }, { "epoch": 0.51, "grad_norm": 2.9324097633361816, "learning_rate": 0.0002, "loss": 1.9382, "step": 125950 }, { "epoch": 0.51, "grad_norm": 2.0047247409820557, "learning_rate": 0.0002, "loss": 1.6485, "step": 125960 }, { "epoch": 0.51, "grad_norm": 3.0772645473480225, "learning_rate": 0.0002, "loss": 1.5955, "step": 125970 }, { "epoch": 0.51, "grad_norm": 2.347072124481201, "learning_rate": 0.0002, "loss": 1.8612, "step": 125980 }, { "epoch": 0.51, "grad_norm": 2.3245372772216797, "learning_rate": 0.0002, "loss": 1.4644, "step": 125990 }, { "epoch": 0.51, "grad_norm": 3.81463360786438, "learning_rate": 0.0002, "loss": 1.6574, "step": 126000 }, { "epoch": 0.51, "grad_norm": 2.2315444946289062, "learning_rate": 0.0002, "loss": 1.691, "step": 126010 }, { "epoch": 0.51, "grad_norm": 3.98052716255188, "learning_rate": 0.0002, "loss": 1.6755, "step": 126020 }, { "epoch": 0.51, "grad_norm": 2.8202927112579346, "learning_rate": 0.0002, "loss": 1.3305, "step": 126030 }, { "epoch": 0.51, "grad_norm": 2.9501819610595703, "learning_rate": 0.0002, "loss": 1.8252, "step": 126040 }, { "epoch": 0.51, "grad_norm": 3.699249505996704, "learning_rate": 0.0002, "loss": 1.697, "step": 126050 }, { "epoch": 0.51, "grad_norm": 4.100847244262695, "learning_rate": 0.0002, "loss": 1.4648, "step": 126060 }, { "epoch": 0.51, "grad_norm": 4.203997611999512, "learning_rate": 0.0002, "loss": 1.6399, "step": 126070 }, { "epoch": 0.51, "grad_norm": 18.38734245300293, "learning_rate": 0.0002, "loss": 1.5283, "step": 126080 }, { "epoch": 0.51, "grad_norm": 2.056993246078491, "learning_rate": 0.0002, "loss": 1.8376, "step": 126090 }, { "epoch": 0.51, "grad_norm": 3.485010862350464, "learning_rate": 0.0002, "loss": 1.5288, "step": 126100 }, { "epoch": 0.51, "grad_norm": 2.8654897212982178, "learning_rate": 0.0002, "loss": 1.3463, "step": 126110 }, { "epoch": 0.51, "grad_norm": 2.2401721477508545, "learning_rate": 0.0002, "loss": 1.5162, "step": 126120 }, { "epoch": 0.51, "grad_norm": 3.0305263996124268, "learning_rate": 0.0002, "loss": 1.5275, "step": 126130 }, { "epoch": 0.51, "grad_norm": 3.763519525527954, "learning_rate": 0.0002, "loss": 1.6084, "step": 126140 }, { "epoch": 0.51, "grad_norm": 2.615682601928711, "learning_rate": 0.0002, "loss": 1.7723, "step": 126150 }, { "epoch": 0.51, "grad_norm": 1.6368907690048218, "learning_rate": 0.0002, "loss": 1.4778, "step": 126160 }, { "epoch": 0.51, "grad_norm": 3.4299514293670654, "learning_rate": 0.0002, "loss": 1.5721, "step": 126170 }, { "epoch": 0.51, "grad_norm": 2.762206792831421, "learning_rate": 0.0002, "loss": 1.5289, "step": 126180 }, { "epoch": 0.51, "grad_norm": 2.4111568927764893, "learning_rate": 0.0002, "loss": 1.8022, "step": 126190 }, { "epoch": 0.51, "grad_norm": 2.126889705657959, "learning_rate": 0.0002, "loss": 1.6831, "step": 126200 }, { "epoch": 0.51, "grad_norm": 2.914680004119873, "learning_rate": 0.0002, "loss": 1.5039, "step": 126210 }, { "epoch": 0.51, "grad_norm": 4.046311378479004, "learning_rate": 0.0002, "loss": 1.4151, "step": 126220 }, { "epoch": 0.51, "grad_norm": 3.834653854370117, "learning_rate": 0.0002, "loss": 1.7155, "step": 126230 }, { "epoch": 0.51, "grad_norm": 3.477987289428711, "learning_rate": 0.0002, "loss": 1.5881, "step": 126240 }, { "epoch": 0.51, "grad_norm": 2.126985549926758, "learning_rate": 0.0002, "loss": 1.3797, "step": 126250 }, { "epoch": 0.51, "grad_norm": 1.9676177501678467, "learning_rate": 0.0002, "loss": 1.5207, "step": 126260 }, { "epoch": 0.51, "grad_norm": 3.1062419414520264, "learning_rate": 0.0002, "loss": 1.6423, "step": 126270 }, { "epoch": 0.51, "grad_norm": 2.033508539199829, "learning_rate": 0.0002, "loss": 1.5351, "step": 126280 }, { "epoch": 0.51, "grad_norm": 3.938234806060791, "learning_rate": 0.0002, "loss": 1.6819, "step": 126290 }, { "epoch": 0.51, "grad_norm": 2.497244119644165, "learning_rate": 0.0002, "loss": 1.8482, "step": 126300 }, { "epoch": 0.51, "grad_norm": 4.773099422454834, "learning_rate": 0.0002, "loss": 1.7991, "step": 126310 }, { "epoch": 0.51, "grad_norm": 2.5827367305755615, "learning_rate": 0.0002, "loss": 1.6233, "step": 126320 }, { "epoch": 0.51, "grad_norm": 2.383765459060669, "learning_rate": 0.0002, "loss": 1.7945, "step": 126330 }, { "epoch": 0.51, "grad_norm": 3.0684754848480225, "learning_rate": 0.0002, "loss": 1.4453, "step": 126340 }, { "epoch": 0.51, "grad_norm": 1.6380369663238525, "learning_rate": 0.0002, "loss": 1.5246, "step": 126350 }, { "epoch": 0.51, "grad_norm": 2.6643781661987305, "learning_rate": 0.0002, "loss": 1.5027, "step": 126360 }, { "epoch": 0.51, "grad_norm": 2.479997396469116, "learning_rate": 0.0002, "loss": 1.4656, "step": 126370 }, { "epoch": 0.51, "grad_norm": 2.1145894527435303, "learning_rate": 0.0002, "loss": 1.3764, "step": 126380 }, { "epoch": 0.51, "grad_norm": 7.340428352355957, "learning_rate": 0.0002, "loss": 1.6055, "step": 126390 }, { "epoch": 0.51, "grad_norm": 4.225484371185303, "learning_rate": 0.0002, "loss": 1.6083, "step": 126400 }, { "epoch": 0.51, "grad_norm": 2.350900888442993, "learning_rate": 0.0002, "loss": 1.6107, "step": 126410 }, { "epoch": 0.51, "grad_norm": 1.8891125917434692, "learning_rate": 0.0002, "loss": 1.8364, "step": 126420 }, { "epoch": 0.51, "grad_norm": 3.7506887912750244, "learning_rate": 0.0002, "loss": 1.3515, "step": 126430 }, { "epoch": 0.51, "grad_norm": 1.3291480541229248, "learning_rate": 0.0002, "loss": 1.8324, "step": 126440 }, { "epoch": 0.51, "grad_norm": 3.5342767238616943, "learning_rate": 0.0002, "loss": 1.5628, "step": 126450 }, { "epoch": 0.51, "grad_norm": 2.8916397094726562, "learning_rate": 0.0002, "loss": 1.5482, "step": 126460 }, { "epoch": 0.51, "grad_norm": 2.0692639350891113, "learning_rate": 0.0002, "loss": 1.8077, "step": 126470 }, { "epoch": 0.51, "grad_norm": 3.309532403945923, "learning_rate": 0.0002, "loss": 1.537, "step": 126480 }, { "epoch": 0.51, "grad_norm": 2.8493714332580566, "learning_rate": 0.0002, "loss": 1.3685, "step": 126490 }, { "epoch": 0.51, "grad_norm": 5.357522010803223, "learning_rate": 0.0002, "loss": 1.5968, "step": 126500 }, { "epoch": 0.52, "grad_norm": 2.131704092025757, "learning_rate": 0.0002, "loss": 1.7331, "step": 126510 }, { "epoch": 0.52, "grad_norm": 3.831702709197998, "learning_rate": 0.0002, "loss": 1.578, "step": 126520 }, { "epoch": 0.52, "grad_norm": 3.3661611080169678, "learning_rate": 0.0002, "loss": 1.5072, "step": 126530 }, { "epoch": 0.52, "grad_norm": 3.109192371368408, "learning_rate": 0.0002, "loss": 1.5562, "step": 126540 }, { "epoch": 0.52, "grad_norm": 3.262145519256592, "learning_rate": 0.0002, "loss": 1.686, "step": 126550 }, { "epoch": 0.52, "grad_norm": 3.4791576862335205, "learning_rate": 0.0002, "loss": 1.4911, "step": 126560 }, { "epoch": 0.52, "grad_norm": 4.365330696105957, "learning_rate": 0.0002, "loss": 1.406, "step": 126570 }, { "epoch": 0.52, "grad_norm": 2.244560480117798, "learning_rate": 0.0002, "loss": 1.656, "step": 126580 }, { "epoch": 0.52, "grad_norm": 3.37253999710083, "learning_rate": 0.0002, "loss": 1.5068, "step": 126590 }, { "epoch": 0.52, "grad_norm": 3.083158016204834, "learning_rate": 0.0002, "loss": 1.3275, "step": 126600 }, { "epoch": 0.52, "grad_norm": 2.555108070373535, "learning_rate": 0.0002, "loss": 1.6242, "step": 126610 }, { "epoch": 0.52, "grad_norm": 1.4322572946548462, "learning_rate": 0.0002, "loss": 1.3491, "step": 126620 }, { "epoch": 0.52, "grad_norm": 2.8758702278137207, "learning_rate": 0.0002, "loss": 1.5998, "step": 126630 }, { "epoch": 0.52, "grad_norm": 3.7288756370544434, "learning_rate": 0.0002, "loss": 1.6885, "step": 126640 }, { "epoch": 0.52, "grad_norm": 2.9757120609283447, "learning_rate": 0.0002, "loss": 1.514, "step": 126650 }, { "epoch": 0.52, "grad_norm": 3.0700061321258545, "learning_rate": 0.0002, "loss": 1.6919, "step": 126660 }, { "epoch": 0.52, "grad_norm": 2.599972724914551, "learning_rate": 0.0002, "loss": 1.5165, "step": 126670 }, { "epoch": 0.52, "grad_norm": 2.80673885345459, "learning_rate": 0.0002, "loss": 1.5592, "step": 126680 }, { "epoch": 0.52, "grad_norm": 2.655069351196289, "learning_rate": 0.0002, "loss": 1.6493, "step": 126690 }, { "epoch": 0.52, "grad_norm": 3.281548500061035, "learning_rate": 0.0002, "loss": 1.6789, "step": 126700 }, { "epoch": 0.52, "grad_norm": 2.4705419540405273, "learning_rate": 0.0002, "loss": 1.5437, "step": 126710 }, { "epoch": 0.52, "grad_norm": 3.735233783721924, "learning_rate": 0.0002, "loss": 1.7035, "step": 126720 }, { "epoch": 0.52, "grad_norm": 2.348398447036743, "learning_rate": 0.0002, "loss": 1.4876, "step": 126730 }, { "epoch": 0.52, "grad_norm": 3.2707788944244385, "learning_rate": 0.0002, "loss": 1.7027, "step": 126740 }, { "epoch": 0.52, "grad_norm": 2.137918472290039, "learning_rate": 0.0002, "loss": 1.5481, "step": 126750 }, { "epoch": 0.52, "grad_norm": 3.112480640411377, "learning_rate": 0.0002, "loss": 1.559, "step": 126760 }, { "epoch": 0.52, "grad_norm": 2.701446771621704, "learning_rate": 0.0002, "loss": 1.5979, "step": 126770 }, { "epoch": 0.52, "grad_norm": 2.3036396503448486, "learning_rate": 0.0002, "loss": 1.8623, "step": 126780 }, { "epoch": 0.52, "grad_norm": 3.4389562606811523, "learning_rate": 0.0002, "loss": 1.6793, "step": 126790 }, { "epoch": 0.52, "grad_norm": 3.38600754737854, "learning_rate": 0.0002, "loss": 1.3335, "step": 126800 }, { "epoch": 0.52, "grad_norm": 3.0847415924072266, "learning_rate": 0.0002, "loss": 1.6388, "step": 126810 }, { "epoch": 0.52, "grad_norm": 2.5199379920959473, "learning_rate": 0.0002, "loss": 1.5291, "step": 126820 }, { "epoch": 0.52, "grad_norm": 3.354538679122925, "learning_rate": 0.0002, "loss": 1.4515, "step": 126830 }, { "epoch": 0.52, "grad_norm": 2.8435468673706055, "learning_rate": 0.0002, "loss": 1.8934, "step": 126840 }, { "epoch": 0.52, "grad_norm": 2.6814489364624023, "learning_rate": 0.0002, "loss": 1.4504, "step": 126850 }, { "epoch": 0.52, "grad_norm": 2.908816337585449, "learning_rate": 0.0002, "loss": 1.2976, "step": 126860 }, { "epoch": 0.52, "grad_norm": 3.752561092376709, "learning_rate": 0.0002, "loss": 1.5181, "step": 126870 }, { "epoch": 0.52, "grad_norm": 3.4769961833953857, "learning_rate": 0.0002, "loss": 1.5844, "step": 126880 }, { "epoch": 0.52, "grad_norm": 5.0559492111206055, "learning_rate": 0.0002, "loss": 1.5386, "step": 126890 }, { "epoch": 0.52, "grad_norm": 1.8790360689163208, "learning_rate": 0.0002, "loss": 1.7519, "step": 126900 }, { "epoch": 0.52, "grad_norm": 3.3082542419433594, "learning_rate": 0.0002, "loss": 1.4203, "step": 126910 }, { "epoch": 0.52, "grad_norm": 5.0208892822265625, "learning_rate": 0.0002, "loss": 1.4324, "step": 126920 }, { "epoch": 0.52, "grad_norm": 3.201648235321045, "learning_rate": 0.0002, "loss": 1.7016, "step": 126930 }, { "epoch": 0.52, "grad_norm": 3.798781394958496, "learning_rate": 0.0002, "loss": 1.577, "step": 126940 }, { "epoch": 0.52, "grad_norm": 4.180607318878174, "learning_rate": 0.0002, "loss": 1.4593, "step": 126950 }, { "epoch": 0.52, "grad_norm": 2.8337697982788086, "learning_rate": 0.0002, "loss": 1.4706, "step": 126960 }, { "epoch": 0.52, "grad_norm": 3.3199617862701416, "learning_rate": 0.0002, "loss": 1.4246, "step": 126970 }, { "epoch": 0.52, "grad_norm": 1.815769910812378, "learning_rate": 0.0002, "loss": 1.5688, "step": 126980 }, { "epoch": 0.52, "grad_norm": 4.516260623931885, "learning_rate": 0.0002, "loss": 1.4474, "step": 126990 }, { "epoch": 0.52, "grad_norm": 3.118136167526245, "learning_rate": 0.0002, "loss": 1.6563, "step": 127000 }, { "epoch": 0.52, "grad_norm": 2.644902467727661, "learning_rate": 0.0002, "loss": 1.5482, "step": 127010 }, { "epoch": 0.52, "grad_norm": 3.2940943241119385, "learning_rate": 0.0002, "loss": 1.3723, "step": 127020 }, { "epoch": 0.52, "grad_norm": 3.475515365600586, "learning_rate": 0.0002, "loss": 1.3199, "step": 127030 }, { "epoch": 0.52, "grad_norm": 2.256382465362549, "learning_rate": 0.0002, "loss": 1.4977, "step": 127040 }, { "epoch": 0.52, "grad_norm": 4.038950443267822, "learning_rate": 0.0002, "loss": 1.5002, "step": 127050 }, { "epoch": 0.52, "grad_norm": 3.4674532413482666, "learning_rate": 0.0002, "loss": 1.6828, "step": 127060 }, { "epoch": 0.52, "grad_norm": 3.8942551612854004, "learning_rate": 0.0002, "loss": 1.5321, "step": 127070 }, { "epoch": 0.52, "grad_norm": 2.476564645767212, "learning_rate": 0.0002, "loss": 1.6912, "step": 127080 }, { "epoch": 0.52, "grad_norm": 3.2603113651275635, "learning_rate": 0.0002, "loss": 1.6304, "step": 127090 }, { "epoch": 0.52, "grad_norm": 4.040179252624512, "learning_rate": 0.0002, "loss": 1.5748, "step": 127100 }, { "epoch": 0.52, "grad_norm": 2.579993724822998, "learning_rate": 0.0002, "loss": 1.5609, "step": 127110 }, { "epoch": 0.52, "grad_norm": 2.8359827995300293, "learning_rate": 0.0002, "loss": 1.6759, "step": 127120 }, { "epoch": 0.52, "grad_norm": 4.660862445831299, "learning_rate": 0.0002, "loss": 1.4284, "step": 127130 }, { "epoch": 0.52, "grad_norm": 1.7333358526229858, "learning_rate": 0.0002, "loss": 1.5096, "step": 127140 }, { "epoch": 0.52, "grad_norm": 3.6797196865081787, "learning_rate": 0.0002, "loss": 1.688, "step": 127150 }, { "epoch": 0.52, "grad_norm": 3.113804340362549, "learning_rate": 0.0002, "loss": 1.6356, "step": 127160 }, { "epoch": 0.52, "grad_norm": 4.013028144836426, "learning_rate": 0.0002, "loss": 1.6799, "step": 127170 }, { "epoch": 0.52, "grad_norm": 3.6647024154663086, "learning_rate": 0.0002, "loss": 1.5989, "step": 127180 }, { "epoch": 0.52, "grad_norm": 5.274631023406982, "learning_rate": 0.0002, "loss": 1.5736, "step": 127190 }, { "epoch": 0.52, "grad_norm": 2.830091953277588, "learning_rate": 0.0002, "loss": 1.6062, "step": 127200 }, { "epoch": 0.52, "grad_norm": 4.152407169342041, "learning_rate": 0.0002, "loss": 1.5514, "step": 127210 }, { "epoch": 0.52, "grad_norm": 3.9301719665527344, "learning_rate": 0.0002, "loss": 1.6302, "step": 127220 }, { "epoch": 0.52, "grad_norm": 3.624728202819824, "learning_rate": 0.0002, "loss": 1.7147, "step": 127230 }, { "epoch": 0.52, "grad_norm": 2.771036386489868, "learning_rate": 0.0002, "loss": 1.597, "step": 127240 }, { "epoch": 0.52, "grad_norm": 2.596163272857666, "learning_rate": 0.0002, "loss": 1.5233, "step": 127250 }, { "epoch": 0.52, "grad_norm": 3.363786220550537, "learning_rate": 0.0002, "loss": 1.4086, "step": 127260 }, { "epoch": 0.52, "grad_norm": 1.868021845817566, "learning_rate": 0.0002, "loss": 1.7239, "step": 127270 }, { "epoch": 0.52, "grad_norm": 3.9029898643493652, "learning_rate": 0.0002, "loss": 1.6971, "step": 127280 }, { "epoch": 0.52, "grad_norm": 3.333867073059082, "learning_rate": 0.0002, "loss": 1.5657, "step": 127290 }, { "epoch": 0.52, "grad_norm": 2.63322114944458, "learning_rate": 0.0002, "loss": 1.6168, "step": 127300 }, { "epoch": 0.52, "grad_norm": 2.788245916366577, "learning_rate": 0.0002, "loss": 1.6017, "step": 127310 }, { "epoch": 0.52, "grad_norm": 3.554814100265503, "learning_rate": 0.0002, "loss": 1.4464, "step": 127320 }, { "epoch": 0.52, "grad_norm": 1.8973639011383057, "learning_rate": 0.0002, "loss": 1.6038, "step": 127330 }, { "epoch": 0.52, "grad_norm": 2.831366539001465, "learning_rate": 0.0002, "loss": 1.3254, "step": 127340 }, { "epoch": 0.52, "grad_norm": 2.58257794380188, "learning_rate": 0.0002, "loss": 1.5019, "step": 127350 }, { "epoch": 0.52, "grad_norm": 2.8412225246429443, "learning_rate": 0.0002, "loss": 1.5621, "step": 127360 }, { "epoch": 0.52, "grad_norm": 2.5354549884796143, "learning_rate": 0.0002, "loss": 1.5108, "step": 127370 }, { "epoch": 0.52, "grad_norm": 2.589282274246216, "learning_rate": 0.0002, "loss": 1.5029, "step": 127380 }, { "epoch": 0.52, "grad_norm": 3.486161231994629, "learning_rate": 0.0002, "loss": 1.7432, "step": 127390 }, { "epoch": 0.52, "grad_norm": 2.5985524654388428, "learning_rate": 0.0002, "loss": 1.7131, "step": 127400 }, { "epoch": 0.52, "grad_norm": 2.6399006843566895, "learning_rate": 0.0002, "loss": 1.7897, "step": 127410 }, { "epoch": 0.52, "grad_norm": 4.408020496368408, "learning_rate": 0.0002, "loss": 1.6343, "step": 127420 }, { "epoch": 0.52, "grad_norm": 2.3647541999816895, "learning_rate": 0.0002, "loss": 1.8762, "step": 127430 }, { "epoch": 0.52, "grad_norm": 2.0825424194335938, "learning_rate": 0.0002, "loss": 1.5991, "step": 127440 }, { "epoch": 0.52, "grad_norm": 4.364232540130615, "learning_rate": 0.0002, "loss": 1.5617, "step": 127450 }, { "epoch": 0.52, "grad_norm": 2.5341527462005615, "learning_rate": 0.0002, "loss": 1.5794, "step": 127460 }, { "epoch": 0.52, "grad_norm": 2.164905071258545, "learning_rate": 0.0002, "loss": 1.3636, "step": 127470 }, { "epoch": 0.52, "grad_norm": 3.2417643070220947, "learning_rate": 0.0002, "loss": 1.8809, "step": 127480 }, { "epoch": 0.52, "grad_norm": 2.0020229816436768, "learning_rate": 0.0002, "loss": 1.6236, "step": 127490 }, { "epoch": 0.52, "grad_norm": 2.77595591545105, "learning_rate": 0.0002, "loss": 1.7425, "step": 127500 }, { "epoch": 0.52, "grad_norm": 3.893120527267456, "learning_rate": 0.0002, "loss": 1.6692, "step": 127510 }, { "epoch": 0.52, "grad_norm": 2.501835346221924, "learning_rate": 0.0002, "loss": 1.5082, "step": 127520 }, { "epoch": 0.52, "grad_norm": 3.713682174682617, "learning_rate": 0.0002, "loss": 1.4031, "step": 127530 }, { "epoch": 0.52, "grad_norm": 3.083582639694214, "learning_rate": 0.0002, "loss": 1.6198, "step": 127540 }, { "epoch": 0.52, "grad_norm": 9.118507385253906, "learning_rate": 0.0002, "loss": 1.4289, "step": 127550 }, { "epoch": 0.52, "grad_norm": 4.052621841430664, "learning_rate": 0.0002, "loss": 1.7091, "step": 127560 }, { "epoch": 0.52, "grad_norm": 3.443741798400879, "learning_rate": 0.0002, "loss": 1.7054, "step": 127570 }, { "epoch": 0.52, "grad_norm": 3.8590409755706787, "learning_rate": 0.0002, "loss": 1.8137, "step": 127580 }, { "epoch": 0.52, "grad_norm": 6.505629062652588, "learning_rate": 0.0002, "loss": 1.5101, "step": 127590 }, { "epoch": 0.52, "grad_norm": 2.8186562061309814, "learning_rate": 0.0002, "loss": 1.6356, "step": 127600 }, { "epoch": 0.52, "grad_norm": 2.2624473571777344, "learning_rate": 0.0002, "loss": 1.7531, "step": 127610 }, { "epoch": 0.52, "grad_norm": 5.245075702667236, "learning_rate": 0.0002, "loss": 1.81, "step": 127620 }, { "epoch": 0.52, "grad_norm": 5.126948356628418, "learning_rate": 0.0002, "loss": 1.7018, "step": 127630 }, { "epoch": 0.52, "grad_norm": 4.006347179412842, "learning_rate": 0.0002, "loss": 1.8533, "step": 127640 }, { "epoch": 0.52, "grad_norm": 3.7684671878814697, "learning_rate": 0.0002, "loss": 1.6145, "step": 127650 }, { "epoch": 0.52, "grad_norm": 4.685242176055908, "learning_rate": 0.0002, "loss": 1.588, "step": 127660 }, { "epoch": 0.52, "grad_norm": 4.019723415374756, "learning_rate": 0.0002, "loss": 1.6148, "step": 127670 }, { "epoch": 0.52, "grad_norm": 2.8608765602111816, "learning_rate": 0.0002, "loss": 1.307, "step": 127680 }, { "epoch": 0.52, "grad_norm": 1.9129081964492798, "learning_rate": 0.0002, "loss": 1.6666, "step": 127690 }, { "epoch": 0.52, "grad_norm": 2.0839672088623047, "learning_rate": 0.0002, "loss": 1.4504, "step": 127700 }, { "epoch": 0.52, "grad_norm": 2.4015605449676514, "learning_rate": 0.0002, "loss": 1.4928, "step": 127710 }, { "epoch": 0.52, "grad_norm": 2.572761297225952, "learning_rate": 0.0002, "loss": 1.5072, "step": 127720 }, { "epoch": 0.52, "grad_norm": 2.443500518798828, "learning_rate": 0.0002, "loss": 1.5749, "step": 127730 }, { "epoch": 0.52, "grad_norm": 3.11234188079834, "learning_rate": 0.0002, "loss": 1.7101, "step": 127740 }, { "epoch": 0.52, "grad_norm": 5.259283542633057, "learning_rate": 0.0002, "loss": 1.5776, "step": 127750 }, { "epoch": 0.52, "grad_norm": 4.057747840881348, "learning_rate": 0.0002, "loss": 1.6022, "step": 127760 }, { "epoch": 0.52, "grad_norm": 2.6860506534576416, "learning_rate": 0.0002, "loss": 1.6261, "step": 127770 }, { "epoch": 0.52, "grad_norm": 3.630350112915039, "learning_rate": 0.0002, "loss": 1.4342, "step": 127780 }, { "epoch": 0.52, "grad_norm": 2.3368070125579834, "learning_rate": 0.0002, "loss": 1.8148, "step": 127790 }, { "epoch": 0.52, "grad_norm": 2.413421630859375, "learning_rate": 0.0002, "loss": 1.3629, "step": 127800 }, { "epoch": 0.52, "grad_norm": 2.7611563205718994, "learning_rate": 0.0002, "loss": 1.6633, "step": 127810 }, { "epoch": 0.52, "grad_norm": 2.1918766498565674, "learning_rate": 0.0002, "loss": 1.4442, "step": 127820 }, { "epoch": 0.52, "grad_norm": 3.648228883743286, "learning_rate": 0.0002, "loss": 1.7529, "step": 127830 }, { "epoch": 0.52, "grad_norm": 3.118396282196045, "learning_rate": 0.0002, "loss": 1.6196, "step": 127840 }, { "epoch": 0.52, "grad_norm": 3.3340840339660645, "learning_rate": 0.0002, "loss": 1.8445, "step": 127850 }, { "epoch": 0.52, "grad_norm": 2.6225574016571045, "learning_rate": 0.0002, "loss": 1.6669, "step": 127860 }, { "epoch": 0.52, "grad_norm": 2.9031171798706055, "learning_rate": 0.0002, "loss": 1.6225, "step": 127870 }, { "epoch": 0.52, "grad_norm": 1.576824426651001, "learning_rate": 0.0002, "loss": 1.555, "step": 127880 }, { "epoch": 0.52, "grad_norm": 3.198138952255249, "learning_rate": 0.0002, "loss": 1.4445, "step": 127890 }, { "epoch": 0.52, "grad_norm": 7.240084171295166, "learning_rate": 0.0002, "loss": 1.4495, "step": 127900 }, { "epoch": 0.52, "grad_norm": 2.3046422004699707, "learning_rate": 0.0002, "loss": 1.7641, "step": 127910 }, { "epoch": 0.52, "grad_norm": 3.952319860458374, "learning_rate": 0.0002, "loss": 1.6025, "step": 127920 }, { "epoch": 0.52, "grad_norm": 3.335106611251831, "learning_rate": 0.0002, "loss": 1.5362, "step": 127930 }, { "epoch": 0.52, "grad_norm": 3.0886411666870117, "learning_rate": 0.0002, "loss": 1.448, "step": 127940 }, { "epoch": 0.52, "grad_norm": 5.558086395263672, "learning_rate": 0.0002, "loss": 1.6293, "step": 127950 }, { "epoch": 0.52, "grad_norm": 2.4532716274261475, "learning_rate": 0.0002, "loss": 1.7476, "step": 127960 }, { "epoch": 0.52, "grad_norm": 1.817489743232727, "learning_rate": 0.0002, "loss": 1.5157, "step": 127970 }, { "epoch": 0.52, "grad_norm": 3.4085307121276855, "learning_rate": 0.0002, "loss": 1.6469, "step": 127980 }, { "epoch": 0.52, "grad_norm": 2.690288543701172, "learning_rate": 0.0002, "loss": 1.7246, "step": 127990 }, { "epoch": 0.52, "grad_norm": 2.456765651702881, "learning_rate": 0.0002, "loss": 1.5057, "step": 128000 }, { "epoch": 0.52, "grad_norm": 1.5455267429351807, "learning_rate": 0.0002, "loss": 1.2647, "step": 128010 }, { "epoch": 0.52, "grad_norm": 3.659348249435425, "learning_rate": 0.0002, "loss": 1.5512, "step": 128020 }, { "epoch": 0.52, "grad_norm": 3.3294496536254883, "learning_rate": 0.0002, "loss": 1.838, "step": 128030 }, { "epoch": 0.52, "grad_norm": 3.1847853660583496, "learning_rate": 0.0002, "loss": 1.6045, "step": 128040 }, { "epoch": 0.52, "grad_norm": 3.248284101486206, "learning_rate": 0.0002, "loss": 1.725, "step": 128050 }, { "epoch": 0.52, "grad_norm": 1.454670786857605, "learning_rate": 0.0002, "loss": 1.8206, "step": 128060 }, { "epoch": 0.52, "grad_norm": 1.730428695678711, "learning_rate": 0.0002, "loss": 1.7727, "step": 128070 }, { "epoch": 0.52, "grad_norm": 3.2317893505096436, "learning_rate": 0.0002, "loss": 1.4969, "step": 128080 }, { "epoch": 0.52, "grad_norm": 2.570978879928589, "learning_rate": 0.0002, "loss": 1.6188, "step": 128090 }, { "epoch": 0.52, "grad_norm": 2.1394691467285156, "learning_rate": 0.0002, "loss": 1.7284, "step": 128100 }, { "epoch": 0.52, "grad_norm": 3.5464909076690674, "learning_rate": 0.0002, "loss": 1.8518, "step": 128110 }, { "epoch": 0.52, "grad_norm": 4.737668514251709, "learning_rate": 0.0002, "loss": 1.5603, "step": 128120 }, { "epoch": 0.52, "grad_norm": 4.028559684753418, "learning_rate": 0.0002, "loss": 1.6985, "step": 128130 }, { "epoch": 0.52, "grad_norm": 3.9220218658447266, "learning_rate": 0.0002, "loss": 1.6886, "step": 128140 }, { "epoch": 0.52, "grad_norm": 2.853527069091797, "learning_rate": 0.0002, "loss": 1.395, "step": 128150 }, { "epoch": 0.52, "grad_norm": 3.288790225982666, "learning_rate": 0.0002, "loss": 1.4612, "step": 128160 }, { "epoch": 0.52, "grad_norm": 3.888462543487549, "learning_rate": 0.0002, "loss": 1.6766, "step": 128170 }, { "epoch": 0.52, "grad_norm": 4.215358734130859, "learning_rate": 0.0002, "loss": 1.4203, "step": 128180 }, { "epoch": 0.52, "grad_norm": 2.7899656295776367, "learning_rate": 0.0002, "loss": 1.5245, "step": 128190 }, { "epoch": 0.52, "grad_norm": 3.285799741744995, "learning_rate": 0.0002, "loss": 1.7875, "step": 128200 }, { "epoch": 0.52, "grad_norm": 1.5645413398742676, "learning_rate": 0.0002, "loss": 1.3452, "step": 128210 }, { "epoch": 0.52, "grad_norm": 3.9584999084472656, "learning_rate": 0.0002, "loss": 1.48, "step": 128220 }, { "epoch": 0.52, "grad_norm": 2.6112327575683594, "learning_rate": 0.0002, "loss": 1.7385, "step": 128230 }, { "epoch": 0.52, "grad_norm": 2.1090545654296875, "learning_rate": 0.0002, "loss": 1.4369, "step": 128240 }, { "epoch": 0.52, "grad_norm": 3.0259084701538086, "learning_rate": 0.0002, "loss": 1.5452, "step": 128250 }, { "epoch": 0.52, "grad_norm": 2.562852382659912, "learning_rate": 0.0002, "loss": 1.7465, "step": 128260 }, { "epoch": 0.52, "grad_norm": 1.6166654825210571, "learning_rate": 0.0002, "loss": 1.3717, "step": 128270 }, { "epoch": 0.52, "grad_norm": 3.1060853004455566, "learning_rate": 0.0002, "loss": 1.7064, "step": 128280 }, { "epoch": 0.52, "grad_norm": 3.989327907562256, "learning_rate": 0.0002, "loss": 1.6383, "step": 128290 }, { "epoch": 0.52, "grad_norm": 1.6662341356277466, "learning_rate": 0.0002, "loss": 1.4812, "step": 128300 }, { "epoch": 0.52, "grad_norm": 3.146874189376831, "learning_rate": 0.0002, "loss": 1.4113, "step": 128310 }, { "epoch": 0.52, "grad_norm": 5.615079879760742, "learning_rate": 0.0002, "loss": 1.5186, "step": 128320 }, { "epoch": 0.52, "grad_norm": 1.5587272644042969, "learning_rate": 0.0002, "loss": 1.3158, "step": 128330 }, { "epoch": 0.52, "grad_norm": 3.2912371158599854, "learning_rate": 0.0002, "loss": 1.6769, "step": 128340 }, { "epoch": 0.52, "grad_norm": 3.0342016220092773, "learning_rate": 0.0002, "loss": 1.599, "step": 128350 }, { "epoch": 0.52, "grad_norm": 3.905336380004883, "learning_rate": 0.0002, "loss": 1.5692, "step": 128360 }, { "epoch": 0.52, "grad_norm": 2.0809166431427, "learning_rate": 0.0002, "loss": 1.7623, "step": 128370 }, { "epoch": 0.52, "grad_norm": 4.8737688064575195, "learning_rate": 0.0002, "loss": 1.4432, "step": 128380 }, { "epoch": 0.52, "grad_norm": 2.524855613708496, "learning_rate": 0.0002, "loss": 1.726, "step": 128390 }, { "epoch": 0.52, "grad_norm": 3.92855167388916, "learning_rate": 0.0002, "loss": 1.5324, "step": 128400 }, { "epoch": 0.52, "grad_norm": 1.8986217975616455, "learning_rate": 0.0002, "loss": 1.5778, "step": 128410 }, { "epoch": 0.52, "grad_norm": 2.9196131229400635, "learning_rate": 0.0002, "loss": 1.7268, "step": 128420 }, { "epoch": 0.52, "grad_norm": 2.659867286682129, "learning_rate": 0.0002, "loss": 1.5789, "step": 128430 }, { "epoch": 0.52, "grad_norm": 5.874001502990723, "learning_rate": 0.0002, "loss": 1.7193, "step": 128440 }, { "epoch": 0.52, "grad_norm": 2.752034902572632, "learning_rate": 0.0002, "loss": 1.962, "step": 128450 }, { "epoch": 0.52, "grad_norm": 2.02774977684021, "learning_rate": 0.0002, "loss": 1.5744, "step": 128460 }, { "epoch": 0.52, "grad_norm": 2.566779136657715, "learning_rate": 0.0002, "loss": 1.456, "step": 128470 }, { "epoch": 0.52, "grad_norm": 3.400505542755127, "learning_rate": 0.0002, "loss": 1.5546, "step": 128480 }, { "epoch": 0.52, "grad_norm": 3.575314998626709, "learning_rate": 0.0002, "loss": 1.6233, "step": 128490 }, { "epoch": 0.52, "grad_norm": 5.954761981964111, "learning_rate": 0.0002, "loss": 1.5325, "step": 128500 }, { "epoch": 0.52, "grad_norm": 4.312578201293945, "learning_rate": 0.0002, "loss": 1.8055, "step": 128510 }, { "epoch": 0.52, "grad_norm": 2.193706750869751, "learning_rate": 0.0002, "loss": 1.3548, "step": 128520 }, { "epoch": 0.52, "grad_norm": 2.424985885620117, "learning_rate": 0.0002, "loss": 1.4955, "step": 128530 }, { "epoch": 0.52, "grad_norm": 1.473246693611145, "learning_rate": 0.0002, "loss": 1.5179, "step": 128540 }, { "epoch": 0.52, "grad_norm": 3.5090198516845703, "learning_rate": 0.0002, "loss": 1.4995, "step": 128550 }, { "epoch": 0.52, "grad_norm": 2.709055185317993, "learning_rate": 0.0002, "loss": 1.6004, "step": 128560 }, { "epoch": 0.52, "grad_norm": 4.309040069580078, "learning_rate": 0.0002, "loss": 1.74, "step": 128570 }, { "epoch": 0.52, "grad_norm": 3.480172634124756, "learning_rate": 0.0002, "loss": 1.7144, "step": 128580 }, { "epoch": 0.52, "grad_norm": 2.8361988067626953, "learning_rate": 0.0002, "loss": 1.7783, "step": 128590 }, { "epoch": 0.52, "grad_norm": 3.6981701850891113, "learning_rate": 0.0002, "loss": 1.7457, "step": 128600 }, { "epoch": 0.52, "grad_norm": 5.535738945007324, "learning_rate": 0.0002, "loss": 1.4918, "step": 128610 }, { "epoch": 0.52, "grad_norm": 3.602754592895508, "learning_rate": 0.0002, "loss": 1.6586, "step": 128620 }, { "epoch": 0.52, "grad_norm": 2.5415432453155518, "learning_rate": 0.0002, "loss": 1.6663, "step": 128630 }, { "epoch": 0.52, "grad_norm": 4.226226329803467, "learning_rate": 0.0002, "loss": 1.7644, "step": 128640 }, { "epoch": 0.52, "grad_norm": 2.2796883583068848, "learning_rate": 0.0002, "loss": 1.8003, "step": 128650 }, { "epoch": 0.52, "grad_norm": 2.768158435821533, "learning_rate": 0.0002, "loss": 1.7431, "step": 128660 }, { "epoch": 0.52, "grad_norm": 1.8013502359390259, "learning_rate": 0.0002, "loss": 1.7747, "step": 128670 }, { "epoch": 0.52, "grad_norm": 2.4319965839385986, "learning_rate": 0.0002, "loss": 1.458, "step": 128680 }, { "epoch": 0.52, "grad_norm": 5.179172515869141, "learning_rate": 0.0002, "loss": 1.6841, "step": 128690 }, { "epoch": 0.52, "grad_norm": 4.299679756164551, "learning_rate": 0.0002, "loss": 1.847, "step": 128700 }, { "epoch": 0.52, "grad_norm": 5.97996187210083, "learning_rate": 0.0002, "loss": 1.5919, "step": 128710 }, { "epoch": 0.52, "grad_norm": 1.6847810745239258, "learning_rate": 0.0002, "loss": 1.3017, "step": 128720 }, { "epoch": 0.52, "grad_norm": 5.755970478057861, "learning_rate": 0.0002, "loss": 1.7862, "step": 128730 }, { "epoch": 0.52, "grad_norm": 2.138876438140869, "learning_rate": 0.0002, "loss": 1.736, "step": 128740 }, { "epoch": 0.52, "grad_norm": 2.503028154373169, "learning_rate": 0.0002, "loss": 1.8564, "step": 128750 }, { "epoch": 0.52, "grad_norm": 3.390367031097412, "learning_rate": 0.0002, "loss": 1.4631, "step": 128760 }, { "epoch": 0.52, "grad_norm": 2.7218825817108154, "learning_rate": 0.0002, "loss": 1.6944, "step": 128770 }, { "epoch": 0.52, "grad_norm": 2.3380112648010254, "learning_rate": 0.0002, "loss": 1.7599, "step": 128780 }, { "epoch": 0.52, "grad_norm": 2.827436923980713, "learning_rate": 0.0002, "loss": 1.4389, "step": 128790 }, { "epoch": 0.52, "grad_norm": 3.7061777114868164, "learning_rate": 0.0002, "loss": 1.8941, "step": 128800 }, { "epoch": 0.52, "grad_norm": 2.635153293609619, "learning_rate": 0.0002, "loss": 1.691, "step": 128810 }, { "epoch": 0.52, "grad_norm": 3.0291359424591064, "learning_rate": 0.0002, "loss": 1.5476, "step": 128820 }, { "epoch": 0.52, "grad_norm": 4.147696018218994, "learning_rate": 0.0002, "loss": 1.5429, "step": 128830 }, { "epoch": 0.52, "grad_norm": 4.327091693878174, "learning_rate": 0.0002, "loss": 1.4238, "step": 128840 }, { "epoch": 0.52, "grad_norm": 2.1795711517333984, "learning_rate": 0.0002, "loss": 1.5732, "step": 128850 }, { "epoch": 0.52, "grad_norm": 2.391371250152588, "learning_rate": 0.0002, "loss": 1.4507, "step": 128860 }, { "epoch": 0.52, "grad_norm": 2.1303443908691406, "learning_rate": 0.0002, "loss": 1.5907, "step": 128870 }, { "epoch": 0.52, "grad_norm": 2.3813862800598145, "learning_rate": 0.0002, "loss": 1.7307, "step": 128880 }, { "epoch": 0.52, "grad_norm": 3.385979413986206, "learning_rate": 0.0002, "loss": 1.2665, "step": 128890 }, { "epoch": 0.52, "grad_norm": 3.294480323791504, "learning_rate": 0.0002, "loss": 1.4023, "step": 128900 }, { "epoch": 0.52, "grad_norm": 2.765554189682007, "learning_rate": 0.0002, "loss": 1.703, "step": 128910 }, { "epoch": 0.52, "grad_norm": 2.2601261138916016, "learning_rate": 0.0002, "loss": 1.7495, "step": 128920 }, { "epoch": 0.52, "grad_norm": 2.943683385848999, "learning_rate": 0.0002, "loss": 1.5289, "step": 128930 }, { "epoch": 0.52, "grad_norm": 2.7508723735809326, "learning_rate": 0.0002, "loss": 1.5407, "step": 128940 }, { "epoch": 0.52, "grad_norm": 1.9837532043457031, "learning_rate": 0.0002, "loss": 1.3912, "step": 128950 }, { "epoch": 0.52, "grad_norm": 2.171959400177002, "learning_rate": 0.0002, "loss": 1.6531, "step": 128960 }, { "epoch": 0.53, "grad_norm": 3.3464515209198, "learning_rate": 0.0002, "loss": 1.8274, "step": 128970 }, { "epoch": 0.53, "grad_norm": 3.8251919746398926, "learning_rate": 0.0002, "loss": 1.4692, "step": 128980 }, { "epoch": 0.53, "grad_norm": 2.7303121089935303, "learning_rate": 0.0002, "loss": 1.6233, "step": 128990 }, { "epoch": 0.53, "grad_norm": 3.242525815963745, "learning_rate": 0.0002, "loss": 1.7945, "step": 129000 }, { "epoch": 0.53, "grad_norm": 3.9863178730010986, "learning_rate": 0.0002, "loss": 1.7173, "step": 129010 }, { "epoch": 0.53, "grad_norm": 3.1788461208343506, "learning_rate": 0.0002, "loss": 1.4766, "step": 129020 }, { "epoch": 0.53, "grad_norm": 2.4267003536224365, "learning_rate": 0.0002, "loss": 1.5593, "step": 129030 }, { "epoch": 0.53, "grad_norm": 2.7345633506774902, "learning_rate": 0.0002, "loss": 1.5149, "step": 129040 }, { "epoch": 0.53, "grad_norm": 2.214456796646118, "learning_rate": 0.0002, "loss": 1.8034, "step": 129050 }, { "epoch": 0.53, "grad_norm": 5.748208999633789, "learning_rate": 0.0002, "loss": 1.6028, "step": 129060 }, { "epoch": 0.53, "grad_norm": 3.5603411197662354, "learning_rate": 0.0002, "loss": 1.5672, "step": 129070 }, { "epoch": 0.53, "grad_norm": 2.7250354290008545, "learning_rate": 0.0002, "loss": 1.2395, "step": 129080 }, { "epoch": 0.53, "grad_norm": 2.199655055999756, "learning_rate": 0.0002, "loss": 1.2591, "step": 129090 }, { "epoch": 0.53, "grad_norm": 2.65690279006958, "learning_rate": 0.0002, "loss": 1.7864, "step": 129100 }, { "epoch": 0.53, "grad_norm": 3.8126604557037354, "learning_rate": 0.0002, "loss": 1.5739, "step": 129110 }, { "epoch": 0.53, "grad_norm": 3.19891619682312, "learning_rate": 0.0002, "loss": 1.4485, "step": 129120 }, { "epoch": 0.53, "grad_norm": 2.912972927093506, "learning_rate": 0.0002, "loss": 1.5079, "step": 129130 }, { "epoch": 0.53, "grad_norm": 2.6568562984466553, "learning_rate": 0.0002, "loss": 1.6422, "step": 129140 }, { "epoch": 0.53, "grad_norm": 3.2195262908935547, "learning_rate": 0.0002, "loss": 1.4834, "step": 129150 }, { "epoch": 0.53, "grad_norm": 2.4102399349212646, "learning_rate": 0.0002, "loss": 1.7384, "step": 129160 }, { "epoch": 0.53, "grad_norm": 2.56459641456604, "learning_rate": 0.0002, "loss": 1.5839, "step": 129170 }, { "epoch": 0.53, "grad_norm": 2.957679033279419, "learning_rate": 0.0002, "loss": 1.6588, "step": 129180 }, { "epoch": 0.53, "grad_norm": 3.4226694107055664, "learning_rate": 0.0002, "loss": 1.5359, "step": 129190 }, { "epoch": 0.53, "grad_norm": 3.3734476566314697, "learning_rate": 0.0002, "loss": 1.2746, "step": 129200 }, { "epoch": 0.53, "grad_norm": 2.608966112136841, "learning_rate": 0.0002, "loss": 1.5956, "step": 129210 }, { "epoch": 0.53, "grad_norm": 4.226099491119385, "learning_rate": 0.0002, "loss": 1.5086, "step": 129220 }, { "epoch": 0.53, "grad_norm": 2.7655720710754395, "learning_rate": 0.0002, "loss": 1.7803, "step": 129230 }, { "epoch": 0.53, "grad_norm": 2.445387363433838, "learning_rate": 0.0002, "loss": 1.4315, "step": 129240 }, { "epoch": 0.53, "grad_norm": 2.040102481842041, "learning_rate": 0.0002, "loss": 1.5472, "step": 129250 }, { "epoch": 0.53, "grad_norm": 2.0809988975524902, "learning_rate": 0.0002, "loss": 1.579, "step": 129260 }, { "epoch": 0.53, "grad_norm": 2.827796459197998, "learning_rate": 0.0002, "loss": 1.5517, "step": 129270 }, { "epoch": 0.53, "grad_norm": 2.24454402923584, "learning_rate": 0.0002, "loss": 1.5024, "step": 129280 }, { "epoch": 0.53, "grad_norm": 3.195650100708008, "learning_rate": 0.0002, "loss": 1.6669, "step": 129290 }, { "epoch": 0.53, "grad_norm": 2.9293222427368164, "learning_rate": 0.0002, "loss": 1.5752, "step": 129300 }, { "epoch": 0.53, "grad_norm": 3.373939275741577, "learning_rate": 0.0002, "loss": 1.5447, "step": 129310 }, { "epoch": 0.53, "grad_norm": 2.374638319015503, "learning_rate": 0.0002, "loss": 1.682, "step": 129320 }, { "epoch": 0.53, "grad_norm": 2.0584700107574463, "learning_rate": 0.0002, "loss": 1.5966, "step": 129330 }, { "epoch": 0.53, "grad_norm": 3.59549880027771, "learning_rate": 0.0002, "loss": 1.5692, "step": 129340 }, { "epoch": 0.53, "grad_norm": 3.1461598873138428, "learning_rate": 0.0002, "loss": 1.7018, "step": 129350 }, { "epoch": 0.53, "grad_norm": 3.5847742557525635, "learning_rate": 0.0002, "loss": 1.6436, "step": 129360 }, { "epoch": 0.53, "grad_norm": 1.2535661458969116, "learning_rate": 0.0002, "loss": 1.5391, "step": 129370 }, { "epoch": 0.53, "grad_norm": 3.4147958755493164, "learning_rate": 0.0002, "loss": 1.5301, "step": 129380 }, { "epoch": 0.53, "grad_norm": 2.4221744537353516, "learning_rate": 0.0002, "loss": 1.5151, "step": 129390 }, { "epoch": 0.53, "grad_norm": 2.996875524520874, "learning_rate": 0.0002, "loss": 1.6238, "step": 129400 }, { "epoch": 0.53, "grad_norm": 2.455221176147461, "learning_rate": 0.0002, "loss": 1.7021, "step": 129410 }, { "epoch": 0.53, "grad_norm": 3.0213730335235596, "learning_rate": 0.0002, "loss": 1.3774, "step": 129420 }, { "epoch": 0.53, "grad_norm": 3.884416103363037, "learning_rate": 0.0002, "loss": 1.6642, "step": 129430 }, { "epoch": 0.53, "grad_norm": 4.02409553527832, "learning_rate": 0.0002, "loss": 1.5474, "step": 129440 }, { "epoch": 0.53, "grad_norm": 4.545559406280518, "learning_rate": 0.0002, "loss": 1.5637, "step": 129450 }, { "epoch": 0.53, "grad_norm": 2.246676206588745, "learning_rate": 0.0002, "loss": 1.6187, "step": 129460 }, { "epoch": 0.53, "grad_norm": 4.849856853485107, "learning_rate": 0.0002, "loss": 1.6612, "step": 129470 }, { "epoch": 0.53, "grad_norm": 2.555891990661621, "learning_rate": 0.0002, "loss": 1.5203, "step": 129480 }, { "epoch": 0.53, "grad_norm": 2.8771049976348877, "learning_rate": 0.0002, "loss": 1.7171, "step": 129490 }, { "epoch": 0.53, "grad_norm": 2.6851396560668945, "learning_rate": 0.0002, "loss": 1.6663, "step": 129500 }, { "epoch": 0.53, "grad_norm": 3.2147233486175537, "learning_rate": 0.0002, "loss": 1.7286, "step": 129510 }, { "epoch": 0.53, "grad_norm": 3.4164440631866455, "learning_rate": 0.0002, "loss": 1.5627, "step": 129520 }, { "epoch": 0.53, "grad_norm": 3.3176841735839844, "learning_rate": 0.0002, "loss": 1.3928, "step": 129530 }, { "epoch": 0.53, "grad_norm": 3.4508886337280273, "learning_rate": 0.0002, "loss": 1.506, "step": 129540 }, { "epoch": 0.53, "grad_norm": 2.8153951168060303, "learning_rate": 0.0002, "loss": 1.7349, "step": 129550 }, { "epoch": 0.53, "grad_norm": 2.2324752807617188, "learning_rate": 0.0002, "loss": 1.6261, "step": 129560 }, { "epoch": 0.53, "grad_norm": 4.463400363922119, "learning_rate": 0.0002, "loss": 1.4919, "step": 129570 }, { "epoch": 0.53, "grad_norm": 4.525806427001953, "learning_rate": 0.0002, "loss": 1.5554, "step": 129580 }, { "epoch": 0.53, "grad_norm": 3.0179615020751953, "learning_rate": 0.0002, "loss": 1.5244, "step": 129590 }, { "epoch": 0.53, "grad_norm": 4.7121453285217285, "learning_rate": 0.0002, "loss": 1.4768, "step": 129600 }, { "epoch": 0.53, "grad_norm": 3.0457546710968018, "learning_rate": 0.0002, "loss": 1.7796, "step": 129610 }, { "epoch": 0.53, "grad_norm": 3.253326416015625, "learning_rate": 0.0002, "loss": 1.4446, "step": 129620 }, { "epoch": 0.53, "grad_norm": 3.962520122528076, "learning_rate": 0.0002, "loss": 1.4588, "step": 129630 }, { "epoch": 0.53, "grad_norm": 4.649148941040039, "learning_rate": 0.0002, "loss": 1.5537, "step": 129640 }, { "epoch": 0.53, "grad_norm": 2.630894660949707, "learning_rate": 0.0002, "loss": 1.5672, "step": 129650 }, { "epoch": 0.53, "grad_norm": 4.2659010887146, "learning_rate": 0.0002, "loss": 1.3586, "step": 129660 }, { "epoch": 0.53, "grad_norm": 4.608733177185059, "learning_rate": 0.0002, "loss": 1.464, "step": 129670 }, { "epoch": 0.53, "grad_norm": 3.393876791000366, "learning_rate": 0.0002, "loss": 1.2765, "step": 129680 }, { "epoch": 0.53, "grad_norm": 2.725135564804077, "learning_rate": 0.0002, "loss": 1.6277, "step": 129690 }, { "epoch": 0.53, "grad_norm": 3.6050384044647217, "learning_rate": 0.0002, "loss": 1.4196, "step": 129700 }, { "epoch": 0.53, "grad_norm": 2.3480422496795654, "learning_rate": 0.0002, "loss": 1.7373, "step": 129710 }, { "epoch": 0.53, "grad_norm": 4.005782127380371, "learning_rate": 0.0002, "loss": 1.6101, "step": 129720 }, { "epoch": 0.53, "grad_norm": 3.1755664348602295, "learning_rate": 0.0002, "loss": 1.2538, "step": 129730 }, { "epoch": 0.53, "grad_norm": 2.7262914180755615, "learning_rate": 0.0002, "loss": 1.7123, "step": 129740 }, { "epoch": 0.53, "grad_norm": 4.617104530334473, "learning_rate": 0.0002, "loss": 1.6792, "step": 129750 }, { "epoch": 0.53, "grad_norm": 2.604771614074707, "learning_rate": 0.0002, "loss": 1.7264, "step": 129760 }, { "epoch": 0.53, "grad_norm": 5.153261661529541, "learning_rate": 0.0002, "loss": 1.6599, "step": 129770 }, { "epoch": 0.53, "grad_norm": 3.682360887527466, "learning_rate": 0.0002, "loss": 1.534, "step": 129780 }, { "epoch": 0.53, "grad_norm": 5.0973734855651855, "learning_rate": 0.0002, "loss": 1.4886, "step": 129790 }, { "epoch": 0.53, "grad_norm": 8.209601402282715, "learning_rate": 0.0002, "loss": 1.7238, "step": 129800 }, { "epoch": 0.53, "grad_norm": 4.030789375305176, "learning_rate": 0.0002, "loss": 1.5765, "step": 129810 }, { "epoch": 0.53, "grad_norm": 3.709228992462158, "learning_rate": 0.0002, "loss": 1.5573, "step": 129820 }, { "epoch": 0.53, "grad_norm": 3.0425009727478027, "learning_rate": 0.0002, "loss": 1.6569, "step": 129830 }, { "epoch": 0.53, "grad_norm": 1.3278748989105225, "learning_rate": 0.0002, "loss": 1.2961, "step": 129840 }, { "epoch": 0.53, "grad_norm": 1.7825461626052856, "learning_rate": 0.0002, "loss": 1.6955, "step": 129850 }, { "epoch": 0.53, "grad_norm": 2.3952279090881348, "learning_rate": 0.0002, "loss": 1.5063, "step": 129860 }, { "epoch": 0.53, "grad_norm": 3.869774103164673, "learning_rate": 0.0002, "loss": 1.4051, "step": 129870 }, { "epoch": 0.53, "grad_norm": 3.530520439147949, "learning_rate": 0.0002, "loss": 1.9113, "step": 129880 }, { "epoch": 0.53, "grad_norm": 2.5672695636749268, "learning_rate": 0.0002, "loss": 1.6802, "step": 129890 }, { "epoch": 0.53, "grad_norm": 4.2737956047058105, "learning_rate": 0.0002, "loss": 1.2843, "step": 129900 }, { "epoch": 0.53, "grad_norm": 3.4251174926757812, "learning_rate": 0.0002, "loss": 1.4604, "step": 129910 }, { "epoch": 0.53, "grad_norm": 2.7020890712738037, "learning_rate": 0.0002, "loss": 1.5626, "step": 129920 }, { "epoch": 0.53, "grad_norm": 2.5233170986175537, "learning_rate": 0.0002, "loss": 1.6274, "step": 129930 }, { "epoch": 0.53, "grad_norm": 2.939091682434082, "learning_rate": 0.0002, "loss": 1.3754, "step": 129940 }, { "epoch": 0.53, "grad_norm": 3.2428512573242188, "learning_rate": 0.0002, "loss": 1.5913, "step": 129950 }, { "epoch": 0.53, "grad_norm": 2.1719727516174316, "learning_rate": 0.0002, "loss": 1.4897, "step": 129960 }, { "epoch": 0.53, "grad_norm": 3.991567611694336, "learning_rate": 0.0002, "loss": 1.5047, "step": 129970 }, { "epoch": 0.53, "grad_norm": 3.4458882808685303, "learning_rate": 0.0002, "loss": 1.3772, "step": 129980 }, { "epoch": 0.53, "grad_norm": 2.3650503158569336, "learning_rate": 0.0002, "loss": 1.2166, "step": 129990 }, { "epoch": 0.53, "grad_norm": 4.379490852355957, "learning_rate": 0.0002, "loss": 1.8043, "step": 130000 }, { "epoch": 0.53, "grad_norm": 2.405287981033325, "learning_rate": 0.0002, "loss": 1.5, "step": 130010 }, { "epoch": 0.53, "grad_norm": 2.8312389850616455, "learning_rate": 0.0002, "loss": 1.6439, "step": 130020 }, { "epoch": 0.53, "grad_norm": 3.241450548171997, "learning_rate": 0.0002, "loss": 1.4208, "step": 130030 }, { "epoch": 0.53, "grad_norm": 2.765388011932373, "learning_rate": 0.0002, "loss": 1.436, "step": 130040 }, { "epoch": 0.53, "grad_norm": 2.6630961894989014, "learning_rate": 0.0002, "loss": 1.635, "step": 130050 }, { "epoch": 0.53, "grad_norm": 3.248556613922119, "learning_rate": 0.0002, "loss": 1.412, "step": 130060 }, { "epoch": 0.53, "grad_norm": 2.5887484550476074, "learning_rate": 0.0002, "loss": 1.449, "step": 130070 }, { "epoch": 0.53, "grad_norm": 8.854004859924316, "learning_rate": 0.0002, "loss": 1.69, "step": 130080 }, { "epoch": 0.53, "grad_norm": 2.7254478931427, "learning_rate": 0.0002, "loss": 1.7196, "step": 130090 }, { "epoch": 0.53, "grad_norm": 1.626544713973999, "learning_rate": 0.0002, "loss": 1.3908, "step": 130100 }, { "epoch": 0.53, "grad_norm": 3.0050466060638428, "learning_rate": 0.0002, "loss": 1.855, "step": 130110 }, { "epoch": 0.53, "grad_norm": 3.110379695892334, "learning_rate": 0.0002, "loss": 1.5053, "step": 130120 }, { "epoch": 0.53, "grad_norm": 3.179594039916992, "learning_rate": 0.0002, "loss": 1.4954, "step": 130130 }, { "epoch": 0.53, "grad_norm": 5.572818756103516, "learning_rate": 0.0002, "loss": 1.7539, "step": 130140 }, { "epoch": 0.53, "grad_norm": 4.732480525970459, "learning_rate": 0.0002, "loss": 1.5748, "step": 130150 }, { "epoch": 0.53, "grad_norm": 2.8016812801361084, "learning_rate": 0.0002, "loss": 1.6795, "step": 130160 }, { "epoch": 0.53, "grad_norm": 4.32248067855835, "learning_rate": 0.0002, "loss": 1.5696, "step": 130170 }, { "epoch": 0.53, "grad_norm": 1.911375641822815, "learning_rate": 0.0002, "loss": 1.6194, "step": 130180 }, { "epoch": 0.53, "grad_norm": 3.380319595336914, "learning_rate": 0.0002, "loss": 1.633, "step": 130190 }, { "epoch": 0.53, "grad_norm": 2.023132562637329, "learning_rate": 0.0002, "loss": 1.4491, "step": 130200 }, { "epoch": 0.53, "grad_norm": 2.6962172985076904, "learning_rate": 0.0002, "loss": 1.2962, "step": 130210 }, { "epoch": 0.53, "grad_norm": 2.7629599571228027, "learning_rate": 0.0002, "loss": 1.5284, "step": 130220 }, { "epoch": 0.53, "grad_norm": 2.130284070968628, "learning_rate": 0.0002, "loss": 1.5154, "step": 130230 }, { "epoch": 0.53, "grad_norm": 5.065508842468262, "learning_rate": 0.0002, "loss": 1.5156, "step": 130240 }, { "epoch": 0.53, "grad_norm": 2.210693359375, "learning_rate": 0.0002, "loss": 1.7012, "step": 130250 }, { "epoch": 0.53, "grad_norm": 3.410264730453491, "learning_rate": 0.0002, "loss": 1.5513, "step": 130260 }, { "epoch": 0.53, "grad_norm": 2.9100704193115234, "learning_rate": 0.0002, "loss": 1.5853, "step": 130270 }, { "epoch": 0.53, "grad_norm": 3.510887861251831, "learning_rate": 0.0002, "loss": 1.6425, "step": 130280 }, { "epoch": 0.53, "grad_norm": 6.059093952178955, "learning_rate": 0.0002, "loss": 1.5503, "step": 130290 }, { "epoch": 0.53, "grad_norm": 2.5290987491607666, "learning_rate": 0.0002, "loss": 1.682, "step": 130300 }, { "epoch": 0.53, "grad_norm": 3.4867708683013916, "learning_rate": 0.0002, "loss": 1.5765, "step": 130310 }, { "epoch": 0.53, "grad_norm": 2.7366127967834473, "learning_rate": 0.0002, "loss": 1.6657, "step": 130320 }, { "epoch": 0.53, "grad_norm": 2.6878609657287598, "learning_rate": 0.0002, "loss": 1.715, "step": 130330 }, { "epoch": 0.53, "grad_norm": 2.898540496826172, "learning_rate": 0.0002, "loss": 1.5984, "step": 130340 }, { "epoch": 0.53, "grad_norm": 2.9471306800842285, "learning_rate": 0.0002, "loss": 1.6658, "step": 130350 }, { "epoch": 0.53, "grad_norm": 2.8784501552581787, "learning_rate": 0.0002, "loss": 1.5707, "step": 130360 }, { "epoch": 0.53, "grad_norm": 3.851210355758667, "learning_rate": 0.0002, "loss": 1.5031, "step": 130370 }, { "epoch": 0.53, "grad_norm": 2.5535593032836914, "learning_rate": 0.0002, "loss": 1.4479, "step": 130380 }, { "epoch": 0.53, "grad_norm": 2.7994706630706787, "learning_rate": 0.0002, "loss": 1.6329, "step": 130390 }, { "epoch": 0.53, "grad_norm": 2.2132744789123535, "learning_rate": 0.0002, "loss": 1.8054, "step": 130400 }, { "epoch": 0.53, "grad_norm": 3.132005453109741, "learning_rate": 0.0002, "loss": 1.5218, "step": 130410 }, { "epoch": 0.53, "grad_norm": 3.773594379425049, "learning_rate": 0.0002, "loss": 1.434, "step": 130420 }, { "epoch": 0.53, "grad_norm": 2.5810258388519287, "learning_rate": 0.0002, "loss": 1.8292, "step": 130430 }, { "epoch": 0.53, "grad_norm": 2.3905296325683594, "learning_rate": 0.0002, "loss": 1.4736, "step": 130440 }, { "epoch": 0.53, "grad_norm": 3.9128403663635254, "learning_rate": 0.0002, "loss": 1.4268, "step": 130450 }, { "epoch": 0.53, "grad_norm": 1.7402865886688232, "learning_rate": 0.0002, "loss": 1.4919, "step": 130460 }, { "epoch": 0.53, "grad_norm": 5.1317033767700195, "learning_rate": 0.0002, "loss": 1.5903, "step": 130470 }, { "epoch": 0.53, "grad_norm": 3.9028146266937256, "learning_rate": 0.0002, "loss": 1.7559, "step": 130480 }, { "epoch": 0.53, "grad_norm": 1.904508113861084, "learning_rate": 0.0002, "loss": 1.4574, "step": 130490 }, { "epoch": 0.53, "grad_norm": 3.9400508403778076, "learning_rate": 0.0002, "loss": 1.7776, "step": 130500 }, { "epoch": 0.53, "grad_norm": 3.9302775859832764, "learning_rate": 0.0002, "loss": 1.8047, "step": 130510 }, { "epoch": 0.53, "grad_norm": 3.029982089996338, "learning_rate": 0.0002, "loss": 1.3364, "step": 130520 }, { "epoch": 0.53, "grad_norm": 2.956627368927002, "learning_rate": 0.0002, "loss": 1.7394, "step": 130530 }, { "epoch": 0.53, "grad_norm": 2.719099760055542, "learning_rate": 0.0002, "loss": 1.6113, "step": 130540 }, { "epoch": 0.53, "grad_norm": 2.5455117225646973, "learning_rate": 0.0002, "loss": 1.7858, "step": 130550 }, { "epoch": 0.53, "grad_norm": 3.749755382537842, "learning_rate": 0.0002, "loss": 1.4052, "step": 130560 }, { "epoch": 0.53, "grad_norm": 3.193474292755127, "learning_rate": 0.0002, "loss": 1.5856, "step": 130570 }, { "epoch": 0.53, "grad_norm": 3.889456272125244, "learning_rate": 0.0002, "loss": 1.483, "step": 130580 }, { "epoch": 0.53, "grad_norm": 3.795858860015869, "learning_rate": 0.0002, "loss": 1.7678, "step": 130590 }, { "epoch": 0.53, "grad_norm": 2.386570692062378, "learning_rate": 0.0002, "loss": 1.5515, "step": 130600 }, { "epoch": 0.53, "grad_norm": 2.679155111312866, "learning_rate": 0.0002, "loss": 1.5203, "step": 130610 }, { "epoch": 0.53, "grad_norm": 2.920773983001709, "learning_rate": 0.0002, "loss": 1.518, "step": 130620 }, { "epoch": 0.53, "grad_norm": 2.727015495300293, "learning_rate": 0.0002, "loss": 1.7124, "step": 130630 }, { "epoch": 0.53, "grad_norm": 2.071632146835327, "learning_rate": 0.0002, "loss": 1.5573, "step": 130640 }, { "epoch": 0.53, "grad_norm": 3.7518417835235596, "learning_rate": 0.0002, "loss": 1.7375, "step": 130650 }, { "epoch": 0.53, "grad_norm": 3.0487546920776367, "learning_rate": 0.0002, "loss": 1.3707, "step": 130660 }, { "epoch": 0.53, "grad_norm": 2.4718663692474365, "learning_rate": 0.0002, "loss": 1.6165, "step": 130670 }, { "epoch": 0.53, "grad_norm": 3.75958251953125, "learning_rate": 0.0002, "loss": 1.4786, "step": 130680 }, { "epoch": 0.53, "grad_norm": 4.577938556671143, "learning_rate": 0.0002, "loss": 1.6324, "step": 130690 }, { "epoch": 0.53, "grad_norm": 2.5162875652313232, "learning_rate": 0.0002, "loss": 1.6009, "step": 130700 }, { "epoch": 0.53, "grad_norm": 2.8430416584014893, "learning_rate": 0.0002, "loss": 1.3978, "step": 130710 }, { "epoch": 0.53, "grad_norm": 3.5931942462921143, "learning_rate": 0.0002, "loss": 1.5672, "step": 130720 }, { "epoch": 0.53, "grad_norm": 2.457136631011963, "learning_rate": 0.0002, "loss": 1.4834, "step": 130730 }, { "epoch": 0.53, "grad_norm": 2.6054227352142334, "learning_rate": 0.0002, "loss": 1.6383, "step": 130740 }, { "epoch": 0.53, "grad_norm": 4.168564319610596, "learning_rate": 0.0002, "loss": 1.7695, "step": 130750 }, { "epoch": 0.53, "grad_norm": 3.397923231124878, "learning_rate": 0.0002, "loss": 1.7849, "step": 130760 }, { "epoch": 0.53, "grad_norm": 2.5405726432800293, "learning_rate": 0.0002, "loss": 1.7705, "step": 130770 }, { "epoch": 0.53, "grad_norm": 2.2195241451263428, "learning_rate": 0.0002, "loss": 1.4825, "step": 130780 }, { "epoch": 0.53, "grad_norm": 2.740023136138916, "learning_rate": 0.0002, "loss": 1.3096, "step": 130790 }, { "epoch": 0.53, "grad_norm": 2.6965017318725586, "learning_rate": 0.0002, "loss": 1.5258, "step": 130800 }, { "epoch": 0.53, "grad_norm": 4.626461982727051, "learning_rate": 0.0002, "loss": 1.4994, "step": 130810 }, { "epoch": 0.53, "grad_norm": 3.7058451175689697, "learning_rate": 0.0002, "loss": 1.7629, "step": 130820 }, { "epoch": 0.53, "grad_norm": 3.370715856552124, "learning_rate": 0.0002, "loss": 1.8236, "step": 130830 }, { "epoch": 0.53, "grad_norm": 2.2491211891174316, "learning_rate": 0.0002, "loss": 1.4738, "step": 130840 }, { "epoch": 0.53, "grad_norm": 3.7860686779022217, "learning_rate": 0.0002, "loss": 1.5774, "step": 130850 }, { "epoch": 0.53, "grad_norm": 2.707275390625, "learning_rate": 0.0002, "loss": 1.6015, "step": 130860 }, { "epoch": 0.53, "grad_norm": 3.2448034286499023, "learning_rate": 0.0002, "loss": 1.4792, "step": 130870 }, { "epoch": 0.53, "grad_norm": 2.9504518508911133, "learning_rate": 0.0002, "loss": 1.5156, "step": 130880 }, { "epoch": 0.53, "grad_norm": 3.3432202339172363, "learning_rate": 0.0002, "loss": 1.5965, "step": 130890 }, { "epoch": 0.53, "grad_norm": 2.7757794857025146, "learning_rate": 0.0002, "loss": 1.6159, "step": 130900 }, { "epoch": 0.53, "grad_norm": 2.401052713394165, "learning_rate": 0.0002, "loss": 1.707, "step": 130910 }, { "epoch": 0.53, "grad_norm": 4.484431266784668, "learning_rate": 0.0002, "loss": 1.4377, "step": 130920 }, { "epoch": 0.53, "grad_norm": 2.8474814891815186, "learning_rate": 0.0002, "loss": 1.4031, "step": 130930 }, { "epoch": 0.53, "grad_norm": 2.9733288288116455, "learning_rate": 0.0002, "loss": 1.6252, "step": 130940 }, { "epoch": 0.53, "grad_norm": 2.2265281677246094, "learning_rate": 0.0002, "loss": 1.5334, "step": 130950 }, { "epoch": 0.53, "grad_norm": 2.9724984169006348, "learning_rate": 0.0002, "loss": 1.6809, "step": 130960 }, { "epoch": 0.53, "grad_norm": 2.129399061203003, "learning_rate": 0.0002, "loss": 1.4989, "step": 130970 }, { "epoch": 0.53, "grad_norm": 2.639418363571167, "learning_rate": 0.0002, "loss": 1.6407, "step": 130980 }, { "epoch": 0.53, "grad_norm": 3.871962070465088, "learning_rate": 0.0002, "loss": 1.3686, "step": 130990 }, { "epoch": 0.53, "grad_norm": 2.700580596923828, "learning_rate": 0.0002, "loss": 1.5418, "step": 131000 }, { "epoch": 0.53, "grad_norm": 3.3605945110321045, "learning_rate": 0.0002, "loss": 1.5766, "step": 131010 }, { "epoch": 0.53, "grad_norm": 3.8233768939971924, "learning_rate": 0.0002, "loss": 1.5823, "step": 131020 }, { "epoch": 0.53, "grad_norm": 1.8012548685073853, "learning_rate": 0.0002, "loss": 1.4118, "step": 131030 }, { "epoch": 0.53, "grad_norm": 3.6876797676086426, "learning_rate": 0.0002, "loss": 1.5805, "step": 131040 }, { "epoch": 0.53, "grad_norm": 3.2523534297943115, "learning_rate": 0.0002, "loss": 1.7618, "step": 131050 }, { "epoch": 0.53, "grad_norm": 2.93222713470459, "learning_rate": 0.0002, "loss": 1.4458, "step": 131060 }, { "epoch": 0.53, "grad_norm": 2.7529733180999756, "learning_rate": 0.0002, "loss": 1.5903, "step": 131070 }, { "epoch": 0.53, "grad_norm": 2.874821186065674, "learning_rate": 0.0002, "loss": 1.5776, "step": 131080 }, { "epoch": 0.53, "grad_norm": 4.413094997406006, "learning_rate": 0.0002, "loss": 1.5662, "step": 131090 }, { "epoch": 0.53, "grad_norm": 3.459042549133301, "learning_rate": 0.0002, "loss": 1.8441, "step": 131100 }, { "epoch": 0.53, "grad_norm": 3.997426748275757, "learning_rate": 0.0002, "loss": 1.6071, "step": 131110 }, { "epoch": 0.53, "grad_norm": 2.2149832248687744, "learning_rate": 0.0002, "loss": 1.7393, "step": 131120 }, { "epoch": 0.53, "grad_norm": 5.146780967712402, "learning_rate": 0.0002, "loss": 1.4009, "step": 131130 }, { "epoch": 0.53, "grad_norm": 3.112931966781616, "learning_rate": 0.0002, "loss": 1.5491, "step": 131140 }, { "epoch": 0.53, "grad_norm": 3.4655325412750244, "learning_rate": 0.0002, "loss": 1.5776, "step": 131150 }, { "epoch": 0.53, "grad_norm": 2.813380479812622, "learning_rate": 0.0002, "loss": 1.579, "step": 131160 }, { "epoch": 0.53, "grad_norm": 2.3805763721466064, "learning_rate": 0.0002, "loss": 1.4722, "step": 131170 }, { "epoch": 0.53, "grad_norm": 3.6863105297088623, "learning_rate": 0.0002, "loss": 1.3812, "step": 131180 }, { "epoch": 0.53, "grad_norm": 2.775825023651123, "learning_rate": 0.0002, "loss": 1.7965, "step": 131190 }, { "epoch": 0.53, "grad_norm": 2.4807488918304443, "learning_rate": 0.0002, "loss": 1.2353, "step": 131200 }, { "epoch": 0.53, "grad_norm": 2.0275821685791016, "learning_rate": 0.0002, "loss": 1.379, "step": 131210 }, { "epoch": 0.53, "grad_norm": 2.869605302810669, "learning_rate": 0.0002, "loss": 1.517, "step": 131220 }, { "epoch": 0.53, "grad_norm": 2.776466131210327, "learning_rate": 0.0002, "loss": 1.5441, "step": 131230 }, { "epoch": 0.53, "grad_norm": 3.030341625213623, "learning_rate": 0.0002, "loss": 1.6653, "step": 131240 }, { "epoch": 0.53, "grad_norm": 2.2533750534057617, "learning_rate": 0.0002, "loss": 1.4938, "step": 131250 }, { "epoch": 0.53, "grad_norm": 3.286184787750244, "learning_rate": 0.0002, "loss": 1.3494, "step": 131260 }, { "epoch": 0.53, "grad_norm": 2.274592638015747, "learning_rate": 0.0002, "loss": 1.281, "step": 131270 }, { "epoch": 0.53, "grad_norm": 2.614516019821167, "learning_rate": 0.0002, "loss": 1.7669, "step": 131280 }, { "epoch": 0.53, "grad_norm": 3.4262375831604004, "learning_rate": 0.0002, "loss": 1.5895, "step": 131290 }, { "epoch": 0.53, "grad_norm": 1.55055570602417, "learning_rate": 0.0002, "loss": 1.4685, "step": 131300 }, { "epoch": 0.53, "grad_norm": 9.914274215698242, "learning_rate": 0.0002, "loss": 1.4602, "step": 131310 }, { "epoch": 0.53, "grad_norm": 3.347014904022217, "learning_rate": 0.0002, "loss": 1.5892, "step": 131320 }, { "epoch": 0.53, "grad_norm": 2.3982913494110107, "learning_rate": 0.0002, "loss": 1.5595, "step": 131330 }, { "epoch": 0.53, "grad_norm": 2.7507688999176025, "learning_rate": 0.0002, "loss": 1.4692, "step": 131340 }, { "epoch": 0.53, "grad_norm": 4.567809104919434, "learning_rate": 0.0002, "loss": 1.5868, "step": 131350 }, { "epoch": 0.53, "grad_norm": 4.641285419464111, "learning_rate": 0.0002, "loss": 1.5595, "step": 131360 }, { "epoch": 0.53, "grad_norm": 5.223957538604736, "learning_rate": 0.0002, "loss": 1.6296, "step": 131370 }, { "epoch": 0.53, "grad_norm": 2.761788845062256, "learning_rate": 0.0002, "loss": 1.4838, "step": 131380 }, { "epoch": 0.53, "grad_norm": 3.7959370613098145, "learning_rate": 0.0002, "loss": 1.639, "step": 131390 }, { "epoch": 0.53, "grad_norm": 2.830455780029297, "learning_rate": 0.0002, "loss": 1.5129, "step": 131400 }, { "epoch": 0.53, "grad_norm": 3.361612319946289, "learning_rate": 0.0002, "loss": 1.626, "step": 131410 }, { "epoch": 0.54, "grad_norm": 1.5677472352981567, "learning_rate": 0.0002, "loss": 1.5794, "step": 131420 }, { "epoch": 0.54, "grad_norm": 2.237645387649536, "learning_rate": 0.0002, "loss": 1.7141, "step": 131430 }, { "epoch": 0.54, "grad_norm": 2.9256560802459717, "learning_rate": 0.0002, "loss": 1.4832, "step": 131440 }, { "epoch": 0.54, "grad_norm": 2.199146032333374, "learning_rate": 0.0002, "loss": 1.4692, "step": 131450 }, { "epoch": 0.54, "grad_norm": 4.1052680015563965, "learning_rate": 0.0002, "loss": 1.3135, "step": 131460 }, { "epoch": 0.54, "grad_norm": 2.207386016845703, "learning_rate": 0.0002, "loss": 1.7715, "step": 131470 }, { "epoch": 0.54, "grad_norm": 2.4332263469696045, "learning_rate": 0.0002, "loss": 1.6491, "step": 131480 }, { "epoch": 0.54, "grad_norm": 3.0444788932800293, "learning_rate": 0.0002, "loss": 1.6697, "step": 131490 }, { "epoch": 0.54, "grad_norm": 2.5228917598724365, "learning_rate": 0.0002, "loss": 1.7716, "step": 131500 }, { "epoch": 0.54, "grad_norm": 2.906594753265381, "learning_rate": 0.0002, "loss": 1.4465, "step": 131510 }, { "epoch": 0.54, "grad_norm": 2.7781243324279785, "learning_rate": 0.0002, "loss": 1.5655, "step": 131520 }, { "epoch": 0.54, "grad_norm": 3.1369378566741943, "learning_rate": 0.0002, "loss": 1.5794, "step": 131530 }, { "epoch": 0.54, "grad_norm": 3.3064053058624268, "learning_rate": 0.0002, "loss": 1.4744, "step": 131540 }, { "epoch": 0.54, "grad_norm": 2.543771743774414, "learning_rate": 0.0002, "loss": 1.4937, "step": 131550 }, { "epoch": 0.54, "grad_norm": 4.531980991363525, "learning_rate": 0.0002, "loss": 1.6081, "step": 131560 }, { "epoch": 0.54, "grad_norm": 2.8663876056671143, "learning_rate": 0.0002, "loss": 1.6294, "step": 131570 }, { "epoch": 0.54, "grad_norm": 3.0475645065307617, "learning_rate": 0.0002, "loss": 1.5647, "step": 131580 }, { "epoch": 0.54, "grad_norm": 2.888148307800293, "learning_rate": 0.0002, "loss": 1.7615, "step": 131590 }, { "epoch": 0.54, "grad_norm": 1.394897222518921, "learning_rate": 0.0002, "loss": 1.6633, "step": 131600 }, { "epoch": 0.54, "grad_norm": 1.7803376913070679, "learning_rate": 0.0002, "loss": 1.6611, "step": 131610 }, { "epoch": 0.54, "grad_norm": 3.1522743701934814, "learning_rate": 0.0002, "loss": 1.6608, "step": 131620 }, { "epoch": 0.54, "grad_norm": 2.31880259513855, "learning_rate": 0.0002, "loss": 1.5916, "step": 131630 }, { "epoch": 0.54, "grad_norm": 4.490846633911133, "learning_rate": 0.0002, "loss": 1.617, "step": 131640 }, { "epoch": 0.54, "grad_norm": 3.5121989250183105, "learning_rate": 0.0002, "loss": 1.6768, "step": 131650 }, { "epoch": 0.54, "grad_norm": 3.1378629207611084, "learning_rate": 0.0002, "loss": 1.4702, "step": 131660 }, { "epoch": 0.54, "grad_norm": 3.377420425415039, "learning_rate": 0.0002, "loss": 1.7314, "step": 131670 }, { "epoch": 0.54, "grad_norm": 3.3254377841949463, "learning_rate": 0.0002, "loss": 1.5167, "step": 131680 }, { "epoch": 0.54, "grad_norm": 2.161102294921875, "learning_rate": 0.0002, "loss": 1.5736, "step": 131690 }, { "epoch": 0.54, "grad_norm": 3.8568527698516846, "learning_rate": 0.0002, "loss": 1.752, "step": 131700 }, { "epoch": 0.54, "grad_norm": 2.08888578414917, "learning_rate": 0.0002, "loss": 1.3047, "step": 131710 }, { "epoch": 0.54, "grad_norm": 3.574237108230591, "learning_rate": 0.0002, "loss": 1.8035, "step": 131720 }, { "epoch": 0.54, "grad_norm": 3.484001874923706, "learning_rate": 0.0002, "loss": 1.579, "step": 131730 }, { "epoch": 0.54, "grad_norm": 2.7724180221557617, "learning_rate": 0.0002, "loss": 1.5641, "step": 131740 }, { "epoch": 0.54, "grad_norm": 2.2374796867370605, "learning_rate": 0.0002, "loss": 1.7439, "step": 131750 }, { "epoch": 0.54, "grad_norm": 2.595284938812256, "learning_rate": 0.0002, "loss": 1.449, "step": 131760 }, { "epoch": 0.54, "grad_norm": 3.204705238342285, "learning_rate": 0.0002, "loss": 1.6955, "step": 131770 }, { "epoch": 0.54, "grad_norm": 2.982863426208496, "learning_rate": 0.0002, "loss": 1.4103, "step": 131780 }, { "epoch": 0.54, "grad_norm": 4.3539605140686035, "learning_rate": 0.0002, "loss": 1.5876, "step": 131790 }, { "epoch": 0.54, "grad_norm": 5.92063570022583, "learning_rate": 0.0002, "loss": 1.3844, "step": 131800 }, { "epoch": 0.54, "grad_norm": 4.247678756713867, "learning_rate": 0.0002, "loss": 1.5447, "step": 131810 }, { "epoch": 0.54, "grad_norm": 2.734834909439087, "learning_rate": 0.0002, "loss": 1.8332, "step": 131820 }, { "epoch": 0.54, "grad_norm": 2.6390323638916016, "learning_rate": 0.0002, "loss": 1.679, "step": 131830 }, { "epoch": 0.54, "grad_norm": 2.7626659870147705, "learning_rate": 0.0002, "loss": 1.4245, "step": 131840 }, { "epoch": 0.54, "grad_norm": 3.8453330993652344, "learning_rate": 0.0002, "loss": 1.4986, "step": 131850 }, { "epoch": 0.54, "grad_norm": 3.154412269592285, "learning_rate": 0.0002, "loss": 1.5169, "step": 131860 }, { "epoch": 0.54, "grad_norm": 1.6781079769134521, "learning_rate": 0.0002, "loss": 1.5745, "step": 131870 }, { "epoch": 0.54, "grad_norm": 2.792634963989258, "learning_rate": 0.0002, "loss": 1.6579, "step": 131880 }, { "epoch": 0.54, "grad_norm": 2.7993762493133545, "learning_rate": 0.0002, "loss": 1.6019, "step": 131890 }, { "epoch": 0.54, "grad_norm": 3.412060260772705, "learning_rate": 0.0002, "loss": 1.5324, "step": 131900 }, { "epoch": 0.54, "grad_norm": 1.9841375350952148, "learning_rate": 0.0002, "loss": 1.3395, "step": 131910 }, { "epoch": 0.54, "grad_norm": 1.2134573459625244, "learning_rate": 0.0002, "loss": 1.5496, "step": 131920 }, { "epoch": 0.54, "grad_norm": 2.505976438522339, "learning_rate": 0.0002, "loss": 1.4717, "step": 131930 }, { "epoch": 0.54, "grad_norm": 2.600486993789673, "learning_rate": 0.0002, "loss": 1.399, "step": 131940 }, { "epoch": 0.54, "grad_norm": 3.206144332885742, "learning_rate": 0.0002, "loss": 1.4275, "step": 131950 }, { "epoch": 0.54, "grad_norm": 2.5084896087646484, "learning_rate": 0.0002, "loss": 1.7809, "step": 131960 }, { "epoch": 0.54, "grad_norm": 3.434417963027954, "learning_rate": 0.0002, "loss": 1.5944, "step": 131970 }, { "epoch": 0.54, "grad_norm": 4.977936744689941, "learning_rate": 0.0002, "loss": 1.469, "step": 131980 }, { "epoch": 0.54, "grad_norm": 2.6316404342651367, "learning_rate": 0.0002, "loss": 1.6884, "step": 131990 }, { "epoch": 0.54, "grad_norm": 3.145285129547119, "learning_rate": 0.0002, "loss": 1.5382, "step": 132000 }, { "epoch": 0.54, "grad_norm": 3.7842190265655518, "learning_rate": 0.0002, "loss": 1.6781, "step": 132010 }, { "epoch": 0.54, "grad_norm": 4.113983631134033, "learning_rate": 0.0002, "loss": 1.4006, "step": 132020 }, { "epoch": 0.54, "grad_norm": 1.9648752212524414, "learning_rate": 0.0002, "loss": 1.6064, "step": 132030 }, { "epoch": 0.54, "grad_norm": 3.291506767272949, "learning_rate": 0.0002, "loss": 1.4571, "step": 132040 }, { "epoch": 0.54, "grad_norm": 2.5977182388305664, "learning_rate": 0.0002, "loss": 1.5841, "step": 132050 }, { "epoch": 0.54, "grad_norm": 3.704068183898926, "learning_rate": 0.0002, "loss": 1.6479, "step": 132060 }, { "epoch": 0.54, "grad_norm": 2.643828868865967, "learning_rate": 0.0002, "loss": 1.6216, "step": 132070 }, { "epoch": 0.54, "grad_norm": 1.7097487449645996, "learning_rate": 0.0002, "loss": 1.4603, "step": 132080 }, { "epoch": 0.54, "grad_norm": 3.157879590988159, "learning_rate": 0.0002, "loss": 1.4345, "step": 132090 }, { "epoch": 0.54, "grad_norm": 2.192409038543701, "learning_rate": 0.0002, "loss": 1.578, "step": 132100 }, { "epoch": 0.54, "grad_norm": 4.872453212738037, "learning_rate": 0.0002, "loss": 1.3868, "step": 132110 }, { "epoch": 0.54, "grad_norm": 3.5016863346099854, "learning_rate": 0.0002, "loss": 1.9118, "step": 132120 }, { "epoch": 0.54, "grad_norm": 2.4095089435577393, "learning_rate": 0.0002, "loss": 1.5862, "step": 132130 }, { "epoch": 0.54, "grad_norm": 1.8711403608322144, "learning_rate": 0.0002, "loss": 1.5799, "step": 132140 }, { "epoch": 0.54, "grad_norm": 4.109976291656494, "learning_rate": 0.0002, "loss": 1.6672, "step": 132150 }, { "epoch": 0.54, "grad_norm": 3.0997674465179443, "learning_rate": 0.0002, "loss": 1.6678, "step": 132160 }, { "epoch": 0.54, "grad_norm": 3.0429506301879883, "learning_rate": 0.0002, "loss": 1.5121, "step": 132170 }, { "epoch": 0.54, "grad_norm": 2.5766971111297607, "learning_rate": 0.0002, "loss": 1.2282, "step": 132180 }, { "epoch": 0.54, "grad_norm": 2.6018624305725098, "learning_rate": 0.0002, "loss": 1.4772, "step": 132190 }, { "epoch": 0.54, "grad_norm": 4.245744228363037, "learning_rate": 0.0002, "loss": 1.7531, "step": 132200 }, { "epoch": 0.54, "grad_norm": 2.549635648727417, "learning_rate": 0.0002, "loss": 1.7212, "step": 132210 }, { "epoch": 0.54, "grad_norm": 3.2792344093322754, "learning_rate": 0.0002, "loss": 1.5308, "step": 132220 }, { "epoch": 0.54, "grad_norm": 2.79823637008667, "learning_rate": 0.0002, "loss": 1.5692, "step": 132230 }, { "epoch": 0.54, "grad_norm": 1.344806432723999, "learning_rate": 0.0002, "loss": 1.601, "step": 132240 }, { "epoch": 0.54, "grad_norm": 1.7829890251159668, "learning_rate": 0.0002, "loss": 1.4078, "step": 132250 }, { "epoch": 0.54, "grad_norm": 2.7840209007263184, "learning_rate": 0.0002, "loss": 1.6717, "step": 132260 }, { "epoch": 0.54, "grad_norm": 2.8453354835510254, "learning_rate": 0.0002, "loss": 1.5374, "step": 132270 }, { "epoch": 0.54, "grad_norm": 3.6537466049194336, "learning_rate": 0.0002, "loss": 1.546, "step": 132280 }, { "epoch": 0.54, "grad_norm": 2.605482339859009, "learning_rate": 0.0002, "loss": 1.5362, "step": 132290 }, { "epoch": 0.54, "grad_norm": 5.024974822998047, "learning_rate": 0.0002, "loss": 2.1373, "step": 132300 }, { "epoch": 0.54, "grad_norm": 2.3742828369140625, "learning_rate": 0.0002, "loss": 1.4939, "step": 132310 }, { "epoch": 0.54, "grad_norm": 4.326141357421875, "learning_rate": 0.0002, "loss": 1.5522, "step": 132320 }, { "epoch": 0.54, "grad_norm": 3.2528398036956787, "learning_rate": 0.0002, "loss": 1.6492, "step": 132330 }, { "epoch": 0.54, "grad_norm": 3.8196933269500732, "learning_rate": 0.0002, "loss": 1.9742, "step": 132340 }, { "epoch": 0.54, "grad_norm": 3.7667653560638428, "learning_rate": 0.0002, "loss": 1.4488, "step": 132350 }, { "epoch": 0.54, "grad_norm": 2.6024086475372314, "learning_rate": 0.0002, "loss": 1.681, "step": 132360 }, { "epoch": 0.54, "grad_norm": 3.3713393211364746, "learning_rate": 0.0002, "loss": 1.5521, "step": 132370 }, { "epoch": 0.54, "grad_norm": 2.3963935375213623, "learning_rate": 0.0002, "loss": 1.6119, "step": 132380 }, { "epoch": 0.54, "grad_norm": 3.171396493911743, "learning_rate": 0.0002, "loss": 1.5577, "step": 132390 }, { "epoch": 0.54, "grad_norm": 1.6242009401321411, "learning_rate": 0.0002, "loss": 1.6901, "step": 132400 }, { "epoch": 0.54, "grad_norm": 2.10360050201416, "learning_rate": 0.0002, "loss": 1.8875, "step": 132410 }, { "epoch": 0.54, "grad_norm": 3.569132089614868, "learning_rate": 0.0002, "loss": 1.5929, "step": 132420 }, { "epoch": 0.54, "grad_norm": 3.929246664047241, "learning_rate": 0.0002, "loss": 1.4611, "step": 132430 }, { "epoch": 0.54, "grad_norm": 2.1225128173828125, "learning_rate": 0.0002, "loss": 1.6755, "step": 132440 }, { "epoch": 0.54, "grad_norm": 5.58320951461792, "learning_rate": 0.0002, "loss": 1.5869, "step": 132450 }, { "epoch": 0.54, "grad_norm": 5.162261009216309, "learning_rate": 0.0002, "loss": 1.5791, "step": 132460 }, { "epoch": 0.54, "grad_norm": 2.7746288776397705, "learning_rate": 0.0002, "loss": 1.5289, "step": 132470 }, { "epoch": 0.54, "grad_norm": 6.334280014038086, "learning_rate": 0.0002, "loss": 1.5126, "step": 132480 }, { "epoch": 0.54, "grad_norm": 4.830169200897217, "learning_rate": 0.0002, "loss": 1.6988, "step": 132490 }, { "epoch": 0.54, "grad_norm": 2.061959743499756, "learning_rate": 0.0002, "loss": 1.4439, "step": 132500 }, { "epoch": 0.54, "grad_norm": 1.7259973287582397, "learning_rate": 0.0002, "loss": 1.5877, "step": 132510 }, { "epoch": 0.54, "grad_norm": 3.118682384490967, "learning_rate": 0.0002, "loss": 1.5666, "step": 132520 }, { "epoch": 0.54, "grad_norm": 1.7339863777160645, "learning_rate": 0.0002, "loss": 1.3931, "step": 132530 }, { "epoch": 0.54, "grad_norm": 3.4311611652374268, "learning_rate": 0.0002, "loss": 1.5549, "step": 132540 }, { "epoch": 0.54, "grad_norm": 3.8642497062683105, "learning_rate": 0.0002, "loss": 1.3876, "step": 132550 }, { "epoch": 0.54, "grad_norm": 3.330388307571411, "learning_rate": 0.0002, "loss": 1.7575, "step": 132560 }, { "epoch": 0.54, "grad_norm": 2.938250780105591, "learning_rate": 0.0002, "loss": 1.7469, "step": 132570 }, { "epoch": 0.54, "grad_norm": 4.97470760345459, "learning_rate": 0.0002, "loss": 1.4424, "step": 132580 }, { "epoch": 0.54, "grad_norm": 3.807746171951294, "learning_rate": 0.0002, "loss": 1.5445, "step": 132590 }, { "epoch": 0.54, "grad_norm": 3.7677175998687744, "learning_rate": 0.0002, "loss": 1.5483, "step": 132600 }, { "epoch": 0.54, "grad_norm": 3.7495739459991455, "learning_rate": 0.0002, "loss": 1.5695, "step": 132610 }, { "epoch": 0.54, "grad_norm": 2.6652097702026367, "learning_rate": 0.0002, "loss": 1.6116, "step": 132620 }, { "epoch": 0.54, "grad_norm": 5.523989677429199, "learning_rate": 0.0002, "loss": 1.5718, "step": 132630 }, { "epoch": 0.54, "grad_norm": 3.2595150470733643, "learning_rate": 0.0002, "loss": 1.5922, "step": 132640 }, { "epoch": 0.54, "grad_norm": 3.0057899951934814, "learning_rate": 0.0002, "loss": 1.2629, "step": 132650 }, { "epoch": 0.54, "grad_norm": 2.5580854415893555, "learning_rate": 0.0002, "loss": 1.3868, "step": 132660 }, { "epoch": 0.54, "grad_norm": 3.130263090133667, "learning_rate": 0.0002, "loss": 1.5864, "step": 132670 }, { "epoch": 0.54, "grad_norm": 2.764816999435425, "learning_rate": 0.0002, "loss": 1.7012, "step": 132680 }, { "epoch": 0.54, "grad_norm": 2.1531550884246826, "learning_rate": 0.0002, "loss": 1.6074, "step": 132690 }, { "epoch": 0.54, "grad_norm": 2.155897617340088, "learning_rate": 0.0002, "loss": 1.4319, "step": 132700 }, { "epoch": 0.54, "grad_norm": 2.1409122943878174, "learning_rate": 0.0002, "loss": 1.5209, "step": 132710 }, { "epoch": 0.54, "grad_norm": 3.071873903274536, "learning_rate": 0.0002, "loss": 1.7712, "step": 132720 }, { "epoch": 0.54, "grad_norm": 3.3182661533355713, "learning_rate": 0.0002, "loss": 1.6221, "step": 132730 }, { "epoch": 0.54, "grad_norm": 3.245633840560913, "learning_rate": 0.0002, "loss": 1.5741, "step": 132740 }, { "epoch": 0.54, "grad_norm": 4.299509525299072, "learning_rate": 0.0002, "loss": 1.6659, "step": 132750 }, { "epoch": 0.54, "grad_norm": 4.489044666290283, "learning_rate": 0.0002, "loss": 1.6818, "step": 132760 }, { "epoch": 0.54, "grad_norm": 2.773878812789917, "learning_rate": 0.0002, "loss": 1.4141, "step": 132770 }, { "epoch": 0.54, "grad_norm": 2.1474392414093018, "learning_rate": 0.0002, "loss": 1.5152, "step": 132780 }, { "epoch": 0.54, "grad_norm": 2.989689826965332, "learning_rate": 0.0002, "loss": 1.5552, "step": 132790 }, { "epoch": 0.54, "grad_norm": 3.5842769145965576, "learning_rate": 0.0002, "loss": 1.3394, "step": 132800 }, { "epoch": 0.54, "grad_norm": 2.2878549098968506, "learning_rate": 0.0002, "loss": 1.8101, "step": 132810 }, { "epoch": 0.54, "grad_norm": 2.930307626724243, "learning_rate": 0.0002, "loss": 1.5372, "step": 132820 }, { "epoch": 0.54, "grad_norm": 3.4479598999023438, "learning_rate": 0.0002, "loss": 1.5879, "step": 132830 }, { "epoch": 0.54, "grad_norm": 3.727058172225952, "learning_rate": 0.0002, "loss": 1.6673, "step": 132840 }, { "epoch": 0.54, "grad_norm": 2.724689245223999, "learning_rate": 0.0002, "loss": 1.5846, "step": 132850 }, { "epoch": 0.54, "grad_norm": 2.3268229961395264, "learning_rate": 0.0002, "loss": 1.5256, "step": 132860 }, { "epoch": 0.54, "grad_norm": 2.458082437515259, "learning_rate": 0.0002, "loss": 1.5124, "step": 132870 }, { "epoch": 0.54, "grad_norm": 2.8612148761749268, "learning_rate": 0.0002, "loss": 1.5237, "step": 132880 }, { "epoch": 0.54, "grad_norm": 2.350132465362549, "learning_rate": 0.0002, "loss": 1.3957, "step": 132890 }, { "epoch": 0.54, "grad_norm": 2.850100040435791, "learning_rate": 0.0002, "loss": 1.7033, "step": 132900 }, { "epoch": 0.54, "grad_norm": 3.239431858062744, "learning_rate": 0.0002, "loss": 1.8571, "step": 132910 }, { "epoch": 0.54, "grad_norm": 3.4066128730773926, "learning_rate": 0.0002, "loss": 1.6673, "step": 132920 }, { "epoch": 0.54, "grad_norm": 4.633105278015137, "learning_rate": 0.0002, "loss": 1.4759, "step": 132930 }, { "epoch": 0.54, "grad_norm": 2.7276086807250977, "learning_rate": 0.0002, "loss": 1.4266, "step": 132940 }, { "epoch": 0.54, "grad_norm": 3.8154234886169434, "learning_rate": 0.0002, "loss": 1.5549, "step": 132950 }, { "epoch": 0.54, "grad_norm": 2.8637819290161133, "learning_rate": 0.0002, "loss": 1.7086, "step": 132960 }, { "epoch": 0.54, "grad_norm": 2.2673182487487793, "learning_rate": 0.0002, "loss": 1.4126, "step": 132970 }, { "epoch": 0.54, "grad_norm": 6.780082702636719, "learning_rate": 0.0002, "loss": 1.4716, "step": 132980 }, { "epoch": 0.54, "grad_norm": 2.195607900619507, "learning_rate": 0.0002, "loss": 1.4637, "step": 132990 }, { "epoch": 0.54, "grad_norm": 2.5986695289611816, "learning_rate": 0.0002, "loss": 1.4573, "step": 133000 }, { "epoch": 0.54, "grad_norm": 2.6903045177459717, "learning_rate": 0.0002, "loss": 1.5248, "step": 133010 }, { "epoch": 0.54, "grad_norm": 3.622177839279175, "learning_rate": 0.0002, "loss": 1.5332, "step": 133020 }, { "epoch": 0.54, "grad_norm": 2.921177864074707, "learning_rate": 0.0002, "loss": 1.4289, "step": 133030 }, { "epoch": 0.54, "grad_norm": 4.501612663269043, "learning_rate": 0.0002, "loss": 1.5728, "step": 133040 }, { "epoch": 0.54, "grad_norm": 5.354920387268066, "learning_rate": 0.0002, "loss": 1.6209, "step": 133050 }, { "epoch": 0.54, "grad_norm": 2.198413610458374, "learning_rate": 0.0002, "loss": 1.3934, "step": 133060 }, { "epoch": 0.54, "grad_norm": 7.35322904586792, "learning_rate": 0.0002, "loss": 1.6902, "step": 133070 }, { "epoch": 0.54, "grad_norm": 4.043615341186523, "learning_rate": 0.0002, "loss": 1.2494, "step": 133080 }, { "epoch": 0.54, "grad_norm": 2.236037492752075, "learning_rate": 0.0002, "loss": 1.5829, "step": 133090 }, { "epoch": 0.54, "grad_norm": 2.6813364028930664, "learning_rate": 0.0002, "loss": 1.67, "step": 133100 }, { "epoch": 0.54, "grad_norm": 5.520621299743652, "learning_rate": 0.0002, "loss": 1.7563, "step": 133110 }, { "epoch": 0.54, "grad_norm": 2.558337450027466, "learning_rate": 0.0002, "loss": 1.4476, "step": 133120 }, { "epoch": 0.54, "grad_norm": 1.5944336652755737, "learning_rate": 0.0002, "loss": 1.5112, "step": 133130 }, { "epoch": 0.54, "grad_norm": 3.678083896636963, "learning_rate": 0.0002, "loss": 1.6608, "step": 133140 }, { "epoch": 0.54, "grad_norm": 4.146426200866699, "learning_rate": 0.0002, "loss": 1.5229, "step": 133150 }, { "epoch": 0.54, "grad_norm": 3.1245391368865967, "learning_rate": 0.0002, "loss": 1.4749, "step": 133160 }, { "epoch": 0.54, "grad_norm": 2.988102674484253, "learning_rate": 0.0002, "loss": 1.6329, "step": 133170 }, { "epoch": 0.54, "grad_norm": 3.7546937465667725, "learning_rate": 0.0002, "loss": 1.6377, "step": 133180 }, { "epoch": 0.54, "grad_norm": 1.8186097145080566, "learning_rate": 0.0002, "loss": 1.7976, "step": 133190 }, { "epoch": 0.54, "grad_norm": 3.3126068115234375, "learning_rate": 0.0002, "loss": 1.4443, "step": 133200 }, { "epoch": 0.54, "grad_norm": 4.3709540367126465, "learning_rate": 0.0002, "loss": 1.6243, "step": 133210 }, { "epoch": 0.54, "grad_norm": 4.408437728881836, "learning_rate": 0.0002, "loss": 1.7285, "step": 133220 }, { "epoch": 0.54, "grad_norm": 3.074345588684082, "learning_rate": 0.0002, "loss": 1.57, "step": 133230 }, { "epoch": 0.54, "grad_norm": 2.705536127090454, "learning_rate": 0.0002, "loss": 1.402, "step": 133240 }, { "epoch": 0.54, "grad_norm": 2.2050185203552246, "learning_rate": 0.0002, "loss": 1.4807, "step": 133250 }, { "epoch": 0.54, "grad_norm": 3.478865623474121, "learning_rate": 0.0002, "loss": 1.5625, "step": 133260 }, { "epoch": 0.54, "grad_norm": 3.6229119300842285, "learning_rate": 0.0002, "loss": 1.9759, "step": 133270 }, { "epoch": 0.54, "grad_norm": 2.3648059368133545, "learning_rate": 0.0002, "loss": 1.6926, "step": 133280 }, { "epoch": 0.54, "grad_norm": 2.3456668853759766, "learning_rate": 0.0002, "loss": 1.6003, "step": 133290 }, { "epoch": 0.54, "grad_norm": 1.3646693229675293, "learning_rate": 0.0002, "loss": 1.5917, "step": 133300 }, { "epoch": 0.54, "grad_norm": 2.041238784790039, "learning_rate": 0.0002, "loss": 1.6198, "step": 133310 }, { "epoch": 0.54, "grad_norm": 3.7132015228271484, "learning_rate": 0.0002, "loss": 1.5834, "step": 133320 }, { "epoch": 0.54, "grad_norm": 3.3170533180236816, "learning_rate": 0.0002, "loss": 1.6611, "step": 133330 }, { "epoch": 0.54, "grad_norm": 2.5727741718292236, "learning_rate": 0.0002, "loss": 1.5224, "step": 133340 }, { "epoch": 0.54, "grad_norm": 1.9771987199783325, "learning_rate": 0.0002, "loss": 1.5867, "step": 133350 }, { "epoch": 0.54, "grad_norm": 3.968938112258911, "learning_rate": 0.0002, "loss": 1.4541, "step": 133360 }, { "epoch": 0.54, "grad_norm": 2.160567283630371, "learning_rate": 0.0002, "loss": 1.6426, "step": 133370 }, { "epoch": 0.54, "grad_norm": 1.8084427118301392, "learning_rate": 0.0002, "loss": 1.614, "step": 133380 }, { "epoch": 0.54, "grad_norm": 3.01977276802063, "learning_rate": 0.0002, "loss": 1.3888, "step": 133390 }, { "epoch": 0.54, "grad_norm": 3.439272165298462, "learning_rate": 0.0002, "loss": 1.7836, "step": 133400 }, { "epoch": 0.54, "grad_norm": 2.4200282096862793, "learning_rate": 0.0002, "loss": 1.4629, "step": 133410 }, { "epoch": 0.54, "grad_norm": 3.3874881267547607, "learning_rate": 0.0002, "loss": 1.823, "step": 133420 }, { "epoch": 0.54, "grad_norm": 2.6143078804016113, "learning_rate": 0.0002, "loss": 1.3954, "step": 133430 }, { "epoch": 0.54, "grad_norm": 2.636605978012085, "learning_rate": 0.0002, "loss": 1.5031, "step": 133440 }, { "epoch": 0.54, "grad_norm": 3.100468397140503, "learning_rate": 0.0002, "loss": 1.4919, "step": 133450 }, { "epoch": 0.54, "grad_norm": 3.1764073371887207, "learning_rate": 0.0002, "loss": 1.6725, "step": 133460 }, { "epoch": 0.54, "grad_norm": 2.9122419357299805, "learning_rate": 0.0002, "loss": 1.6165, "step": 133470 }, { "epoch": 0.54, "grad_norm": 3.3119449615478516, "learning_rate": 0.0002, "loss": 1.6724, "step": 133480 }, { "epoch": 0.54, "grad_norm": 2.5349509716033936, "learning_rate": 0.0002, "loss": 1.6018, "step": 133490 }, { "epoch": 0.54, "grad_norm": 2.712228536605835, "learning_rate": 0.0002, "loss": 1.6047, "step": 133500 }, { "epoch": 0.54, "grad_norm": 3.170473098754883, "learning_rate": 0.0002, "loss": 1.5794, "step": 133510 }, { "epoch": 0.54, "grad_norm": 5.774148464202881, "learning_rate": 0.0002, "loss": 1.5832, "step": 133520 }, { "epoch": 0.54, "grad_norm": 3.0464682579040527, "learning_rate": 0.0002, "loss": 1.5881, "step": 133530 }, { "epoch": 0.54, "grad_norm": 2.9972429275512695, "learning_rate": 0.0002, "loss": 1.5213, "step": 133540 }, { "epoch": 0.54, "grad_norm": 3.7148616313934326, "learning_rate": 0.0002, "loss": 1.7282, "step": 133550 }, { "epoch": 0.54, "grad_norm": 3.2061493396759033, "learning_rate": 0.0002, "loss": 1.487, "step": 133560 }, { "epoch": 0.54, "grad_norm": 3.746885299682617, "learning_rate": 0.0002, "loss": 1.6491, "step": 133570 }, { "epoch": 0.54, "grad_norm": 2.0605380535125732, "learning_rate": 0.0002, "loss": 1.622, "step": 133580 }, { "epoch": 0.54, "grad_norm": 4.105598449707031, "learning_rate": 0.0002, "loss": 1.4605, "step": 133590 }, { "epoch": 0.54, "grad_norm": 2.350541830062866, "learning_rate": 0.0002, "loss": 1.501, "step": 133600 }, { "epoch": 0.54, "grad_norm": 3.5393576622009277, "learning_rate": 0.0002, "loss": 1.5196, "step": 133610 }, { "epoch": 0.54, "grad_norm": 3.3107872009277344, "learning_rate": 0.0002, "loss": 1.7026, "step": 133620 }, { "epoch": 0.54, "grad_norm": 2.274284839630127, "learning_rate": 0.0002, "loss": 1.6493, "step": 133630 }, { "epoch": 0.54, "grad_norm": 4.124180316925049, "learning_rate": 0.0002, "loss": 1.7068, "step": 133640 }, { "epoch": 0.54, "grad_norm": 3.3961832523345947, "learning_rate": 0.0002, "loss": 1.3778, "step": 133650 }, { "epoch": 0.54, "grad_norm": 5.650259971618652, "learning_rate": 0.0002, "loss": 1.4886, "step": 133660 }, { "epoch": 0.54, "grad_norm": 2.9799327850341797, "learning_rate": 0.0002, "loss": 1.6859, "step": 133670 }, { "epoch": 0.54, "grad_norm": 1.8928143978118896, "learning_rate": 0.0002, "loss": 1.6781, "step": 133680 }, { "epoch": 0.54, "grad_norm": 2.2185380458831787, "learning_rate": 0.0002, "loss": 1.6042, "step": 133690 }, { "epoch": 0.54, "grad_norm": 3.4684085845947266, "learning_rate": 0.0002, "loss": 1.8037, "step": 133700 }, { "epoch": 0.54, "grad_norm": 2.3706207275390625, "learning_rate": 0.0002, "loss": 1.4111, "step": 133710 }, { "epoch": 0.54, "grad_norm": 2.0057239532470703, "learning_rate": 0.0002, "loss": 1.4132, "step": 133720 }, { "epoch": 0.54, "grad_norm": 2.871953010559082, "learning_rate": 0.0002, "loss": 1.5505, "step": 133730 }, { "epoch": 0.54, "grad_norm": 3.136117696762085, "learning_rate": 0.0002, "loss": 1.6788, "step": 133740 }, { "epoch": 0.54, "grad_norm": 4.292231559753418, "learning_rate": 0.0002, "loss": 1.4415, "step": 133750 }, { "epoch": 0.54, "grad_norm": 2.781466245651245, "learning_rate": 0.0002, "loss": 1.6404, "step": 133760 }, { "epoch": 0.54, "grad_norm": 4.2471442222595215, "learning_rate": 0.0002, "loss": 1.5953, "step": 133770 }, { "epoch": 0.54, "grad_norm": 2.9673948287963867, "learning_rate": 0.0002, "loss": 1.7837, "step": 133780 }, { "epoch": 0.54, "grad_norm": 2.634690999984741, "learning_rate": 0.0002, "loss": 1.5506, "step": 133790 }, { "epoch": 0.54, "grad_norm": 2.4830007553100586, "learning_rate": 0.0002, "loss": 1.4919, "step": 133800 }, { "epoch": 0.54, "grad_norm": 2.8362679481506348, "learning_rate": 0.0002, "loss": 1.4694, "step": 133810 }, { "epoch": 0.54, "grad_norm": 4.806083679199219, "learning_rate": 0.0002, "loss": 1.7689, "step": 133820 }, { "epoch": 0.54, "grad_norm": 1.6477866172790527, "learning_rate": 0.0002, "loss": 1.4421, "step": 133830 }, { "epoch": 0.54, "grad_norm": 3.7562780380249023, "learning_rate": 0.0002, "loss": 1.4883, "step": 133840 }, { "epoch": 0.54, "grad_norm": 3.505436420440674, "learning_rate": 0.0002, "loss": 1.8373, "step": 133850 }, { "epoch": 0.54, "grad_norm": 3.3383164405822754, "learning_rate": 0.0002, "loss": 1.5476, "step": 133860 }, { "epoch": 0.54, "grad_norm": 7.499658584594727, "learning_rate": 0.0002, "loss": 1.7719, "step": 133870 }, { "epoch": 0.55, "grad_norm": 2.3638763427734375, "learning_rate": 0.0002, "loss": 1.3528, "step": 133880 }, { "epoch": 0.55, "grad_norm": 2.0701260566711426, "learning_rate": 0.0002, "loss": 1.4306, "step": 133890 }, { "epoch": 0.55, "grad_norm": 3.314889669418335, "learning_rate": 0.0002, "loss": 1.6139, "step": 133900 }, { "epoch": 0.55, "grad_norm": 2.5796496868133545, "learning_rate": 0.0002, "loss": 1.4027, "step": 133910 }, { "epoch": 0.55, "grad_norm": 3.5127313137054443, "learning_rate": 0.0002, "loss": 1.7237, "step": 133920 }, { "epoch": 0.55, "grad_norm": 3.506765365600586, "learning_rate": 0.0002, "loss": 1.653, "step": 133930 }, { "epoch": 0.55, "grad_norm": 3.6329421997070312, "learning_rate": 0.0002, "loss": 1.6198, "step": 133940 }, { "epoch": 0.55, "grad_norm": 2.0110855102539062, "learning_rate": 0.0002, "loss": 1.3871, "step": 133950 }, { "epoch": 0.55, "grad_norm": 2.766291379928589, "learning_rate": 0.0002, "loss": 1.568, "step": 133960 }, { "epoch": 0.55, "grad_norm": 2.7808284759521484, "learning_rate": 0.0002, "loss": 1.5314, "step": 133970 }, { "epoch": 0.55, "grad_norm": 3.2014520168304443, "learning_rate": 0.0002, "loss": 1.7767, "step": 133980 }, { "epoch": 0.55, "grad_norm": 1.9286693334579468, "learning_rate": 0.0002, "loss": 1.7025, "step": 133990 }, { "epoch": 0.55, "grad_norm": 4.67029333114624, "learning_rate": 0.0002, "loss": 1.687, "step": 134000 }, { "epoch": 0.55, "grad_norm": 4.030566215515137, "learning_rate": 0.0002, "loss": 1.4746, "step": 134010 }, { "epoch": 0.55, "grad_norm": 1.715440273284912, "learning_rate": 0.0002, "loss": 1.4457, "step": 134020 }, { "epoch": 0.55, "grad_norm": 2.457383394241333, "learning_rate": 0.0002, "loss": 1.7012, "step": 134030 }, { "epoch": 0.55, "grad_norm": 3.195314407348633, "learning_rate": 0.0002, "loss": 1.521, "step": 134040 }, { "epoch": 0.55, "grad_norm": 3.7268741130828857, "learning_rate": 0.0002, "loss": 1.5134, "step": 134050 }, { "epoch": 0.55, "grad_norm": 2.3937249183654785, "learning_rate": 0.0002, "loss": 1.7209, "step": 134060 }, { "epoch": 0.55, "grad_norm": 2.28829026222229, "learning_rate": 0.0002, "loss": 1.543, "step": 134070 }, { "epoch": 0.55, "grad_norm": 3.02135968208313, "learning_rate": 0.0002, "loss": 1.6384, "step": 134080 }, { "epoch": 0.55, "grad_norm": 2.8499982357025146, "learning_rate": 0.0002, "loss": 1.6539, "step": 134090 }, { "epoch": 0.55, "grad_norm": 2.4848742485046387, "learning_rate": 0.0002, "loss": 1.8019, "step": 134100 }, { "epoch": 0.55, "grad_norm": 2.453946828842163, "learning_rate": 0.0002, "loss": 1.407, "step": 134110 }, { "epoch": 0.55, "grad_norm": 3.067357063293457, "learning_rate": 0.0002, "loss": 1.7399, "step": 134120 }, { "epoch": 0.55, "grad_norm": 2.87699556350708, "learning_rate": 0.0002, "loss": 1.51, "step": 134130 }, { "epoch": 0.55, "grad_norm": 11.241921424865723, "learning_rate": 0.0002, "loss": 1.6272, "step": 134140 }, { "epoch": 0.55, "grad_norm": 1.7566609382629395, "learning_rate": 0.0002, "loss": 1.4804, "step": 134150 }, { "epoch": 0.55, "grad_norm": 3.0086710453033447, "learning_rate": 0.0002, "loss": 1.7116, "step": 134160 }, { "epoch": 0.55, "grad_norm": 3.731386423110962, "learning_rate": 0.0002, "loss": 1.5646, "step": 134170 }, { "epoch": 0.55, "grad_norm": 1.5538560152053833, "learning_rate": 0.0002, "loss": 1.5022, "step": 134180 }, { "epoch": 0.55, "grad_norm": 3.2505135536193848, "learning_rate": 0.0002, "loss": 1.6908, "step": 134190 }, { "epoch": 0.55, "grad_norm": 2.9161322116851807, "learning_rate": 0.0002, "loss": 1.6995, "step": 134200 }, { "epoch": 0.55, "grad_norm": 3.344679594039917, "learning_rate": 0.0002, "loss": 1.5846, "step": 134210 }, { "epoch": 0.55, "grad_norm": 2.557171583175659, "learning_rate": 0.0002, "loss": 1.6907, "step": 134220 }, { "epoch": 0.55, "grad_norm": 3.483497381210327, "learning_rate": 0.0002, "loss": 1.5251, "step": 134230 }, { "epoch": 0.55, "grad_norm": 2.302415609359741, "learning_rate": 0.0002, "loss": 1.7365, "step": 134240 }, { "epoch": 0.55, "grad_norm": 2.8372561931610107, "learning_rate": 0.0002, "loss": 1.632, "step": 134250 }, { "epoch": 0.55, "grad_norm": 2.4881632328033447, "learning_rate": 0.0002, "loss": 1.5785, "step": 134260 }, { "epoch": 0.55, "grad_norm": 3.8211302757263184, "learning_rate": 0.0002, "loss": 1.7552, "step": 134270 }, { "epoch": 0.55, "grad_norm": 2.959923028945923, "learning_rate": 0.0002, "loss": 1.5456, "step": 134280 }, { "epoch": 0.55, "grad_norm": 3.74362850189209, "learning_rate": 0.0002, "loss": 1.4702, "step": 134290 }, { "epoch": 0.55, "grad_norm": 3.7576959133148193, "learning_rate": 0.0002, "loss": 1.5019, "step": 134300 }, { "epoch": 0.55, "grad_norm": 4.366917133331299, "learning_rate": 0.0002, "loss": 1.5598, "step": 134310 }, { "epoch": 0.55, "grad_norm": 2.8149495124816895, "learning_rate": 0.0002, "loss": 1.6607, "step": 134320 }, { "epoch": 0.55, "grad_norm": 2.153913736343384, "learning_rate": 0.0002, "loss": 1.6217, "step": 134330 }, { "epoch": 0.55, "grad_norm": 3.6126952171325684, "learning_rate": 0.0002, "loss": 1.4736, "step": 134340 }, { "epoch": 0.55, "grad_norm": 2.0770528316497803, "learning_rate": 0.0002, "loss": 1.6553, "step": 134350 }, { "epoch": 0.55, "grad_norm": 2.9627363681793213, "learning_rate": 0.0002, "loss": 1.5734, "step": 134360 }, { "epoch": 0.55, "grad_norm": 2.4370434284210205, "learning_rate": 0.0002, "loss": 1.6458, "step": 134370 }, { "epoch": 0.55, "grad_norm": 2.2514636516571045, "learning_rate": 0.0002, "loss": 1.6136, "step": 134380 }, { "epoch": 0.55, "grad_norm": 2.0463500022888184, "learning_rate": 0.0002, "loss": 1.6809, "step": 134390 }, { "epoch": 0.55, "grad_norm": 2.6074934005737305, "learning_rate": 0.0002, "loss": 1.6575, "step": 134400 }, { "epoch": 0.55, "grad_norm": 2.4051167964935303, "learning_rate": 0.0002, "loss": 1.6392, "step": 134410 }, { "epoch": 0.55, "grad_norm": 2.759432315826416, "learning_rate": 0.0002, "loss": 1.5335, "step": 134420 }, { "epoch": 0.55, "grad_norm": 3.518120050430298, "learning_rate": 0.0002, "loss": 1.5988, "step": 134430 }, { "epoch": 0.55, "grad_norm": 3.2552876472473145, "learning_rate": 0.0002, "loss": 1.7701, "step": 134440 }, { "epoch": 0.55, "grad_norm": 2.983280658721924, "learning_rate": 0.0002, "loss": 1.8638, "step": 134450 }, { "epoch": 0.55, "grad_norm": 10.920119285583496, "learning_rate": 0.0002, "loss": 1.6392, "step": 134460 }, { "epoch": 0.55, "grad_norm": 2.3163902759552, "learning_rate": 0.0002, "loss": 1.7769, "step": 134470 }, { "epoch": 0.55, "grad_norm": 5.778345108032227, "learning_rate": 0.0002, "loss": 1.6347, "step": 134480 }, { "epoch": 0.55, "grad_norm": 2.598585367202759, "learning_rate": 0.0002, "loss": 1.4549, "step": 134490 }, { "epoch": 0.55, "grad_norm": 2.3543617725372314, "learning_rate": 0.0002, "loss": 1.6204, "step": 134500 }, { "epoch": 0.55, "grad_norm": 2.7247629165649414, "learning_rate": 0.0002, "loss": 1.6007, "step": 134510 }, { "epoch": 0.55, "grad_norm": 2.99178147315979, "learning_rate": 0.0002, "loss": 1.6414, "step": 134520 }, { "epoch": 0.55, "grad_norm": 4.225892543792725, "learning_rate": 0.0002, "loss": 1.7294, "step": 134530 }, { "epoch": 0.55, "grad_norm": 2.5417308807373047, "learning_rate": 0.0002, "loss": 1.4061, "step": 134540 }, { "epoch": 0.55, "grad_norm": 3.034977674484253, "learning_rate": 0.0002, "loss": 1.7835, "step": 134550 }, { "epoch": 0.55, "grad_norm": 3.741173267364502, "learning_rate": 0.0002, "loss": 1.5907, "step": 134560 }, { "epoch": 0.55, "grad_norm": 1.680893898010254, "learning_rate": 0.0002, "loss": 1.6214, "step": 134570 }, { "epoch": 0.55, "grad_norm": 3.0465149879455566, "learning_rate": 0.0002, "loss": 1.4215, "step": 134580 }, { "epoch": 0.55, "grad_norm": 2.5113613605499268, "learning_rate": 0.0002, "loss": 1.3978, "step": 134590 }, { "epoch": 0.55, "grad_norm": 3.2608213424682617, "learning_rate": 0.0002, "loss": 1.6631, "step": 134600 }, { "epoch": 0.55, "grad_norm": 3.0026509761810303, "learning_rate": 0.0002, "loss": 1.6141, "step": 134610 }, { "epoch": 0.55, "grad_norm": 2.520437479019165, "learning_rate": 0.0002, "loss": 1.6494, "step": 134620 }, { "epoch": 0.55, "grad_norm": 2.388371706008911, "learning_rate": 0.0002, "loss": 1.7547, "step": 134630 }, { "epoch": 0.55, "grad_norm": 2.2577738761901855, "learning_rate": 0.0002, "loss": 1.5128, "step": 134640 }, { "epoch": 0.55, "grad_norm": 1.958665132522583, "learning_rate": 0.0002, "loss": 1.4766, "step": 134650 }, { "epoch": 0.55, "grad_norm": 2.333284616470337, "learning_rate": 0.0002, "loss": 1.5134, "step": 134660 }, { "epoch": 0.55, "grad_norm": 3.463763475418091, "learning_rate": 0.0002, "loss": 1.5042, "step": 134670 }, { "epoch": 0.55, "grad_norm": 2.4218008518218994, "learning_rate": 0.0002, "loss": 1.5736, "step": 134680 }, { "epoch": 0.55, "grad_norm": 3.5027718544006348, "learning_rate": 0.0002, "loss": 1.7005, "step": 134690 }, { "epoch": 0.55, "grad_norm": 3.6790544986724854, "learning_rate": 0.0002, "loss": 1.6748, "step": 134700 }, { "epoch": 0.55, "grad_norm": 1.8774431943893433, "learning_rate": 0.0002, "loss": 1.6322, "step": 134710 }, { "epoch": 0.55, "grad_norm": 2.2430412769317627, "learning_rate": 0.0002, "loss": 1.6109, "step": 134720 }, { "epoch": 0.55, "grad_norm": 2.7556893825531006, "learning_rate": 0.0002, "loss": 1.7942, "step": 134730 }, { "epoch": 0.55, "grad_norm": 2.4879417419433594, "learning_rate": 0.0002, "loss": 1.5008, "step": 134740 }, { "epoch": 0.55, "grad_norm": 4.0518903732299805, "learning_rate": 0.0002, "loss": 1.598, "step": 134750 }, { "epoch": 0.55, "grad_norm": 3.4124157428741455, "learning_rate": 0.0002, "loss": 1.2788, "step": 134760 }, { "epoch": 0.55, "grad_norm": 3.2558705806732178, "learning_rate": 0.0002, "loss": 1.7081, "step": 134770 }, { "epoch": 0.55, "grad_norm": 2.5003662109375, "learning_rate": 0.0002, "loss": 1.541, "step": 134780 }, { "epoch": 0.55, "grad_norm": 4.117000579833984, "learning_rate": 0.0002, "loss": 1.8306, "step": 134790 }, { "epoch": 0.55, "grad_norm": 4.823728561401367, "learning_rate": 0.0002, "loss": 1.4503, "step": 134800 }, { "epoch": 0.55, "grad_norm": 3.3945138454437256, "learning_rate": 0.0002, "loss": 1.6749, "step": 134810 }, { "epoch": 0.55, "grad_norm": 2.8981714248657227, "learning_rate": 0.0002, "loss": 1.5807, "step": 134820 }, { "epoch": 0.55, "grad_norm": 2.2007923126220703, "learning_rate": 0.0002, "loss": 1.4885, "step": 134830 }, { "epoch": 0.55, "grad_norm": 2.301938772201538, "learning_rate": 0.0002, "loss": 1.7161, "step": 134840 }, { "epoch": 0.55, "grad_norm": 3.1653547286987305, "learning_rate": 0.0002, "loss": 1.4672, "step": 134850 }, { "epoch": 0.55, "grad_norm": 2.7072913646698, "learning_rate": 0.0002, "loss": 1.5782, "step": 134860 }, { "epoch": 0.55, "grad_norm": 3.0029828548431396, "learning_rate": 0.0002, "loss": 1.6243, "step": 134870 }, { "epoch": 0.55, "grad_norm": 2.496567487716675, "learning_rate": 0.0002, "loss": 1.6321, "step": 134880 }, { "epoch": 0.55, "grad_norm": 3.199547290802002, "learning_rate": 0.0002, "loss": 1.5563, "step": 134890 }, { "epoch": 0.55, "grad_norm": 3.205615997314453, "learning_rate": 0.0002, "loss": 1.5057, "step": 134900 }, { "epoch": 0.55, "grad_norm": 2.4157960414886475, "learning_rate": 0.0002, "loss": 1.2429, "step": 134910 }, { "epoch": 0.55, "grad_norm": 2.2341785430908203, "learning_rate": 0.0002, "loss": 1.4794, "step": 134920 }, { "epoch": 0.55, "grad_norm": 3.761305809020996, "learning_rate": 0.0002, "loss": 1.4668, "step": 134930 }, { "epoch": 0.55, "grad_norm": 2.9767844676971436, "learning_rate": 0.0002, "loss": 1.484, "step": 134940 }, { "epoch": 0.55, "grad_norm": 1.9414817094802856, "learning_rate": 0.0002, "loss": 1.4071, "step": 134950 }, { "epoch": 0.55, "grad_norm": 2.4042470455169678, "learning_rate": 0.0002, "loss": 1.5108, "step": 134960 }, { "epoch": 0.55, "grad_norm": 3.6790599822998047, "learning_rate": 0.0002, "loss": 1.6429, "step": 134970 }, { "epoch": 0.55, "grad_norm": 3.1791257858276367, "learning_rate": 0.0002, "loss": 1.7575, "step": 134980 }, { "epoch": 0.55, "grad_norm": 2.920422315597534, "learning_rate": 0.0002, "loss": 1.5078, "step": 134990 }, { "epoch": 0.55, "grad_norm": 2.185378074645996, "learning_rate": 0.0002, "loss": 1.3832, "step": 135000 }, { "epoch": 0.55, "grad_norm": 2.3109045028686523, "learning_rate": 0.0002, "loss": 1.6895, "step": 135010 }, { "epoch": 0.55, "grad_norm": 3.4338107109069824, "learning_rate": 0.0002, "loss": 1.6385, "step": 135020 }, { "epoch": 0.55, "grad_norm": 2.3523597717285156, "learning_rate": 0.0002, "loss": 1.6033, "step": 135030 }, { "epoch": 0.55, "grad_norm": 3.000495433807373, "learning_rate": 0.0002, "loss": 1.5265, "step": 135040 }, { "epoch": 0.55, "grad_norm": 5.325974464416504, "learning_rate": 0.0002, "loss": 1.8344, "step": 135050 }, { "epoch": 0.55, "grad_norm": 4.217962741851807, "learning_rate": 0.0002, "loss": 1.4421, "step": 135060 }, { "epoch": 0.55, "grad_norm": 3.1162426471710205, "learning_rate": 0.0002, "loss": 1.5558, "step": 135070 }, { "epoch": 0.55, "grad_norm": 6.07834005355835, "learning_rate": 0.0002, "loss": 1.5014, "step": 135080 }, { "epoch": 0.55, "grad_norm": 2.0961060523986816, "learning_rate": 0.0002, "loss": 1.5372, "step": 135090 }, { "epoch": 0.55, "grad_norm": 5.101419448852539, "learning_rate": 0.0002, "loss": 1.8017, "step": 135100 }, { "epoch": 0.55, "grad_norm": 2.165363311767578, "learning_rate": 0.0002, "loss": 1.5105, "step": 135110 }, { "epoch": 0.55, "grad_norm": 2.9019548892974854, "learning_rate": 0.0002, "loss": 1.4691, "step": 135120 }, { "epoch": 0.55, "grad_norm": 3.07326340675354, "learning_rate": 0.0002, "loss": 1.7035, "step": 135130 }, { "epoch": 0.55, "grad_norm": 3.316850423812866, "learning_rate": 0.0002, "loss": 1.7964, "step": 135140 }, { "epoch": 0.55, "grad_norm": 3.1618731021881104, "learning_rate": 0.0002, "loss": 1.7734, "step": 135150 }, { "epoch": 0.55, "grad_norm": 1.6873562335968018, "learning_rate": 0.0002, "loss": 1.6277, "step": 135160 }, { "epoch": 0.55, "grad_norm": 1.924946904182434, "learning_rate": 0.0002, "loss": 1.7597, "step": 135170 }, { "epoch": 0.55, "grad_norm": 2.0226409435272217, "learning_rate": 0.0002, "loss": 1.604, "step": 135180 }, { "epoch": 0.55, "grad_norm": 4.111810684204102, "learning_rate": 0.0002, "loss": 1.667, "step": 135190 }, { "epoch": 0.55, "grad_norm": 2.3858861923217773, "learning_rate": 0.0002, "loss": 1.658, "step": 135200 }, { "epoch": 0.55, "grad_norm": 3.170073986053467, "learning_rate": 0.0002, "loss": 1.521, "step": 135210 }, { "epoch": 0.55, "grad_norm": 4.191363334655762, "learning_rate": 0.0002, "loss": 1.4361, "step": 135220 }, { "epoch": 0.55, "grad_norm": 3.3603861331939697, "learning_rate": 0.0002, "loss": 1.5908, "step": 135230 }, { "epoch": 0.55, "grad_norm": 3.1239333152770996, "learning_rate": 0.0002, "loss": 1.7273, "step": 135240 }, { "epoch": 0.55, "grad_norm": 1.9923129081726074, "learning_rate": 0.0002, "loss": 1.5602, "step": 135250 }, { "epoch": 0.55, "grad_norm": 6.904910564422607, "learning_rate": 0.0002, "loss": 1.7199, "step": 135260 }, { "epoch": 0.55, "grad_norm": 3.5781562328338623, "learning_rate": 0.0002, "loss": 1.689, "step": 135270 }, { "epoch": 0.55, "grad_norm": 2.6828460693359375, "learning_rate": 0.0002, "loss": 1.3951, "step": 135280 }, { "epoch": 0.55, "grad_norm": 3.554471015930176, "learning_rate": 0.0002, "loss": 1.4842, "step": 135290 }, { "epoch": 0.55, "grad_norm": 3.2341816425323486, "learning_rate": 0.0002, "loss": 1.7561, "step": 135300 }, { "epoch": 0.55, "grad_norm": 3.082495927810669, "learning_rate": 0.0002, "loss": 1.4475, "step": 135310 }, { "epoch": 0.55, "grad_norm": 1.394470453262329, "learning_rate": 0.0002, "loss": 1.795, "step": 135320 }, { "epoch": 0.55, "grad_norm": 3.740652084350586, "learning_rate": 0.0002, "loss": 1.6495, "step": 135330 }, { "epoch": 0.55, "grad_norm": 3.8842318058013916, "learning_rate": 0.0002, "loss": 1.403, "step": 135340 }, { "epoch": 0.55, "grad_norm": 2.6687958240509033, "learning_rate": 0.0002, "loss": 1.7065, "step": 135350 }, { "epoch": 0.55, "grad_norm": 4.566591262817383, "learning_rate": 0.0002, "loss": 1.4464, "step": 135360 }, { "epoch": 0.55, "grad_norm": 2.1734869480133057, "learning_rate": 0.0002, "loss": 1.4997, "step": 135370 }, { "epoch": 0.55, "grad_norm": 2.7936460971832275, "learning_rate": 0.0002, "loss": 1.9526, "step": 135380 }, { "epoch": 0.55, "grad_norm": 2.3060414791107178, "learning_rate": 0.0002, "loss": 1.8539, "step": 135390 }, { "epoch": 0.55, "grad_norm": 2.7865121364593506, "learning_rate": 0.0002, "loss": 1.4644, "step": 135400 }, { "epoch": 0.55, "grad_norm": 2.591792106628418, "learning_rate": 0.0002, "loss": 1.7517, "step": 135410 }, { "epoch": 0.55, "grad_norm": 2.7007100582122803, "learning_rate": 0.0002, "loss": 1.4255, "step": 135420 }, { "epoch": 0.55, "grad_norm": 3.7804923057556152, "learning_rate": 0.0002, "loss": 1.6557, "step": 135430 }, { "epoch": 0.55, "grad_norm": 1.9313961267471313, "learning_rate": 0.0002, "loss": 1.5603, "step": 135440 }, { "epoch": 0.55, "grad_norm": 1.9773975610733032, "learning_rate": 0.0002, "loss": 1.7427, "step": 135450 }, { "epoch": 0.55, "grad_norm": 3.339694023132324, "learning_rate": 0.0002, "loss": 1.622, "step": 135460 }, { "epoch": 0.55, "grad_norm": 4.228157043457031, "learning_rate": 0.0002, "loss": 1.6149, "step": 135470 }, { "epoch": 0.55, "grad_norm": 1.928106427192688, "learning_rate": 0.0002, "loss": 1.4985, "step": 135480 }, { "epoch": 0.55, "grad_norm": 2.130225419998169, "learning_rate": 0.0002, "loss": 1.6154, "step": 135490 }, { "epoch": 0.55, "grad_norm": 2.9058475494384766, "learning_rate": 0.0002, "loss": 1.6264, "step": 135500 }, { "epoch": 0.55, "grad_norm": 4.61972713470459, "learning_rate": 0.0002, "loss": 1.3847, "step": 135510 }, { "epoch": 0.55, "grad_norm": 3.2187516689300537, "learning_rate": 0.0002, "loss": 1.5779, "step": 135520 }, { "epoch": 0.55, "grad_norm": 2.6724741458892822, "learning_rate": 0.0002, "loss": 1.4012, "step": 135530 }, { "epoch": 0.55, "grad_norm": 2.5651769638061523, "learning_rate": 0.0002, "loss": 1.8368, "step": 135540 }, { "epoch": 0.55, "grad_norm": 3.422541379928589, "learning_rate": 0.0002, "loss": 1.571, "step": 135550 }, { "epoch": 0.55, "grad_norm": 2.880309581756592, "learning_rate": 0.0002, "loss": 1.7771, "step": 135560 }, { "epoch": 0.55, "grad_norm": 4.256182670593262, "learning_rate": 0.0002, "loss": 1.6355, "step": 135570 }, { "epoch": 0.55, "grad_norm": 1.9663752317428589, "learning_rate": 0.0002, "loss": 1.9617, "step": 135580 }, { "epoch": 0.55, "grad_norm": 2.7870326042175293, "learning_rate": 0.0002, "loss": 1.7573, "step": 135590 }, { "epoch": 0.55, "grad_norm": 3.736049175262451, "learning_rate": 0.0002, "loss": 1.4948, "step": 135600 }, { "epoch": 0.55, "grad_norm": 4.744179725646973, "learning_rate": 0.0002, "loss": 1.6073, "step": 135610 }, { "epoch": 0.55, "grad_norm": 2.2614784240722656, "learning_rate": 0.0002, "loss": 1.6125, "step": 135620 }, { "epoch": 0.55, "grad_norm": 3.61253023147583, "learning_rate": 0.0002, "loss": 1.6225, "step": 135630 }, { "epoch": 0.55, "grad_norm": 1.878764271736145, "learning_rate": 0.0002, "loss": 1.3841, "step": 135640 }, { "epoch": 0.55, "grad_norm": 4.738122940063477, "learning_rate": 0.0002, "loss": 1.5377, "step": 135650 }, { "epoch": 0.55, "grad_norm": 4.436850070953369, "learning_rate": 0.0002, "loss": 1.6028, "step": 135660 }, { "epoch": 0.55, "grad_norm": 11.78845500946045, "learning_rate": 0.0002, "loss": 1.7252, "step": 135670 }, { "epoch": 0.55, "grad_norm": 3.112382411956787, "learning_rate": 0.0002, "loss": 1.5539, "step": 135680 }, { "epoch": 0.55, "grad_norm": 2.512237548828125, "learning_rate": 0.0002, "loss": 1.6859, "step": 135690 }, { "epoch": 0.55, "grad_norm": 2.469907760620117, "learning_rate": 0.0002, "loss": 1.4362, "step": 135700 }, { "epoch": 0.55, "grad_norm": 2.3377015590667725, "learning_rate": 0.0002, "loss": 1.3441, "step": 135710 }, { "epoch": 0.55, "grad_norm": 2.54060435295105, "learning_rate": 0.0002, "loss": 1.7308, "step": 135720 }, { "epoch": 0.55, "grad_norm": 2.301492214202881, "learning_rate": 0.0002, "loss": 1.6066, "step": 135730 }, { "epoch": 0.55, "grad_norm": 1.7158021926879883, "learning_rate": 0.0002, "loss": 1.5841, "step": 135740 }, { "epoch": 0.55, "grad_norm": 1.875072956085205, "learning_rate": 0.0002, "loss": 1.4484, "step": 135750 }, { "epoch": 0.55, "grad_norm": 1.7711933851242065, "learning_rate": 0.0002, "loss": 1.9858, "step": 135760 }, { "epoch": 0.55, "grad_norm": 4.614899635314941, "learning_rate": 0.0002, "loss": 1.6421, "step": 135770 }, { "epoch": 0.55, "grad_norm": 1.7659951448440552, "learning_rate": 0.0002, "loss": 1.4536, "step": 135780 }, { "epoch": 0.55, "grad_norm": 2.5038890838623047, "learning_rate": 0.0002, "loss": 1.5894, "step": 135790 }, { "epoch": 0.55, "grad_norm": 3.319286823272705, "learning_rate": 0.0002, "loss": 1.5341, "step": 135800 }, { "epoch": 0.55, "grad_norm": 4.9495463371276855, "learning_rate": 0.0002, "loss": 1.5213, "step": 135810 }, { "epoch": 0.55, "grad_norm": 4.108973026275635, "learning_rate": 0.0002, "loss": 1.586, "step": 135820 }, { "epoch": 0.55, "grad_norm": 2.8830032348632812, "learning_rate": 0.0002, "loss": 1.5184, "step": 135830 }, { "epoch": 0.55, "grad_norm": 3.144336462020874, "learning_rate": 0.0002, "loss": 1.5001, "step": 135840 }, { "epoch": 0.55, "grad_norm": 2.3981900215148926, "learning_rate": 0.0002, "loss": 1.6215, "step": 135850 }, { "epoch": 0.55, "grad_norm": 3.877784490585327, "learning_rate": 0.0002, "loss": 1.5071, "step": 135860 }, { "epoch": 0.55, "grad_norm": 2.0470941066741943, "learning_rate": 0.0002, "loss": 1.6468, "step": 135870 }, { "epoch": 0.55, "grad_norm": 2.7649383544921875, "learning_rate": 0.0002, "loss": 1.5575, "step": 135880 }, { "epoch": 0.55, "grad_norm": 3.015094041824341, "learning_rate": 0.0002, "loss": 1.4911, "step": 135890 }, { "epoch": 0.55, "grad_norm": 2.5283732414245605, "learning_rate": 0.0002, "loss": 1.4917, "step": 135900 }, { "epoch": 0.55, "grad_norm": 2.770191192626953, "learning_rate": 0.0002, "loss": 1.4382, "step": 135910 }, { "epoch": 0.55, "grad_norm": 3.5564897060394287, "learning_rate": 0.0002, "loss": 1.6622, "step": 135920 }, { "epoch": 0.55, "grad_norm": 2.066343307495117, "learning_rate": 0.0002, "loss": 1.6583, "step": 135930 }, { "epoch": 0.55, "grad_norm": 3.9186365604400635, "learning_rate": 0.0002, "loss": 1.6528, "step": 135940 }, { "epoch": 0.55, "grad_norm": 3.439878225326538, "learning_rate": 0.0002, "loss": 1.366, "step": 135950 }, { "epoch": 0.55, "grad_norm": 2.671093702316284, "learning_rate": 0.0002, "loss": 1.5266, "step": 135960 }, { "epoch": 0.55, "grad_norm": 2.642627477645874, "learning_rate": 0.0002, "loss": 1.8889, "step": 135970 }, { "epoch": 0.55, "grad_norm": 2.9578232765197754, "learning_rate": 0.0002, "loss": 1.4613, "step": 135980 }, { "epoch": 0.55, "grad_norm": 3.6485681533813477, "learning_rate": 0.0002, "loss": 1.5848, "step": 135990 }, { "epoch": 0.55, "grad_norm": 3.4440090656280518, "learning_rate": 0.0002, "loss": 1.642, "step": 136000 }, { "epoch": 0.55, "grad_norm": 1.8865033388137817, "learning_rate": 0.0002, "loss": 1.5326, "step": 136010 }, { "epoch": 0.55, "grad_norm": 2.239804983139038, "learning_rate": 0.0002, "loss": 1.4511, "step": 136020 }, { "epoch": 0.55, "grad_norm": 5.415381908416748, "learning_rate": 0.0002, "loss": 1.6554, "step": 136030 }, { "epoch": 0.55, "grad_norm": 4.304215908050537, "learning_rate": 0.0002, "loss": 1.6424, "step": 136040 }, { "epoch": 0.55, "grad_norm": 3.7108685970306396, "learning_rate": 0.0002, "loss": 1.7411, "step": 136050 }, { "epoch": 0.55, "grad_norm": 1.9612960815429688, "learning_rate": 0.0002, "loss": 1.5879, "step": 136060 }, { "epoch": 0.55, "grad_norm": 4.451406955718994, "learning_rate": 0.0002, "loss": 1.5335, "step": 136070 }, { "epoch": 0.55, "grad_norm": 5.70948600769043, "learning_rate": 0.0002, "loss": 1.4763, "step": 136080 }, { "epoch": 0.55, "grad_norm": 4.535614013671875, "learning_rate": 0.0002, "loss": 1.6126, "step": 136090 }, { "epoch": 0.55, "grad_norm": 3.141885757446289, "learning_rate": 0.0002, "loss": 1.475, "step": 136100 }, { "epoch": 0.55, "grad_norm": 2.0357666015625, "learning_rate": 0.0002, "loss": 1.494, "step": 136110 }, { "epoch": 0.55, "grad_norm": 2.8144400119781494, "learning_rate": 0.0002, "loss": 1.6627, "step": 136120 }, { "epoch": 0.55, "grad_norm": 2.9528491497039795, "learning_rate": 0.0002, "loss": 1.5333, "step": 136130 }, { "epoch": 0.55, "grad_norm": 2.3867883682250977, "learning_rate": 0.0002, "loss": 1.5887, "step": 136140 }, { "epoch": 0.55, "grad_norm": 4.119438171386719, "learning_rate": 0.0002, "loss": 1.6056, "step": 136150 }, { "epoch": 0.55, "grad_norm": 3.9905993938446045, "learning_rate": 0.0002, "loss": 1.6071, "step": 136160 }, { "epoch": 0.55, "grad_norm": 2.7003729343414307, "learning_rate": 0.0002, "loss": 1.5498, "step": 136170 }, { "epoch": 0.55, "grad_norm": 3.2298309803009033, "learning_rate": 0.0002, "loss": 1.7926, "step": 136180 }, { "epoch": 0.55, "grad_norm": 3.727233648300171, "learning_rate": 0.0002, "loss": 1.5634, "step": 136190 }, { "epoch": 0.55, "grad_norm": 3.8767025470733643, "learning_rate": 0.0002, "loss": 1.4146, "step": 136200 }, { "epoch": 0.55, "grad_norm": 2.2067997455596924, "learning_rate": 0.0002, "loss": 1.4006, "step": 136210 }, { "epoch": 0.55, "grad_norm": 1.9609359502792358, "learning_rate": 0.0002, "loss": 1.5129, "step": 136220 }, { "epoch": 0.55, "grad_norm": 3.2537105083465576, "learning_rate": 0.0002, "loss": 1.8275, "step": 136230 }, { "epoch": 0.55, "grad_norm": 2.058582067489624, "learning_rate": 0.0002, "loss": 1.3946, "step": 136240 }, { "epoch": 0.55, "grad_norm": 4.0561981201171875, "learning_rate": 0.0002, "loss": 1.4668, "step": 136250 }, { "epoch": 0.55, "grad_norm": 4.231278419494629, "learning_rate": 0.0002, "loss": 1.5717, "step": 136260 }, { "epoch": 0.55, "grad_norm": 4.499081611633301, "learning_rate": 0.0002, "loss": 1.2872, "step": 136270 }, { "epoch": 0.55, "grad_norm": 4.688112258911133, "learning_rate": 0.0002, "loss": 1.5992, "step": 136280 }, { "epoch": 0.55, "grad_norm": 2.0293829441070557, "learning_rate": 0.0002, "loss": 1.7204, "step": 136290 }, { "epoch": 0.55, "grad_norm": 1.9090046882629395, "learning_rate": 0.0002, "loss": 1.533, "step": 136300 }, { "epoch": 0.55, "grad_norm": 2.7452454566955566, "learning_rate": 0.0002, "loss": 1.5765, "step": 136310 }, { "epoch": 0.55, "grad_norm": 2.286454200744629, "learning_rate": 0.0002, "loss": 1.9676, "step": 136320 }, { "epoch": 0.55, "grad_norm": 2.1124658584594727, "learning_rate": 0.0002, "loss": 1.5641, "step": 136330 }, { "epoch": 0.56, "grad_norm": 2.8129491806030273, "learning_rate": 0.0002, "loss": 1.4705, "step": 136340 }, { "epoch": 0.56, "grad_norm": 2.2985267639160156, "learning_rate": 0.0002, "loss": 1.6677, "step": 136350 }, { "epoch": 0.56, "grad_norm": 2.0560381412506104, "learning_rate": 0.0002, "loss": 1.4321, "step": 136360 }, { "epoch": 0.56, "grad_norm": 1.6793692111968994, "learning_rate": 0.0002, "loss": 1.5875, "step": 136370 }, { "epoch": 0.56, "grad_norm": 3.518232822418213, "learning_rate": 0.0002, "loss": 1.5467, "step": 136380 }, { "epoch": 0.56, "grad_norm": 4.5318603515625, "learning_rate": 0.0002, "loss": 1.5869, "step": 136390 }, { "epoch": 0.56, "grad_norm": 3.7138476371765137, "learning_rate": 0.0002, "loss": 1.5705, "step": 136400 }, { "epoch": 0.56, "grad_norm": 2.8423068523406982, "learning_rate": 0.0002, "loss": 1.683, "step": 136410 }, { "epoch": 0.56, "grad_norm": 2.4675705432891846, "learning_rate": 0.0002, "loss": 1.6426, "step": 136420 }, { "epoch": 0.56, "grad_norm": 3.020720958709717, "learning_rate": 0.0002, "loss": 1.5517, "step": 136430 }, { "epoch": 0.56, "grad_norm": 3.0762763023376465, "learning_rate": 0.0002, "loss": 1.5407, "step": 136440 }, { "epoch": 0.56, "grad_norm": 2.663830280303955, "learning_rate": 0.0002, "loss": 1.5161, "step": 136450 }, { "epoch": 0.56, "grad_norm": 3.3534202575683594, "learning_rate": 0.0002, "loss": 1.6858, "step": 136460 }, { "epoch": 0.56, "grad_norm": 5.973743438720703, "learning_rate": 0.0002, "loss": 1.7272, "step": 136470 }, { "epoch": 0.56, "grad_norm": 2.090454339981079, "learning_rate": 0.0002, "loss": 1.5437, "step": 136480 }, { "epoch": 0.56, "grad_norm": 3.1757588386535645, "learning_rate": 0.0002, "loss": 1.4973, "step": 136490 }, { "epoch": 0.56, "grad_norm": 3.3855650424957275, "learning_rate": 0.0002, "loss": 1.8128, "step": 136500 }, { "epoch": 0.56, "grad_norm": 2.370704412460327, "learning_rate": 0.0002, "loss": 1.4457, "step": 136510 }, { "epoch": 0.56, "grad_norm": 3.1941137313842773, "learning_rate": 0.0002, "loss": 1.6916, "step": 136520 }, { "epoch": 0.56, "grad_norm": 2.8342394828796387, "learning_rate": 0.0002, "loss": 1.582, "step": 136530 }, { "epoch": 0.56, "grad_norm": 2.7999370098114014, "learning_rate": 0.0002, "loss": 1.4761, "step": 136540 }, { "epoch": 0.56, "grad_norm": 2.5353751182556152, "learning_rate": 0.0002, "loss": 1.6352, "step": 136550 }, { "epoch": 0.56, "grad_norm": 2.6249313354492188, "learning_rate": 0.0002, "loss": 1.5047, "step": 136560 }, { "epoch": 0.56, "grad_norm": 8.009292602539062, "learning_rate": 0.0002, "loss": 1.5785, "step": 136570 }, { "epoch": 0.56, "grad_norm": 1.6806386709213257, "learning_rate": 0.0002, "loss": 1.5464, "step": 136580 }, { "epoch": 0.56, "grad_norm": 5.860755920410156, "learning_rate": 0.0002, "loss": 1.373, "step": 136590 }, { "epoch": 0.56, "grad_norm": 2.30057692527771, "learning_rate": 0.0002, "loss": 1.7521, "step": 136600 }, { "epoch": 0.56, "grad_norm": 3.3587498664855957, "learning_rate": 0.0002, "loss": 1.6414, "step": 136610 }, { "epoch": 0.56, "grad_norm": 5.050489902496338, "learning_rate": 0.0002, "loss": 1.8021, "step": 136620 }, { "epoch": 0.56, "grad_norm": 3.7157328128814697, "learning_rate": 0.0002, "loss": 1.5897, "step": 136630 }, { "epoch": 0.56, "grad_norm": 3.0055124759674072, "learning_rate": 0.0002, "loss": 1.5094, "step": 136640 }, { "epoch": 0.56, "grad_norm": 3.659395933151245, "learning_rate": 0.0002, "loss": 1.5079, "step": 136650 }, { "epoch": 0.56, "grad_norm": 2.3542304039001465, "learning_rate": 0.0002, "loss": 1.4896, "step": 136660 }, { "epoch": 0.56, "grad_norm": 3.6483001708984375, "learning_rate": 0.0002, "loss": 2.0081, "step": 136670 }, { "epoch": 0.56, "grad_norm": 1.6343374252319336, "learning_rate": 0.0002, "loss": 1.5478, "step": 136680 }, { "epoch": 0.56, "grad_norm": 2.1466312408447266, "learning_rate": 0.0002, "loss": 1.6079, "step": 136690 }, { "epoch": 0.56, "grad_norm": 3.6406967639923096, "learning_rate": 0.0002, "loss": 1.6462, "step": 136700 }, { "epoch": 0.56, "grad_norm": 3.1208932399749756, "learning_rate": 0.0002, "loss": 1.7348, "step": 136710 }, { "epoch": 0.56, "grad_norm": 3.6379570960998535, "learning_rate": 0.0002, "loss": 1.5507, "step": 136720 }, { "epoch": 0.56, "grad_norm": 3.4739649295806885, "learning_rate": 0.0002, "loss": 1.45, "step": 136730 }, { "epoch": 0.56, "grad_norm": 3.8031723499298096, "learning_rate": 0.0002, "loss": 1.5712, "step": 136740 }, { "epoch": 0.56, "grad_norm": 3.7401485443115234, "learning_rate": 0.0002, "loss": 1.6687, "step": 136750 }, { "epoch": 0.56, "grad_norm": 4.046003818511963, "learning_rate": 0.0002, "loss": 1.5213, "step": 136760 }, { "epoch": 0.56, "grad_norm": 2.0692951679229736, "learning_rate": 0.0002, "loss": 1.7237, "step": 136770 }, { "epoch": 0.56, "grad_norm": 2.5835766792297363, "learning_rate": 0.0002, "loss": 1.3436, "step": 136780 }, { "epoch": 0.56, "grad_norm": 4.005113124847412, "learning_rate": 0.0002, "loss": 1.4744, "step": 136790 }, { "epoch": 0.56, "grad_norm": 3.671555280685425, "learning_rate": 0.0002, "loss": 1.5747, "step": 136800 }, { "epoch": 0.56, "grad_norm": 1.7906808853149414, "learning_rate": 0.0002, "loss": 1.6159, "step": 136810 }, { "epoch": 0.56, "grad_norm": 2.506878137588501, "learning_rate": 0.0002, "loss": 1.7885, "step": 136820 }, { "epoch": 0.56, "grad_norm": 3.1822850704193115, "learning_rate": 0.0002, "loss": 1.4358, "step": 136830 }, { "epoch": 0.56, "grad_norm": 2.9135990142822266, "learning_rate": 0.0002, "loss": 1.4961, "step": 136840 }, { "epoch": 0.56, "grad_norm": 5.354123592376709, "learning_rate": 0.0002, "loss": 1.7062, "step": 136850 }, { "epoch": 0.56, "grad_norm": 3.584784507751465, "learning_rate": 0.0002, "loss": 1.5677, "step": 136860 }, { "epoch": 0.56, "grad_norm": 3.316242218017578, "learning_rate": 0.0002, "loss": 1.8499, "step": 136870 }, { "epoch": 0.56, "grad_norm": 3.386030435562134, "learning_rate": 0.0002, "loss": 1.2667, "step": 136880 }, { "epoch": 0.56, "grad_norm": 12.131012916564941, "learning_rate": 0.0002, "loss": 1.6549, "step": 136890 }, { "epoch": 0.56, "grad_norm": 5.247278213500977, "learning_rate": 0.0002, "loss": 1.3326, "step": 136900 }, { "epoch": 0.56, "grad_norm": 3.4114553928375244, "learning_rate": 0.0002, "loss": 1.489, "step": 136910 }, { "epoch": 0.56, "grad_norm": 2.5192854404449463, "learning_rate": 0.0002, "loss": 1.5556, "step": 136920 }, { "epoch": 0.56, "grad_norm": 5.079383850097656, "learning_rate": 0.0002, "loss": 1.4426, "step": 136930 }, { "epoch": 0.56, "grad_norm": 3.5383706092834473, "learning_rate": 0.0002, "loss": 1.6341, "step": 136940 }, { "epoch": 0.56, "grad_norm": 2.298177719116211, "learning_rate": 0.0002, "loss": 1.4012, "step": 136950 }, { "epoch": 0.56, "grad_norm": 3.404271125793457, "learning_rate": 0.0002, "loss": 1.5429, "step": 136960 }, { "epoch": 0.56, "grad_norm": 2.033308267593384, "learning_rate": 0.0002, "loss": 1.3933, "step": 136970 }, { "epoch": 0.56, "grad_norm": 2.3121232986450195, "learning_rate": 0.0002, "loss": 1.6222, "step": 136980 }, { "epoch": 0.56, "grad_norm": 2.9891552925109863, "learning_rate": 0.0002, "loss": 1.7196, "step": 136990 }, { "epoch": 0.56, "grad_norm": 2.286877393722534, "learning_rate": 0.0002, "loss": 1.4698, "step": 137000 }, { "epoch": 0.56, "grad_norm": 4.3204193115234375, "learning_rate": 0.0002, "loss": 1.42, "step": 137010 }, { "epoch": 0.56, "grad_norm": 2.9095022678375244, "learning_rate": 0.0002, "loss": 1.7, "step": 137020 }, { "epoch": 0.56, "grad_norm": 3.595325231552124, "learning_rate": 0.0002, "loss": 1.6595, "step": 137030 }, { "epoch": 0.56, "grad_norm": 3.502584218978882, "learning_rate": 0.0002, "loss": 1.4979, "step": 137040 }, { "epoch": 0.56, "grad_norm": 5.034910202026367, "learning_rate": 0.0002, "loss": 1.5619, "step": 137050 }, { "epoch": 0.56, "grad_norm": 3.3706068992614746, "learning_rate": 0.0002, "loss": 1.8498, "step": 137060 }, { "epoch": 0.56, "grad_norm": 2.0511341094970703, "learning_rate": 0.0002, "loss": 1.5834, "step": 137070 }, { "epoch": 0.56, "grad_norm": 3.1928610801696777, "learning_rate": 0.0002, "loss": 1.4867, "step": 137080 }, { "epoch": 0.56, "grad_norm": 3.7649645805358887, "learning_rate": 0.0002, "loss": 1.4996, "step": 137090 }, { "epoch": 0.56, "grad_norm": 2.5177881717681885, "learning_rate": 0.0002, "loss": 1.8066, "step": 137100 }, { "epoch": 0.56, "grad_norm": 3.1460487842559814, "learning_rate": 0.0002, "loss": 1.7962, "step": 137110 }, { "epoch": 0.56, "grad_norm": 2.29095458984375, "learning_rate": 0.0002, "loss": 1.676, "step": 137120 }, { "epoch": 0.56, "grad_norm": 3.248255729675293, "learning_rate": 0.0002, "loss": 1.5439, "step": 137130 }, { "epoch": 0.56, "grad_norm": 3.4693009853363037, "learning_rate": 0.0002, "loss": 1.5911, "step": 137140 }, { "epoch": 0.56, "grad_norm": 2.745591163635254, "learning_rate": 0.0002, "loss": 1.504, "step": 137150 }, { "epoch": 0.56, "grad_norm": 2.9331443309783936, "learning_rate": 0.0002, "loss": 1.5143, "step": 137160 }, { "epoch": 0.56, "grad_norm": 3.378073215484619, "learning_rate": 0.0002, "loss": 1.6321, "step": 137170 }, { "epoch": 0.56, "grad_norm": 3.1893980503082275, "learning_rate": 0.0002, "loss": 1.6227, "step": 137180 }, { "epoch": 0.56, "grad_norm": 2.5602550506591797, "learning_rate": 0.0002, "loss": 1.5055, "step": 137190 }, { "epoch": 0.56, "grad_norm": 2.583772897720337, "learning_rate": 0.0002, "loss": 1.6167, "step": 137200 }, { "epoch": 0.56, "grad_norm": 2.0536553859710693, "learning_rate": 0.0002, "loss": 1.516, "step": 137210 }, { "epoch": 0.56, "grad_norm": 2.582268714904785, "learning_rate": 0.0002, "loss": 1.629, "step": 137220 }, { "epoch": 0.56, "grad_norm": 3.0254671573638916, "learning_rate": 0.0002, "loss": 1.5414, "step": 137230 }, { "epoch": 0.56, "grad_norm": 3.3609604835510254, "learning_rate": 0.0002, "loss": 1.3609, "step": 137240 }, { "epoch": 0.56, "grad_norm": 3.117126226425171, "learning_rate": 0.0002, "loss": 1.6771, "step": 137250 }, { "epoch": 0.56, "grad_norm": 2.730037212371826, "learning_rate": 0.0002, "loss": 1.4229, "step": 137260 }, { "epoch": 0.56, "grad_norm": 4.850566864013672, "learning_rate": 0.0002, "loss": 1.5851, "step": 137270 }, { "epoch": 0.56, "grad_norm": 3.2682137489318848, "learning_rate": 0.0002, "loss": 1.6907, "step": 137280 }, { "epoch": 0.56, "grad_norm": 3.20636248588562, "learning_rate": 0.0002, "loss": 1.4408, "step": 137290 }, { "epoch": 0.56, "grad_norm": 2.6486496925354004, "learning_rate": 0.0002, "loss": 1.3772, "step": 137300 }, { "epoch": 0.56, "grad_norm": 4.687081336975098, "learning_rate": 0.0002, "loss": 1.9101, "step": 137310 }, { "epoch": 0.56, "grad_norm": 6.806678295135498, "learning_rate": 0.0002, "loss": 1.7367, "step": 137320 }, { "epoch": 0.56, "grad_norm": 2.7847726345062256, "learning_rate": 0.0002, "loss": 1.6875, "step": 137330 }, { "epoch": 0.56, "grad_norm": 3.322484254837036, "learning_rate": 0.0002, "loss": 1.7495, "step": 137340 }, { "epoch": 0.56, "grad_norm": 2.3768234252929688, "learning_rate": 0.0002, "loss": 1.5023, "step": 137350 }, { "epoch": 0.56, "grad_norm": 3.2627649307250977, "learning_rate": 0.0002, "loss": 1.7747, "step": 137360 }, { "epoch": 0.56, "grad_norm": 2.7372913360595703, "learning_rate": 0.0002, "loss": 1.5813, "step": 137370 }, { "epoch": 0.56, "grad_norm": 5.9672627449035645, "learning_rate": 0.0002, "loss": 1.3902, "step": 137380 }, { "epoch": 0.56, "grad_norm": 2.673356056213379, "learning_rate": 0.0002, "loss": 1.6035, "step": 137390 }, { "epoch": 0.56, "grad_norm": 4.051168918609619, "learning_rate": 0.0002, "loss": 1.7014, "step": 137400 }, { "epoch": 0.56, "grad_norm": 9.761800765991211, "learning_rate": 0.0002, "loss": 1.849, "step": 137410 }, { "epoch": 0.56, "grad_norm": 2.9674341678619385, "learning_rate": 0.0002, "loss": 1.5806, "step": 137420 }, { "epoch": 0.56, "grad_norm": 3.730152130126953, "learning_rate": 0.0002, "loss": 1.6163, "step": 137430 }, { "epoch": 0.56, "grad_norm": 3.6709368228912354, "learning_rate": 0.0002, "loss": 1.6024, "step": 137440 }, { "epoch": 0.56, "grad_norm": 3.540635824203491, "learning_rate": 0.0002, "loss": 1.5418, "step": 137450 }, { "epoch": 0.56, "grad_norm": 2.5993313789367676, "learning_rate": 0.0002, "loss": 1.3855, "step": 137460 }, { "epoch": 0.56, "grad_norm": 2.3547627925872803, "learning_rate": 0.0002, "loss": 1.6316, "step": 137470 }, { "epoch": 0.56, "grad_norm": 3.886723518371582, "learning_rate": 0.0002, "loss": 1.7233, "step": 137480 }, { "epoch": 0.56, "grad_norm": 8.247516632080078, "learning_rate": 0.0002, "loss": 1.8171, "step": 137490 }, { "epoch": 0.56, "grad_norm": 3.668313980102539, "learning_rate": 0.0002, "loss": 1.6767, "step": 137500 }, { "epoch": 0.56, "grad_norm": 2.435676097869873, "learning_rate": 0.0002, "loss": 1.492, "step": 137510 }, { "epoch": 0.56, "grad_norm": 2.944058418273926, "learning_rate": 0.0002, "loss": 1.6664, "step": 137520 }, { "epoch": 0.56, "grad_norm": 2.206692934036255, "learning_rate": 0.0002, "loss": 1.7556, "step": 137530 }, { "epoch": 0.56, "grad_norm": 2.0884737968444824, "learning_rate": 0.0002, "loss": 1.5723, "step": 137540 }, { "epoch": 0.56, "grad_norm": 4.421091556549072, "learning_rate": 0.0002, "loss": 1.6187, "step": 137550 }, { "epoch": 0.56, "grad_norm": 2.829822063446045, "learning_rate": 0.0002, "loss": 1.5824, "step": 137560 }, { "epoch": 0.56, "grad_norm": 1.8010395765304565, "learning_rate": 0.0002, "loss": 1.6454, "step": 137570 }, { "epoch": 0.56, "grad_norm": 2.763831853866577, "learning_rate": 0.0002, "loss": 1.5221, "step": 137580 }, { "epoch": 0.56, "grad_norm": 2.9795897006988525, "learning_rate": 0.0002, "loss": 1.512, "step": 137590 }, { "epoch": 0.56, "grad_norm": 3.9540934562683105, "learning_rate": 0.0002, "loss": 1.6249, "step": 137600 }, { "epoch": 0.56, "grad_norm": 1.6957610845565796, "learning_rate": 0.0002, "loss": 1.7222, "step": 137610 }, { "epoch": 0.56, "grad_norm": 2.6676313877105713, "learning_rate": 0.0002, "loss": 1.5356, "step": 137620 }, { "epoch": 0.56, "grad_norm": 2.536015510559082, "learning_rate": 0.0002, "loss": 1.5899, "step": 137630 }, { "epoch": 0.56, "grad_norm": 5.00136137008667, "learning_rate": 0.0002, "loss": 1.7257, "step": 137640 }, { "epoch": 0.56, "grad_norm": 12.88910961151123, "learning_rate": 0.0002, "loss": 1.422, "step": 137650 }, { "epoch": 0.56, "grad_norm": 3.223344326019287, "learning_rate": 0.0002, "loss": 1.6407, "step": 137660 }, { "epoch": 0.56, "grad_norm": 2.865152359008789, "learning_rate": 0.0002, "loss": 1.4855, "step": 137670 }, { "epoch": 0.56, "grad_norm": 4.427142143249512, "learning_rate": 0.0002, "loss": 1.5262, "step": 137680 }, { "epoch": 0.56, "grad_norm": 3.152101516723633, "learning_rate": 0.0002, "loss": 1.6404, "step": 137690 }, { "epoch": 0.56, "grad_norm": 2.9639155864715576, "learning_rate": 0.0002, "loss": 1.8172, "step": 137700 }, { "epoch": 0.56, "grad_norm": 2.599909543991089, "learning_rate": 0.0002, "loss": 1.5241, "step": 137710 }, { "epoch": 0.56, "grad_norm": 1.8760790824890137, "learning_rate": 0.0002, "loss": 1.5092, "step": 137720 }, { "epoch": 0.56, "grad_norm": 3.4771409034729004, "learning_rate": 0.0002, "loss": 1.6485, "step": 137730 }, { "epoch": 0.56, "grad_norm": 5.411869525909424, "learning_rate": 0.0002, "loss": 1.5707, "step": 137740 }, { "epoch": 0.56, "grad_norm": 1.9741816520690918, "learning_rate": 0.0002, "loss": 1.3137, "step": 137750 }, { "epoch": 0.56, "grad_norm": 3.0060770511627197, "learning_rate": 0.0002, "loss": 1.2522, "step": 137760 }, { "epoch": 0.56, "grad_norm": 3.8905303478240967, "learning_rate": 0.0002, "loss": 1.9131, "step": 137770 }, { "epoch": 0.56, "grad_norm": 3.01460862159729, "learning_rate": 0.0002, "loss": 1.4757, "step": 137780 }, { "epoch": 0.56, "grad_norm": 2.9932055473327637, "learning_rate": 0.0002, "loss": 1.407, "step": 137790 }, { "epoch": 0.56, "grad_norm": 4.2024760246276855, "learning_rate": 0.0002, "loss": 1.5023, "step": 137800 }, { "epoch": 0.56, "grad_norm": 2.9203646183013916, "learning_rate": 0.0002, "loss": 1.5824, "step": 137810 }, { "epoch": 0.56, "grad_norm": 2.587073802947998, "learning_rate": 0.0002, "loss": 1.4659, "step": 137820 }, { "epoch": 0.56, "grad_norm": 2.836735725402832, "learning_rate": 0.0002, "loss": 1.5765, "step": 137830 }, { "epoch": 0.56, "grad_norm": 3.490060567855835, "learning_rate": 0.0002, "loss": 1.4535, "step": 137840 }, { "epoch": 0.56, "grad_norm": 2.1752004623413086, "learning_rate": 0.0002, "loss": 1.6962, "step": 137850 }, { "epoch": 0.56, "grad_norm": 2.2746949195861816, "learning_rate": 0.0002, "loss": 1.5328, "step": 137860 }, { "epoch": 0.56, "grad_norm": 2.2619168758392334, "learning_rate": 0.0002, "loss": 1.6549, "step": 137870 }, { "epoch": 0.56, "grad_norm": 1.9368926286697388, "learning_rate": 0.0002, "loss": 1.6303, "step": 137880 }, { "epoch": 0.56, "grad_norm": 2.431442975997925, "learning_rate": 0.0002, "loss": 1.6791, "step": 137890 }, { "epoch": 0.56, "grad_norm": 4.937935829162598, "learning_rate": 0.0002, "loss": 1.5906, "step": 137900 }, { "epoch": 0.56, "grad_norm": 3.1228630542755127, "learning_rate": 0.0002, "loss": 1.8148, "step": 137910 }, { "epoch": 0.56, "grad_norm": 2.4329380989074707, "learning_rate": 0.0002, "loss": 1.4702, "step": 137920 }, { "epoch": 0.56, "grad_norm": 2.4843690395355225, "learning_rate": 0.0002, "loss": 1.7215, "step": 137930 }, { "epoch": 0.56, "grad_norm": 2.2212018966674805, "learning_rate": 0.0002, "loss": 1.5898, "step": 137940 }, { "epoch": 0.56, "grad_norm": 4.161941051483154, "learning_rate": 0.0002, "loss": 1.8246, "step": 137950 }, { "epoch": 0.56, "grad_norm": 4.6026411056518555, "learning_rate": 0.0002, "loss": 1.644, "step": 137960 }, { "epoch": 0.56, "grad_norm": 3.087999105453491, "learning_rate": 0.0002, "loss": 1.5228, "step": 137970 }, { "epoch": 0.56, "grad_norm": 3.814368486404419, "learning_rate": 0.0002, "loss": 1.7841, "step": 137980 }, { "epoch": 0.56, "grad_norm": 2.9276812076568604, "learning_rate": 0.0002, "loss": 1.7043, "step": 137990 }, { "epoch": 0.56, "grad_norm": 2.583850622177124, "learning_rate": 0.0002, "loss": 1.634, "step": 138000 }, { "epoch": 0.56, "grad_norm": 3.7903058528900146, "learning_rate": 0.0002, "loss": 1.5272, "step": 138010 }, { "epoch": 0.56, "grad_norm": 4.529233932495117, "learning_rate": 0.0002, "loss": 1.7578, "step": 138020 }, { "epoch": 0.56, "grad_norm": 2.142080307006836, "learning_rate": 0.0002, "loss": 1.2435, "step": 138030 }, { "epoch": 0.56, "grad_norm": 2.39219069480896, "learning_rate": 0.0002, "loss": 1.8486, "step": 138040 }, { "epoch": 0.56, "grad_norm": 4.989705562591553, "learning_rate": 0.0002, "loss": 1.583, "step": 138050 }, { "epoch": 0.56, "grad_norm": 1.7219408750534058, "learning_rate": 0.0002, "loss": 1.7135, "step": 138060 }, { "epoch": 0.56, "grad_norm": 2.2580652236938477, "learning_rate": 0.0002, "loss": 1.4826, "step": 138070 }, { "epoch": 0.56, "grad_norm": 3.412687063217163, "learning_rate": 0.0002, "loss": 1.6093, "step": 138080 }, { "epoch": 0.56, "grad_norm": 2.4888052940368652, "learning_rate": 0.0002, "loss": 1.3798, "step": 138090 }, { "epoch": 0.56, "grad_norm": 2.6860835552215576, "learning_rate": 0.0002, "loss": 1.6729, "step": 138100 }, { "epoch": 0.56, "grad_norm": 5.182403087615967, "learning_rate": 0.0002, "loss": 1.4188, "step": 138110 }, { "epoch": 0.56, "grad_norm": 3.556070327758789, "learning_rate": 0.0002, "loss": 1.4805, "step": 138120 }, { "epoch": 0.56, "grad_norm": 1.6766715049743652, "learning_rate": 0.0002, "loss": 1.2473, "step": 138130 }, { "epoch": 0.56, "grad_norm": 3.721715211868286, "learning_rate": 0.0002, "loss": 1.5319, "step": 138140 }, { "epoch": 0.56, "grad_norm": 2.9527933597564697, "learning_rate": 0.0002, "loss": 1.6555, "step": 138150 }, { "epoch": 0.56, "grad_norm": 5.710496425628662, "learning_rate": 0.0002, "loss": 1.4946, "step": 138160 }, { "epoch": 0.56, "grad_norm": 2.674293279647827, "learning_rate": 0.0002, "loss": 1.4684, "step": 138170 }, { "epoch": 0.56, "grad_norm": 2.7763607501983643, "learning_rate": 0.0002, "loss": 1.4718, "step": 138180 }, { "epoch": 0.56, "grad_norm": 2.066664934158325, "learning_rate": 0.0002, "loss": 1.4441, "step": 138190 }, { "epoch": 0.56, "grad_norm": 2.256613254547119, "learning_rate": 0.0002, "loss": 1.7462, "step": 138200 }, { "epoch": 0.56, "grad_norm": 2.135835647583008, "learning_rate": 0.0002, "loss": 1.5347, "step": 138210 }, { "epoch": 0.56, "grad_norm": 2.728731870651245, "learning_rate": 0.0002, "loss": 1.6763, "step": 138220 }, { "epoch": 0.56, "grad_norm": 1.713837742805481, "learning_rate": 0.0002, "loss": 1.6561, "step": 138230 }, { "epoch": 0.56, "grad_norm": 2.9750263690948486, "learning_rate": 0.0002, "loss": 1.4517, "step": 138240 }, { "epoch": 0.56, "grad_norm": 2.893742084503174, "learning_rate": 0.0002, "loss": 1.5446, "step": 138250 }, { "epoch": 0.56, "grad_norm": 2.6159894466400146, "learning_rate": 0.0002, "loss": 1.5424, "step": 138260 }, { "epoch": 0.56, "grad_norm": 3.010035276412964, "learning_rate": 0.0002, "loss": 1.5943, "step": 138270 }, { "epoch": 0.56, "grad_norm": 4.40212345123291, "learning_rate": 0.0002, "loss": 1.6636, "step": 138280 }, { "epoch": 0.56, "grad_norm": 3.4623656272888184, "learning_rate": 0.0002, "loss": 1.4869, "step": 138290 }, { "epoch": 0.56, "grad_norm": 2.75014328956604, "learning_rate": 0.0002, "loss": 1.4362, "step": 138300 }, { "epoch": 0.56, "grad_norm": 3.7236008644104004, "learning_rate": 0.0002, "loss": 1.7024, "step": 138310 }, { "epoch": 0.56, "grad_norm": 3.4705705642700195, "learning_rate": 0.0002, "loss": 1.3396, "step": 138320 }, { "epoch": 0.56, "grad_norm": 2.686873197555542, "learning_rate": 0.0002, "loss": 1.3865, "step": 138330 }, { "epoch": 0.56, "grad_norm": 3.1812522411346436, "learning_rate": 0.0002, "loss": 1.6829, "step": 138340 }, { "epoch": 0.56, "grad_norm": 3.8559532165527344, "learning_rate": 0.0002, "loss": 1.7379, "step": 138350 }, { "epoch": 0.56, "grad_norm": 3.4663538932800293, "learning_rate": 0.0002, "loss": 1.5857, "step": 138360 }, { "epoch": 0.56, "grad_norm": 3.644221305847168, "learning_rate": 0.0002, "loss": 1.1336, "step": 138370 }, { "epoch": 0.56, "grad_norm": 2.017469644546509, "learning_rate": 0.0002, "loss": 1.5796, "step": 138380 }, { "epoch": 0.56, "grad_norm": 2.964728832244873, "learning_rate": 0.0002, "loss": 1.7317, "step": 138390 }, { "epoch": 0.56, "grad_norm": 8.020936965942383, "learning_rate": 0.0002, "loss": 1.7836, "step": 138400 }, { "epoch": 0.56, "grad_norm": 3.158015489578247, "learning_rate": 0.0002, "loss": 1.5652, "step": 138410 }, { "epoch": 0.56, "grad_norm": 3.9578421115875244, "learning_rate": 0.0002, "loss": 1.7283, "step": 138420 }, { "epoch": 0.56, "grad_norm": 3.11797833442688, "learning_rate": 0.0002, "loss": 1.4965, "step": 138430 }, { "epoch": 0.56, "grad_norm": 2.7689027786254883, "learning_rate": 0.0002, "loss": 1.6214, "step": 138440 }, { "epoch": 0.56, "grad_norm": 3.4827539920806885, "learning_rate": 0.0002, "loss": 1.7418, "step": 138450 }, { "epoch": 0.56, "grad_norm": 1.7987430095672607, "learning_rate": 0.0002, "loss": 1.4561, "step": 138460 }, { "epoch": 0.56, "grad_norm": 7.2307305335998535, "learning_rate": 0.0002, "loss": 1.5367, "step": 138470 }, { "epoch": 0.56, "grad_norm": 4.078079700469971, "learning_rate": 0.0002, "loss": 1.591, "step": 138480 }, { "epoch": 0.56, "grad_norm": 2.416623115539551, "learning_rate": 0.0002, "loss": 1.3993, "step": 138490 }, { "epoch": 0.56, "grad_norm": 6.009616374969482, "learning_rate": 0.0002, "loss": 1.6985, "step": 138500 }, { "epoch": 0.56, "grad_norm": 2.8217639923095703, "learning_rate": 0.0002, "loss": 1.4701, "step": 138510 }, { "epoch": 0.56, "grad_norm": 1.9690228700637817, "learning_rate": 0.0002, "loss": 1.6648, "step": 138520 }, { "epoch": 0.56, "grad_norm": 3.9942240715026855, "learning_rate": 0.0002, "loss": 1.5624, "step": 138530 }, { "epoch": 0.56, "grad_norm": 3.5165884494781494, "learning_rate": 0.0002, "loss": 1.6103, "step": 138540 }, { "epoch": 0.56, "grad_norm": 4.355506420135498, "learning_rate": 0.0002, "loss": 1.5364, "step": 138550 }, { "epoch": 0.56, "grad_norm": 3.4484148025512695, "learning_rate": 0.0002, "loss": 1.5298, "step": 138560 }, { "epoch": 0.56, "grad_norm": 3.5398967266082764, "learning_rate": 0.0002, "loss": 1.6186, "step": 138570 }, { "epoch": 0.56, "grad_norm": 3.332721471786499, "learning_rate": 0.0002, "loss": 1.5472, "step": 138580 }, { "epoch": 0.56, "grad_norm": 3.3097496032714844, "learning_rate": 0.0002, "loss": 1.6952, "step": 138590 }, { "epoch": 0.56, "grad_norm": 1.9036710262298584, "learning_rate": 0.0002, "loss": 1.6195, "step": 138600 }, { "epoch": 0.56, "grad_norm": 3.341055393218994, "learning_rate": 0.0002, "loss": 1.5421, "step": 138610 }, { "epoch": 0.56, "grad_norm": 3.764688491821289, "learning_rate": 0.0002, "loss": 1.605, "step": 138620 }, { "epoch": 0.56, "grad_norm": 3.5762717723846436, "learning_rate": 0.0002, "loss": 1.6414, "step": 138630 }, { "epoch": 0.56, "grad_norm": 2.8380632400512695, "learning_rate": 0.0002, "loss": 1.7197, "step": 138640 }, { "epoch": 0.56, "grad_norm": 2.946845531463623, "learning_rate": 0.0002, "loss": 1.5925, "step": 138650 }, { "epoch": 0.56, "grad_norm": 5.1430511474609375, "learning_rate": 0.0002, "loss": 1.7263, "step": 138660 }, { "epoch": 0.56, "grad_norm": 3.4866464138031006, "learning_rate": 0.0002, "loss": 1.6821, "step": 138670 }, { "epoch": 0.56, "grad_norm": 2.278388738632202, "learning_rate": 0.0002, "loss": 1.4708, "step": 138680 }, { "epoch": 0.56, "grad_norm": 2.171603202819824, "learning_rate": 0.0002, "loss": 1.5879, "step": 138690 }, { "epoch": 0.56, "grad_norm": 3.476797103881836, "learning_rate": 0.0002, "loss": 1.6656, "step": 138700 }, { "epoch": 0.56, "grad_norm": 4.124690532684326, "learning_rate": 0.0002, "loss": 1.4977, "step": 138710 }, { "epoch": 0.56, "grad_norm": 2.6025781631469727, "learning_rate": 0.0002, "loss": 1.674, "step": 138720 }, { "epoch": 0.56, "grad_norm": 3.470146417617798, "learning_rate": 0.0002, "loss": 1.4883, "step": 138730 }, { "epoch": 0.56, "grad_norm": 2.2465972900390625, "learning_rate": 0.0002, "loss": 1.5951, "step": 138740 }, { "epoch": 0.56, "grad_norm": 3.0606136322021484, "learning_rate": 0.0002, "loss": 1.3981, "step": 138750 }, { "epoch": 0.56, "grad_norm": 5.941366672515869, "learning_rate": 0.0002, "loss": 1.5335, "step": 138760 }, { "epoch": 0.56, "grad_norm": 2.900073528289795, "learning_rate": 0.0002, "loss": 1.6525, "step": 138770 }, { "epoch": 0.56, "grad_norm": 3.687565565109253, "learning_rate": 0.0002, "loss": 1.3479, "step": 138780 }, { "epoch": 0.57, "grad_norm": 2.995321035385132, "learning_rate": 0.0002, "loss": 1.6874, "step": 138790 }, { "epoch": 0.57, "grad_norm": 2.6990268230438232, "learning_rate": 0.0002, "loss": 1.6671, "step": 138800 }, { "epoch": 0.57, "grad_norm": 3.1641595363616943, "learning_rate": 0.0002, "loss": 1.8472, "step": 138810 }, { "epoch": 0.57, "grad_norm": 3.542811393737793, "learning_rate": 0.0002, "loss": 1.5647, "step": 138820 }, { "epoch": 0.57, "grad_norm": 3.2602922916412354, "learning_rate": 0.0002, "loss": 1.8777, "step": 138830 }, { "epoch": 0.57, "grad_norm": 4.548471927642822, "learning_rate": 0.0002, "loss": 1.5008, "step": 138840 }, { "epoch": 0.57, "grad_norm": 2.8731026649475098, "learning_rate": 0.0002, "loss": 1.7419, "step": 138850 }, { "epoch": 0.57, "grad_norm": 4.254958629608154, "learning_rate": 0.0002, "loss": 1.4236, "step": 138860 }, { "epoch": 0.57, "grad_norm": 1.9869500398635864, "learning_rate": 0.0002, "loss": 1.5161, "step": 138870 }, { "epoch": 0.57, "grad_norm": 6.323207855224609, "learning_rate": 0.0002, "loss": 1.5754, "step": 138880 }, { "epoch": 0.57, "grad_norm": 2.1867263317108154, "learning_rate": 0.0002, "loss": 1.6202, "step": 138890 }, { "epoch": 0.57, "grad_norm": 1.8406568765640259, "learning_rate": 0.0002, "loss": 1.4967, "step": 138900 }, { "epoch": 0.57, "grad_norm": 3.3045122623443604, "learning_rate": 0.0002, "loss": 1.8178, "step": 138910 }, { "epoch": 0.57, "grad_norm": 3.0721852779388428, "learning_rate": 0.0002, "loss": 1.7402, "step": 138920 }, { "epoch": 0.57, "grad_norm": 2.2229113578796387, "learning_rate": 0.0002, "loss": 1.346, "step": 138930 }, { "epoch": 0.57, "grad_norm": 2.825225830078125, "learning_rate": 0.0002, "loss": 1.532, "step": 138940 }, { "epoch": 0.57, "grad_norm": 2.640918254852295, "learning_rate": 0.0002, "loss": 1.5365, "step": 138950 }, { "epoch": 0.57, "grad_norm": 8.076580047607422, "learning_rate": 0.0002, "loss": 1.5209, "step": 138960 }, { "epoch": 0.57, "grad_norm": 3.28052020072937, "learning_rate": 0.0002, "loss": 1.5552, "step": 138970 }, { "epoch": 0.57, "grad_norm": 2.9198527336120605, "learning_rate": 0.0002, "loss": 1.7444, "step": 138980 }, { "epoch": 0.57, "grad_norm": 0.9539849758148193, "learning_rate": 0.0002, "loss": 1.7341, "step": 138990 }, { "epoch": 0.57, "grad_norm": 4.730000019073486, "learning_rate": 0.0002, "loss": 1.8308, "step": 139000 }, { "epoch": 0.57, "grad_norm": 2.074807643890381, "learning_rate": 0.0002, "loss": 1.7565, "step": 139010 }, { "epoch": 0.57, "grad_norm": 3.3299641609191895, "learning_rate": 0.0002, "loss": 1.4269, "step": 139020 }, { "epoch": 0.57, "grad_norm": 3.3996520042419434, "learning_rate": 0.0002, "loss": 1.4967, "step": 139030 }, { "epoch": 0.57, "grad_norm": 2.5820157527923584, "learning_rate": 0.0002, "loss": 1.2281, "step": 139040 }, { "epoch": 0.57, "grad_norm": 4.081928730010986, "learning_rate": 0.0002, "loss": 1.6397, "step": 139050 }, { "epoch": 0.57, "grad_norm": 2.1544342041015625, "learning_rate": 0.0002, "loss": 1.4821, "step": 139060 }, { "epoch": 0.57, "grad_norm": 2.813751220703125, "learning_rate": 0.0002, "loss": 1.6121, "step": 139070 }, { "epoch": 0.57, "grad_norm": 1.7572758197784424, "learning_rate": 0.0002, "loss": 1.585, "step": 139080 }, { "epoch": 0.57, "grad_norm": 3.9942967891693115, "learning_rate": 0.0002, "loss": 1.5836, "step": 139090 }, { "epoch": 0.57, "grad_norm": 3.3641464710235596, "learning_rate": 0.0002, "loss": 1.2879, "step": 139100 }, { "epoch": 0.57, "grad_norm": 3.5819859504699707, "learning_rate": 0.0002, "loss": 1.4643, "step": 139110 }, { "epoch": 0.57, "grad_norm": 2.125323534011841, "learning_rate": 0.0002, "loss": 1.6, "step": 139120 }, { "epoch": 0.57, "grad_norm": 3.91314959526062, "learning_rate": 0.0002, "loss": 1.3689, "step": 139130 }, { "epoch": 0.57, "grad_norm": 2.201617956161499, "learning_rate": 0.0002, "loss": 1.4697, "step": 139140 }, { "epoch": 0.57, "grad_norm": 3.397116184234619, "learning_rate": 0.0002, "loss": 1.2623, "step": 139150 }, { "epoch": 0.57, "grad_norm": 6.2521562576293945, "learning_rate": 0.0002, "loss": 1.3067, "step": 139160 }, { "epoch": 0.57, "grad_norm": 4.206436634063721, "learning_rate": 0.0002, "loss": 1.6386, "step": 139170 }, { "epoch": 0.57, "grad_norm": 3.139616012573242, "learning_rate": 0.0002, "loss": 1.582, "step": 139180 }, { "epoch": 0.57, "grad_norm": 3.494555711746216, "learning_rate": 0.0002, "loss": 1.7305, "step": 139190 }, { "epoch": 0.57, "grad_norm": 1.3291722536087036, "learning_rate": 0.0002, "loss": 1.7879, "step": 139200 }, { "epoch": 0.57, "grad_norm": 3.0945911407470703, "learning_rate": 0.0002, "loss": 1.565, "step": 139210 }, { "epoch": 0.57, "grad_norm": 2.8438878059387207, "learning_rate": 0.0002, "loss": 1.6526, "step": 139220 }, { "epoch": 0.57, "grad_norm": 2.7184791564941406, "learning_rate": 0.0002, "loss": 1.661, "step": 139230 }, { "epoch": 0.57, "grad_norm": 3.583345890045166, "learning_rate": 0.0002, "loss": 1.5372, "step": 139240 }, { "epoch": 0.57, "grad_norm": 3.7206437587738037, "learning_rate": 0.0002, "loss": 1.4408, "step": 139250 }, { "epoch": 0.57, "grad_norm": 1.994494080543518, "learning_rate": 0.0002, "loss": 1.3443, "step": 139260 }, { "epoch": 0.57, "grad_norm": 2.7345612049102783, "learning_rate": 0.0002, "loss": 1.5233, "step": 139270 }, { "epoch": 0.57, "grad_norm": 3.551441192626953, "learning_rate": 0.0002, "loss": 1.4825, "step": 139280 }, { "epoch": 0.57, "grad_norm": 2.5982778072357178, "learning_rate": 0.0002, "loss": 1.846, "step": 139290 }, { "epoch": 0.57, "grad_norm": 2.3127729892730713, "learning_rate": 0.0002, "loss": 1.741, "step": 139300 }, { "epoch": 0.57, "grad_norm": 3.0806379318237305, "learning_rate": 0.0002, "loss": 1.5135, "step": 139310 }, { "epoch": 0.57, "grad_norm": 2.708298921585083, "learning_rate": 0.0002, "loss": 1.4733, "step": 139320 }, { "epoch": 0.57, "grad_norm": 3.4777534008026123, "learning_rate": 0.0002, "loss": 1.8084, "step": 139330 }, { "epoch": 0.57, "grad_norm": 2.8042900562286377, "learning_rate": 0.0002, "loss": 1.5667, "step": 139340 }, { "epoch": 0.57, "grad_norm": 2.1897716522216797, "learning_rate": 0.0002, "loss": 1.6999, "step": 139350 }, { "epoch": 0.57, "grad_norm": 6.042189598083496, "learning_rate": 0.0002, "loss": 1.5646, "step": 139360 }, { "epoch": 0.57, "grad_norm": 2.9724645614624023, "learning_rate": 0.0002, "loss": 1.7233, "step": 139370 }, { "epoch": 0.57, "grad_norm": 2.445265054702759, "learning_rate": 0.0002, "loss": 1.6718, "step": 139380 }, { "epoch": 0.57, "grad_norm": 3.5147030353546143, "learning_rate": 0.0002, "loss": 1.7277, "step": 139390 }, { "epoch": 0.57, "grad_norm": 3.359889030456543, "learning_rate": 0.0002, "loss": 1.7488, "step": 139400 }, { "epoch": 0.57, "grad_norm": 5.144214153289795, "learning_rate": 0.0002, "loss": 1.5693, "step": 139410 }, { "epoch": 0.57, "grad_norm": 2.161346197128296, "learning_rate": 0.0002, "loss": 1.7677, "step": 139420 }, { "epoch": 0.57, "grad_norm": 2.722015619277954, "learning_rate": 0.0002, "loss": 1.4115, "step": 139430 }, { "epoch": 0.57, "grad_norm": 4.0465989112854, "learning_rate": 0.0002, "loss": 1.3204, "step": 139440 }, { "epoch": 0.57, "grad_norm": 2.42264723777771, "learning_rate": 0.0002, "loss": 1.499, "step": 139450 }, { "epoch": 0.57, "grad_norm": 4.6920576095581055, "learning_rate": 0.0002, "loss": 1.5574, "step": 139460 }, { "epoch": 0.57, "grad_norm": 4.284289836883545, "learning_rate": 0.0002, "loss": 1.5142, "step": 139470 }, { "epoch": 0.57, "grad_norm": 2.3635456562042236, "learning_rate": 0.0002, "loss": 1.5365, "step": 139480 }, { "epoch": 0.57, "grad_norm": 2.4568698406219482, "learning_rate": 0.0002, "loss": 1.6549, "step": 139490 }, { "epoch": 0.57, "grad_norm": 2.8066627979278564, "learning_rate": 0.0002, "loss": 1.5196, "step": 139500 }, { "epoch": 0.57, "grad_norm": 2.1560750007629395, "learning_rate": 0.0002, "loss": 1.525, "step": 139510 }, { "epoch": 0.57, "grad_norm": 3.4509072303771973, "learning_rate": 0.0002, "loss": 1.7085, "step": 139520 }, { "epoch": 0.57, "grad_norm": 2.8384413719177246, "learning_rate": 0.0002, "loss": 1.7493, "step": 139530 }, { "epoch": 0.57, "grad_norm": 2.520986318588257, "learning_rate": 0.0002, "loss": 1.4965, "step": 139540 }, { "epoch": 0.57, "grad_norm": 1.6324201822280884, "learning_rate": 0.0002, "loss": 1.4772, "step": 139550 }, { "epoch": 0.57, "grad_norm": 2.737727165222168, "learning_rate": 0.0002, "loss": 1.6821, "step": 139560 }, { "epoch": 0.57, "grad_norm": 1.8619319200515747, "learning_rate": 0.0002, "loss": 1.5714, "step": 139570 }, { "epoch": 0.57, "grad_norm": 1.7889189720153809, "learning_rate": 0.0002, "loss": 1.5047, "step": 139580 }, { "epoch": 0.57, "grad_norm": 2.629943370819092, "learning_rate": 0.0002, "loss": 1.4614, "step": 139590 }, { "epoch": 0.57, "grad_norm": 3.341149091720581, "learning_rate": 0.0002, "loss": 1.8016, "step": 139600 }, { "epoch": 0.57, "grad_norm": 3.048725128173828, "learning_rate": 0.0002, "loss": 1.6742, "step": 139610 }, { "epoch": 0.57, "grad_norm": 3.2943105697631836, "learning_rate": 0.0002, "loss": 1.5543, "step": 139620 }, { "epoch": 0.57, "grad_norm": 3.1250486373901367, "learning_rate": 0.0002, "loss": 1.6839, "step": 139630 }, { "epoch": 0.57, "grad_norm": 3.8572397232055664, "learning_rate": 0.0002, "loss": 1.3841, "step": 139640 }, { "epoch": 0.57, "grad_norm": 3.3916125297546387, "learning_rate": 0.0002, "loss": 1.5169, "step": 139650 }, { "epoch": 0.57, "grad_norm": 1.984208583831787, "learning_rate": 0.0002, "loss": 1.415, "step": 139660 }, { "epoch": 0.57, "grad_norm": 2.4830167293548584, "learning_rate": 0.0002, "loss": 1.4632, "step": 139670 }, { "epoch": 0.57, "grad_norm": 3.4615559577941895, "learning_rate": 0.0002, "loss": 1.4967, "step": 139680 }, { "epoch": 0.57, "grad_norm": 3.7891523838043213, "learning_rate": 0.0002, "loss": 1.2365, "step": 139690 }, { "epoch": 0.57, "grad_norm": 3.096796989440918, "learning_rate": 0.0002, "loss": 1.4375, "step": 139700 }, { "epoch": 0.57, "grad_norm": 2.534261465072632, "learning_rate": 0.0002, "loss": 1.6119, "step": 139710 }, { "epoch": 0.57, "grad_norm": 4.720998764038086, "learning_rate": 0.0002, "loss": 1.4777, "step": 139720 }, { "epoch": 0.57, "grad_norm": 2.4684131145477295, "learning_rate": 0.0002, "loss": 1.2605, "step": 139730 }, { "epoch": 0.57, "grad_norm": 5.593508720397949, "learning_rate": 0.0002, "loss": 1.5788, "step": 139740 }, { "epoch": 0.57, "grad_norm": 1.5080897808074951, "learning_rate": 0.0002, "loss": 1.5627, "step": 139750 }, { "epoch": 0.57, "grad_norm": 2.5287299156188965, "learning_rate": 0.0002, "loss": 2.048, "step": 139760 }, { "epoch": 0.57, "grad_norm": 2.0877881050109863, "learning_rate": 0.0002, "loss": 1.4316, "step": 139770 }, { "epoch": 0.57, "grad_norm": 4.637547969818115, "learning_rate": 0.0002, "loss": 1.4872, "step": 139780 }, { "epoch": 0.57, "grad_norm": 4.0005340576171875, "learning_rate": 0.0002, "loss": 1.6003, "step": 139790 }, { "epoch": 0.57, "grad_norm": 2.753878355026245, "learning_rate": 0.0002, "loss": 1.5052, "step": 139800 }, { "epoch": 0.57, "grad_norm": 4.051019668579102, "learning_rate": 0.0002, "loss": 1.425, "step": 139810 }, { "epoch": 0.57, "grad_norm": 3.043130874633789, "learning_rate": 0.0002, "loss": 1.5873, "step": 139820 }, { "epoch": 0.57, "grad_norm": 3.6316018104553223, "learning_rate": 0.0002, "loss": 1.6471, "step": 139830 }, { "epoch": 0.57, "grad_norm": 3.396012306213379, "learning_rate": 0.0002, "loss": 1.5695, "step": 139840 }, { "epoch": 0.57, "grad_norm": 2.8557794094085693, "learning_rate": 0.0002, "loss": 1.6889, "step": 139850 }, { "epoch": 0.57, "grad_norm": 2.4657950401306152, "learning_rate": 0.0002, "loss": 1.7354, "step": 139860 }, { "epoch": 0.57, "grad_norm": 3.779924154281616, "learning_rate": 0.0002, "loss": 1.6799, "step": 139870 }, { "epoch": 0.57, "grad_norm": 3.2794690132141113, "learning_rate": 0.0002, "loss": 1.9154, "step": 139880 }, { "epoch": 0.57, "grad_norm": 4.237423419952393, "learning_rate": 0.0002, "loss": 1.5194, "step": 139890 }, { "epoch": 0.57, "grad_norm": 3.312739372253418, "learning_rate": 0.0002, "loss": 1.5625, "step": 139900 }, { "epoch": 0.57, "grad_norm": 1.6333576440811157, "learning_rate": 0.0002, "loss": 1.2659, "step": 139910 }, { "epoch": 0.57, "grad_norm": 3.8926963806152344, "learning_rate": 0.0002, "loss": 1.4426, "step": 139920 }, { "epoch": 0.57, "grad_norm": 3.0492494106292725, "learning_rate": 0.0002, "loss": 1.6772, "step": 139930 }, { "epoch": 0.57, "grad_norm": 3.978175163269043, "learning_rate": 0.0002, "loss": 1.6002, "step": 139940 }, { "epoch": 0.57, "grad_norm": 4.006534099578857, "learning_rate": 0.0002, "loss": 1.6988, "step": 139950 }, { "epoch": 0.57, "grad_norm": 4.267167091369629, "learning_rate": 0.0002, "loss": 1.7022, "step": 139960 }, { "epoch": 0.57, "grad_norm": 5.05123233795166, "learning_rate": 0.0002, "loss": 1.682, "step": 139970 }, { "epoch": 0.57, "grad_norm": 3.0096325874328613, "learning_rate": 0.0002, "loss": 1.7167, "step": 139980 }, { "epoch": 0.57, "grad_norm": 4.132102012634277, "learning_rate": 0.0002, "loss": 1.4401, "step": 139990 }, { "epoch": 0.57, "grad_norm": 3.1489434242248535, "learning_rate": 0.0002, "loss": 1.4475, "step": 140000 }, { "epoch": 0.57, "grad_norm": 2.632918357849121, "learning_rate": 0.0002, "loss": 1.5031, "step": 140010 }, { "epoch": 0.57, "grad_norm": 4.9941229820251465, "learning_rate": 0.0002, "loss": 1.7291, "step": 140020 }, { "epoch": 0.57, "grad_norm": 3.0151870250701904, "learning_rate": 0.0002, "loss": 1.53, "step": 140030 }, { "epoch": 0.57, "grad_norm": 2.2969019412994385, "learning_rate": 0.0002, "loss": 1.6098, "step": 140040 }, { "epoch": 0.57, "grad_norm": 3.095771312713623, "learning_rate": 0.0002, "loss": 1.5627, "step": 140050 }, { "epoch": 0.57, "grad_norm": 2.9386496543884277, "learning_rate": 0.0002, "loss": 1.6954, "step": 140060 }, { "epoch": 0.57, "grad_norm": 3.210348129272461, "learning_rate": 0.0002, "loss": 1.7661, "step": 140070 }, { "epoch": 0.57, "grad_norm": 4.135376930236816, "learning_rate": 0.0002, "loss": 1.8477, "step": 140080 }, { "epoch": 0.57, "grad_norm": 3.6185104846954346, "learning_rate": 0.0002, "loss": 1.4637, "step": 140090 }, { "epoch": 0.57, "grad_norm": 3.3456063270568848, "learning_rate": 0.0002, "loss": 1.4794, "step": 140100 }, { "epoch": 0.57, "grad_norm": 5.525566101074219, "learning_rate": 0.0002, "loss": 1.9281, "step": 140110 }, { "epoch": 0.57, "grad_norm": 2.9298274517059326, "learning_rate": 0.0002, "loss": 1.4896, "step": 140120 }, { "epoch": 0.57, "grad_norm": 2.3349907398223877, "learning_rate": 0.0002, "loss": 1.6328, "step": 140130 }, { "epoch": 0.57, "grad_norm": 2.4572596549987793, "learning_rate": 0.0002, "loss": 1.5566, "step": 140140 }, { "epoch": 0.57, "grad_norm": 2.6220669746398926, "learning_rate": 0.0002, "loss": 1.6333, "step": 140150 }, { "epoch": 0.57, "grad_norm": 3.7275781631469727, "learning_rate": 0.0002, "loss": 1.4686, "step": 140160 }, { "epoch": 0.57, "grad_norm": 1.6875160932540894, "learning_rate": 0.0002, "loss": 1.7127, "step": 140170 }, { "epoch": 0.57, "grad_norm": 3.4457151889801025, "learning_rate": 0.0002, "loss": 1.5058, "step": 140180 }, { "epoch": 0.57, "grad_norm": 3.0337183475494385, "learning_rate": 0.0002, "loss": 1.4797, "step": 140190 }, { "epoch": 0.57, "grad_norm": 3.1474363803863525, "learning_rate": 0.0002, "loss": 1.5845, "step": 140200 }, { "epoch": 0.57, "grad_norm": 3.2478439807891846, "learning_rate": 0.0002, "loss": 1.6941, "step": 140210 }, { "epoch": 0.57, "grad_norm": 1.5011534690856934, "learning_rate": 0.0002, "loss": 1.4939, "step": 140220 }, { "epoch": 0.57, "grad_norm": 2.285877227783203, "learning_rate": 0.0002, "loss": 1.8069, "step": 140230 }, { "epoch": 0.57, "grad_norm": 2.825035572052002, "learning_rate": 0.0002, "loss": 1.5122, "step": 140240 }, { "epoch": 0.57, "grad_norm": 3.7534608840942383, "learning_rate": 0.0002, "loss": 1.5369, "step": 140250 }, { "epoch": 0.57, "grad_norm": 4.372366428375244, "learning_rate": 0.0002, "loss": 1.5602, "step": 140260 }, { "epoch": 0.57, "grad_norm": 1.9935660362243652, "learning_rate": 0.0002, "loss": 1.4876, "step": 140270 }, { "epoch": 0.57, "grad_norm": 3.9394054412841797, "learning_rate": 0.0002, "loss": 1.5861, "step": 140280 }, { "epoch": 0.57, "grad_norm": 2.3710057735443115, "learning_rate": 0.0002, "loss": 1.5575, "step": 140290 }, { "epoch": 0.57, "grad_norm": 3.5089938640594482, "learning_rate": 0.0002, "loss": 1.7149, "step": 140300 }, { "epoch": 0.57, "grad_norm": 2.4479591846466064, "learning_rate": 0.0002, "loss": 1.4587, "step": 140310 }, { "epoch": 0.57, "grad_norm": 4.619992733001709, "learning_rate": 0.0002, "loss": 1.7148, "step": 140320 }, { "epoch": 0.57, "grad_norm": 2.8190715312957764, "learning_rate": 0.0002, "loss": 1.6865, "step": 140330 }, { "epoch": 0.57, "grad_norm": 2.950946807861328, "learning_rate": 0.0002, "loss": 1.5134, "step": 140340 }, { "epoch": 0.57, "grad_norm": 2.7356808185577393, "learning_rate": 0.0002, "loss": 1.5265, "step": 140350 }, { "epoch": 0.57, "grad_norm": 5.506762981414795, "learning_rate": 0.0002, "loss": 1.739, "step": 140360 }, { "epoch": 0.57, "grad_norm": 3.7402806282043457, "learning_rate": 0.0002, "loss": 1.3739, "step": 140370 }, { "epoch": 0.57, "grad_norm": 3.0095250606536865, "learning_rate": 0.0002, "loss": 1.3934, "step": 140380 }, { "epoch": 0.57, "grad_norm": 6.151127338409424, "learning_rate": 0.0002, "loss": 1.8597, "step": 140390 }, { "epoch": 0.57, "grad_norm": 2.377588987350464, "learning_rate": 0.0002, "loss": 1.6309, "step": 140400 }, { "epoch": 0.57, "grad_norm": 4.452584266662598, "learning_rate": 0.0002, "loss": 1.3215, "step": 140410 }, { "epoch": 0.57, "grad_norm": 2.2170562744140625, "learning_rate": 0.0002, "loss": 1.6832, "step": 140420 }, { "epoch": 0.57, "grad_norm": 3.387582778930664, "learning_rate": 0.0002, "loss": 1.5184, "step": 140430 }, { "epoch": 0.57, "grad_norm": 3.1608569622039795, "learning_rate": 0.0002, "loss": 1.7525, "step": 140440 }, { "epoch": 0.57, "grad_norm": 3.5107622146606445, "learning_rate": 0.0002, "loss": 1.3644, "step": 140450 }, { "epoch": 0.57, "grad_norm": 3.8717503547668457, "learning_rate": 0.0002, "loss": 1.8537, "step": 140460 }, { "epoch": 0.57, "grad_norm": 3.829118490219116, "learning_rate": 0.0002, "loss": 1.5673, "step": 140470 }, { "epoch": 0.57, "grad_norm": 2.1562445163726807, "learning_rate": 0.0002, "loss": 1.3497, "step": 140480 }, { "epoch": 0.57, "grad_norm": 3.5410633087158203, "learning_rate": 0.0002, "loss": 1.5826, "step": 140490 }, { "epoch": 0.57, "grad_norm": 3.643791675567627, "learning_rate": 0.0002, "loss": 1.3428, "step": 140500 }, { "epoch": 0.57, "grad_norm": 2.65110182762146, "learning_rate": 0.0002, "loss": 1.5765, "step": 140510 }, { "epoch": 0.57, "grad_norm": 3.8643486499786377, "learning_rate": 0.0002, "loss": 1.3459, "step": 140520 }, { "epoch": 0.57, "grad_norm": 2.572633743286133, "learning_rate": 0.0002, "loss": 1.3505, "step": 140530 }, { "epoch": 0.57, "grad_norm": 3.353057622909546, "learning_rate": 0.0002, "loss": 1.9085, "step": 140540 }, { "epoch": 0.57, "grad_norm": 2.5897939205169678, "learning_rate": 0.0002, "loss": 1.4449, "step": 140550 }, { "epoch": 0.57, "grad_norm": 2.649487018585205, "learning_rate": 0.0002, "loss": 1.5698, "step": 140560 }, { "epoch": 0.57, "grad_norm": 2.673335075378418, "learning_rate": 0.0002, "loss": 1.7509, "step": 140570 }, { "epoch": 0.57, "grad_norm": 1.6079968214035034, "learning_rate": 0.0002, "loss": 1.6163, "step": 140580 }, { "epoch": 0.57, "grad_norm": 3.4504122734069824, "learning_rate": 0.0002, "loss": 1.7807, "step": 140590 }, { "epoch": 0.57, "grad_norm": 3.9043900966644287, "learning_rate": 0.0002, "loss": 1.8242, "step": 140600 }, { "epoch": 0.57, "grad_norm": 2.143385171890259, "learning_rate": 0.0002, "loss": 1.8239, "step": 140610 }, { "epoch": 0.57, "grad_norm": 2.7344489097595215, "learning_rate": 0.0002, "loss": 1.6618, "step": 140620 }, { "epoch": 0.57, "grad_norm": 2.593059539794922, "learning_rate": 0.0002, "loss": 1.5663, "step": 140630 }, { "epoch": 0.57, "grad_norm": 2.929033041000366, "learning_rate": 0.0002, "loss": 1.7618, "step": 140640 }, { "epoch": 0.57, "grad_norm": 4.0003228187561035, "learning_rate": 0.0002, "loss": 1.5126, "step": 140650 }, { "epoch": 0.57, "grad_norm": 3.065298318862915, "learning_rate": 0.0002, "loss": 1.6286, "step": 140660 }, { "epoch": 0.57, "grad_norm": 2.095655918121338, "learning_rate": 0.0002, "loss": 1.479, "step": 140670 }, { "epoch": 0.57, "grad_norm": 2.0782203674316406, "learning_rate": 0.0002, "loss": 1.5078, "step": 140680 }, { "epoch": 0.57, "grad_norm": 2.6464710235595703, "learning_rate": 0.0002, "loss": 1.5032, "step": 140690 }, { "epoch": 0.57, "grad_norm": 2.3832738399505615, "learning_rate": 0.0002, "loss": 1.8359, "step": 140700 }, { "epoch": 0.57, "grad_norm": 4.928684234619141, "learning_rate": 0.0002, "loss": 1.7948, "step": 140710 }, { "epoch": 0.57, "grad_norm": 2.7072439193725586, "learning_rate": 0.0002, "loss": 1.3624, "step": 140720 }, { "epoch": 0.57, "grad_norm": 1.9016873836517334, "learning_rate": 0.0002, "loss": 1.4395, "step": 140730 }, { "epoch": 0.57, "grad_norm": 3.3115170001983643, "learning_rate": 0.0002, "loss": 1.7125, "step": 140740 }, { "epoch": 0.57, "grad_norm": 3.38554310798645, "learning_rate": 0.0002, "loss": 1.7805, "step": 140750 }, { "epoch": 0.57, "grad_norm": 2.387336492538452, "learning_rate": 0.0002, "loss": 1.6462, "step": 140760 }, { "epoch": 0.57, "grad_norm": 1.7481378316879272, "learning_rate": 0.0002, "loss": 1.4675, "step": 140770 }, { "epoch": 0.57, "grad_norm": 4.516280651092529, "learning_rate": 0.0002, "loss": 1.6676, "step": 140780 }, { "epoch": 0.57, "grad_norm": 4.4433722496032715, "learning_rate": 0.0002, "loss": 1.5786, "step": 140790 }, { "epoch": 0.57, "grad_norm": 4.201609134674072, "learning_rate": 0.0002, "loss": 1.8012, "step": 140800 }, { "epoch": 0.57, "grad_norm": 2.1270501613616943, "learning_rate": 0.0002, "loss": 1.6167, "step": 140810 }, { "epoch": 0.57, "grad_norm": 2.7318105697631836, "learning_rate": 0.0002, "loss": 1.4755, "step": 140820 }, { "epoch": 0.57, "grad_norm": 6.263514041900635, "learning_rate": 0.0002, "loss": 1.6905, "step": 140830 }, { "epoch": 0.57, "grad_norm": 4.658751964569092, "learning_rate": 0.0002, "loss": 1.7942, "step": 140840 }, { "epoch": 0.57, "grad_norm": 2.7918028831481934, "learning_rate": 0.0002, "loss": 1.4329, "step": 140850 }, { "epoch": 0.57, "grad_norm": 2.3200957775115967, "learning_rate": 0.0002, "loss": 1.6996, "step": 140860 }, { "epoch": 0.57, "grad_norm": 3.993065118789673, "learning_rate": 0.0002, "loss": 1.5829, "step": 140870 }, { "epoch": 0.57, "grad_norm": 3.9435715675354004, "learning_rate": 0.0002, "loss": 1.5965, "step": 140880 }, { "epoch": 0.57, "grad_norm": 2.2138965129852295, "learning_rate": 0.0002, "loss": 1.5768, "step": 140890 }, { "epoch": 0.57, "grad_norm": 2.024533748626709, "learning_rate": 0.0002, "loss": 1.5809, "step": 140900 }, { "epoch": 0.57, "grad_norm": 3.0105857849121094, "learning_rate": 0.0002, "loss": 1.5184, "step": 140910 }, { "epoch": 0.57, "grad_norm": 3.7798659801483154, "learning_rate": 0.0002, "loss": 1.4606, "step": 140920 }, { "epoch": 0.57, "grad_norm": 6.261932849884033, "learning_rate": 0.0002, "loss": 1.3125, "step": 140930 }, { "epoch": 0.57, "grad_norm": 2.5229153633117676, "learning_rate": 0.0002, "loss": 1.4283, "step": 140940 }, { "epoch": 0.57, "grad_norm": 3.7091381549835205, "learning_rate": 0.0002, "loss": 1.7193, "step": 140950 }, { "epoch": 0.57, "grad_norm": 2.0763895511627197, "learning_rate": 0.0002, "loss": 1.6784, "step": 140960 }, { "epoch": 0.57, "grad_norm": 2.0505361557006836, "learning_rate": 0.0002, "loss": 1.6331, "step": 140970 }, { "epoch": 0.57, "grad_norm": 4.577404022216797, "learning_rate": 0.0002, "loss": 1.6486, "step": 140980 }, { "epoch": 0.57, "grad_norm": 2.2828032970428467, "learning_rate": 0.0002, "loss": 1.56, "step": 140990 }, { "epoch": 0.57, "grad_norm": 2.708909273147583, "learning_rate": 0.0002, "loss": 1.7649, "step": 141000 }, { "epoch": 0.57, "grad_norm": 3.2846405506134033, "learning_rate": 0.0002, "loss": 1.5926, "step": 141010 }, { "epoch": 0.57, "grad_norm": 8.080002784729004, "learning_rate": 0.0002, "loss": 1.7838, "step": 141020 }, { "epoch": 0.57, "grad_norm": 2.667479991912842, "learning_rate": 0.0002, "loss": 1.5704, "step": 141030 }, { "epoch": 0.57, "grad_norm": 2.7973451614379883, "learning_rate": 0.0002, "loss": 1.585, "step": 141040 }, { "epoch": 0.57, "grad_norm": 2.8129141330718994, "learning_rate": 0.0002, "loss": 1.4512, "step": 141050 }, { "epoch": 0.57, "grad_norm": 2.488234281539917, "learning_rate": 0.0002, "loss": 1.4152, "step": 141060 }, { "epoch": 0.57, "grad_norm": 3.5464675426483154, "learning_rate": 0.0002, "loss": 1.4126, "step": 141070 }, { "epoch": 0.57, "grad_norm": 2.8269429206848145, "learning_rate": 0.0002, "loss": 1.7529, "step": 141080 }, { "epoch": 0.57, "grad_norm": 1.5051604509353638, "learning_rate": 0.0002, "loss": 1.5703, "step": 141090 }, { "epoch": 0.57, "grad_norm": 2.1909477710723877, "learning_rate": 0.0002, "loss": 1.5166, "step": 141100 }, { "epoch": 0.57, "grad_norm": 3.1389856338500977, "learning_rate": 0.0002, "loss": 1.4891, "step": 141110 }, { "epoch": 0.57, "grad_norm": 4.246254920959473, "learning_rate": 0.0002, "loss": 1.5768, "step": 141120 }, { "epoch": 0.57, "grad_norm": 2.9260482788085938, "learning_rate": 0.0002, "loss": 1.5714, "step": 141130 }, { "epoch": 0.57, "grad_norm": 3.6769323348999023, "learning_rate": 0.0002, "loss": 1.6751, "step": 141140 }, { "epoch": 0.57, "grad_norm": 4.221198558807373, "learning_rate": 0.0002, "loss": 1.3009, "step": 141150 }, { "epoch": 0.57, "grad_norm": 2.3032920360565186, "learning_rate": 0.0002, "loss": 1.6308, "step": 141160 }, { "epoch": 0.57, "grad_norm": 3.220702886581421, "learning_rate": 0.0002, "loss": 1.6685, "step": 141170 }, { "epoch": 0.57, "grad_norm": 3.816387414932251, "learning_rate": 0.0002, "loss": 1.6884, "step": 141180 }, { "epoch": 0.57, "grad_norm": 1.5950871706008911, "learning_rate": 0.0002, "loss": 1.542, "step": 141190 }, { "epoch": 0.57, "grad_norm": 2.494966983795166, "learning_rate": 0.0002, "loss": 1.4547, "step": 141200 }, { "epoch": 0.57, "grad_norm": 2.2292380332946777, "learning_rate": 0.0002, "loss": 1.6609, "step": 141210 }, { "epoch": 0.57, "grad_norm": 2.8878819942474365, "learning_rate": 0.0002, "loss": 1.5399, "step": 141220 }, { "epoch": 0.57, "grad_norm": 3.838923692703247, "learning_rate": 0.0002, "loss": 1.4151, "step": 141230 }, { "epoch": 0.57, "grad_norm": 2.568315267562866, "learning_rate": 0.0002, "loss": 1.6796, "step": 141240 }, { "epoch": 0.58, "grad_norm": 3.033358097076416, "learning_rate": 0.0002, "loss": 1.5023, "step": 141250 }, { "epoch": 0.58, "grad_norm": 2.4023468494415283, "learning_rate": 0.0002, "loss": 1.9109, "step": 141260 }, { "epoch": 0.58, "grad_norm": 3.735752582550049, "learning_rate": 0.0002, "loss": 1.5556, "step": 141270 }, { "epoch": 0.58, "grad_norm": 4.116325855255127, "learning_rate": 0.0002, "loss": 1.6763, "step": 141280 }, { "epoch": 0.58, "grad_norm": 3.0791003704071045, "learning_rate": 0.0002, "loss": 1.4799, "step": 141290 }, { "epoch": 0.58, "grad_norm": 4.81809663772583, "learning_rate": 0.0002, "loss": 1.349, "step": 141300 }, { "epoch": 0.58, "grad_norm": 2.052163600921631, "learning_rate": 0.0002, "loss": 1.434, "step": 141310 }, { "epoch": 0.58, "grad_norm": 3.250016212463379, "learning_rate": 0.0002, "loss": 1.5021, "step": 141320 }, { "epoch": 0.58, "grad_norm": 2.7029738426208496, "learning_rate": 0.0002, "loss": 1.3706, "step": 141330 }, { "epoch": 0.58, "grad_norm": 2.1414618492126465, "learning_rate": 0.0002, "loss": 1.7928, "step": 141340 }, { "epoch": 0.58, "grad_norm": 3.4672136306762695, "learning_rate": 0.0002, "loss": 1.864, "step": 141350 }, { "epoch": 0.58, "grad_norm": 2.6699037551879883, "learning_rate": 0.0002, "loss": 1.4663, "step": 141360 }, { "epoch": 0.58, "grad_norm": 3.2502384185791016, "learning_rate": 0.0002, "loss": 1.6302, "step": 141370 }, { "epoch": 0.58, "grad_norm": 2.0265538692474365, "learning_rate": 0.0002, "loss": 1.6488, "step": 141380 }, { "epoch": 0.58, "grad_norm": 3.0398082733154297, "learning_rate": 0.0002, "loss": 1.6369, "step": 141390 }, { "epoch": 0.58, "grad_norm": 1.607927680015564, "learning_rate": 0.0002, "loss": 1.6114, "step": 141400 }, { "epoch": 0.58, "grad_norm": 2.776710033416748, "learning_rate": 0.0002, "loss": 1.5075, "step": 141410 }, { "epoch": 0.58, "grad_norm": 4.654109001159668, "learning_rate": 0.0002, "loss": 1.5035, "step": 141420 }, { "epoch": 0.58, "grad_norm": 3.840425729751587, "learning_rate": 0.0002, "loss": 1.7492, "step": 141430 }, { "epoch": 0.58, "grad_norm": 2.9730422496795654, "learning_rate": 0.0002, "loss": 1.7046, "step": 141440 }, { "epoch": 0.58, "grad_norm": 4.435123920440674, "learning_rate": 0.0002, "loss": 1.6391, "step": 141450 }, { "epoch": 0.58, "grad_norm": 2.4983925819396973, "learning_rate": 0.0002, "loss": 1.4568, "step": 141460 }, { "epoch": 0.58, "grad_norm": 3.269289493560791, "learning_rate": 0.0002, "loss": 1.6067, "step": 141470 }, { "epoch": 0.58, "grad_norm": 3.4380059242248535, "learning_rate": 0.0002, "loss": 1.595, "step": 141480 }, { "epoch": 0.58, "grad_norm": 2.658263683319092, "learning_rate": 0.0002, "loss": 1.5525, "step": 141490 }, { "epoch": 0.58, "grad_norm": 3.0753815174102783, "learning_rate": 0.0002, "loss": 1.4689, "step": 141500 }, { "epoch": 0.58, "grad_norm": 2.0350592136383057, "learning_rate": 0.0002, "loss": 1.3588, "step": 141510 }, { "epoch": 0.58, "grad_norm": 3.327505350112915, "learning_rate": 0.0002, "loss": 1.4386, "step": 141520 }, { "epoch": 0.58, "grad_norm": 2.537167549133301, "learning_rate": 0.0002, "loss": 1.5862, "step": 141530 }, { "epoch": 0.58, "grad_norm": 2.04443097114563, "learning_rate": 0.0002, "loss": 1.6177, "step": 141540 }, { "epoch": 0.58, "grad_norm": 3.9253177642822266, "learning_rate": 0.0002, "loss": 1.5124, "step": 141550 }, { "epoch": 0.58, "grad_norm": 4.458237171173096, "learning_rate": 0.0002, "loss": 1.5582, "step": 141560 }, { "epoch": 0.58, "grad_norm": 4.706989288330078, "learning_rate": 0.0002, "loss": 1.5409, "step": 141570 }, { "epoch": 0.58, "grad_norm": 3.4262688159942627, "learning_rate": 0.0002, "loss": 1.5091, "step": 141580 }, { "epoch": 0.58, "grad_norm": 5.0802836418151855, "learning_rate": 0.0002, "loss": 1.3742, "step": 141590 }, { "epoch": 0.58, "grad_norm": 3.5642662048339844, "learning_rate": 0.0002, "loss": 1.6513, "step": 141600 }, { "epoch": 0.58, "grad_norm": 2.7942049503326416, "learning_rate": 0.0002, "loss": 1.7434, "step": 141610 }, { "epoch": 0.58, "grad_norm": 2.0036873817443848, "learning_rate": 0.0002, "loss": 1.7443, "step": 141620 }, { "epoch": 0.58, "grad_norm": 3.4718103408813477, "learning_rate": 0.0002, "loss": 1.493, "step": 141630 }, { "epoch": 0.58, "grad_norm": 3.8105685710906982, "learning_rate": 0.0002, "loss": 1.6218, "step": 141640 }, { "epoch": 0.58, "grad_norm": 2.91166090965271, "learning_rate": 0.0002, "loss": 1.4517, "step": 141650 }, { "epoch": 0.58, "grad_norm": 1.4030412435531616, "learning_rate": 0.0002, "loss": 1.4544, "step": 141660 }, { "epoch": 0.58, "grad_norm": 1.8938970565795898, "learning_rate": 0.0002, "loss": 1.9201, "step": 141670 }, { "epoch": 0.58, "grad_norm": 2.782327890396118, "learning_rate": 0.0002, "loss": 1.6022, "step": 141680 }, { "epoch": 0.58, "grad_norm": 4.18053674697876, "learning_rate": 0.0002, "loss": 1.6242, "step": 141690 }, { "epoch": 0.58, "grad_norm": 2.5277929306030273, "learning_rate": 0.0002, "loss": 1.7805, "step": 141700 }, { "epoch": 0.58, "grad_norm": 4.599117755889893, "learning_rate": 0.0002, "loss": 1.5838, "step": 141710 }, { "epoch": 0.58, "grad_norm": 3.3341422080993652, "learning_rate": 0.0002, "loss": 1.5018, "step": 141720 }, { "epoch": 0.58, "grad_norm": 2.169975996017456, "learning_rate": 0.0002, "loss": 1.7066, "step": 141730 }, { "epoch": 0.58, "grad_norm": 3.289247751235962, "learning_rate": 0.0002, "loss": 1.5561, "step": 141740 }, { "epoch": 0.58, "grad_norm": 3.7944087982177734, "learning_rate": 0.0002, "loss": 1.4359, "step": 141750 }, { "epoch": 0.58, "grad_norm": 3.2860515117645264, "learning_rate": 0.0002, "loss": 1.684, "step": 141760 }, { "epoch": 0.58, "grad_norm": 1.8023768663406372, "learning_rate": 0.0002, "loss": 1.5718, "step": 141770 }, { "epoch": 0.58, "grad_norm": 4.127261638641357, "learning_rate": 0.0002, "loss": 1.5101, "step": 141780 }, { "epoch": 0.58, "grad_norm": 2.0740644931793213, "learning_rate": 0.0002, "loss": 1.5011, "step": 141790 }, { "epoch": 0.58, "grad_norm": 2.243868350982666, "learning_rate": 0.0002, "loss": 1.5611, "step": 141800 }, { "epoch": 0.58, "grad_norm": 1.9725067615509033, "learning_rate": 0.0002, "loss": 1.6222, "step": 141810 }, { "epoch": 0.58, "grad_norm": 2.655233144760132, "learning_rate": 0.0002, "loss": 1.4415, "step": 141820 }, { "epoch": 0.58, "grad_norm": 2.050859212875366, "learning_rate": 0.0002, "loss": 1.5313, "step": 141830 }, { "epoch": 0.58, "grad_norm": 21.50899887084961, "learning_rate": 0.0002, "loss": 1.609, "step": 141840 }, { "epoch": 0.58, "grad_norm": 2.8394737243652344, "learning_rate": 0.0002, "loss": 1.5831, "step": 141850 }, { "epoch": 0.58, "grad_norm": 4.300594806671143, "learning_rate": 0.0002, "loss": 1.5571, "step": 141860 }, { "epoch": 0.58, "grad_norm": 3.8762881755828857, "learning_rate": 0.0002, "loss": 1.613, "step": 141870 }, { "epoch": 0.58, "grad_norm": 2.1986286640167236, "learning_rate": 0.0002, "loss": 1.5327, "step": 141880 }, { "epoch": 0.58, "grad_norm": 3.9522504806518555, "learning_rate": 0.0002, "loss": 1.433, "step": 141890 }, { "epoch": 0.58, "grad_norm": 2.515878438949585, "learning_rate": 0.0002, "loss": 1.5832, "step": 141900 }, { "epoch": 0.58, "grad_norm": 2.8676536083221436, "learning_rate": 0.0002, "loss": 1.6389, "step": 141910 }, { "epoch": 0.58, "grad_norm": 3.2528884410858154, "learning_rate": 0.0002, "loss": 1.5232, "step": 141920 }, { "epoch": 0.58, "grad_norm": 3.400700807571411, "learning_rate": 0.0002, "loss": 1.6549, "step": 141930 }, { "epoch": 0.58, "grad_norm": 2.761439323425293, "learning_rate": 0.0002, "loss": 1.5481, "step": 141940 }, { "epoch": 0.58, "grad_norm": 4.477262020111084, "learning_rate": 0.0002, "loss": 1.8074, "step": 141950 }, { "epoch": 0.58, "grad_norm": 4.003121852874756, "learning_rate": 0.0002, "loss": 1.3677, "step": 141960 }, { "epoch": 0.58, "grad_norm": 2.602177858352661, "learning_rate": 0.0002, "loss": 1.623, "step": 141970 }, { "epoch": 0.58, "grad_norm": 2.9385766983032227, "learning_rate": 0.0002, "loss": 1.5726, "step": 141980 }, { "epoch": 0.58, "grad_norm": 3.983858585357666, "learning_rate": 0.0002, "loss": 1.6113, "step": 141990 }, { "epoch": 0.58, "grad_norm": 3.3476030826568604, "learning_rate": 0.0002, "loss": 1.5566, "step": 142000 }, { "epoch": 0.58, "grad_norm": 3.0571413040161133, "learning_rate": 0.0002, "loss": 1.315, "step": 142010 }, { "epoch": 0.58, "grad_norm": 2.9524309635162354, "learning_rate": 0.0002, "loss": 1.8891, "step": 142020 }, { "epoch": 0.58, "grad_norm": 2.5995402336120605, "learning_rate": 0.0002, "loss": 1.4137, "step": 142030 }, { "epoch": 0.58, "grad_norm": 2.905627489089966, "learning_rate": 0.0002, "loss": 1.5104, "step": 142040 }, { "epoch": 0.58, "grad_norm": 2.5206992626190186, "learning_rate": 0.0002, "loss": 1.6511, "step": 142050 }, { "epoch": 0.58, "grad_norm": 2.3643107414245605, "learning_rate": 0.0002, "loss": 1.9797, "step": 142060 }, { "epoch": 0.58, "grad_norm": 2.696366548538208, "learning_rate": 0.0002, "loss": 1.7627, "step": 142070 }, { "epoch": 0.58, "grad_norm": 2.1395134925842285, "learning_rate": 0.0002, "loss": 1.8225, "step": 142080 }, { "epoch": 0.58, "grad_norm": 2.820434808731079, "learning_rate": 0.0002, "loss": 1.7217, "step": 142090 }, { "epoch": 0.58, "grad_norm": 3.9733126163482666, "learning_rate": 0.0002, "loss": 1.5767, "step": 142100 }, { "epoch": 0.58, "grad_norm": 5.615174770355225, "learning_rate": 0.0002, "loss": 1.4781, "step": 142110 }, { "epoch": 0.58, "grad_norm": 3.8244900703430176, "learning_rate": 0.0002, "loss": 1.7156, "step": 142120 }, { "epoch": 0.58, "grad_norm": 2.090205669403076, "learning_rate": 0.0002, "loss": 1.4948, "step": 142130 }, { "epoch": 0.58, "grad_norm": 2.235842227935791, "learning_rate": 0.0002, "loss": 1.751, "step": 142140 }, { "epoch": 0.58, "grad_norm": 3.7971203327178955, "learning_rate": 0.0002, "loss": 1.4967, "step": 142150 }, { "epoch": 0.58, "grad_norm": 2.0839779376983643, "learning_rate": 0.0002, "loss": 1.262, "step": 142160 }, { "epoch": 0.58, "grad_norm": 3.914044141769409, "learning_rate": 0.0002, "loss": 1.4844, "step": 142170 }, { "epoch": 0.58, "grad_norm": 2.729334592819214, "learning_rate": 0.0002, "loss": 1.6307, "step": 142180 }, { "epoch": 0.58, "grad_norm": 3.3596034049987793, "learning_rate": 0.0002, "loss": 1.3328, "step": 142190 }, { "epoch": 0.58, "grad_norm": 1.6891674995422363, "learning_rate": 0.0002, "loss": 1.3691, "step": 142200 }, { "epoch": 0.58, "grad_norm": 3.0084314346313477, "learning_rate": 0.0002, "loss": 1.3759, "step": 142210 }, { "epoch": 0.58, "grad_norm": 2.6521873474121094, "learning_rate": 0.0002, "loss": 1.6783, "step": 142220 }, { "epoch": 0.58, "grad_norm": 2.340575933456421, "learning_rate": 0.0002, "loss": 1.4965, "step": 142230 }, { "epoch": 0.58, "grad_norm": 3.152198553085327, "learning_rate": 0.0002, "loss": 1.5381, "step": 142240 }, { "epoch": 0.58, "grad_norm": 3.821547031402588, "learning_rate": 0.0002, "loss": 1.2805, "step": 142250 }, { "epoch": 0.58, "grad_norm": 2.069657564163208, "learning_rate": 0.0002, "loss": 1.4126, "step": 142260 }, { "epoch": 0.58, "grad_norm": 2.653170585632324, "learning_rate": 0.0002, "loss": 1.6898, "step": 142270 }, { "epoch": 0.58, "grad_norm": 1.8474538326263428, "learning_rate": 0.0002, "loss": 1.6981, "step": 142280 }, { "epoch": 0.58, "grad_norm": 1.4386769533157349, "learning_rate": 0.0002, "loss": 1.5184, "step": 142290 }, { "epoch": 0.58, "grad_norm": 3.840451240539551, "learning_rate": 0.0002, "loss": 1.5134, "step": 142300 }, { "epoch": 0.58, "grad_norm": 2.675757884979248, "learning_rate": 0.0002, "loss": 1.6476, "step": 142310 }, { "epoch": 0.58, "grad_norm": 2.752563714981079, "learning_rate": 0.0002, "loss": 1.6057, "step": 142320 }, { "epoch": 0.58, "grad_norm": 1.9002419710159302, "learning_rate": 0.0002, "loss": 1.7158, "step": 142330 }, { "epoch": 0.58, "grad_norm": 2.357790231704712, "learning_rate": 0.0002, "loss": 1.7687, "step": 142340 }, { "epoch": 0.58, "grad_norm": 2.8719570636749268, "learning_rate": 0.0002, "loss": 1.6336, "step": 142350 }, { "epoch": 0.58, "grad_norm": 4.199347019195557, "learning_rate": 0.0002, "loss": 1.7037, "step": 142360 }, { "epoch": 0.58, "grad_norm": 1.5946388244628906, "learning_rate": 0.0002, "loss": 1.6368, "step": 142370 }, { "epoch": 0.58, "grad_norm": 3.0337321758270264, "learning_rate": 0.0002, "loss": 1.6168, "step": 142380 }, { "epoch": 0.58, "grad_norm": 2.415839672088623, "learning_rate": 0.0002, "loss": 1.5481, "step": 142390 }, { "epoch": 0.58, "grad_norm": 2.976059675216675, "learning_rate": 0.0002, "loss": 1.5152, "step": 142400 }, { "epoch": 0.58, "grad_norm": 2.4388694763183594, "learning_rate": 0.0002, "loss": 1.6651, "step": 142410 }, { "epoch": 0.58, "grad_norm": 3.6856577396392822, "learning_rate": 0.0002, "loss": 1.559, "step": 142420 }, { "epoch": 0.58, "grad_norm": 3.188873052597046, "learning_rate": 0.0002, "loss": 1.8762, "step": 142430 }, { "epoch": 0.58, "grad_norm": 3.6975560188293457, "learning_rate": 0.0002, "loss": 1.7216, "step": 142440 }, { "epoch": 0.58, "grad_norm": 3.115818977355957, "learning_rate": 0.0002, "loss": 1.6902, "step": 142450 }, { "epoch": 0.58, "grad_norm": 3.279632091522217, "learning_rate": 0.0002, "loss": 1.5831, "step": 142460 }, { "epoch": 0.58, "grad_norm": 2.7613558769226074, "learning_rate": 0.0002, "loss": 1.5052, "step": 142470 }, { "epoch": 0.58, "grad_norm": 3.319511651992798, "learning_rate": 0.0002, "loss": 1.7948, "step": 142480 }, { "epoch": 0.58, "grad_norm": 4.151919841766357, "learning_rate": 0.0002, "loss": 1.6776, "step": 142490 }, { "epoch": 0.58, "grad_norm": 3.192800521850586, "learning_rate": 0.0002, "loss": 1.5898, "step": 142500 }, { "epoch": 0.58, "grad_norm": 2.481383800506592, "learning_rate": 0.0002, "loss": 1.6836, "step": 142510 }, { "epoch": 0.58, "grad_norm": 3.5243942737579346, "learning_rate": 0.0002, "loss": 1.7871, "step": 142520 }, { "epoch": 0.58, "grad_norm": 2.417248487472534, "learning_rate": 0.0002, "loss": 1.4777, "step": 142530 }, { "epoch": 0.58, "grad_norm": 2.36443829536438, "learning_rate": 0.0002, "loss": 1.532, "step": 142540 }, { "epoch": 0.58, "grad_norm": 2.7484636306762695, "learning_rate": 0.0002, "loss": 1.4562, "step": 142550 }, { "epoch": 0.58, "grad_norm": 3.266099214553833, "learning_rate": 0.0002, "loss": 1.8285, "step": 142560 }, { "epoch": 0.58, "grad_norm": 3.122119665145874, "learning_rate": 0.0002, "loss": 1.626, "step": 142570 }, { "epoch": 0.58, "grad_norm": 2.469198703765869, "learning_rate": 0.0002, "loss": 1.5837, "step": 142580 }, { "epoch": 0.58, "grad_norm": 2.9393506050109863, "learning_rate": 0.0002, "loss": 1.5212, "step": 142590 }, { "epoch": 0.58, "grad_norm": 3.0580830574035645, "learning_rate": 0.0002, "loss": 1.5189, "step": 142600 }, { "epoch": 0.58, "grad_norm": 4.15604829788208, "learning_rate": 0.0002, "loss": 1.5134, "step": 142610 }, { "epoch": 0.58, "grad_norm": 2.9275732040405273, "learning_rate": 0.0002, "loss": 1.3546, "step": 142620 }, { "epoch": 0.58, "grad_norm": 4.070162773132324, "learning_rate": 0.0002, "loss": 1.4292, "step": 142630 }, { "epoch": 0.58, "grad_norm": 3.76296067237854, "learning_rate": 0.0002, "loss": 1.7095, "step": 142640 }, { "epoch": 0.58, "grad_norm": 3.308225393295288, "learning_rate": 0.0002, "loss": 1.5242, "step": 142650 }, { "epoch": 0.58, "grad_norm": 3.40788197517395, "learning_rate": 0.0002, "loss": 1.4626, "step": 142660 }, { "epoch": 0.58, "grad_norm": 2.482124090194702, "learning_rate": 0.0002, "loss": 1.5009, "step": 142670 }, { "epoch": 0.58, "grad_norm": 2.8421664237976074, "learning_rate": 0.0002, "loss": 1.5524, "step": 142680 }, { "epoch": 0.58, "grad_norm": 3.279782295227051, "learning_rate": 0.0002, "loss": 1.5343, "step": 142690 }, { "epoch": 0.58, "grad_norm": 3.9962801933288574, "learning_rate": 0.0002, "loss": 1.7143, "step": 142700 }, { "epoch": 0.58, "grad_norm": 2.4828662872314453, "learning_rate": 0.0002, "loss": 1.5953, "step": 142710 }, { "epoch": 0.58, "grad_norm": 3.1878883838653564, "learning_rate": 0.0002, "loss": 1.7013, "step": 142720 }, { "epoch": 0.58, "grad_norm": 1.8902636766433716, "learning_rate": 0.0002, "loss": 1.6121, "step": 142730 }, { "epoch": 0.58, "grad_norm": 2.631312847137451, "learning_rate": 0.0002, "loss": 1.5926, "step": 142740 }, { "epoch": 0.58, "grad_norm": 2.8849990367889404, "learning_rate": 0.0002, "loss": 1.4207, "step": 142750 }, { "epoch": 0.58, "grad_norm": 3.2203309535980225, "learning_rate": 0.0002, "loss": 1.6067, "step": 142760 }, { "epoch": 0.58, "grad_norm": 2.972223997116089, "learning_rate": 0.0002, "loss": 1.5188, "step": 142770 }, { "epoch": 0.58, "grad_norm": 2.752657651901245, "learning_rate": 0.0002, "loss": 1.447, "step": 142780 }, { "epoch": 0.58, "grad_norm": 1.9947322607040405, "learning_rate": 0.0002, "loss": 1.591, "step": 142790 }, { "epoch": 0.58, "grad_norm": 1.962688684463501, "learning_rate": 0.0002, "loss": 1.4427, "step": 142800 }, { "epoch": 0.58, "grad_norm": 2.292099952697754, "learning_rate": 0.0002, "loss": 1.41, "step": 142810 }, { "epoch": 0.58, "grad_norm": 2.688511610031128, "learning_rate": 0.0002, "loss": 1.6943, "step": 142820 }, { "epoch": 0.58, "grad_norm": 3.1782584190368652, "learning_rate": 0.0002, "loss": 1.4069, "step": 142830 }, { "epoch": 0.58, "grad_norm": 3.4282639026641846, "learning_rate": 0.0002, "loss": 1.5724, "step": 142840 }, { "epoch": 0.58, "grad_norm": 3.201129913330078, "learning_rate": 0.0002, "loss": 1.5476, "step": 142850 }, { "epoch": 0.58, "grad_norm": 2.409860372543335, "learning_rate": 0.0002, "loss": 1.7558, "step": 142860 }, { "epoch": 0.58, "grad_norm": 1.9465031623840332, "learning_rate": 0.0002, "loss": 1.6518, "step": 142870 }, { "epoch": 0.58, "grad_norm": 3.121652841567993, "learning_rate": 0.0002, "loss": 1.4812, "step": 142880 }, { "epoch": 0.58, "grad_norm": 2.231454372406006, "learning_rate": 0.0002, "loss": 1.5978, "step": 142890 }, { "epoch": 0.58, "grad_norm": 2.2339463233947754, "learning_rate": 0.0002, "loss": 1.5272, "step": 142900 }, { "epoch": 0.58, "grad_norm": 3.331925392150879, "learning_rate": 0.0002, "loss": 1.468, "step": 142910 }, { "epoch": 0.58, "grad_norm": 4.098651885986328, "learning_rate": 0.0002, "loss": 1.8574, "step": 142920 }, { "epoch": 0.58, "grad_norm": 3.221993923187256, "learning_rate": 0.0002, "loss": 1.6743, "step": 142930 }, { "epoch": 0.58, "grad_norm": 2.2360546588897705, "learning_rate": 0.0002, "loss": 1.2468, "step": 142940 }, { "epoch": 0.58, "grad_norm": 2.1818501949310303, "learning_rate": 0.0002, "loss": 1.6867, "step": 142950 }, { "epoch": 0.58, "grad_norm": 2.602975845336914, "learning_rate": 0.0002, "loss": 1.4537, "step": 142960 }, { "epoch": 0.58, "grad_norm": 1.6655524969100952, "learning_rate": 0.0002, "loss": 1.7019, "step": 142970 }, { "epoch": 0.58, "grad_norm": 2.8259897232055664, "learning_rate": 0.0002, "loss": 1.5006, "step": 142980 }, { "epoch": 0.58, "grad_norm": 1.764972448348999, "learning_rate": 0.0002, "loss": 1.401, "step": 142990 }, { "epoch": 0.58, "grad_norm": 2.5265586376190186, "learning_rate": 0.0002, "loss": 1.3495, "step": 143000 }, { "epoch": 0.58, "grad_norm": 3.5920374393463135, "learning_rate": 0.0002, "loss": 1.7465, "step": 143010 }, { "epoch": 0.58, "grad_norm": 3.4734654426574707, "learning_rate": 0.0002, "loss": 1.6125, "step": 143020 }, { "epoch": 0.58, "grad_norm": 6.311175346374512, "learning_rate": 0.0002, "loss": 1.4867, "step": 143030 }, { "epoch": 0.58, "grad_norm": 3.1885862350463867, "learning_rate": 0.0002, "loss": 1.4894, "step": 143040 }, { "epoch": 0.58, "grad_norm": 1.6219385862350464, "learning_rate": 0.0002, "loss": 1.6322, "step": 143050 }, { "epoch": 0.58, "grad_norm": 3.4190220832824707, "learning_rate": 0.0002, "loss": 1.4245, "step": 143060 }, { "epoch": 0.58, "grad_norm": 3.115341901779175, "learning_rate": 0.0002, "loss": 2.0562, "step": 143070 }, { "epoch": 0.58, "grad_norm": 8.129717826843262, "learning_rate": 0.0002, "loss": 1.5397, "step": 143080 }, { "epoch": 0.58, "grad_norm": 2.3084847927093506, "learning_rate": 0.0002, "loss": 1.7232, "step": 143090 }, { "epoch": 0.58, "grad_norm": 3.6561098098754883, "learning_rate": 0.0002, "loss": 1.6198, "step": 143100 }, { "epoch": 0.58, "grad_norm": 2.9334557056427, "learning_rate": 0.0002, "loss": 1.4633, "step": 143110 }, { "epoch": 0.58, "grad_norm": 4.126786708831787, "learning_rate": 0.0002, "loss": 1.3446, "step": 143120 }, { "epoch": 0.58, "grad_norm": 2.178297519683838, "learning_rate": 0.0002, "loss": 1.454, "step": 143130 }, { "epoch": 0.58, "grad_norm": 2.239647150039673, "learning_rate": 0.0002, "loss": 1.7715, "step": 143140 }, { "epoch": 0.58, "grad_norm": 2.5436882972717285, "learning_rate": 0.0002, "loss": 1.4081, "step": 143150 }, { "epoch": 0.58, "grad_norm": 2.935523271560669, "learning_rate": 0.0002, "loss": 1.5446, "step": 143160 }, { "epoch": 0.58, "grad_norm": 5.602851390838623, "learning_rate": 0.0002, "loss": 1.5318, "step": 143170 }, { "epoch": 0.58, "grad_norm": 3.4348769187927246, "learning_rate": 0.0002, "loss": 1.5456, "step": 143180 }, { "epoch": 0.58, "grad_norm": 2.8082997798919678, "learning_rate": 0.0002, "loss": 1.518, "step": 143190 }, { "epoch": 0.58, "grad_norm": 2.6437559127807617, "learning_rate": 0.0002, "loss": 1.4051, "step": 143200 }, { "epoch": 0.58, "grad_norm": 2.359384536743164, "learning_rate": 0.0002, "loss": 1.6423, "step": 143210 }, { "epoch": 0.58, "grad_norm": 3.237206220626831, "learning_rate": 0.0002, "loss": 1.5194, "step": 143220 }, { "epoch": 0.58, "grad_norm": 2.6762266159057617, "learning_rate": 0.0002, "loss": 1.6484, "step": 143230 }, { "epoch": 0.58, "grad_norm": 2.7394983768463135, "learning_rate": 0.0002, "loss": 1.5629, "step": 143240 }, { "epoch": 0.58, "grad_norm": 3.0172247886657715, "learning_rate": 0.0002, "loss": 1.6842, "step": 143250 }, { "epoch": 0.58, "grad_norm": 2.2629759311676025, "learning_rate": 0.0002, "loss": 1.2127, "step": 143260 }, { "epoch": 0.58, "grad_norm": 4.555082321166992, "learning_rate": 0.0002, "loss": 1.5527, "step": 143270 }, { "epoch": 0.58, "grad_norm": 1.9946925640106201, "learning_rate": 0.0002, "loss": 1.4595, "step": 143280 }, { "epoch": 0.58, "grad_norm": 2.3163745403289795, "learning_rate": 0.0002, "loss": 1.6698, "step": 143290 }, { "epoch": 0.58, "grad_norm": 2.8924031257629395, "learning_rate": 0.0002, "loss": 1.7131, "step": 143300 }, { "epoch": 0.58, "grad_norm": 4.053839683532715, "learning_rate": 0.0002, "loss": 1.5473, "step": 143310 }, { "epoch": 0.58, "grad_norm": 7.101120948791504, "learning_rate": 0.0002, "loss": 1.4723, "step": 143320 }, { "epoch": 0.58, "grad_norm": 3.135582685470581, "learning_rate": 0.0002, "loss": 1.3436, "step": 143330 }, { "epoch": 0.58, "grad_norm": 3.2004988193511963, "learning_rate": 0.0002, "loss": 1.6389, "step": 143340 }, { "epoch": 0.58, "grad_norm": 2.662534713745117, "learning_rate": 0.0002, "loss": 1.7247, "step": 143350 }, { "epoch": 0.58, "grad_norm": 3.97407603263855, "learning_rate": 0.0002, "loss": 1.6771, "step": 143360 }, { "epoch": 0.58, "grad_norm": 3.450596809387207, "learning_rate": 0.0002, "loss": 1.4185, "step": 143370 }, { "epoch": 0.58, "grad_norm": 3.223747968673706, "learning_rate": 0.0002, "loss": 1.6959, "step": 143380 }, { "epoch": 0.58, "grad_norm": 3.681818962097168, "learning_rate": 0.0002, "loss": 1.6008, "step": 143390 }, { "epoch": 0.58, "grad_norm": 4.573892116546631, "learning_rate": 0.0002, "loss": 1.6861, "step": 143400 }, { "epoch": 0.58, "grad_norm": 3.2542455196380615, "learning_rate": 0.0002, "loss": 1.6002, "step": 143410 }, { "epoch": 0.58, "grad_norm": 2.4925918579101562, "learning_rate": 0.0002, "loss": 1.6623, "step": 143420 }, { "epoch": 0.58, "grad_norm": 2.617579698562622, "learning_rate": 0.0002, "loss": 1.526, "step": 143430 }, { "epoch": 0.58, "grad_norm": 2.9623122215270996, "learning_rate": 0.0002, "loss": 1.6878, "step": 143440 }, { "epoch": 0.58, "grad_norm": 2.7382709980010986, "learning_rate": 0.0002, "loss": 1.4803, "step": 143450 }, { "epoch": 0.58, "grad_norm": 2.7803287506103516, "learning_rate": 0.0002, "loss": 1.6398, "step": 143460 }, { "epoch": 0.58, "grad_norm": 1.881168246269226, "learning_rate": 0.0002, "loss": 1.2638, "step": 143470 }, { "epoch": 0.58, "grad_norm": 2.8968589305877686, "learning_rate": 0.0002, "loss": 1.3906, "step": 143480 }, { "epoch": 0.58, "grad_norm": 4.074605941772461, "learning_rate": 0.0002, "loss": 1.4426, "step": 143490 }, { "epoch": 0.58, "grad_norm": 2.2265450954437256, "learning_rate": 0.0002, "loss": 1.52, "step": 143500 }, { "epoch": 0.58, "grad_norm": 4.1027655601501465, "learning_rate": 0.0002, "loss": 1.5436, "step": 143510 }, { "epoch": 0.58, "grad_norm": 3.1425068378448486, "learning_rate": 0.0002, "loss": 1.5466, "step": 143520 }, { "epoch": 0.58, "grad_norm": 1.8659765720367432, "learning_rate": 0.0002, "loss": 1.3971, "step": 143530 }, { "epoch": 0.58, "grad_norm": 3.1437625885009766, "learning_rate": 0.0002, "loss": 1.5405, "step": 143540 }, { "epoch": 0.58, "grad_norm": 3.8131675720214844, "learning_rate": 0.0002, "loss": 1.4044, "step": 143550 }, { "epoch": 0.58, "grad_norm": 2.8955166339874268, "learning_rate": 0.0002, "loss": 1.5083, "step": 143560 }, { "epoch": 0.58, "grad_norm": 2.5573084354400635, "learning_rate": 0.0002, "loss": 1.6093, "step": 143570 }, { "epoch": 0.58, "grad_norm": 3.46854829788208, "learning_rate": 0.0002, "loss": 1.6743, "step": 143580 }, { "epoch": 0.58, "grad_norm": 1.662785291671753, "learning_rate": 0.0002, "loss": 1.6608, "step": 143590 }, { "epoch": 0.58, "grad_norm": 4.044504642486572, "learning_rate": 0.0002, "loss": 1.6209, "step": 143600 }, { "epoch": 0.58, "grad_norm": 2.2977123260498047, "learning_rate": 0.0002, "loss": 1.5079, "step": 143610 }, { "epoch": 0.58, "grad_norm": 1.6918624639511108, "learning_rate": 0.0002, "loss": 1.5077, "step": 143620 }, { "epoch": 0.58, "grad_norm": 4.9448323249816895, "learning_rate": 0.0002, "loss": 1.4109, "step": 143630 }, { "epoch": 0.58, "grad_norm": 2.6603732109069824, "learning_rate": 0.0002, "loss": 1.6994, "step": 143640 }, { "epoch": 0.58, "grad_norm": 1.4706792831420898, "learning_rate": 0.0002, "loss": 1.3177, "step": 143650 }, { "epoch": 0.58, "grad_norm": 2.3319931030273438, "learning_rate": 0.0002, "loss": 1.5893, "step": 143660 }, { "epoch": 0.58, "grad_norm": 2.0461912155151367, "learning_rate": 0.0002, "loss": 1.5765, "step": 143670 }, { "epoch": 0.58, "grad_norm": 2.7195775508880615, "learning_rate": 0.0002, "loss": 1.5563, "step": 143680 }, { "epoch": 0.58, "grad_norm": 3.234084129333496, "learning_rate": 0.0002, "loss": 1.5457, "step": 143690 }, { "epoch": 0.58, "grad_norm": 2.2634823322296143, "learning_rate": 0.0002, "loss": 1.6047, "step": 143700 }, { "epoch": 0.59, "grad_norm": 3.979196786880493, "learning_rate": 0.0002, "loss": 1.5925, "step": 143710 }, { "epoch": 0.59, "grad_norm": 4.343077659606934, "learning_rate": 0.0002, "loss": 1.3435, "step": 143720 }, { "epoch": 0.59, "grad_norm": 3.284727096557617, "learning_rate": 0.0002, "loss": 1.8842, "step": 143730 }, { "epoch": 0.59, "grad_norm": 2.885223627090454, "learning_rate": 0.0002, "loss": 1.6711, "step": 143740 }, { "epoch": 0.59, "grad_norm": 2.8821043968200684, "learning_rate": 0.0002, "loss": 1.6333, "step": 143750 }, { "epoch": 0.59, "grad_norm": 3.3197178840637207, "learning_rate": 0.0002, "loss": 1.586, "step": 143760 }, { "epoch": 0.59, "grad_norm": 3.7376174926757812, "learning_rate": 0.0002, "loss": 1.7903, "step": 143770 }, { "epoch": 0.59, "grad_norm": 2.4934303760528564, "learning_rate": 0.0002, "loss": 1.5536, "step": 143780 }, { "epoch": 0.59, "grad_norm": 2.6438755989074707, "learning_rate": 0.0002, "loss": 1.646, "step": 143790 }, { "epoch": 0.59, "grad_norm": 2.7602179050445557, "learning_rate": 0.0002, "loss": 1.5523, "step": 143800 }, { "epoch": 0.59, "grad_norm": 2.9536795616149902, "learning_rate": 0.0002, "loss": 1.657, "step": 143810 }, { "epoch": 0.59, "grad_norm": 3.527021646499634, "learning_rate": 0.0002, "loss": 1.6045, "step": 143820 }, { "epoch": 0.59, "grad_norm": 2.885579824447632, "learning_rate": 0.0002, "loss": 1.3358, "step": 143830 }, { "epoch": 0.59, "grad_norm": 2.9698944091796875, "learning_rate": 0.0002, "loss": 1.4166, "step": 143840 }, { "epoch": 0.59, "grad_norm": 3.1497161388397217, "learning_rate": 0.0002, "loss": 1.6965, "step": 143850 }, { "epoch": 0.59, "grad_norm": 3.705111503601074, "learning_rate": 0.0002, "loss": 1.4927, "step": 143860 }, { "epoch": 0.59, "grad_norm": 2.9959309101104736, "learning_rate": 0.0002, "loss": 1.7694, "step": 143870 }, { "epoch": 0.59, "grad_norm": 3.257948398590088, "learning_rate": 0.0002, "loss": 1.5963, "step": 143880 }, { "epoch": 0.59, "grad_norm": 3.0933878421783447, "learning_rate": 0.0002, "loss": 1.7117, "step": 143890 }, { "epoch": 0.59, "grad_norm": 2.986724376678467, "learning_rate": 0.0002, "loss": 1.3986, "step": 143900 }, { "epoch": 0.59, "grad_norm": 4.605105400085449, "learning_rate": 0.0002, "loss": 1.5474, "step": 143910 }, { "epoch": 0.59, "grad_norm": 2.741154432296753, "learning_rate": 0.0002, "loss": 1.492, "step": 143920 }, { "epoch": 0.59, "grad_norm": 2.493946075439453, "learning_rate": 0.0002, "loss": 1.4488, "step": 143930 }, { "epoch": 0.59, "grad_norm": 2.6530544757843018, "learning_rate": 0.0002, "loss": 1.6871, "step": 143940 }, { "epoch": 0.59, "grad_norm": 4.612924575805664, "learning_rate": 0.0002, "loss": 1.626, "step": 143950 }, { "epoch": 0.59, "grad_norm": 3.487257480621338, "learning_rate": 0.0002, "loss": 1.5244, "step": 143960 }, { "epoch": 0.59, "grad_norm": 2.843160629272461, "learning_rate": 0.0002, "loss": 1.7956, "step": 143970 }, { "epoch": 0.59, "grad_norm": 3.926057815551758, "learning_rate": 0.0002, "loss": 1.5876, "step": 143980 }, { "epoch": 0.59, "grad_norm": 2.9539732933044434, "learning_rate": 0.0002, "loss": 1.7569, "step": 143990 }, { "epoch": 0.59, "grad_norm": 3.2889809608459473, "learning_rate": 0.0002, "loss": 1.5225, "step": 144000 }, { "epoch": 0.59, "grad_norm": 2.657654285430908, "learning_rate": 0.0002, "loss": 1.8013, "step": 144010 }, { "epoch": 0.59, "grad_norm": 1.9489470720291138, "learning_rate": 0.0002, "loss": 1.5745, "step": 144020 }, { "epoch": 0.59, "grad_norm": 1.9192594289779663, "learning_rate": 0.0002, "loss": 1.651, "step": 144030 }, { "epoch": 0.59, "grad_norm": 2.0182793140411377, "learning_rate": 0.0002, "loss": 1.476, "step": 144040 }, { "epoch": 0.59, "grad_norm": 3.2232792377471924, "learning_rate": 0.0002, "loss": 1.5937, "step": 144050 }, { "epoch": 0.59, "grad_norm": 3.291978359222412, "learning_rate": 0.0002, "loss": 1.7015, "step": 144060 }, { "epoch": 0.59, "grad_norm": 4.1027302742004395, "learning_rate": 0.0002, "loss": 1.4737, "step": 144070 }, { "epoch": 0.59, "grad_norm": 2.2058959007263184, "learning_rate": 0.0002, "loss": 1.7084, "step": 144080 }, { "epoch": 0.59, "grad_norm": 4.081703186035156, "learning_rate": 0.0002, "loss": 1.771, "step": 144090 }, { "epoch": 0.59, "grad_norm": 4.3394036293029785, "learning_rate": 0.0002, "loss": 1.5296, "step": 144100 }, { "epoch": 0.59, "grad_norm": 3.4050230979919434, "learning_rate": 0.0002, "loss": 1.9993, "step": 144110 }, { "epoch": 0.59, "grad_norm": 4.534991264343262, "learning_rate": 0.0002, "loss": 1.4182, "step": 144120 }, { "epoch": 0.59, "grad_norm": 2.5728936195373535, "learning_rate": 0.0002, "loss": 1.4696, "step": 144130 }, { "epoch": 0.59, "grad_norm": 3.2453176975250244, "learning_rate": 0.0002, "loss": 1.7543, "step": 144140 }, { "epoch": 0.59, "grad_norm": 4.110244274139404, "learning_rate": 0.0002, "loss": 1.3792, "step": 144150 }, { "epoch": 0.59, "grad_norm": 1.252826452255249, "learning_rate": 0.0002, "loss": 1.5081, "step": 144160 }, { "epoch": 0.59, "grad_norm": 2.359419345855713, "learning_rate": 0.0002, "loss": 1.581, "step": 144170 }, { "epoch": 0.59, "grad_norm": 4.218199253082275, "learning_rate": 0.0002, "loss": 1.4676, "step": 144180 }, { "epoch": 0.59, "grad_norm": 4.116444110870361, "learning_rate": 0.0002, "loss": 1.6582, "step": 144190 }, { "epoch": 0.59, "grad_norm": 3.1138553619384766, "learning_rate": 0.0002, "loss": 1.4505, "step": 144200 }, { "epoch": 0.59, "grad_norm": 3.1885499954223633, "learning_rate": 0.0002, "loss": 1.6593, "step": 144210 }, { "epoch": 0.59, "grad_norm": 2.595618486404419, "learning_rate": 0.0002, "loss": 1.7369, "step": 144220 }, { "epoch": 0.59, "grad_norm": 1.3801383972167969, "learning_rate": 0.0002, "loss": 1.4324, "step": 144230 }, { "epoch": 0.59, "grad_norm": 2.926713228225708, "learning_rate": 0.0002, "loss": 1.5438, "step": 144240 }, { "epoch": 0.59, "grad_norm": 3.0394339561462402, "learning_rate": 0.0002, "loss": 1.4989, "step": 144250 }, { "epoch": 0.59, "grad_norm": 3.1156179904937744, "learning_rate": 0.0002, "loss": 1.4927, "step": 144260 }, { "epoch": 0.59, "grad_norm": 3.51334285736084, "learning_rate": 0.0002, "loss": 1.4419, "step": 144270 }, { "epoch": 0.59, "grad_norm": 2.8580141067504883, "learning_rate": 0.0002, "loss": 1.5562, "step": 144280 }, { "epoch": 0.59, "grad_norm": 3.2039740085601807, "learning_rate": 0.0002, "loss": 1.7029, "step": 144290 }, { "epoch": 0.59, "grad_norm": 4.017179012298584, "learning_rate": 0.0002, "loss": 1.6052, "step": 144300 }, { "epoch": 0.59, "grad_norm": 2.7888729572296143, "learning_rate": 0.0002, "loss": 1.3672, "step": 144310 }, { "epoch": 0.59, "grad_norm": 3.077793836593628, "learning_rate": 0.0002, "loss": 1.4646, "step": 144320 }, { "epoch": 0.59, "grad_norm": 1.6307753324508667, "learning_rate": 0.0002, "loss": 1.5382, "step": 144330 }, { "epoch": 0.59, "grad_norm": 2.3974318504333496, "learning_rate": 0.0002, "loss": 1.7498, "step": 144340 }, { "epoch": 0.59, "grad_norm": 2.949195623397827, "learning_rate": 0.0002, "loss": 1.6696, "step": 144350 }, { "epoch": 0.59, "grad_norm": 2.077579975128174, "learning_rate": 0.0002, "loss": 1.7795, "step": 144360 }, { "epoch": 0.59, "grad_norm": 4.152207374572754, "learning_rate": 0.0002, "loss": 1.4547, "step": 144370 }, { "epoch": 0.59, "grad_norm": 3.7070975303649902, "learning_rate": 0.0002, "loss": 1.5087, "step": 144380 }, { "epoch": 0.59, "grad_norm": 2.9465513229370117, "learning_rate": 0.0002, "loss": 1.5069, "step": 144390 }, { "epoch": 0.59, "grad_norm": 2.8032877445220947, "learning_rate": 0.0002, "loss": 1.4786, "step": 144400 }, { "epoch": 0.59, "grad_norm": 5.062723636627197, "learning_rate": 0.0002, "loss": 1.5927, "step": 144410 }, { "epoch": 0.59, "grad_norm": 2.133430004119873, "learning_rate": 0.0002, "loss": 1.6841, "step": 144420 }, { "epoch": 0.59, "grad_norm": 5.371130466461182, "learning_rate": 0.0002, "loss": 1.5488, "step": 144430 }, { "epoch": 0.59, "grad_norm": 3.890160322189331, "learning_rate": 0.0002, "loss": 1.9141, "step": 144440 }, { "epoch": 0.59, "grad_norm": 2.2469069957733154, "learning_rate": 0.0002, "loss": 1.7537, "step": 144450 }, { "epoch": 0.59, "grad_norm": 2.4697515964508057, "learning_rate": 0.0002, "loss": 1.4674, "step": 144460 }, { "epoch": 0.59, "grad_norm": 2.905510663986206, "learning_rate": 0.0002, "loss": 1.4908, "step": 144470 }, { "epoch": 0.59, "grad_norm": 2.887406587600708, "learning_rate": 0.0002, "loss": 1.6985, "step": 144480 }, { "epoch": 0.59, "grad_norm": 2.911214590072632, "learning_rate": 0.0002, "loss": 1.693, "step": 144490 }, { "epoch": 0.59, "grad_norm": 3.1416544914245605, "learning_rate": 0.0002, "loss": 1.5806, "step": 144500 }, { "epoch": 0.59, "grad_norm": 4.0276780128479, "learning_rate": 0.0002, "loss": 1.7039, "step": 144510 }, { "epoch": 0.59, "grad_norm": 2.509413957595825, "learning_rate": 0.0002, "loss": 1.5715, "step": 144520 }, { "epoch": 0.59, "grad_norm": 1.8346171379089355, "learning_rate": 0.0002, "loss": 1.6411, "step": 144530 }, { "epoch": 0.59, "grad_norm": 4.5079827308654785, "learning_rate": 0.0002, "loss": 1.541, "step": 144540 }, { "epoch": 0.59, "grad_norm": 2.522380828857422, "learning_rate": 0.0002, "loss": 1.8088, "step": 144550 }, { "epoch": 0.59, "grad_norm": Infinity, "learning_rate": 0.0002, "loss": 1.6286, "step": 144560 }, { "epoch": 0.59, "grad_norm": 3.4495015144348145, "learning_rate": 0.0002, "loss": 1.6703, "step": 144570 }, { "epoch": 0.59, "grad_norm": 4.466534614562988, "learning_rate": 0.0002, "loss": 1.5038, "step": 144580 }, { "epoch": 0.59, "grad_norm": 2.739861249923706, "learning_rate": 0.0002, "loss": 1.7868, "step": 144590 }, { "epoch": 0.59, "grad_norm": 3.4605014324188232, "learning_rate": 0.0002, "loss": 1.7029, "step": 144600 }, { "epoch": 0.59, "grad_norm": 4.446744441986084, "learning_rate": 0.0002, "loss": 1.1381, "step": 144610 }, { "epoch": 0.59, "grad_norm": 3.486891269683838, "learning_rate": 0.0002, "loss": 1.7811, "step": 144620 }, { "epoch": 0.59, "grad_norm": 3.0974576473236084, "learning_rate": 0.0002, "loss": 1.475, "step": 144630 }, { "epoch": 0.59, "grad_norm": 3.170431137084961, "learning_rate": 0.0002, "loss": 1.585, "step": 144640 }, { "epoch": 0.59, "grad_norm": 2.388657331466675, "learning_rate": 0.0002, "loss": 1.5438, "step": 144650 }, { "epoch": 0.59, "grad_norm": 3.5943870544433594, "learning_rate": 0.0002, "loss": 1.7784, "step": 144660 }, { "epoch": 0.59, "grad_norm": 2.7008421421051025, "learning_rate": 0.0002, "loss": 1.6652, "step": 144670 }, { "epoch": 0.59, "grad_norm": 4.7659687995910645, "learning_rate": 0.0002, "loss": 1.7386, "step": 144680 }, { "epoch": 0.59, "grad_norm": 3.629417657852173, "learning_rate": 0.0002, "loss": 1.6294, "step": 144690 }, { "epoch": 0.59, "grad_norm": 3.013587474822998, "learning_rate": 0.0002, "loss": 1.7031, "step": 144700 }, { "epoch": 0.59, "grad_norm": 2.4177560806274414, "learning_rate": 0.0002, "loss": 1.6135, "step": 144710 }, { "epoch": 0.59, "grad_norm": 3.417405605316162, "learning_rate": 0.0002, "loss": 1.5998, "step": 144720 }, { "epoch": 0.59, "grad_norm": 1.3461631536483765, "learning_rate": 0.0002, "loss": 1.5711, "step": 144730 }, { "epoch": 0.59, "grad_norm": 5.494606018066406, "learning_rate": 0.0002, "loss": 1.5051, "step": 144740 }, { "epoch": 0.59, "grad_norm": 3.2587437629699707, "learning_rate": 0.0002, "loss": 1.4603, "step": 144750 }, { "epoch": 0.59, "grad_norm": 2.748561143875122, "learning_rate": 0.0002, "loss": 1.8483, "step": 144760 }, { "epoch": 0.59, "grad_norm": 2.8676869869232178, "learning_rate": 0.0002, "loss": 1.5251, "step": 144770 }, { "epoch": 0.59, "grad_norm": 1.6949174404144287, "learning_rate": 0.0002, "loss": 1.6027, "step": 144780 }, { "epoch": 0.59, "grad_norm": 2.775702714920044, "learning_rate": 0.0002, "loss": 1.3493, "step": 144790 }, { "epoch": 0.59, "grad_norm": 3.440169334411621, "learning_rate": 0.0002, "loss": 1.6372, "step": 144800 }, { "epoch": 0.59, "grad_norm": 2.1695234775543213, "learning_rate": 0.0002, "loss": 1.4125, "step": 144810 }, { "epoch": 0.59, "grad_norm": 1.5626126527786255, "learning_rate": 0.0002, "loss": 1.3413, "step": 144820 }, { "epoch": 0.59, "grad_norm": 4.522005081176758, "learning_rate": 0.0002, "loss": 1.4491, "step": 144830 }, { "epoch": 0.59, "grad_norm": 2.5009605884552, "learning_rate": 0.0002, "loss": 1.4465, "step": 144840 }, { "epoch": 0.59, "grad_norm": 3.63848614692688, "learning_rate": 0.0002, "loss": 1.2462, "step": 144850 }, { "epoch": 0.59, "grad_norm": 2.6037003993988037, "learning_rate": 0.0002, "loss": 1.4651, "step": 144860 }, { "epoch": 0.59, "grad_norm": 3.545987129211426, "learning_rate": 0.0002, "loss": 1.7064, "step": 144870 }, { "epoch": 0.59, "grad_norm": 3.0467259883880615, "learning_rate": 0.0002, "loss": 1.8706, "step": 144880 }, { "epoch": 0.59, "grad_norm": 1.8849201202392578, "learning_rate": 0.0002, "loss": 1.6841, "step": 144890 }, { "epoch": 0.59, "grad_norm": 2.452547788619995, "learning_rate": 0.0002, "loss": 1.4361, "step": 144900 }, { "epoch": 0.59, "grad_norm": 3.23097562789917, "learning_rate": 0.0002, "loss": 1.6598, "step": 144910 }, { "epoch": 0.59, "grad_norm": 3.614738941192627, "learning_rate": 0.0002, "loss": 1.6509, "step": 144920 }, { "epoch": 0.59, "grad_norm": 3.4059433937072754, "learning_rate": 0.0002, "loss": 1.3782, "step": 144930 }, { "epoch": 0.59, "grad_norm": 3.6763269901275635, "learning_rate": 0.0002, "loss": 1.5414, "step": 144940 }, { "epoch": 0.59, "grad_norm": 3.365452527999878, "learning_rate": 0.0002, "loss": 1.3485, "step": 144950 }, { "epoch": 0.59, "grad_norm": 3.111544609069824, "learning_rate": 0.0002, "loss": 1.4997, "step": 144960 }, { "epoch": 0.59, "grad_norm": 3.1125378608703613, "learning_rate": 0.0002, "loss": 1.6227, "step": 144970 }, { "epoch": 0.59, "grad_norm": 1.9139569997787476, "learning_rate": 0.0002, "loss": 1.5871, "step": 144980 }, { "epoch": 0.59, "grad_norm": 3.372304677963257, "learning_rate": 0.0002, "loss": 1.5836, "step": 144990 }, { "epoch": 0.59, "grad_norm": 5.488195896148682, "learning_rate": 0.0002, "loss": 1.4787, "step": 145000 }, { "epoch": 0.59, "grad_norm": 4.055669784545898, "learning_rate": 0.0002, "loss": 1.6136, "step": 145010 }, { "epoch": 0.59, "grad_norm": 2.4463958740234375, "learning_rate": 0.0002, "loss": 1.423, "step": 145020 }, { "epoch": 0.59, "grad_norm": 2.4722468852996826, "learning_rate": 0.0002, "loss": 1.729, "step": 145030 }, { "epoch": 0.59, "grad_norm": 1.717251181602478, "learning_rate": 0.0002, "loss": 1.6907, "step": 145040 }, { "epoch": 0.59, "grad_norm": 3.102372169494629, "learning_rate": 0.0002, "loss": 1.4134, "step": 145050 }, { "epoch": 0.59, "grad_norm": 2.0044798851013184, "learning_rate": 0.0002, "loss": 1.5081, "step": 145060 }, { "epoch": 0.59, "grad_norm": 2.8993992805480957, "learning_rate": 0.0002, "loss": 1.4304, "step": 145070 }, { "epoch": 0.59, "grad_norm": 3.928298234939575, "learning_rate": 0.0002, "loss": 1.6419, "step": 145080 }, { "epoch": 0.59, "grad_norm": 3.9666500091552734, "learning_rate": 0.0002, "loss": 1.6741, "step": 145090 }, { "epoch": 0.59, "grad_norm": 4.956921577453613, "learning_rate": 0.0002, "loss": 1.555, "step": 145100 }, { "epoch": 0.59, "grad_norm": 2.1692898273468018, "learning_rate": 0.0002, "loss": 1.4959, "step": 145110 }, { "epoch": 0.59, "grad_norm": 2.5309717655181885, "learning_rate": 0.0002, "loss": 1.5595, "step": 145120 }, { "epoch": 0.59, "grad_norm": 3.265190601348877, "learning_rate": 0.0002, "loss": 1.4051, "step": 145130 }, { "epoch": 0.59, "grad_norm": 3.1143853664398193, "learning_rate": 0.0002, "loss": 1.6178, "step": 145140 }, { "epoch": 0.59, "grad_norm": 2.2368557453155518, "learning_rate": 0.0002, "loss": 1.5628, "step": 145150 }, { "epoch": 0.59, "grad_norm": 3.918394088745117, "learning_rate": 0.0002, "loss": 1.4251, "step": 145160 }, { "epoch": 0.59, "grad_norm": 3.014627456665039, "learning_rate": 0.0002, "loss": 1.5638, "step": 145170 }, { "epoch": 0.59, "grad_norm": 3.2764322757720947, "learning_rate": 0.0002, "loss": 1.6199, "step": 145180 }, { "epoch": 0.59, "grad_norm": 2.1950490474700928, "learning_rate": 0.0002, "loss": 1.7342, "step": 145190 }, { "epoch": 0.59, "grad_norm": 2.515141248703003, "learning_rate": 0.0002, "loss": 1.7112, "step": 145200 }, { "epoch": 0.59, "grad_norm": 2.6158339977264404, "learning_rate": 0.0002, "loss": 1.433, "step": 145210 }, { "epoch": 0.59, "grad_norm": 2.4978439807891846, "learning_rate": 0.0002, "loss": 1.5489, "step": 145220 }, { "epoch": 0.59, "grad_norm": 2.5941619873046875, "learning_rate": 0.0002, "loss": 1.7598, "step": 145230 }, { "epoch": 0.59, "grad_norm": 2.417501211166382, "learning_rate": 0.0002, "loss": 1.6289, "step": 145240 }, { "epoch": 0.59, "grad_norm": 4.211089134216309, "learning_rate": 0.0002, "loss": 1.5991, "step": 145250 }, { "epoch": 0.59, "grad_norm": 3.677835464477539, "learning_rate": 0.0002, "loss": 1.5058, "step": 145260 }, { "epoch": 0.59, "grad_norm": 1.6904250383377075, "learning_rate": 0.0002, "loss": 1.6133, "step": 145270 }, { "epoch": 0.59, "grad_norm": 2.3348443508148193, "learning_rate": 0.0002, "loss": 1.629, "step": 145280 }, { "epoch": 0.59, "grad_norm": 2.473358154296875, "learning_rate": 0.0002, "loss": 1.604, "step": 145290 }, { "epoch": 0.59, "grad_norm": 6.179749965667725, "learning_rate": 0.0002, "loss": 1.8114, "step": 145300 }, { "epoch": 0.59, "grad_norm": 3.8882410526275635, "learning_rate": 0.0002, "loss": 1.4641, "step": 145310 }, { "epoch": 0.59, "grad_norm": 3.057178497314453, "learning_rate": 0.0002, "loss": 1.6463, "step": 145320 }, { "epoch": 0.59, "grad_norm": 3.066197633743286, "learning_rate": 0.0002, "loss": 1.6372, "step": 145330 }, { "epoch": 0.59, "grad_norm": 3.519423246383667, "learning_rate": 0.0002, "loss": 1.6359, "step": 145340 }, { "epoch": 0.59, "grad_norm": 2.357785224914551, "learning_rate": 0.0002, "loss": 1.6447, "step": 145350 }, { "epoch": 0.59, "grad_norm": 3.106191635131836, "learning_rate": 0.0002, "loss": 1.5207, "step": 145360 }, { "epoch": 0.59, "grad_norm": 2.2562572956085205, "learning_rate": 0.0002, "loss": 1.5538, "step": 145370 }, { "epoch": 0.59, "grad_norm": 2.9902496337890625, "learning_rate": 0.0002, "loss": 1.7566, "step": 145380 }, { "epoch": 0.59, "grad_norm": 3.5317602157592773, "learning_rate": 0.0002, "loss": 1.5776, "step": 145390 }, { "epoch": 0.59, "grad_norm": 2.7331490516662598, "learning_rate": 0.0002, "loss": 1.4849, "step": 145400 }, { "epoch": 0.59, "grad_norm": 2.7921533584594727, "learning_rate": 0.0002, "loss": 1.5163, "step": 145410 }, { "epoch": 0.59, "grad_norm": 5.0636491775512695, "learning_rate": 0.0002, "loss": 1.5441, "step": 145420 }, { "epoch": 0.59, "grad_norm": 3.2024221420288086, "learning_rate": 0.0002, "loss": 1.4816, "step": 145430 }, { "epoch": 0.59, "grad_norm": 2.0522947311401367, "learning_rate": 0.0002, "loss": 1.6419, "step": 145440 }, { "epoch": 0.59, "grad_norm": 2.5822770595550537, "learning_rate": 0.0002, "loss": 1.557, "step": 145450 }, { "epoch": 0.59, "grad_norm": 2.655345916748047, "learning_rate": 0.0002, "loss": 1.6607, "step": 145460 }, { "epoch": 0.59, "grad_norm": 4.425290107727051, "learning_rate": 0.0002, "loss": 1.7252, "step": 145470 }, { "epoch": 0.59, "grad_norm": 2.6548452377319336, "learning_rate": 0.0002, "loss": 1.4573, "step": 145480 }, { "epoch": 0.59, "grad_norm": 3.937386989593506, "learning_rate": 0.0002, "loss": 1.5076, "step": 145490 }, { "epoch": 0.59, "grad_norm": 1.871720314025879, "learning_rate": 0.0002, "loss": 1.6438, "step": 145500 }, { "epoch": 0.59, "grad_norm": 3.459623336791992, "learning_rate": 0.0002, "loss": 1.8388, "step": 145510 }, { "epoch": 0.59, "grad_norm": 1.989587664604187, "learning_rate": 0.0002, "loss": 1.3133, "step": 145520 }, { "epoch": 0.59, "grad_norm": 2.6366989612579346, "learning_rate": 0.0002, "loss": 1.4785, "step": 145530 }, { "epoch": 0.59, "grad_norm": 3.230790853500366, "learning_rate": 0.0002, "loss": 1.5157, "step": 145540 }, { "epoch": 0.59, "grad_norm": 2.5266518592834473, "learning_rate": 0.0002, "loss": 1.8826, "step": 145550 }, { "epoch": 0.59, "grad_norm": 2.9062647819519043, "learning_rate": 0.0002, "loss": 1.7145, "step": 145560 }, { "epoch": 0.59, "grad_norm": 4.294717311859131, "learning_rate": 0.0002, "loss": 1.4334, "step": 145570 }, { "epoch": 0.59, "grad_norm": 6.789798736572266, "learning_rate": 0.0002, "loss": 1.7219, "step": 145580 }, { "epoch": 0.59, "grad_norm": 3.4844844341278076, "learning_rate": 0.0002, "loss": 1.6637, "step": 145590 }, { "epoch": 0.59, "grad_norm": 2.789642572402954, "learning_rate": 0.0002, "loss": 1.85, "step": 145600 }, { "epoch": 0.59, "grad_norm": 3.2624285221099854, "learning_rate": 0.0002, "loss": 1.8643, "step": 145610 }, { "epoch": 0.59, "grad_norm": 3.84909987449646, "learning_rate": 0.0002, "loss": 1.44, "step": 145620 }, { "epoch": 0.59, "grad_norm": 1.8715953826904297, "learning_rate": 0.0002, "loss": 1.5475, "step": 145630 }, { "epoch": 0.59, "grad_norm": 2.578062057495117, "learning_rate": 0.0002, "loss": 1.8377, "step": 145640 }, { "epoch": 0.59, "grad_norm": 4.244503021240234, "learning_rate": 0.0002, "loss": 1.8622, "step": 145650 }, { "epoch": 0.59, "grad_norm": 3.5568044185638428, "learning_rate": 0.0002, "loss": 1.6647, "step": 145660 }, { "epoch": 0.59, "grad_norm": 1.6794604063034058, "learning_rate": 0.0002, "loss": 1.5858, "step": 145670 }, { "epoch": 0.59, "grad_norm": 3.312593936920166, "learning_rate": 0.0002, "loss": 1.6437, "step": 145680 }, { "epoch": 0.59, "grad_norm": 2.5114338397979736, "learning_rate": 0.0002, "loss": 1.4024, "step": 145690 }, { "epoch": 0.59, "grad_norm": 2.55092716217041, "learning_rate": 0.0002, "loss": 1.9588, "step": 145700 }, { "epoch": 0.59, "grad_norm": 1.9192079305648804, "learning_rate": 0.0002, "loss": 1.6148, "step": 145710 }, { "epoch": 0.59, "grad_norm": 4.171694278717041, "learning_rate": 0.0002, "loss": 1.5434, "step": 145720 }, { "epoch": 0.59, "grad_norm": 2.3817248344421387, "learning_rate": 0.0002, "loss": 1.5624, "step": 145730 }, { "epoch": 0.59, "grad_norm": 1.123989224433899, "learning_rate": 0.0002, "loss": 1.4708, "step": 145740 }, { "epoch": 0.59, "grad_norm": 3.516249656677246, "learning_rate": 0.0002, "loss": 1.4078, "step": 145750 }, { "epoch": 0.59, "grad_norm": 4.984117031097412, "learning_rate": 0.0002, "loss": 1.7509, "step": 145760 }, { "epoch": 0.59, "grad_norm": 2.7329089641571045, "learning_rate": 0.0002, "loss": 1.6337, "step": 145770 }, { "epoch": 0.59, "grad_norm": 2.914491891860962, "learning_rate": 0.0002, "loss": 1.367, "step": 145780 }, { "epoch": 0.59, "grad_norm": 2.6017441749572754, "learning_rate": 0.0002, "loss": 1.9148, "step": 145790 }, { "epoch": 0.59, "grad_norm": 4.2722859382629395, "learning_rate": 0.0002, "loss": 1.5082, "step": 145800 }, { "epoch": 0.59, "grad_norm": 4.647732257843018, "learning_rate": 0.0002, "loss": 1.597, "step": 145810 }, { "epoch": 0.59, "grad_norm": 3.5356268882751465, "learning_rate": 0.0002, "loss": 1.5585, "step": 145820 }, { "epoch": 0.59, "grad_norm": 6.68808126449585, "learning_rate": 0.0002, "loss": 1.6498, "step": 145830 }, { "epoch": 0.59, "grad_norm": 2.043750762939453, "learning_rate": 0.0002, "loss": 1.6269, "step": 145840 }, { "epoch": 0.59, "grad_norm": 2.9913790225982666, "learning_rate": 0.0002, "loss": 1.6061, "step": 145850 }, { "epoch": 0.59, "grad_norm": 1.9998226165771484, "learning_rate": 0.0002, "loss": 1.5882, "step": 145860 }, { "epoch": 0.59, "grad_norm": 3.7182488441467285, "learning_rate": 0.0002, "loss": 1.6304, "step": 145870 }, { "epoch": 0.59, "grad_norm": 3.365709066390991, "learning_rate": 0.0002, "loss": 1.7173, "step": 145880 }, { "epoch": 0.59, "grad_norm": 2.353074550628662, "learning_rate": 0.0002, "loss": 1.4399, "step": 145890 }, { "epoch": 0.59, "grad_norm": 2.472630023956299, "learning_rate": 0.0002, "loss": 1.4329, "step": 145900 }, { "epoch": 0.59, "grad_norm": 3.380044460296631, "learning_rate": 0.0002, "loss": 1.5922, "step": 145910 }, { "epoch": 0.59, "grad_norm": 2.1396734714508057, "learning_rate": 0.0002, "loss": 1.5839, "step": 145920 }, { "epoch": 0.59, "grad_norm": 3.1256697177886963, "learning_rate": 0.0002, "loss": 1.6698, "step": 145930 }, { "epoch": 0.59, "grad_norm": 1.9501150846481323, "learning_rate": 0.0002, "loss": 1.5705, "step": 145940 }, { "epoch": 0.59, "grad_norm": 2.6813511848449707, "learning_rate": 0.0002, "loss": 1.783, "step": 145950 }, { "epoch": 0.59, "grad_norm": 3.854881763458252, "learning_rate": 0.0002, "loss": 1.7267, "step": 145960 }, { "epoch": 0.59, "grad_norm": 2.8955135345458984, "learning_rate": 0.0002, "loss": 1.438, "step": 145970 }, { "epoch": 0.59, "grad_norm": 2.310006618499756, "learning_rate": 0.0002, "loss": 1.7295, "step": 145980 }, { "epoch": 0.59, "grad_norm": 4.649086952209473, "learning_rate": 0.0002, "loss": 1.3917, "step": 145990 }, { "epoch": 0.59, "grad_norm": 3.293064594268799, "learning_rate": 0.0002, "loss": 1.7718, "step": 146000 }, { "epoch": 0.59, "grad_norm": 3.974902629852295, "learning_rate": 0.0002, "loss": 1.6048, "step": 146010 }, { "epoch": 0.59, "grad_norm": 2.8931596279144287, "learning_rate": 0.0002, "loss": 1.3566, "step": 146020 }, { "epoch": 0.59, "grad_norm": 4.3263092041015625, "learning_rate": 0.0002, "loss": 1.4763, "step": 146030 }, { "epoch": 0.59, "grad_norm": 3.3870580196380615, "learning_rate": 0.0002, "loss": 1.5685, "step": 146040 }, { "epoch": 0.59, "grad_norm": 3.113953113555908, "learning_rate": 0.0002, "loss": 1.5899, "step": 146050 }, { "epoch": 0.59, "grad_norm": 5.083286762237549, "learning_rate": 0.0002, "loss": 1.5545, "step": 146060 }, { "epoch": 0.59, "grad_norm": 2.9632251262664795, "learning_rate": 0.0002, "loss": 1.648, "step": 146070 }, { "epoch": 0.59, "grad_norm": 3.849560022354126, "learning_rate": 0.0002, "loss": 1.6055, "step": 146080 }, { "epoch": 0.59, "grad_norm": 3.7829906940460205, "learning_rate": 0.0002, "loss": 1.5576, "step": 146090 }, { "epoch": 0.59, "grad_norm": 1.9834071397781372, "learning_rate": 0.0002, "loss": 1.6896, "step": 146100 }, { "epoch": 0.59, "grad_norm": 2.9285452365875244, "learning_rate": 0.0002, "loss": 1.7476, "step": 146110 }, { "epoch": 0.59, "grad_norm": 2.0751986503601074, "learning_rate": 0.0002, "loss": 1.3908, "step": 146120 }, { "epoch": 0.59, "grad_norm": 2.096633195877075, "learning_rate": 0.0002, "loss": 1.8057, "step": 146130 }, { "epoch": 0.59, "grad_norm": 2.91513729095459, "learning_rate": 0.0002, "loss": 1.7291, "step": 146140 }, { "epoch": 0.59, "grad_norm": 1.7819880247116089, "learning_rate": 0.0002, "loss": 1.8584, "step": 146150 }, { "epoch": 0.6, "grad_norm": 2.8098456859588623, "learning_rate": 0.0002, "loss": 1.581, "step": 146160 }, { "epoch": 0.6, "grad_norm": 3.0472147464752197, "learning_rate": 0.0002, "loss": 1.5022, "step": 146170 }, { "epoch": 0.6, "grad_norm": 4.24691915512085, "learning_rate": 0.0002, "loss": 1.7407, "step": 146180 }, { "epoch": 0.6, "grad_norm": 2.754225254058838, "learning_rate": 0.0002, "loss": 1.4716, "step": 146190 }, { "epoch": 0.6, "grad_norm": 3.7891523838043213, "learning_rate": 0.0002, "loss": 1.5583, "step": 146200 }, { "epoch": 0.6, "grad_norm": 4.340595245361328, "learning_rate": 0.0002, "loss": 1.3967, "step": 146210 }, { "epoch": 0.6, "grad_norm": 1.8947086334228516, "learning_rate": 0.0002, "loss": 1.5939, "step": 146220 }, { "epoch": 0.6, "grad_norm": 3.029435634613037, "learning_rate": 0.0002, "loss": 1.6169, "step": 146230 }, { "epoch": 0.6, "grad_norm": 3.572531223297119, "learning_rate": 0.0002, "loss": 1.5135, "step": 146240 }, { "epoch": 0.6, "grad_norm": 2.259964942932129, "learning_rate": 0.0002, "loss": 1.6102, "step": 146250 }, { "epoch": 0.6, "grad_norm": 2.4936296939849854, "learning_rate": 0.0002, "loss": 1.7469, "step": 146260 }, { "epoch": 0.6, "grad_norm": 3.293539047241211, "learning_rate": 0.0002, "loss": 1.6235, "step": 146270 }, { "epoch": 0.6, "grad_norm": 3.401224374771118, "learning_rate": 0.0002, "loss": 1.5414, "step": 146280 }, { "epoch": 0.6, "grad_norm": 8.03328800201416, "learning_rate": 0.0002, "loss": 1.7859, "step": 146290 }, { "epoch": 0.6, "grad_norm": 2.735027551651001, "learning_rate": 0.0002, "loss": 1.474, "step": 146300 }, { "epoch": 0.6, "grad_norm": 3.425446033477783, "learning_rate": 0.0002, "loss": 1.6565, "step": 146310 }, { "epoch": 0.6, "grad_norm": 1.5841410160064697, "learning_rate": 0.0002, "loss": 1.6657, "step": 146320 }, { "epoch": 0.6, "grad_norm": 2.9722089767456055, "learning_rate": 0.0002, "loss": 1.5534, "step": 146330 }, { "epoch": 0.6, "grad_norm": 3.5950825214385986, "learning_rate": 0.0002, "loss": 1.665, "step": 146340 }, { "epoch": 0.6, "grad_norm": 2.6911227703094482, "learning_rate": 0.0002, "loss": 1.4484, "step": 146350 }, { "epoch": 0.6, "grad_norm": 3.7503647804260254, "learning_rate": 0.0002, "loss": 1.3742, "step": 146360 }, { "epoch": 0.6, "grad_norm": 4.192305564880371, "learning_rate": 0.0002, "loss": 1.6537, "step": 146370 }, { "epoch": 0.6, "grad_norm": 2.8174991607666016, "learning_rate": 0.0002, "loss": 1.6471, "step": 146380 }, { "epoch": 0.6, "grad_norm": 4.041783332824707, "learning_rate": 0.0002, "loss": 1.512, "step": 146390 }, { "epoch": 0.6, "grad_norm": 3.050405740737915, "learning_rate": 0.0002, "loss": 1.6621, "step": 146400 }, { "epoch": 0.6, "grad_norm": 2.74743390083313, "learning_rate": 0.0002, "loss": 1.348, "step": 146410 }, { "epoch": 0.6, "grad_norm": 3.519915819168091, "learning_rate": 0.0002, "loss": 1.6554, "step": 146420 }, { "epoch": 0.6, "grad_norm": 3.6196742057800293, "learning_rate": 0.0002, "loss": 1.7266, "step": 146430 }, { "epoch": 0.6, "grad_norm": 3.6906991004943848, "learning_rate": 0.0002, "loss": 1.6385, "step": 146440 }, { "epoch": 0.6, "grad_norm": 4.105449199676514, "learning_rate": 0.0002, "loss": 1.6173, "step": 146450 }, { "epoch": 0.6, "grad_norm": 3.3996903896331787, "learning_rate": 0.0002, "loss": 1.4771, "step": 146460 }, { "epoch": 0.6, "grad_norm": 4.081470012664795, "learning_rate": 0.0002, "loss": 1.5286, "step": 146470 }, { "epoch": 0.6, "grad_norm": 3.4344537258148193, "learning_rate": 0.0002, "loss": 1.6244, "step": 146480 }, { "epoch": 0.6, "grad_norm": 4.5093817710876465, "learning_rate": 0.0002, "loss": 1.2066, "step": 146490 }, { "epoch": 0.6, "grad_norm": 2.797816514968872, "learning_rate": 0.0002, "loss": 1.7109, "step": 146500 }, { "epoch": 0.6, "grad_norm": 3.0985324382781982, "learning_rate": 0.0002, "loss": 1.4447, "step": 146510 }, { "epoch": 0.6, "grad_norm": 2.0666024684906006, "learning_rate": 0.0002, "loss": 1.4877, "step": 146520 }, { "epoch": 0.6, "grad_norm": 3.466264009475708, "learning_rate": 0.0002, "loss": 1.6809, "step": 146530 }, { "epoch": 0.6, "grad_norm": 3.2936973571777344, "learning_rate": 0.0002, "loss": 1.6445, "step": 146540 }, { "epoch": 0.6, "grad_norm": 2.6239728927612305, "learning_rate": 0.0002, "loss": 1.7771, "step": 146550 }, { "epoch": 0.6, "grad_norm": 3.728905200958252, "learning_rate": 0.0002, "loss": 1.4651, "step": 146560 }, { "epoch": 0.6, "grad_norm": 2.829514741897583, "learning_rate": 0.0002, "loss": 1.4588, "step": 146570 }, { "epoch": 0.6, "grad_norm": 3.7652597427368164, "learning_rate": 0.0002, "loss": 1.5201, "step": 146580 }, { "epoch": 0.6, "grad_norm": 2.991666078567505, "learning_rate": 0.0002, "loss": 1.5062, "step": 146590 }, { "epoch": 0.6, "grad_norm": 3.3336777687072754, "learning_rate": 0.0002, "loss": 1.6442, "step": 146600 }, { "epoch": 0.6, "grad_norm": 2.901949405670166, "learning_rate": 0.0002, "loss": 1.28, "step": 146610 }, { "epoch": 0.6, "grad_norm": 2.827308177947998, "learning_rate": 0.0002, "loss": 1.7495, "step": 146620 }, { "epoch": 0.6, "grad_norm": 3.963134288787842, "learning_rate": 0.0002, "loss": 1.3088, "step": 146630 }, { "epoch": 0.6, "grad_norm": 4.064605236053467, "learning_rate": 0.0002, "loss": 1.3172, "step": 146640 }, { "epoch": 0.6, "grad_norm": 2.220245599746704, "learning_rate": 0.0002, "loss": 1.7457, "step": 146650 }, { "epoch": 0.6, "grad_norm": 5.024345874786377, "learning_rate": 0.0002, "loss": 1.7206, "step": 146660 }, { "epoch": 0.6, "grad_norm": 4.71925163269043, "learning_rate": 0.0002, "loss": 1.5981, "step": 146670 }, { "epoch": 0.6, "grad_norm": 3.9313907623291016, "learning_rate": 0.0002, "loss": 1.7427, "step": 146680 }, { "epoch": 0.6, "grad_norm": 2.679640293121338, "learning_rate": 0.0002, "loss": 1.5756, "step": 146690 }, { "epoch": 0.6, "grad_norm": 2.3734214305877686, "learning_rate": 0.0002, "loss": 1.6331, "step": 146700 }, { "epoch": 0.6, "grad_norm": 3.23861026763916, "learning_rate": 0.0002, "loss": 1.6346, "step": 146710 }, { "epoch": 0.6, "grad_norm": 2.7212696075439453, "learning_rate": 0.0002, "loss": 1.7091, "step": 146720 }, { "epoch": 0.6, "grad_norm": 2.7208242416381836, "learning_rate": 0.0002, "loss": 1.4189, "step": 146730 }, { "epoch": 0.6, "grad_norm": 2.747687816619873, "learning_rate": 0.0002, "loss": 1.6039, "step": 146740 }, { "epoch": 0.6, "grad_norm": 1.9655705690383911, "learning_rate": 0.0002, "loss": 1.4187, "step": 146750 }, { "epoch": 0.6, "grad_norm": 3.8926491737365723, "learning_rate": 0.0002, "loss": 1.6033, "step": 146760 }, { "epoch": 0.6, "grad_norm": 3.111708641052246, "learning_rate": 0.0002, "loss": 1.3909, "step": 146770 }, { "epoch": 0.6, "grad_norm": 2.2557597160339355, "learning_rate": 0.0002, "loss": 1.4103, "step": 146780 }, { "epoch": 0.6, "grad_norm": 2.823000192642212, "learning_rate": 0.0002, "loss": 1.6198, "step": 146790 }, { "epoch": 0.6, "grad_norm": 1.4003030061721802, "learning_rate": 0.0002, "loss": 1.4205, "step": 146800 }, { "epoch": 0.6, "grad_norm": 4.801102638244629, "learning_rate": 0.0002, "loss": 1.8543, "step": 146810 }, { "epoch": 0.6, "grad_norm": 1.9721224308013916, "learning_rate": 0.0002, "loss": 1.4166, "step": 146820 }, { "epoch": 0.6, "grad_norm": 2.4048118591308594, "learning_rate": 0.0002, "loss": 1.456, "step": 146830 }, { "epoch": 0.6, "grad_norm": 2.6634597778320312, "learning_rate": 0.0002, "loss": 1.4514, "step": 146840 }, { "epoch": 0.6, "grad_norm": 3.142157793045044, "learning_rate": 0.0002, "loss": 1.6206, "step": 146850 }, { "epoch": 0.6, "grad_norm": 2.899359941482544, "learning_rate": 0.0002, "loss": 1.6591, "step": 146860 }, { "epoch": 0.6, "grad_norm": 3.078632354736328, "learning_rate": 0.0002, "loss": 1.5804, "step": 146870 }, { "epoch": 0.6, "grad_norm": 2.8623287677764893, "learning_rate": 0.0002, "loss": 1.3543, "step": 146880 }, { "epoch": 0.6, "grad_norm": 4.997032165527344, "learning_rate": 0.0002, "loss": 1.3235, "step": 146890 }, { "epoch": 0.6, "grad_norm": 4.042985439300537, "learning_rate": 0.0002, "loss": 1.579, "step": 146900 }, { "epoch": 0.6, "grad_norm": 2.181730270385742, "learning_rate": 0.0002, "loss": 1.6238, "step": 146910 }, { "epoch": 0.6, "grad_norm": 3.1973628997802734, "learning_rate": 0.0002, "loss": 1.3999, "step": 146920 }, { "epoch": 0.6, "grad_norm": 2.0369701385498047, "learning_rate": 0.0002, "loss": 1.6117, "step": 146930 }, { "epoch": 0.6, "grad_norm": 3.9555013179779053, "learning_rate": 0.0002, "loss": 1.7333, "step": 146940 }, { "epoch": 0.6, "grad_norm": 2.915513515472412, "learning_rate": 0.0002, "loss": 1.5112, "step": 146950 }, { "epoch": 0.6, "grad_norm": 2.3752002716064453, "learning_rate": 0.0002, "loss": 1.7107, "step": 146960 }, { "epoch": 0.6, "grad_norm": 2.7596499919891357, "learning_rate": 0.0002, "loss": 1.4864, "step": 146970 }, { "epoch": 0.6, "grad_norm": 4.260519504547119, "learning_rate": 0.0002, "loss": 1.6981, "step": 146980 }, { "epoch": 0.6, "grad_norm": 3.5076229572296143, "learning_rate": 0.0002, "loss": 1.6241, "step": 146990 }, { "epoch": 0.6, "grad_norm": 3.2679390907287598, "learning_rate": 0.0002, "loss": 1.6688, "step": 147000 }, { "epoch": 0.6, "grad_norm": 2.445505142211914, "learning_rate": 0.0002, "loss": 1.4787, "step": 147010 }, { "epoch": 0.6, "grad_norm": 2.3970654010772705, "learning_rate": 0.0002, "loss": 1.4488, "step": 147020 }, { "epoch": 0.6, "grad_norm": 2.0552618503570557, "learning_rate": 0.0002, "loss": 1.6805, "step": 147030 }, { "epoch": 0.6, "grad_norm": 3.6992101669311523, "learning_rate": 0.0002, "loss": 1.6287, "step": 147040 }, { "epoch": 0.6, "grad_norm": 4.120721817016602, "learning_rate": 0.0002, "loss": 1.7114, "step": 147050 }, { "epoch": 0.6, "grad_norm": 3.2760117053985596, "learning_rate": 0.0002, "loss": 1.6274, "step": 147060 }, { "epoch": 0.6, "grad_norm": 1.5065183639526367, "learning_rate": 0.0002, "loss": 1.6929, "step": 147070 }, { "epoch": 0.6, "grad_norm": 2.898732900619507, "learning_rate": 0.0002, "loss": 1.5154, "step": 147080 }, { "epoch": 0.6, "grad_norm": 2.4138176441192627, "learning_rate": 0.0002, "loss": 1.6448, "step": 147090 }, { "epoch": 0.6, "grad_norm": 3.0338668823242188, "learning_rate": 0.0002, "loss": 1.554, "step": 147100 }, { "epoch": 0.6, "grad_norm": 1.8915224075317383, "learning_rate": 0.0002, "loss": 1.58, "step": 147110 }, { "epoch": 0.6, "grad_norm": 3.1107337474823, "learning_rate": 0.0002, "loss": 1.6614, "step": 147120 }, { "epoch": 0.6, "grad_norm": 3.4010767936706543, "learning_rate": 0.0002, "loss": 1.6311, "step": 147130 }, { "epoch": 0.6, "grad_norm": 2.890864849090576, "learning_rate": 0.0002, "loss": 1.3633, "step": 147140 }, { "epoch": 0.6, "grad_norm": 3.6366803646087646, "learning_rate": 0.0002, "loss": 1.5403, "step": 147150 }, { "epoch": 0.6, "grad_norm": 2.8983213901519775, "learning_rate": 0.0002, "loss": 1.4688, "step": 147160 }, { "epoch": 0.6, "grad_norm": 3.5203680992126465, "learning_rate": 0.0002, "loss": 1.7425, "step": 147170 }, { "epoch": 0.6, "grad_norm": 2.411478042602539, "learning_rate": 0.0002, "loss": 1.6739, "step": 147180 }, { "epoch": 0.6, "grad_norm": 2.909060478210449, "learning_rate": 0.0002, "loss": 1.4087, "step": 147190 }, { "epoch": 0.6, "grad_norm": 1.7415025234222412, "learning_rate": 0.0002, "loss": 1.5354, "step": 147200 }, { "epoch": 0.6, "grad_norm": 3.153212308883667, "learning_rate": 0.0002, "loss": 1.3155, "step": 147210 }, { "epoch": 0.6, "grad_norm": 2.3095977306365967, "learning_rate": 0.0002, "loss": 1.6687, "step": 147220 }, { "epoch": 0.6, "grad_norm": 1.9207383394241333, "learning_rate": 0.0002, "loss": 1.7107, "step": 147230 }, { "epoch": 0.6, "grad_norm": 5.306963920593262, "learning_rate": 0.0002, "loss": 1.677, "step": 147240 }, { "epoch": 0.6, "grad_norm": 3.0928099155426025, "learning_rate": 0.0002, "loss": 1.7116, "step": 147250 }, { "epoch": 0.6, "grad_norm": 4.48909854888916, "learning_rate": 0.0002, "loss": 1.428, "step": 147260 }, { "epoch": 0.6, "grad_norm": 3.480041027069092, "learning_rate": 0.0002, "loss": 1.4449, "step": 147270 }, { "epoch": 0.6, "grad_norm": 3.429490327835083, "learning_rate": 0.0002, "loss": 1.9551, "step": 147280 }, { "epoch": 0.6, "grad_norm": 3.6702375411987305, "learning_rate": 0.0002, "loss": 1.3528, "step": 147290 }, { "epoch": 0.6, "grad_norm": 5.525444507598877, "learning_rate": 0.0002, "loss": 1.5291, "step": 147300 }, { "epoch": 0.6, "grad_norm": 3.8661398887634277, "learning_rate": 0.0002, "loss": 1.5249, "step": 147310 }, { "epoch": 0.6, "grad_norm": 1.8573194742202759, "learning_rate": 0.0002, "loss": 1.2238, "step": 147320 }, { "epoch": 0.6, "grad_norm": 4.436105251312256, "learning_rate": 0.0002, "loss": 1.349, "step": 147330 }, { "epoch": 0.6, "grad_norm": 1.9840004444122314, "learning_rate": 0.0002, "loss": 1.5705, "step": 147340 }, { "epoch": 0.6, "grad_norm": 3.337970733642578, "learning_rate": 0.0002, "loss": 1.3737, "step": 147350 }, { "epoch": 0.6, "grad_norm": 3.067739248275757, "learning_rate": 0.0002, "loss": 1.6121, "step": 147360 }, { "epoch": 0.6, "grad_norm": 2.2898881435394287, "learning_rate": 0.0002, "loss": 1.6284, "step": 147370 }, { "epoch": 0.6, "grad_norm": 2.4163870811462402, "learning_rate": 0.0002, "loss": 1.8186, "step": 147380 }, { "epoch": 0.6, "grad_norm": 2.8077871799468994, "learning_rate": 0.0002, "loss": 1.7552, "step": 147390 }, { "epoch": 0.6, "grad_norm": 2.347815990447998, "learning_rate": 0.0002, "loss": 1.5629, "step": 147400 }, { "epoch": 0.6, "grad_norm": 2.8330488204956055, "learning_rate": 0.0002, "loss": 1.5426, "step": 147410 }, { "epoch": 0.6, "grad_norm": 1.904661774635315, "learning_rate": 0.0002, "loss": 1.395, "step": 147420 }, { "epoch": 0.6, "grad_norm": 3.8035154342651367, "learning_rate": 0.0002, "loss": 1.8562, "step": 147430 }, { "epoch": 0.6, "grad_norm": 2.3182015419006348, "learning_rate": 0.0002, "loss": 1.522, "step": 147440 }, { "epoch": 0.6, "grad_norm": 3.3988070487976074, "learning_rate": 0.0002, "loss": 1.5559, "step": 147450 }, { "epoch": 0.6, "grad_norm": 1.9814931154251099, "learning_rate": 0.0002, "loss": 1.7318, "step": 147460 }, { "epoch": 0.6, "grad_norm": 4.865689754486084, "learning_rate": 0.0002, "loss": 1.4979, "step": 147470 }, { "epoch": 0.6, "grad_norm": 2.7354896068573, "learning_rate": 0.0002, "loss": 1.885, "step": 147480 }, { "epoch": 0.6, "grad_norm": 1.9704262018203735, "learning_rate": 0.0002, "loss": 1.6896, "step": 147490 }, { "epoch": 0.6, "grad_norm": 1.811026930809021, "learning_rate": 0.0002, "loss": 1.4178, "step": 147500 }, { "epoch": 0.6, "grad_norm": 5.806945323944092, "learning_rate": 0.0002, "loss": 1.4981, "step": 147510 }, { "epoch": 0.6, "grad_norm": 4.4959187507629395, "learning_rate": 0.0002, "loss": 1.5778, "step": 147520 }, { "epoch": 0.6, "grad_norm": 2.032381057739258, "learning_rate": 0.0002, "loss": 1.7004, "step": 147530 }, { "epoch": 0.6, "grad_norm": 2.986616849899292, "learning_rate": 0.0002, "loss": 1.5752, "step": 147540 }, { "epoch": 0.6, "grad_norm": 2.1757946014404297, "learning_rate": 0.0002, "loss": 1.6163, "step": 147550 }, { "epoch": 0.6, "grad_norm": 2.588432788848877, "learning_rate": 0.0002, "loss": 1.5023, "step": 147560 }, { "epoch": 0.6, "grad_norm": 3.2992563247680664, "learning_rate": 0.0002, "loss": 1.5407, "step": 147570 }, { "epoch": 0.6, "grad_norm": 3.5087223052978516, "learning_rate": 0.0002, "loss": 1.7777, "step": 147580 }, { "epoch": 0.6, "grad_norm": 4.851124286651611, "learning_rate": 0.0002, "loss": 1.8311, "step": 147590 }, { "epoch": 0.6, "grad_norm": 4.799617767333984, "learning_rate": 0.0002, "loss": 1.6976, "step": 147600 }, { "epoch": 0.6, "grad_norm": 2.918004035949707, "learning_rate": 0.0002, "loss": 1.3555, "step": 147610 }, { "epoch": 0.6, "grad_norm": 2.6311147212982178, "learning_rate": 0.0002, "loss": 1.7932, "step": 147620 }, { "epoch": 0.6, "grad_norm": 3.642362117767334, "learning_rate": 0.0002, "loss": 1.5871, "step": 147630 }, { "epoch": 0.6, "grad_norm": 2.026038646697998, "learning_rate": 0.0002, "loss": 1.5756, "step": 147640 }, { "epoch": 0.6, "grad_norm": 3.6811764240264893, "learning_rate": 0.0002, "loss": 1.5522, "step": 147650 }, { "epoch": 0.6, "grad_norm": 2.190653085708618, "learning_rate": 0.0002, "loss": 1.5134, "step": 147660 }, { "epoch": 0.6, "grad_norm": 3.2056589126586914, "learning_rate": 0.0002, "loss": 1.6575, "step": 147670 }, { "epoch": 0.6, "grad_norm": 2.846407890319824, "learning_rate": 0.0002, "loss": 1.311, "step": 147680 }, { "epoch": 0.6, "grad_norm": 3.3062524795532227, "learning_rate": 0.0002, "loss": 1.6379, "step": 147690 }, { "epoch": 0.6, "grad_norm": 1.9893313646316528, "learning_rate": 0.0002, "loss": 1.5817, "step": 147700 }, { "epoch": 0.6, "grad_norm": 2.4990344047546387, "learning_rate": 0.0002, "loss": 1.7277, "step": 147710 }, { "epoch": 0.6, "grad_norm": 2.245119571685791, "learning_rate": 0.0002, "loss": 1.5362, "step": 147720 }, { "epoch": 0.6, "grad_norm": 2.172544479370117, "learning_rate": 0.0002, "loss": 1.732, "step": 147730 }, { "epoch": 0.6, "grad_norm": 2.2729971408843994, "learning_rate": 0.0002, "loss": 1.5541, "step": 147740 }, { "epoch": 0.6, "grad_norm": 2.193690538406372, "learning_rate": 0.0002, "loss": 1.4975, "step": 147750 }, { "epoch": 0.6, "grad_norm": 3.5911154747009277, "learning_rate": 0.0002, "loss": 1.5244, "step": 147760 }, { "epoch": 0.6, "grad_norm": 6.981244087219238, "learning_rate": 0.0002, "loss": 1.6013, "step": 147770 }, { "epoch": 0.6, "grad_norm": 3.614409923553467, "learning_rate": 0.0002, "loss": 1.7447, "step": 147780 }, { "epoch": 0.6, "grad_norm": 7.682974338531494, "learning_rate": 0.0002, "loss": 1.5018, "step": 147790 }, { "epoch": 0.6, "grad_norm": 2.6953446865081787, "learning_rate": 0.0002, "loss": 1.4805, "step": 147800 }, { "epoch": 0.6, "grad_norm": 2.6488142013549805, "learning_rate": 0.0002, "loss": 1.5502, "step": 147810 }, { "epoch": 0.6, "grad_norm": 3.627453565597534, "learning_rate": 0.0002, "loss": 1.6339, "step": 147820 }, { "epoch": 0.6, "grad_norm": 1.781600832939148, "learning_rate": 0.0002, "loss": 1.6239, "step": 147830 }, { "epoch": 0.6, "grad_norm": 3.3973031044006348, "learning_rate": 0.0002, "loss": 1.6026, "step": 147840 }, { "epoch": 0.6, "grad_norm": 2.5190138816833496, "learning_rate": 0.0002, "loss": 1.4943, "step": 147850 }, { "epoch": 0.6, "grad_norm": 4.964199542999268, "learning_rate": 0.0002, "loss": 1.5048, "step": 147860 }, { "epoch": 0.6, "grad_norm": 1.0621691942214966, "learning_rate": 0.0002, "loss": 1.6748, "step": 147870 }, { "epoch": 0.6, "grad_norm": 2.0978546142578125, "learning_rate": 0.0002, "loss": 1.7564, "step": 147880 }, { "epoch": 0.6, "grad_norm": 3.8997962474823, "learning_rate": 0.0002, "loss": 1.5265, "step": 147890 }, { "epoch": 0.6, "grad_norm": 4.800848007202148, "learning_rate": 0.0002, "loss": 1.6, "step": 147900 }, { "epoch": 0.6, "grad_norm": 3.247511386871338, "learning_rate": 0.0002, "loss": 1.4858, "step": 147910 }, { "epoch": 0.6, "grad_norm": 5.598001003265381, "learning_rate": 0.0002, "loss": 1.6617, "step": 147920 }, { "epoch": 0.6, "grad_norm": 2.694335699081421, "learning_rate": 0.0002, "loss": 1.5939, "step": 147930 }, { "epoch": 0.6, "grad_norm": 3.012746810913086, "learning_rate": 0.0002, "loss": 1.6548, "step": 147940 }, { "epoch": 0.6, "grad_norm": 2.726720094680786, "learning_rate": 0.0002, "loss": 1.6499, "step": 147950 }, { "epoch": 0.6, "grad_norm": 3.40006947517395, "learning_rate": 0.0002, "loss": 1.4995, "step": 147960 }, { "epoch": 0.6, "grad_norm": 2.992243528366089, "learning_rate": 0.0002, "loss": 1.6206, "step": 147970 }, { "epoch": 0.6, "grad_norm": 2.6778385639190674, "learning_rate": 0.0002, "loss": 1.3938, "step": 147980 }, { "epoch": 0.6, "grad_norm": 2.673527956008911, "learning_rate": 0.0002, "loss": 1.4929, "step": 147990 }, { "epoch": 0.6, "grad_norm": 3.805018663406372, "learning_rate": 0.0002, "loss": 1.3782, "step": 148000 }, { "epoch": 0.6, "grad_norm": 7.032206058502197, "learning_rate": 0.0002, "loss": 1.8036, "step": 148010 }, { "epoch": 0.6, "grad_norm": 3.667884111404419, "learning_rate": 0.0002, "loss": 1.5699, "step": 148020 }, { "epoch": 0.6, "grad_norm": 5.058253288269043, "learning_rate": 0.0002, "loss": 1.4843, "step": 148030 }, { "epoch": 0.6, "grad_norm": 3.4978833198547363, "learning_rate": 0.0002, "loss": 1.4597, "step": 148040 }, { "epoch": 0.6, "grad_norm": 3.7111868858337402, "learning_rate": 0.0002, "loss": 1.4597, "step": 148050 }, { "epoch": 0.6, "grad_norm": 1.895351529121399, "learning_rate": 0.0002, "loss": 1.631, "step": 148060 }, { "epoch": 0.6, "grad_norm": 2.3499701023101807, "learning_rate": 0.0002, "loss": 1.7007, "step": 148070 }, { "epoch": 0.6, "grad_norm": 2.7382795810699463, "learning_rate": 0.0002, "loss": 1.6192, "step": 148080 }, { "epoch": 0.6, "grad_norm": 1.9184656143188477, "learning_rate": 0.0002, "loss": 1.5718, "step": 148090 }, { "epoch": 0.6, "grad_norm": 2.5512804985046387, "learning_rate": 0.0002, "loss": 1.5461, "step": 148100 }, { "epoch": 0.6, "grad_norm": 4.2922587394714355, "learning_rate": 0.0002, "loss": 1.6042, "step": 148110 }, { "epoch": 0.6, "grad_norm": 2.7415623664855957, "learning_rate": 0.0002, "loss": 1.6374, "step": 148120 }, { "epoch": 0.6, "grad_norm": 2.5938923358917236, "learning_rate": 0.0002, "loss": 1.3677, "step": 148130 }, { "epoch": 0.6, "grad_norm": 4.0929951667785645, "learning_rate": 0.0002, "loss": 1.411, "step": 148140 }, { "epoch": 0.6, "grad_norm": 2.9453585147857666, "learning_rate": 0.0002, "loss": 1.8179, "step": 148150 }, { "epoch": 0.6, "grad_norm": 2.2738239765167236, "learning_rate": 0.0002, "loss": 1.6176, "step": 148160 }, { "epoch": 0.6, "grad_norm": 2.8512046337127686, "learning_rate": 0.0002, "loss": 1.5603, "step": 148170 }, { "epoch": 0.6, "grad_norm": 5.584303855895996, "learning_rate": 0.0002, "loss": 1.6196, "step": 148180 }, { "epoch": 0.6, "grad_norm": 2.6012537479400635, "learning_rate": 0.0002, "loss": 1.4916, "step": 148190 }, { "epoch": 0.6, "grad_norm": 2.958332061767578, "learning_rate": 0.0002, "loss": 1.6993, "step": 148200 }, { "epoch": 0.6, "grad_norm": 6.5377421379089355, "learning_rate": 0.0002, "loss": 1.5227, "step": 148210 }, { "epoch": 0.6, "grad_norm": 3.322643995285034, "learning_rate": 0.0002, "loss": 1.5346, "step": 148220 }, { "epoch": 0.6, "grad_norm": 3.060041666030884, "learning_rate": 0.0002, "loss": 1.6778, "step": 148230 }, { "epoch": 0.6, "grad_norm": 3.376011371612549, "learning_rate": 0.0002, "loss": 1.5099, "step": 148240 }, { "epoch": 0.6, "grad_norm": 2.4897572994232178, "learning_rate": 0.0002, "loss": 2.0017, "step": 148250 }, { "epoch": 0.6, "grad_norm": 2.7426626682281494, "learning_rate": 0.0002, "loss": 1.3917, "step": 148260 }, { "epoch": 0.6, "grad_norm": 2.489689588546753, "learning_rate": 0.0002, "loss": 1.5391, "step": 148270 }, { "epoch": 0.6, "grad_norm": 2.368791341781616, "learning_rate": 0.0002, "loss": 1.4302, "step": 148280 }, { "epoch": 0.6, "grad_norm": 8.903152465820312, "learning_rate": 0.0002, "loss": 1.719, "step": 148290 }, { "epoch": 0.6, "grad_norm": 2.5628323554992676, "learning_rate": 0.0002, "loss": 1.722, "step": 148300 }, { "epoch": 0.6, "grad_norm": 3.0599637031555176, "learning_rate": 0.0002, "loss": 1.4718, "step": 148310 }, { "epoch": 0.6, "grad_norm": 3.179892063140869, "learning_rate": 0.0002, "loss": 1.5845, "step": 148320 }, { "epoch": 0.6, "grad_norm": 5.331453800201416, "learning_rate": 0.0002, "loss": 1.6669, "step": 148330 }, { "epoch": 0.6, "grad_norm": 5.264748573303223, "learning_rate": 0.0002, "loss": 1.7947, "step": 148340 }, { "epoch": 0.6, "grad_norm": 4.125826358795166, "learning_rate": 0.0002, "loss": 1.6379, "step": 148350 }, { "epoch": 0.6, "grad_norm": 3.708319902420044, "learning_rate": 0.0002, "loss": 1.3724, "step": 148360 }, { "epoch": 0.6, "grad_norm": 2.8660972118377686, "learning_rate": 0.0002, "loss": 1.4313, "step": 148370 }, { "epoch": 0.6, "grad_norm": 3.531416893005371, "learning_rate": 0.0002, "loss": 1.3969, "step": 148380 }, { "epoch": 0.6, "grad_norm": 2.6227142810821533, "learning_rate": 0.0002, "loss": 1.7198, "step": 148390 }, { "epoch": 0.6, "grad_norm": 2.557032346725464, "learning_rate": 0.0002, "loss": 1.7124, "step": 148400 }, { "epoch": 0.6, "grad_norm": 4.371963977813721, "learning_rate": 0.0002, "loss": 1.5, "step": 148410 }, { "epoch": 0.6, "grad_norm": 2.042198419570923, "learning_rate": 0.0002, "loss": 1.4498, "step": 148420 }, { "epoch": 0.6, "grad_norm": 2.325774908065796, "learning_rate": 0.0002, "loss": 1.335, "step": 148430 }, { "epoch": 0.6, "grad_norm": 2.493802785873413, "learning_rate": 0.0002, "loss": 1.7525, "step": 148440 }, { "epoch": 0.6, "grad_norm": 3.7058310508728027, "learning_rate": 0.0002, "loss": 1.6908, "step": 148450 }, { "epoch": 0.6, "grad_norm": 2.6179089546203613, "learning_rate": 0.0002, "loss": 1.612, "step": 148460 }, { "epoch": 0.6, "grad_norm": 3.187185764312744, "learning_rate": 0.0002, "loss": 1.7666, "step": 148470 }, { "epoch": 0.6, "grad_norm": 2.3222594261169434, "learning_rate": 0.0002, "loss": 1.5897, "step": 148480 }, { "epoch": 0.6, "grad_norm": 3.5392866134643555, "learning_rate": 0.0002, "loss": 1.4135, "step": 148490 }, { "epoch": 0.6, "grad_norm": 2.7639503479003906, "learning_rate": 0.0002, "loss": 1.7983, "step": 148500 }, { "epoch": 0.6, "grad_norm": 2.782470464706421, "learning_rate": 0.0002, "loss": 1.6033, "step": 148510 }, { "epoch": 0.6, "grad_norm": 2.5673751831054688, "learning_rate": 0.0002, "loss": 1.6623, "step": 148520 }, { "epoch": 0.6, "grad_norm": 4.364479064941406, "learning_rate": 0.0002, "loss": 1.6848, "step": 148530 }, { "epoch": 0.6, "grad_norm": 3.5864243507385254, "learning_rate": 0.0002, "loss": 1.5059, "step": 148540 }, { "epoch": 0.6, "grad_norm": 2.827054738998413, "learning_rate": 0.0002, "loss": 1.6425, "step": 148550 }, { "epoch": 0.6, "grad_norm": 2.467421770095825, "learning_rate": 0.0002, "loss": 1.4148, "step": 148560 }, { "epoch": 0.6, "grad_norm": 2.8181467056274414, "learning_rate": 0.0002, "loss": 1.5084, "step": 148570 }, { "epoch": 0.6, "grad_norm": 2.355853796005249, "learning_rate": 0.0002, "loss": 1.3891, "step": 148580 }, { "epoch": 0.6, "grad_norm": 2.824850082397461, "learning_rate": 0.0002, "loss": 1.4781, "step": 148590 }, { "epoch": 0.6, "grad_norm": 2.086804151535034, "learning_rate": 0.0002, "loss": 1.6297, "step": 148600 }, { "epoch": 0.6, "grad_norm": 4.246990203857422, "learning_rate": 0.0002, "loss": 1.4234, "step": 148610 }, { "epoch": 0.61, "grad_norm": 3.3790371417999268, "learning_rate": 0.0002, "loss": 1.6979, "step": 148620 }, { "epoch": 0.61, "grad_norm": 3.1754660606384277, "learning_rate": 0.0002, "loss": 1.6017, "step": 148630 }, { "epoch": 0.61, "grad_norm": 2.90705943107605, "learning_rate": 0.0002, "loss": 1.7274, "step": 148640 }, { "epoch": 0.61, "grad_norm": 4.001359462738037, "learning_rate": 0.0002, "loss": 1.5553, "step": 148650 }, { "epoch": 0.61, "grad_norm": 2.115891695022583, "learning_rate": 0.0002, "loss": 1.7412, "step": 148660 }, { "epoch": 0.61, "grad_norm": 2.193784713745117, "learning_rate": 0.0002, "loss": 1.662, "step": 148670 }, { "epoch": 0.61, "grad_norm": 3.2779767513275146, "learning_rate": 0.0002, "loss": 1.7831, "step": 148680 }, { "epoch": 0.61, "grad_norm": 5.652781963348389, "learning_rate": 0.0002, "loss": 1.4706, "step": 148690 }, { "epoch": 0.61, "grad_norm": 3.158749580383301, "learning_rate": 0.0002, "loss": 1.6077, "step": 148700 }, { "epoch": 0.61, "grad_norm": 3.399092197418213, "learning_rate": 0.0002, "loss": 1.5976, "step": 148710 }, { "epoch": 0.61, "grad_norm": 3.7337613105773926, "learning_rate": 0.0002, "loss": 1.6031, "step": 148720 }, { "epoch": 0.61, "grad_norm": 2.1484198570251465, "learning_rate": 0.0002, "loss": 1.5353, "step": 148730 }, { "epoch": 0.61, "grad_norm": 1.8992919921875, "learning_rate": 0.0002, "loss": 1.4597, "step": 148740 }, { "epoch": 0.61, "grad_norm": 3.9292938709259033, "learning_rate": 0.0002, "loss": 1.5949, "step": 148750 }, { "epoch": 0.61, "grad_norm": 2.2937827110290527, "learning_rate": 0.0002, "loss": 1.8907, "step": 148760 }, { "epoch": 0.61, "grad_norm": 2.7425484657287598, "learning_rate": 0.0002, "loss": 1.7355, "step": 148770 }, { "epoch": 0.61, "grad_norm": 2.6011414527893066, "learning_rate": 0.0002, "loss": 1.7599, "step": 148780 }, { "epoch": 0.61, "grad_norm": 4.168368816375732, "learning_rate": 0.0002, "loss": 1.4676, "step": 148790 }, { "epoch": 0.61, "grad_norm": 2.4102795124053955, "learning_rate": 0.0002, "loss": 1.8439, "step": 148800 }, { "epoch": 0.61, "grad_norm": 3.597698926925659, "learning_rate": 0.0002, "loss": 1.7529, "step": 148810 }, { "epoch": 0.61, "grad_norm": 2.7530627250671387, "learning_rate": 0.0002, "loss": 1.476, "step": 148820 }, { "epoch": 0.61, "grad_norm": 3.1481082439422607, "learning_rate": 0.0002, "loss": 1.6767, "step": 148830 }, { "epoch": 0.61, "grad_norm": 2.9568521976470947, "learning_rate": 0.0002, "loss": 1.7847, "step": 148840 }, { "epoch": 0.61, "grad_norm": 1.5947130918502808, "learning_rate": 0.0002, "loss": 1.6215, "step": 148850 }, { "epoch": 0.61, "grad_norm": 2.718594551086426, "learning_rate": 0.0002, "loss": 1.5632, "step": 148860 }, { "epoch": 0.61, "grad_norm": 2.715569496154785, "learning_rate": 0.0002, "loss": 1.6482, "step": 148870 }, { "epoch": 0.61, "grad_norm": 2.0951273441314697, "learning_rate": 0.0002, "loss": 1.4628, "step": 148880 }, { "epoch": 0.61, "grad_norm": 2.3977551460266113, "learning_rate": 0.0002, "loss": 1.5497, "step": 148890 }, { "epoch": 0.61, "grad_norm": 2.9550418853759766, "learning_rate": 0.0002, "loss": 1.5294, "step": 148900 }, { "epoch": 0.61, "grad_norm": 3.0874643325805664, "learning_rate": 0.0002, "loss": 1.5681, "step": 148910 }, { "epoch": 0.61, "grad_norm": 2.496662139892578, "learning_rate": 0.0002, "loss": 1.8681, "step": 148920 }, { "epoch": 0.61, "grad_norm": 3.335009813308716, "learning_rate": 0.0002, "loss": 1.5683, "step": 148930 }, { "epoch": 0.61, "grad_norm": 3.3757529258728027, "learning_rate": 0.0002, "loss": 1.5702, "step": 148940 }, { "epoch": 0.61, "grad_norm": 2.7741856575012207, "learning_rate": 0.0002, "loss": 1.5441, "step": 148950 }, { "epoch": 0.61, "grad_norm": 2.554340362548828, "learning_rate": 0.0002, "loss": 1.6386, "step": 148960 }, { "epoch": 0.61, "grad_norm": 6.426239013671875, "learning_rate": 0.0002, "loss": 1.4621, "step": 148970 }, { "epoch": 0.61, "grad_norm": 3.331275701522827, "learning_rate": 0.0002, "loss": 1.746, "step": 148980 }, { "epoch": 0.61, "grad_norm": 1.8693276643753052, "learning_rate": 0.0002, "loss": 1.5407, "step": 148990 }, { "epoch": 0.61, "grad_norm": 2.768077850341797, "learning_rate": 0.0002, "loss": 1.6511, "step": 149000 }, { "epoch": 0.61, "grad_norm": 2.9224750995635986, "learning_rate": 0.0002, "loss": 1.3602, "step": 149010 }, { "epoch": 0.61, "grad_norm": 2.7017366886138916, "learning_rate": 0.0002, "loss": 1.5517, "step": 149020 }, { "epoch": 0.61, "grad_norm": 1.7059118747711182, "learning_rate": 0.0002, "loss": 1.6504, "step": 149030 }, { "epoch": 0.61, "grad_norm": 3.458096981048584, "learning_rate": 0.0002, "loss": 1.335, "step": 149040 }, { "epoch": 0.61, "grad_norm": 3.57788348197937, "learning_rate": 0.0002, "loss": 1.6894, "step": 149050 }, { "epoch": 0.61, "grad_norm": 3.8899669647216797, "learning_rate": 0.0002, "loss": 1.5481, "step": 149060 }, { "epoch": 0.61, "grad_norm": 3.645994186401367, "learning_rate": 0.0002, "loss": 1.6651, "step": 149070 }, { "epoch": 0.61, "grad_norm": 2.612273931503296, "learning_rate": 0.0002, "loss": 1.6157, "step": 149080 }, { "epoch": 0.61, "grad_norm": 2.751122236251831, "learning_rate": 0.0002, "loss": 1.611, "step": 149090 }, { "epoch": 0.61, "grad_norm": 4.434129238128662, "learning_rate": 0.0002, "loss": 1.5814, "step": 149100 }, { "epoch": 0.61, "grad_norm": 3.098741054534912, "learning_rate": 0.0002, "loss": 1.4542, "step": 149110 }, { "epoch": 0.61, "grad_norm": 1.7862093448638916, "learning_rate": 0.0002, "loss": 1.456, "step": 149120 }, { "epoch": 0.61, "grad_norm": 4.325265884399414, "learning_rate": 0.0002, "loss": 1.4843, "step": 149130 }, { "epoch": 0.61, "grad_norm": 1.9085536003112793, "learning_rate": 0.0002, "loss": 1.5295, "step": 149140 }, { "epoch": 0.61, "grad_norm": 2.3413138389587402, "learning_rate": 0.0002, "loss": 1.4094, "step": 149150 }, { "epoch": 0.61, "grad_norm": 2.7347686290740967, "learning_rate": 0.0002, "loss": 1.7495, "step": 149160 }, { "epoch": 0.61, "grad_norm": 2.5349767208099365, "learning_rate": 0.0002, "loss": 1.5826, "step": 149170 }, { "epoch": 0.61, "grad_norm": 2.211355686187744, "learning_rate": 0.0002, "loss": 1.49, "step": 149180 }, { "epoch": 0.61, "grad_norm": 4.234583854675293, "learning_rate": 0.0002, "loss": 1.621, "step": 149190 }, { "epoch": 0.61, "grad_norm": 2.507056474685669, "learning_rate": 0.0002, "loss": 1.8121, "step": 149200 }, { "epoch": 0.61, "grad_norm": 3.020939826965332, "learning_rate": 0.0002, "loss": 1.343, "step": 149210 }, { "epoch": 0.61, "grad_norm": 5.974359512329102, "learning_rate": 0.0002, "loss": 1.7353, "step": 149220 }, { "epoch": 0.61, "grad_norm": 3.4904277324676514, "learning_rate": 0.0002, "loss": 1.7791, "step": 149230 }, { "epoch": 0.61, "grad_norm": 2.657010316848755, "learning_rate": 0.0002, "loss": 1.521, "step": 149240 }, { "epoch": 0.61, "grad_norm": 3.588623046875, "learning_rate": 0.0002, "loss": 1.715, "step": 149250 }, { "epoch": 0.61, "grad_norm": 2.3516509532928467, "learning_rate": 0.0002, "loss": 1.5528, "step": 149260 }, { "epoch": 0.61, "grad_norm": 2.879607677459717, "learning_rate": 0.0002, "loss": 1.4956, "step": 149270 }, { "epoch": 0.61, "grad_norm": 4.968492031097412, "learning_rate": 0.0002, "loss": 1.4257, "step": 149280 }, { "epoch": 0.61, "grad_norm": 3.4176318645477295, "learning_rate": 0.0002, "loss": 1.5625, "step": 149290 }, { "epoch": 0.61, "grad_norm": 4.148564338684082, "learning_rate": 0.0002, "loss": 1.3905, "step": 149300 }, { "epoch": 0.61, "grad_norm": 2.043510675430298, "learning_rate": 0.0002, "loss": 1.4741, "step": 149310 }, { "epoch": 0.61, "grad_norm": 2.243596076965332, "learning_rate": 0.0002, "loss": 1.6572, "step": 149320 }, { "epoch": 0.61, "grad_norm": 1.8638192415237427, "learning_rate": 0.0002, "loss": 1.5023, "step": 149330 }, { "epoch": 0.61, "grad_norm": 3.068800210952759, "learning_rate": 0.0002, "loss": 1.8401, "step": 149340 }, { "epoch": 0.61, "grad_norm": 2.7617433071136475, "learning_rate": 0.0002, "loss": 1.529, "step": 149350 }, { "epoch": 0.61, "grad_norm": 2.864145517349243, "learning_rate": 0.0002, "loss": 1.4218, "step": 149360 }, { "epoch": 0.61, "grad_norm": 3.146235942840576, "learning_rate": 0.0002, "loss": 1.4105, "step": 149370 }, { "epoch": 0.61, "grad_norm": 3.937549352645874, "learning_rate": 0.0002, "loss": 1.651, "step": 149380 }, { "epoch": 0.61, "grad_norm": 3.9782488346099854, "learning_rate": 0.0002, "loss": 1.5911, "step": 149390 }, { "epoch": 0.61, "grad_norm": 3.3604588508605957, "learning_rate": 0.0002, "loss": 1.2907, "step": 149400 }, { "epoch": 0.61, "grad_norm": 2.0946590900421143, "learning_rate": 0.0002, "loss": 1.8005, "step": 149410 }, { "epoch": 0.61, "grad_norm": 3.6101579666137695, "learning_rate": 0.0002, "loss": 1.7478, "step": 149420 }, { "epoch": 0.61, "grad_norm": 3.5327084064483643, "learning_rate": 0.0002, "loss": 1.7679, "step": 149430 }, { "epoch": 0.61, "grad_norm": 1.1660417318344116, "learning_rate": 0.0002, "loss": 1.5703, "step": 149440 }, { "epoch": 0.61, "grad_norm": 3.0993692874908447, "learning_rate": 0.0002, "loss": 1.9072, "step": 149450 }, { "epoch": 0.61, "grad_norm": 3.0666491985321045, "learning_rate": 0.0002, "loss": 1.8416, "step": 149460 }, { "epoch": 0.61, "grad_norm": 8.020081520080566, "learning_rate": 0.0002, "loss": 1.5025, "step": 149470 }, { "epoch": 0.61, "grad_norm": 2.0901808738708496, "learning_rate": 0.0002, "loss": 1.2732, "step": 149480 }, { "epoch": 0.61, "grad_norm": 2.4684135913848877, "learning_rate": 0.0002, "loss": 1.6629, "step": 149490 }, { "epoch": 0.61, "grad_norm": 4.367386817932129, "learning_rate": 0.0002, "loss": 1.7061, "step": 149500 }, { "epoch": 0.61, "grad_norm": 2.5990071296691895, "learning_rate": 0.0002, "loss": 1.5845, "step": 149510 }, { "epoch": 0.61, "grad_norm": 4.53460168838501, "learning_rate": 0.0002, "loss": 1.6289, "step": 149520 }, { "epoch": 0.61, "grad_norm": 3.7338311672210693, "learning_rate": 0.0002, "loss": 1.4358, "step": 149530 }, { "epoch": 0.61, "grad_norm": 2.8710267543792725, "learning_rate": 0.0002, "loss": 1.6503, "step": 149540 }, { "epoch": 0.61, "grad_norm": 0.9397190809249878, "learning_rate": 0.0002, "loss": 1.4366, "step": 149550 }, { "epoch": 0.61, "grad_norm": 3.2731263637542725, "learning_rate": 0.0002, "loss": 1.6317, "step": 149560 }, { "epoch": 0.61, "grad_norm": 3.0635769367218018, "learning_rate": 0.0002, "loss": 1.6756, "step": 149570 }, { "epoch": 0.61, "grad_norm": 4.3120927810668945, "learning_rate": 0.0002, "loss": 1.5602, "step": 149580 }, { "epoch": 0.61, "grad_norm": 2.0507330894470215, "learning_rate": 0.0002, "loss": 1.5187, "step": 149590 }, { "epoch": 0.61, "grad_norm": 4.077273368835449, "learning_rate": 0.0002, "loss": 1.6486, "step": 149600 }, { "epoch": 0.61, "grad_norm": 3.575709342956543, "learning_rate": 0.0002, "loss": 1.5973, "step": 149610 }, { "epoch": 0.61, "grad_norm": 3.397780418395996, "learning_rate": 0.0002, "loss": 1.4625, "step": 149620 }, { "epoch": 0.61, "grad_norm": 1.9739112854003906, "learning_rate": 0.0002, "loss": 1.6536, "step": 149630 }, { "epoch": 0.61, "grad_norm": 3.86913800239563, "learning_rate": 0.0002, "loss": 1.6832, "step": 149640 }, { "epoch": 0.61, "grad_norm": 2.9252662658691406, "learning_rate": 0.0002, "loss": 1.6728, "step": 149650 }, { "epoch": 0.61, "grad_norm": 6.41008996963501, "learning_rate": 0.0002, "loss": 1.2755, "step": 149660 }, { "epoch": 0.61, "grad_norm": 3.7796223163604736, "learning_rate": 0.0002, "loss": 1.5626, "step": 149670 }, { "epoch": 0.61, "grad_norm": 4.525079250335693, "learning_rate": 0.0002, "loss": 1.6265, "step": 149680 }, { "epoch": 0.61, "grad_norm": 2.680917978286743, "learning_rate": 0.0002, "loss": 1.5161, "step": 149690 }, { "epoch": 0.61, "grad_norm": 4.149164199829102, "learning_rate": 0.0002, "loss": 1.5353, "step": 149700 }, { "epoch": 0.61, "grad_norm": 2.5873231887817383, "learning_rate": 0.0002, "loss": 1.575, "step": 149710 }, { "epoch": 0.61, "grad_norm": 3.0733582973480225, "learning_rate": 0.0002, "loss": 1.6414, "step": 149720 }, { "epoch": 0.61, "grad_norm": 4.111893177032471, "learning_rate": 0.0002, "loss": 1.5617, "step": 149730 }, { "epoch": 0.61, "grad_norm": 2.6759703159332275, "learning_rate": 0.0002, "loss": 1.539, "step": 149740 }, { "epoch": 0.61, "grad_norm": 1.3753212690353394, "learning_rate": 0.0002, "loss": 1.6497, "step": 149750 }, { "epoch": 0.61, "grad_norm": 2.0639097690582275, "learning_rate": 0.0002, "loss": 1.6137, "step": 149760 }, { "epoch": 0.61, "grad_norm": 1.9603484869003296, "learning_rate": 0.0002, "loss": 1.5825, "step": 149770 }, { "epoch": 0.61, "grad_norm": 3.8446431159973145, "learning_rate": 0.0002, "loss": 1.6086, "step": 149780 }, { "epoch": 0.61, "grad_norm": 2.283515691757202, "learning_rate": 0.0002, "loss": 1.7487, "step": 149790 }, { "epoch": 0.61, "grad_norm": 2.1496424674987793, "learning_rate": 0.0002, "loss": 1.4153, "step": 149800 }, { "epoch": 0.61, "grad_norm": 3.132847547531128, "learning_rate": 0.0002, "loss": 1.5158, "step": 149810 }, { "epoch": 0.61, "grad_norm": 3.8595120906829834, "learning_rate": 0.0002, "loss": 1.4685, "step": 149820 }, { "epoch": 0.61, "grad_norm": 3.027460813522339, "learning_rate": 0.0002, "loss": 1.6219, "step": 149830 }, { "epoch": 0.61, "grad_norm": 4.090783596038818, "learning_rate": 0.0002, "loss": 1.5744, "step": 149840 }, { "epoch": 0.61, "grad_norm": 2.537750720977783, "learning_rate": 0.0002, "loss": 1.481, "step": 149850 }, { "epoch": 0.61, "grad_norm": 2.3137874603271484, "learning_rate": 0.0002, "loss": 1.4192, "step": 149860 }, { "epoch": 0.61, "grad_norm": 2.4531259536743164, "learning_rate": 0.0002, "loss": 1.6484, "step": 149870 }, { "epoch": 0.61, "grad_norm": 2.986381769180298, "learning_rate": 0.0002, "loss": 1.4637, "step": 149880 }, { "epoch": 0.61, "grad_norm": 2.6210262775421143, "learning_rate": 0.0002, "loss": 1.6286, "step": 149890 }, { "epoch": 0.61, "grad_norm": 6.581050395965576, "learning_rate": 0.0002, "loss": 1.7661, "step": 149900 }, { "epoch": 0.61, "grad_norm": 5.998105525970459, "learning_rate": 0.0002, "loss": 1.8503, "step": 149910 }, { "epoch": 0.61, "grad_norm": 4.8061747550964355, "learning_rate": 0.0002, "loss": 1.6434, "step": 149920 }, { "epoch": 0.61, "grad_norm": 2.292074203491211, "learning_rate": 0.0002, "loss": 1.5977, "step": 149930 }, { "epoch": 0.61, "grad_norm": 2.6228039264678955, "learning_rate": 0.0002, "loss": 1.6457, "step": 149940 }, { "epoch": 0.61, "grad_norm": 2.2387208938598633, "learning_rate": 0.0002, "loss": 1.7253, "step": 149950 }, { "epoch": 0.61, "grad_norm": 5.844400882720947, "learning_rate": 0.0002, "loss": 1.7715, "step": 149960 }, { "epoch": 0.61, "grad_norm": 3.2657582759857178, "learning_rate": 0.0002, "loss": 1.3395, "step": 149970 }, { "epoch": 0.61, "grad_norm": 3.040539026260376, "learning_rate": 0.0002, "loss": 1.665, "step": 149980 }, { "epoch": 0.61, "grad_norm": 5.164117813110352, "learning_rate": 0.0002, "loss": 1.7633, "step": 149990 }, { "epoch": 0.61, "grad_norm": 1.9268486499786377, "learning_rate": 0.0002, "loss": 1.5059, "step": 150000 }, { "epoch": 0.61, "grad_norm": 10.628193855285645, "learning_rate": 0.0002, "loss": 1.4238, "step": 150010 }, { "epoch": 0.61, "grad_norm": 3.5487940311431885, "learning_rate": 0.0002, "loss": 1.5021, "step": 150020 }, { "epoch": 0.61, "grad_norm": 2.3978335857391357, "learning_rate": 0.0002, "loss": 1.6253, "step": 150030 }, { "epoch": 0.61, "grad_norm": 2.2799723148345947, "learning_rate": 0.0002, "loss": 1.5726, "step": 150040 }, { "epoch": 0.61, "grad_norm": 3.6964550018310547, "learning_rate": 0.0002, "loss": 1.7096, "step": 150050 }, { "epoch": 0.61, "grad_norm": 2.4746198654174805, "learning_rate": 0.0002, "loss": 1.7244, "step": 150060 }, { "epoch": 0.61, "grad_norm": 4.2961530685424805, "learning_rate": 0.0002, "loss": 1.8582, "step": 150070 }, { "epoch": 0.61, "grad_norm": 3.352879762649536, "learning_rate": 0.0002, "loss": 1.4488, "step": 150080 }, { "epoch": 0.61, "grad_norm": 2.76777720451355, "learning_rate": 0.0002, "loss": 1.7828, "step": 150090 }, { "epoch": 0.61, "grad_norm": 2.9092960357666016, "learning_rate": 0.0002, "loss": 1.6291, "step": 150100 }, { "epoch": 0.61, "grad_norm": 2.272176504135132, "learning_rate": 0.0002, "loss": 1.6383, "step": 150110 }, { "epoch": 0.61, "grad_norm": 2.509827136993408, "learning_rate": 0.0002, "loss": 1.424, "step": 150120 }, { "epoch": 0.61, "grad_norm": 3.7811388969421387, "learning_rate": 0.0002, "loss": 1.526, "step": 150130 }, { "epoch": 0.61, "grad_norm": 1.8116618394851685, "learning_rate": 0.0002, "loss": 1.4252, "step": 150140 }, { "epoch": 0.61, "grad_norm": 2.4770705699920654, "learning_rate": 0.0002, "loss": 1.4061, "step": 150150 }, { "epoch": 0.61, "grad_norm": 2.122627019882202, "learning_rate": 0.0002, "loss": 1.4907, "step": 150160 }, { "epoch": 0.61, "grad_norm": 2.265846014022827, "learning_rate": 0.0002, "loss": 1.4588, "step": 150170 }, { "epoch": 0.61, "grad_norm": 8.551990509033203, "learning_rate": 0.0002, "loss": 1.6169, "step": 150180 }, { "epoch": 0.61, "grad_norm": 3.04268741607666, "learning_rate": 0.0002, "loss": 1.5054, "step": 150190 }, { "epoch": 0.61, "grad_norm": 1.7387661933898926, "learning_rate": 0.0002, "loss": 1.545, "step": 150200 }, { "epoch": 0.61, "grad_norm": 3.8985037803649902, "learning_rate": 0.0002, "loss": 1.5552, "step": 150210 }, { "epoch": 0.61, "grad_norm": 3.714221954345703, "learning_rate": 0.0002, "loss": 1.7062, "step": 150220 }, { "epoch": 0.61, "grad_norm": 2.568638324737549, "learning_rate": 0.0002, "loss": 1.4483, "step": 150230 }, { "epoch": 0.61, "grad_norm": 3.2893197536468506, "learning_rate": 0.0002, "loss": 1.5765, "step": 150240 }, { "epoch": 0.61, "grad_norm": 2.4852890968322754, "learning_rate": 0.0002, "loss": 1.4546, "step": 150250 }, { "epoch": 0.61, "grad_norm": 3.1201727390289307, "learning_rate": 0.0002, "loss": 1.4231, "step": 150260 }, { "epoch": 0.61, "grad_norm": 4.175493240356445, "learning_rate": 0.0002, "loss": 1.5366, "step": 150270 }, { "epoch": 0.61, "grad_norm": 1.9589775800704956, "learning_rate": 0.0002, "loss": 1.3407, "step": 150280 }, { "epoch": 0.61, "grad_norm": 1.641882300376892, "learning_rate": 0.0002, "loss": 1.7584, "step": 150290 }, { "epoch": 0.61, "grad_norm": 1.5379719734191895, "learning_rate": 0.0002, "loss": 1.693, "step": 150300 }, { "epoch": 0.61, "grad_norm": 4.133964538574219, "learning_rate": 0.0002, "loss": 1.5941, "step": 150310 }, { "epoch": 0.61, "grad_norm": 4.860578536987305, "learning_rate": 0.0002, "loss": 1.3136, "step": 150320 }, { "epoch": 0.61, "grad_norm": 2.6749460697174072, "learning_rate": 0.0002, "loss": 1.3742, "step": 150330 }, { "epoch": 0.61, "grad_norm": 3.9361393451690674, "learning_rate": 0.0002, "loss": 1.4791, "step": 150340 }, { "epoch": 0.61, "grad_norm": 3.0124266147613525, "learning_rate": 0.0002, "loss": 1.6311, "step": 150350 }, { "epoch": 0.61, "grad_norm": 2.9594738483428955, "learning_rate": 0.0002, "loss": 1.3632, "step": 150360 }, { "epoch": 0.61, "grad_norm": 3.6046552658081055, "learning_rate": 0.0002, "loss": 1.4805, "step": 150370 }, { "epoch": 0.61, "grad_norm": 2.400784730911255, "learning_rate": 0.0002, "loss": 1.543, "step": 150380 }, { "epoch": 0.61, "grad_norm": 1.9726852178573608, "learning_rate": 0.0002, "loss": 1.5065, "step": 150390 }, { "epoch": 0.61, "grad_norm": 3.748619794845581, "learning_rate": 0.0002, "loss": 1.6612, "step": 150400 }, { "epoch": 0.61, "grad_norm": 3.1821014881134033, "learning_rate": 0.0002, "loss": 1.5924, "step": 150410 }, { "epoch": 0.61, "grad_norm": 2.7934365272521973, "learning_rate": 0.0002, "loss": 1.4806, "step": 150420 }, { "epoch": 0.61, "grad_norm": 3.158599615097046, "learning_rate": 0.0002, "loss": 1.5239, "step": 150430 }, { "epoch": 0.61, "grad_norm": 1.8781108856201172, "learning_rate": 0.0002, "loss": 1.7134, "step": 150440 }, { "epoch": 0.61, "grad_norm": 2.736034393310547, "learning_rate": 0.0002, "loss": 1.5074, "step": 150450 }, { "epoch": 0.61, "grad_norm": 2.869366407394409, "learning_rate": 0.0002, "loss": 1.5792, "step": 150460 }, { "epoch": 0.61, "grad_norm": 3.7072083950042725, "learning_rate": 0.0002, "loss": 1.7086, "step": 150470 }, { "epoch": 0.61, "grad_norm": 3.9491512775421143, "learning_rate": 0.0002, "loss": 1.5246, "step": 150480 }, { "epoch": 0.61, "grad_norm": 3.1207022666931152, "learning_rate": 0.0002, "loss": 1.6963, "step": 150490 }, { "epoch": 0.61, "grad_norm": 2.601240396499634, "learning_rate": 0.0002, "loss": 1.5592, "step": 150500 }, { "epoch": 0.61, "grad_norm": 2.791616439819336, "learning_rate": 0.0002, "loss": 1.4559, "step": 150510 }, { "epoch": 0.61, "grad_norm": 3.7391369342803955, "learning_rate": 0.0002, "loss": 1.404, "step": 150520 }, { "epoch": 0.61, "grad_norm": 2.3161041736602783, "learning_rate": 0.0002, "loss": 1.5025, "step": 150530 }, { "epoch": 0.61, "grad_norm": 2.3510544300079346, "learning_rate": 0.0002, "loss": 1.4568, "step": 150540 }, { "epoch": 0.61, "grad_norm": 2.6449801921844482, "learning_rate": 0.0002, "loss": 1.6428, "step": 150550 }, { "epoch": 0.61, "grad_norm": 4.518970012664795, "learning_rate": 0.0002, "loss": 1.6354, "step": 150560 }, { "epoch": 0.61, "grad_norm": 3.4073081016540527, "learning_rate": 0.0002, "loss": 1.5997, "step": 150570 }, { "epoch": 0.61, "grad_norm": 4.106124401092529, "learning_rate": 0.0002, "loss": 1.7022, "step": 150580 }, { "epoch": 0.61, "grad_norm": 2.913987159729004, "learning_rate": 0.0002, "loss": 1.4896, "step": 150590 }, { "epoch": 0.61, "grad_norm": 2.408024787902832, "learning_rate": 0.0002, "loss": 1.7962, "step": 150600 }, { "epoch": 0.61, "grad_norm": 2.38266658782959, "learning_rate": 0.0002, "loss": 1.5413, "step": 150610 }, { "epoch": 0.61, "grad_norm": 1.7220125198364258, "learning_rate": 0.0002, "loss": 1.5715, "step": 150620 }, { "epoch": 0.61, "grad_norm": 2.384594678878784, "learning_rate": 0.0002, "loss": 1.4575, "step": 150630 }, { "epoch": 0.61, "grad_norm": 2.196831464767456, "learning_rate": 0.0002, "loss": 1.482, "step": 150640 }, { "epoch": 0.61, "grad_norm": 4.136523723602295, "learning_rate": 0.0002, "loss": 1.7036, "step": 150650 }, { "epoch": 0.61, "grad_norm": 2.6067025661468506, "learning_rate": 0.0002, "loss": 1.6415, "step": 150660 }, { "epoch": 0.61, "grad_norm": 7.317099571228027, "learning_rate": 0.0002, "loss": 1.4189, "step": 150670 }, { "epoch": 0.61, "grad_norm": 3.5725958347320557, "learning_rate": 0.0002, "loss": 1.4772, "step": 150680 }, { "epoch": 0.61, "grad_norm": 3.152851104736328, "learning_rate": 0.0002, "loss": 1.4358, "step": 150690 }, { "epoch": 0.61, "grad_norm": 3.819744348526001, "learning_rate": 0.0002, "loss": 1.4582, "step": 150700 }, { "epoch": 0.61, "grad_norm": 2.1322662830352783, "learning_rate": 0.0002, "loss": 1.6898, "step": 150710 }, { "epoch": 0.61, "grad_norm": 1.5130984783172607, "learning_rate": 0.0002, "loss": 1.497, "step": 150720 }, { "epoch": 0.61, "grad_norm": 3.6328938007354736, "learning_rate": 0.0002, "loss": 1.7812, "step": 150730 }, { "epoch": 0.61, "grad_norm": 4.242856025695801, "learning_rate": 0.0002, "loss": 1.9256, "step": 150740 }, { "epoch": 0.61, "grad_norm": 2.9898791313171387, "learning_rate": 0.0002, "loss": 1.3887, "step": 150750 }, { "epoch": 0.61, "grad_norm": 2.5835068225860596, "learning_rate": 0.0002, "loss": 1.3987, "step": 150760 }, { "epoch": 0.61, "grad_norm": 3.4426486492156982, "learning_rate": 0.0002, "loss": 1.4933, "step": 150770 }, { "epoch": 0.61, "grad_norm": 4.024013996124268, "learning_rate": 0.0002, "loss": 1.7975, "step": 150780 }, { "epoch": 0.61, "grad_norm": 2.2726147174835205, "learning_rate": 0.0002, "loss": 1.9974, "step": 150790 }, { "epoch": 0.61, "grad_norm": 2.8637261390686035, "learning_rate": 0.0002, "loss": 1.4486, "step": 150800 }, { "epoch": 0.61, "grad_norm": 3.020493745803833, "learning_rate": 0.0002, "loss": 1.5341, "step": 150810 }, { "epoch": 0.61, "grad_norm": 3.3733150959014893, "learning_rate": 0.0002, "loss": 1.5322, "step": 150820 }, { "epoch": 0.61, "grad_norm": 1.9612127542495728, "learning_rate": 0.0002, "loss": 1.3608, "step": 150830 }, { "epoch": 0.61, "grad_norm": 1.908439040184021, "learning_rate": 0.0002, "loss": 1.5395, "step": 150840 }, { "epoch": 0.61, "grad_norm": 2.8337461948394775, "learning_rate": 0.0002, "loss": 1.3856, "step": 150850 }, { "epoch": 0.61, "grad_norm": 1.7921823263168335, "learning_rate": 0.0002, "loss": 1.1893, "step": 150860 }, { "epoch": 0.61, "grad_norm": 3.2965500354766846, "learning_rate": 0.0002, "loss": 1.792, "step": 150870 }, { "epoch": 0.61, "grad_norm": 2.6730287075042725, "learning_rate": 0.0002, "loss": 1.5449, "step": 150880 }, { "epoch": 0.61, "grad_norm": 3.9185068607330322, "learning_rate": 0.0002, "loss": 1.4371, "step": 150890 }, { "epoch": 0.61, "grad_norm": 6.965991497039795, "learning_rate": 0.0002, "loss": 1.7093, "step": 150900 }, { "epoch": 0.61, "grad_norm": 3.182866096496582, "learning_rate": 0.0002, "loss": 1.7341, "step": 150910 }, { "epoch": 0.61, "grad_norm": 3.433873414993286, "learning_rate": 0.0002, "loss": 1.4191, "step": 150920 }, { "epoch": 0.61, "grad_norm": 3.7191176414489746, "learning_rate": 0.0002, "loss": 1.7812, "step": 150930 }, { "epoch": 0.61, "grad_norm": 3.582331657409668, "learning_rate": 0.0002, "loss": 1.6573, "step": 150940 }, { "epoch": 0.61, "grad_norm": 4.5090484619140625, "learning_rate": 0.0002, "loss": 1.6718, "step": 150950 }, { "epoch": 0.61, "grad_norm": 2.4480984210968018, "learning_rate": 0.0002, "loss": 1.5925, "step": 150960 }, { "epoch": 0.61, "grad_norm": 2.600332260131836, "learning_rate": 0.0002, "loss": 1.5498, "step": 150970 }, { "epoch": 0.61, "grad_norm": 6.3990583419799805, "learning_rate": 0.0002, "loss": 1.533, "step": 150980 }, { "epoch": 0.61, "grad_norm": 3.5303232669830322, "learning_rate": 0.0002, "loss": 1.5203, "step": 150990 }, { "epoch": 0.61, "grad_norm": 3.8601701259613037, "learning_rate": 0.0002, "loss": 1.7347, "step": 151000 }, { "epoch": 0.61, "grad_norm": 2.900522470474243, "learning_rate": 0.0002, "loss": 1.5307, "step": 151010 }, { "epoch": 0.61, "grad_norm": 3.457213878631592, "learning_rate": 0.0002, "loss": 1.5933, "step": 151020 }, { "epoch": 0.61, "grad_norm": 2.2570645809173584, "learning_rate": 0.0002, "loss": 1.8577, "step": 151030 }, { "epoch": 0.61, "grad_norm": 1.8552021980285645, "learning_rate": 0.0002, "loss": 1.3265, "step": 151040 }, { "epoch": 0.61, "grad_norm": 5.620177268981934, "learning_rate": 0.0002, "loss": 1.4859, "step": 151050 }, { "epoch": 0.61, "grad_norm": 1.6206759214401245, "learning_rate": 0.0002, "loss": 1.6838, "step": 151060 }, { "epoch": 0.61, "grad_norm": 3.4312944412231445, "learning_rate": 0.0002, "loss": 1.5237, "step": 151070 }, { "epoch": 0.62, "grad_norm": 3.9786338806152344, "learning_rate": 0.0002, "loss": 1.5421, "step": 151080 }, { "epoch": 0.62, "grad_norm": 3.7300171852111816, "learning_rate": 0.0002, "loss": 1.7436, "step": 151090 }, { "epoch": 0.62, "grad_norm": 2.7780985832214355, "learning_rate": 0.0002, "loss": 1.6206, "step": 151100 }, { "epoch": 0.62, "grad_norm": 2.4561309814453125, "learning_rate": 0.0002, "loss": 1.4741, "step": 151110 }, { "epoch": 0.62, "grad_norm": 2.9076128005981445, "learning_rate": 0.0002, "loss": 1.813, "step": 151120 }, { "epoch": 0.62, "grad_norm": 1.948192834854126, "learning_rate": 0.0002, "loss": 1.5981, "step": 151130 }, { "epoch": 0.62, "grad_norm": 2.6902618408203125, "learning_rate": 0.0002, "loss": 1.5238, "step": 151140 }, { "epoch": 0.62, "grad_norm": 1.6400312185287476, "learning_rate": 0.0002, "loss": 1.6898, "step": 151150 }, { "epoch": 0.62, "grad_norm": 3.6881814002990723, "learning_rate": 0.0002, "loss": 1.5187, "step": 151160 }, { "epoch": 0.62, "grad_norm": 3.1815786361694336, "learning_rate": 0.0002, "loss": 1.4343, "step": 151170 }, { "epoch": 0.62, "grad_norm": 3.007690191268921, "learning_rate": 0.0002, "loss": 1.4547, "step": 151180 }, { "epoch": 0.62, "grad_norm": 6.037755966186523, "learning_rate": 0.0002, "loss": 1.4099, "step": 151190 }, { "epoch": 0.62, "grad_norm": 2.5340042114257812, "learning_rate": 0.0002, "loss": 1.3701, "step": 151200 }, { "epoch": 0.62, "grad_norm": 2.8002233505249023, "learning_rate": 0.0002, "loss": 1.5276, "step": 151210 }, { "epoch": 0.62, "grad_norm": 4.362546920776367, "learning_rate": 0.0002, "loss": 1.6795, "step": 151220 }, { "epoch": 0.62, "grad_norm": 3.375504732131958, "learning_rate": 0.0002, "loss": 1.7021, "step": 151230 }, { "epoch": 0.62, "grad_norm": 2.052281379699707, "learning_rate": 0.0002, "loss": 1.5239, "step": 151240 }, { "epoch": 0.62, "grad_norm": 1.6877838373184204, "learning_rate": 0.0002, "loss": 1.7786, "step": 151250 }, { "epoch": 0.62, "grad_norm": 2.9146387577056885, "learning_rate": 0.0002, "loss": 1.2687, "step": 151260 }, { "epoch": 0.62, "grad_norm": 2.7645368576049805, "learning_rate": 0.0002, "loss": 1.5563, "step": 151270 }, { "epoch": 0.62, "grad_norm": 2.614712953567505, "learning_rate": 0.0002, "loss": 1.5866, "step": 151280 }, { "epoch": 0.62, "grad_norm": 3.4461476802825928, "learning_rate": 0.0002, "loss": 1.6013, "step": 151290 }, { "epoch": 0.62, "grad_norm": 3.073777675628662, "learning_rate": 0.0002, "loss": 1.4731, "step": 151300 }, { "epoch": 0.62, "grad_norm": 4.3654093742370605, "learning_rate": 0.0002, "loss": 1.6013, "step": 151310 }, { "epoch": 0.62, "grad_norm": 3.787230968475342, "learning_rate": 0.0002, "loss": 1.6189, "step": 151320 }, { "epoch": 0.62, "grad_norm": 4.195778846740723, "learning_rate": 0.0002, "loss": 1.436, "step": 151330 }, { "epoch": 0.62, "grad_norm": 2.9258174896240234, "learning_rate": 0.0002, "loss": 1.7156, "step": 151340 }, { "epoch": 0.62, "grad_norm": 2.6029207706451416, "learning_rate": 0.0002, "loss": 1.4962, "step": 151350 }, { "epoch": 0.62, "grad_norm": 2.7506937980651855, "learning_rate": 0.0002, "loss": 1.5714, "step": 151360 }, { "epoch": 0.62, "grad_norm": 3.584973096847534, "learning_rate": 0.0002, "loss": 1.4502, "step": 151370 }, { "epoch": 0.62, "grad_norm": 4.705638885498047, "learning_rate": 0.0002, "loss": 1.6547, "step": 151380 }, { "epoch": 0.62, "grad_norm": 2.3931214809417725, "learning_rate": 0.0002, "loss": 1.7279, "step": 151390 }, { "epoch": 0.62, "grad_norm": 3.000615119934082, "learning_rate": 0.0002, "loss": 1.4037, "step": 151400 }, { "epoch": 0.62, "grad_norm": 2.134019613265991, "learning_rate": 0.0002, "loss": 1.6098, "step": 151410 }, { "epoch": 0.62, "grad_norm": 1.8330943584442139, "learning_rate": 0.0002, "loss": 1.3963, "step": 151420 }, { "epoch": 0.62, "grad_norm": 2.8010289669036865, "learning_rate": 0.0002, "loss": 1.7952, "step": 151430 }, { "epoch": 0.62, "grad_norm": 2.7052130699157715, "learning_rate": 0.0002, "loss": 1.6346, "step": 151440 }, { "epoch": 0.62, "grad_norm": 3.192924976348877, "learning_rate": 0.0002, "loss": 1.5394, "step": 151450 }, { "epoch": 0.62, "grad_norm": 1.9951196908950806, "learning_rate": 0.0002, "loss": 1.6716, "step": 151460 }, { "epoch": 0.62, "grad_norm": 2.9435079097747803, "learning_rate": 0.0002, "loss": 1.5462, "step": 151470 }, { "epoch": 0.62, "grad_norm": 1.5383466482162476, "learning_rate": 0.0002, "loss": 1.4331, "step": 151480 }, { "epoch": 0.62, "grad_norm": 2.6784496307373047, "learning_rate": 0.0002, "loss": 1.5019, "step": 151490 }, { "epoch": 0.62, "grad_norm": 1.8489018678665161, "learning_rate": 0.0002, "loss": 1.6249, "step": 151500 }, { "epoch": 0.62, "grad_norm": 3.4292898178100586, "learning_rate": 0.0002, "loss": 1.5592, "step": 151510 }, { "epoch": 0.62, "grad_norm": 2.5190768241882324, "learning_rate": 0.0002, "loss": 1.6459, "step": 151520 }, { "epoch": 0.62, "grad_norm": 2.468094825744629, "learning_rate": 0.0002, "loss": 1.5568, "step": 151530 }, { "epoch": 0.62, "grad_norm": 1.7401061058044434, "learning_rate": 0.0002, "loss": 1.4447, "step": 151540 }, { "epoch": 0.62, "grad_norm": 2.6899094581604004, "learning_rate": 0.0002, "loss": 1.4946, "step": 151550 }, { "epoch": 0.62, "grad_norm": 1.7832530736923218, "learning_rate": 0.0002, "loss": 1.6262, "step": 151560 }, { "epoch": 0.62, "grad_norm": 2.3483242988586426, "learning_rate": 0.0002, "loss": 1.5921, "step": 151570 }, { "epoch": 0.62, "grad_norm": 2.3284006118774414, "learning_rate": 0.0002, "loss": 1.3693, "step": 151580 }, { "epoch": 0.62, "grad_norm": 2.0383918285369873, "learning_rate": 0.0002, "loss": 1.2409, "step": 151590 }, { "epoch": 0.62, "grad_norm": 2.4010391235351562, "learning_rate": 0.0002, "loss": 1.5464, "step": 151600 }, { "epoch": 0.62, "grad_norm": 3.4366018772125244, "learning_rate": 0.0002, "loss": 1.5913, "step": 151610 }, { "epoch": 0.62, "grad_norm": 2.8050007820129395, "learning_rate": 0.0002, "loss": 1.5817, "step": 151620 }, { "epoch": 0.62, "grad_norm": 2.3732681274414062, "learning_rate": 0.0002, "loss": 1.4718, "step": 151630 }, { "epoch": 0.62, "grad_norm": 3.51308012008667, "learning_rate": 0.0002, "loss": 1.8631, "step": 151640 }, { "epoch": 0.62, "grad_norm": 2.7594571113586426, "learning_rate": 0.0002, "loss": 1.4755, "step": 151650 }, { "epoch": 0.62, "grad_norm": 3.848419666290283, "learning_rate": 0.0002, "loss": 1.6847, "step": 151660 }, { "epoch": 0.62, "grad_norm": 3.84104323387146, "learning_rate": 0.0002, "loss": 1.6773, "step": 151670 }, { "epoch": 0.62, "grad_norm": 2.5781564712524414, "learning_rate": 0.0002, "loss": 1.673, "step": 151680 }, { "epoch": 0.62, "grad_norm": 3.797550678253174, "learning_rate": 0.0002, "loss": 1.7988, "step": 151690 }, { "epoch": 0.62, "grad_norm": 2.3704888820648193, "learning_rate": 0.0002, "loss": 1.5269, "step": 151700 }, { "epoch": 0.62, "grad_norm": 3.4139199256896973, "learning_rate": 0.0002, "loss": 1.788, "step": 151710 }, { "epoch": 0.62, "grad_norm": 3.018209457397461, "learning_rate": 0.0002, "loss": 1.7133, "step": 151720 }, { "epoch": 0.62, "grad_norm": 3.0816845893859863, "learning_rate": 0.0002, "loss": 1.4628, "step": 151730 }, { "epoch": 0.62, "grad_norm": 5.46912145614624, "learning_rate": 0.0002, "loss": 1.5475, "step": 151740 }, { "epoch": 0.62, "grad_norm": 2.110701322555542, "learning_rate": 0.0002, "loss": 1.6654, "step": 151750 }, { "epoch": 0.62, "grad_norm": 3.936873197555542, "learning_rate": 0.0002, "loss": 1.6911, "step": 151760 }, { "epoch": 0.62, "grad_norm": 1.8775988817214966, "learning_rate": 0.0002, "loss": 1.591, "step": 151770 }, { "epoch": 0.62, "grad_norm": 3.18554425239563, "learning_rate": 0.0002, "loss": 1.8925, "step": 151780 }, { "epoch": 0.62, "grad_norm": 2.5233874320983887, "learning_rate": 0.0002, "loss": 1.5967, "step": 151790 }, { "epoch": 0.62, "grad_norm": 2.6685659885406494, "learning_rate": 0.0002, "loss": 1.8014, "step": 151800 }, { "epoch": 0.62, "grad_norm": 2.5910696983337402, "learning_rate": 0.0002, "loss": 1.3629, "step": 151810 }, { "epoch": 0.62, "grad_norm": 3.4659616947174072, "learning_rate": 0.0002, "loss": 1.5886, "step": 151820 }, { "epoch": 0.62, "grad_norm": 3.110905647277832, "learning_rate": 0.0002, "loss": 1.5169, "step": 151830 }, { "epoch": 0.62, "grad_norm": 1.6642214059829712, "learning_rate": 0.0002, "loss": 1.5306, "step": 151840 }, { "epoch": 0.62, "grad_norm": 4.998319149017334, "learning_rate": 0.0002, "loss": 1.4832, "step": 151850 }, { "epoch": 0.62, "grad_norm": 3.0788285732269287, "learning_rate": 0.0002, "loss": 1.6711, "step": 151860 }, { "epoch": 0.62, "grad_norm": 2.514780282974243, "learning_rate": 0.0002, "loss": 1.4596, "step": 151870 }, { "epoch": 0.62, "grad_norm": 2.473844289779663, "learning_rate": 0.0002, "loss": 1.4304, "step": 151880 }, { "epoch": 0.62, "grad_norm": 5.506654739379883, "learning_rate": 0.0002, "loss": 1.4488, "step": 151890 }, { "epoch": 0.62, "grad_norm": 4.019510269165039, "learning_rate": 0.0002, "loss": 1.67, "step": 151900 }, { "epoch": 0.62, "grad_norm": 3.1874942779541016, "learning_rate": 0.0002, "loss": 1.5563, "step": 151910 }, { "epoch": 0.62, "grad_norm": 1.737492322921753, "learning_rate": 0.0002, "loss": 1.7455, "step": 151920 }, { "epoch": 0.62, "grad_norm": 3.442392349243164, "learning_rate": 0.0002, "loss": 1.8122, "step": 151930 }, { "epoch": 0.62, "grad_norm": 1.4264626502990723, "learning_rate": 0.0002, "loss": 1.5399, "step": 151940 }, { "epoch": 0.62, "grad_norm": 2.9204068183898926, "learning_rate": 0.0002, "loss": 1.7234, "step": 151950 }, { "epoch": 0.62, "grad_norm": 2.489906072616577, "learning_rate": 0.0002, "loss": 1.5924, "step": 151960 }, { "epoch": 0.62, "grad_norm": 2.411439895629883, "learning_rate": 0.0002, "loss": 1.6283, "step": 151970 }, { "epoch": 0.62, "grad_norm": 1.8634570837020874, "learning_rate": 0.0002, "loss": 1.5777, "step": 151980 }, { "epoch": 0.62, "grad_norm": 3.9150290489196777, "learning_rate": 0.0002, "loss": 1.8859, "step": 151990 }, { "epoch": 0.62, "grad_norm": 2.743612289428711, "learning_rate": 0.0002, "loss": 1.7968, "step": 152000 }, { "epoch": 0.62, "grad_norm": 2.756216287612915, "learning_rate": 0.0002, "loss": 1.6906, "step": 152010 }, { "epoch": 0.62, "grad_norm": 1.7009882926940918, "learning_rate": 0.0002, "loss": 1.8021, "step": 152020 }, { "epoch": 0.62, "grad_norm": 2.8847200870513916, "learning_rate": 0.0002, "loss": 1.6272, "step": 152030 }, { "epoch": 0.62, "grad_norm": 2.600680351257324, "learning_rate": 0.0002, "loss": 1.6547, "step": 152040 }, { "epoch": 0.62, "grad_norm": 4.191713333129883, "learning_rate": 0.0002, "loss": 1.6261, "step": 152050 }, { "epoch": 0.62, "grad_norm": 3.1131348609924316, "learning_rate": 0.0002, "loss": 1.5542, "step": 152060 }, { "epoch": 0.62, "grad_norm": 2.640681266784668, "learning_rate": 0.0002, "loss": 1.6329, "step": 152070 }, { "epoch": 0.62, "grad_norm": Infinity, "learning_rate": 0.0002, "loss": 1.4677, "step": 152080 }, { "epoch": 0.62, "grad_norm": 3.3192737102508545, "learning_rate": 0.0002, "loss": 1.517, "step": 152090 }, { "epoch": 0.62, "grad_norm": 2.72725772857666, "learning_rate": 0.0002, "loss": 1.5931, "step": 152100 }, { "epoch": 0.62, "grad_norm": 3.851039171218872, "learning_rate": 0.0002, "loss": 1.5583, "step": 152110 }, { "epoch": 0.62, "grad_norm": 2.6490418910980225, "learning_rate": 0.0002, "loss": 1.8236, "step": 152120 }, { "epoch": 0.62, "grad_norm": 3.104623317718506, "learning_rate": 0.0002, "loss": 1.5731, "step": 152130 }, { "epoch": 0.62, "grad_norm": 3.754861831665039, "learning_rate": 0.0002, "loss": 1.5165, "step": 152140 }, { "epoch": 0.62, "grad_norm": 3.6498501300811768, "learning_rate": 0.0002, "loss": 1.4802, "step": 152150 }, { "epoch": 0.62, "grad_norm": 3.7711384296417236, "learning_rate": 0.0002, "loss": 1.6359, "step": 152160 }, { "epoch": 0.62, "grad_norm": 2.9985148906707764, "learning_rate": 0.0002, "loss": 1.6919, "step": 152170 }, { "epoch": 0.62, "grad_norm": 2.796131134033203, "learning_rate": 0.0002, "loss": 1.4926, "step": 152180 }, { "epoch": 0.62, "grad_norm": 2.7127633094787598, "learning_rate": 0.0002, "loss": 1.4846, "step": 152190 }, { "epoch": 0.62, "grad_norm": 3.7297616004943848, "learning_rate": 0.0002, "loss": 1.7277, "step": 152200 }, { "epoch": 0.62, "grad_norm": 2.868964672088623, "learning_rate": 0.0002, "loss": 1.6028, "step": 152210 }, { "epoch": 0.62, "grad_norm": 2.2148959636688232, "learning_rate": 0.0002, "loss": 1.6434, "step": 152220 }, { "epoch": 0.62, "grad_norm": 3.236828088760376, "learning_rate": 0.0002, "loss": 1.5956, "step": 152230 }, { "epoch": 0.62, "grad_norm": 3.096456289291382, "learning_rate": 0.0002, "loss": 1.4899, "step": 152240 }, { "epoch": 0.62, "grad_norm": 3.728788375854492, "learning_rate": 0.0002, "loss": 1.4672, "step": 152250 }, { "epoch": 0.62, "grad_norm": 2.2450177669525146, "learning_rate": 0.0002, "loss": 1.6178, "step": 152260 }, { "epoch": 0.62, "grad_norm": 2.6389334201812744, "learning_rate": 0.0002, "loss": 1.5934, "step": 152270 }, { "epoch": 0.62, "grad_norm": 7.747086524963379, "learning_rate": 0.0002, "loss": 1.5613, "step": 152280 }, { "epoch": 0.62, "grad_norm": 2.3733839988708496, "learning_rate": 0.0002, "loss": 1.6093, "step": 152290 }, { "epoch": 0.62, "grad_norm": 1.6719387769699097, "learning_rate": 0.0002, "loss": 1.3835, "step": 152300 }, { "epoch": 0.62, "grad_norm": 2.834533452987671, "learning_rate": 0.0002, "loss": 1.6498, "step": 152310 }, { "epoch": 0.62, "grad_norm": 2.2661163806915283, "learning_rate": 0.0002, "loss": 1.637, "step": 152320 }, { "epoch": 0.62, "grad_norm": 3.187904119491577, "learning_rate": 0.0002, "loss": 1.7624, "step": 152330 }, { "epoch": 0.62, "grad_norm": 3.658827781677246, "learning_rate": 0.0002, "loss": 1.8206, "step": 152340 }, { "epoch": 0.62, "grad_norm": 3.270704984664917, "learning_rate": 0.0002, "loss": 1.5475, "step": 152350 }, { "epoch": 0.62, "grad_norm": 3.6028411388397217, "learning_rate": 0.0002, "loss": 1.6827, "step": 152360 }, { "epoch": 0.62, "grad_norm": 2.0818188190460205, "learning_rate": 0.0002, "loss": 1.9563, "step": 152370 }, { "epoch": 0.62, "grad_norm": 2.466402053833008, "learning_rate": 0.0002, "loss": 1.6741, "step": 152380 }, { "epoch": 0.62, "grad_norm": 3.859095811843872, "learning_rate": 0.0002, "loss": 1.5578, "step": 152390 }, { "epoch": 0.62, "grad_norm": 3.005462169647217, "learning_rate": 0.0002, "loss": 1.4607, "step": 152400 }, { "epoch": 0.62, "grad_norm": 3.5772032737731934, "learning_rate": 0.0002, "loss": 1.5904, "step": 152410 }, { "epoch": 0.62, "grad_norm": 3.252263307571411, "learning_rate": 0.0002, "loss": 1.5044, "step": 152420 }, { "epoch": 0.62, "grad_norm": 5.011229991912842, "learning_rate": 0.0002, "loss": 1.4913, "step": 152430 }, { "epoch": 0.62, "grad_norm": 5.868353366851807, "learning_rate": 0.0002, "loss": 1.5251, "step": 152440 }, { "epoch": 0.62, "grad_norm": 2.3781237602233887, "learning_rate": 0.0002, "loss": 1.6594, "step": 152450 }, { "epoch": 0.62, "grad_norm": 3.929483652114868, "learning_rate": 0.0002, "loss": 1.603, "step": 152460 }, { "epoch": 0.62, "grad_norm": 2.706346273422241, "learning_rate": 0.0002, "loss": 1.4185, "step": 152470 }, { "epoch": 0.62, "grad_norm": 3.8132822513580322, "learning_rate": 0.0002, "loss": 1.7579, "step": 152480 }, { "epoch": 0.62, "grad_norm": 7.431941032409668, "learning_rate": 0.0002, "loss": 1.5141, "step": 152490 }, { "epoch": 0.62, "grad_norm": 2.7841005325317383, "learning_rate": 0.0002, "loss": 1.663, "step": 152500 }, { "epoch": 0.62, "grad_norm": 3.5575711727142334, "learning_rate": 0.0002, "loss": 1.6963, "step": 152510 }, { "epoch": 0.62, "grad_norm": 2.1663548946380615, "learning_rate": 0.0002, "loss": 1.6997, "step": 152520 }, { "epoch": 0.62, "grad_norm": 2.71342396736145, "learning_rate": 0.0002, "loss": 1.6125, "step": 152530 }, { "epoch": 0.62, "grad_norm": 2.2560079097747803, "learning_rate": 0.0002, "loss": 1.6952, "step": 152540 }, { "epoch": 0.62, "grad_norm": 2.578667402267456, "learning_rate": 0.0002, "loss": 1.7354, "step": 152550 }, { "epoch": 0.62, "grad_norm": 3.452939748764038, "learning_rate": 0.0002, "loss": 1.5324, "step": 152560 }, { "epoch": 0.62, "grad_norm": 2.48458194732666, "learning_rate": 0.0002, "loss": 1.7741, "step": 152570 }, { "epoch": 0.62, "grad_norm": 1.8856934309005737, "learning_rate": 0.0002, "loss": 1.5326, "step": 152580 }, { "epoch": 0.62, "grad_norm": 3.05633807182312, "learning_rate": 0.0002, "loss": 1.5758, "step": 152590 }, { "epoch": 0.62, "grad_norm": 4.08749532699585, "learning_rate": 0.0002, "loss": 1.3475, "step": 152600 }, { "epoch": 0.62, "grad_norm": 2.8559041023254395, "learning_rate": 0.0002, "loss": 1.697, "step": 152610 }, { "epoch": 0.62, "grad_norm": 2.512575387954712, "learning_rate": 0.0002, "loss": 1.7314, "step": 152620 }, { "epoch": 0.62, "grad_norm": 3.805168867111206, "learning_rate": 0.0002, "loss": 1.7381, "step": 152630 }, { "epoch": 0.62, "grad_norm": 2.68558406829834, "learning_rate": 0.0002, "loss": 1.8014, "step": 152640 }, { "epoch": 0.62, "grad_norm": 3.1819539070129395, "learning_rate": 0.0002, "loss": 1.6654, "step": 152650 }, { "epoch": 0.62, "grad_norm": 4.801983833312988, "learning_rate": 0.0002, "loss": 1.3804, "step": 152660 }, { "epoch": 0.62, "grad_norm": 2.81337833404541, "learning_rate": 0.0002, "loss": 1.5407, "step": 152670 }, { "epoch": 0.62, "grad_norm": 1.5718275308609009, "learning_rate": 0.0002, "loss": 1.5192, "step": 152680 }, { "epoch": 0.62, "grad_norm": 3.474133253097534, "learning_rate": 0.0002, "loss": 1.7939, "step": 152690 }, { "epoch": 0.62, "grad_norm": 4.319483757019043, "learning_rate": 0.0002, "loss": 1.6378, "step": 152700 }, { "epoch": 0.62, "grad_norm": 2.3533458709716797, "learning_rate": 0.0002, "loss": 1.5188, "step": 152710 }, { "epoch": 0.62, "grad_norm": 4.034660816192627, "learning_rate": 0.0002, "loss": 1.7925, "step": 152720 }, { "epoch": 0.62, "grad_norm": 1.7585208415985107, "learning_rate": 0.0002, "loss": 1.4129, "step": 152730 }, { "epoch": 0.62, "grad_norm": 3.2727274894714355, "learning_rate": 0.0002, "loss": 1.3548, "step": 152740 }, { "epoch": 0.62, "grad_norm": 3.5840046405792236, "learning_rate": 0.0002, "loss": 1.729, "step": 152750 }, { "epoch": 0.62, "grad_norm": 2.978203773498535, "learning_rate": 0.0002, "loss": 1.5548, "step": 152760 }, { "epoch": 0.62, "grad_norm": 3.2785089015960693, "learning_rate": 0.0002, "loss": 1.8372, "step": 152770 }, { "epoch": 0.62, "grad_norm": 2.7241106033325195, "learning_rate": 0.0002, "loss": 1.6639, "step": 152780 }, { "epoch": 0.62, "grad_norm": 4.813477993011475, "learning_rate": 0.0002, "loss": 1.5875, "step": 152790 }, { "epoch": 0.62, "grad_norm": 2.120144844055176, "learning_rate": 0.0002, "loss": 1.6794, "step": 152800 }, { "epoch": 0.62, "grad_norm": 2.458286762237549, "learning_rate": 0.0002, "loss": 1.6887, "step": 152810 }, { "epoch": 0.62, "grad_norm": 2.1424477100372314, "learning_rate": 0.0002, "loss": 1.3808, "step": 152820 }, { "epoch": 0.62, "grad_norm": 2.553676128387451, "learning_rate": 0.0002, "loss": 1.4984, "step": 152830 }, { "epoch": 0.62, "grad_norm": 3.5468428134918213, "learning_rate": 0.0002, "loss": 1.6563, "step": 152840 }, { "epoch": 0.62, "grad_norm": 2.0722036361694336, "learning_rate": 0.0002, "loss": 1.5651, "step": 152850 }, { "epoch": 0.62, "grad_norm": 2.6853151321411133, "learning_rate": 0.0002, "loss": 1.4865, "step": 152860 }, { "epoch": 0.62, "grad_norm": 2.4830639362335205, "learning_rate": 0.0002, "loss": 1.4854, "step": 152870 }, { "epoch": 0.62, "grad_norm": 2.4437308311462402, "learning_rate": 0.0002, "loss": 1.3882, "step": 152880 }, { "epoch": 0.62, "grad_norm": 1.5625439882278442, "learning_rate": 0.0002, "loss": 1.4747, "step": 152890 }, { "epoch": 0.62, "grad_norm": 2.3407649993896484, "learning_rate": 0.0002, "loss": 1.6403, "step": 152900 }, { "epoch": 0.62, "grad_norm": 2.732253313064575, "learning_rate": 0.0002, "loss": 1.6452, "step": 152910 }, { "epoch": 0.62, "grad_norm": 2.754319429397583, "learning_rate": 0.0002, "loss": 1.4178, "step": 152920 }, { "epoch": 0.62, "grad_norm": 2.8119218349456787, "learning_rate": 0.0002, "loss": 1.6455, "step": 152930 }, { "epoch": 0.62, "grad_norm": 3.7981748580932617, "learning_rate": 0.0002, "loss": 1.7422, "step": 152940 }, { "epoch": 0.62, "grad_norm": 3.4114134311676025, "learning_rate": 0.0002, "loss": 1.6235, "step": 152950 }, { "epoch": 0.62, "grad_norm": 2.41719651222229, "learning_rate": 0.0002, "loss": 1.4908, "step": 152960 }, { "epoch": 0.62, "grad_norm": 2.7783727645874023, "learning_rate": 0.0002, "loss": 1.8252, "step": 152970 }, { "epoch": 0.62, "grad_norm": 3.2723963260650635, "learning_rate": 0.0002, "loss": 1.8451, "step": 152980 }, { "epoch": 0.62, "grad_norm": 2.9276275634765625, "learning_rate": 0.0002, "loss": 1.5212, "step": 152990 }, { "epoch": 0.62, "grad_norm": 4.132941722869873, "learning_rate": 0.0002, "loss": 1.7257, "step": 153000 }, { "epoch": 0.62, "grad_norm": 2.4539012908935547, "learning_rate": 0.0002, "loss": 1.5345, "step": 153010 }, { "epoch": 0.62, "grad_norm": 2.7759382724761963, "learning_rate": 0.0002, "loss": 1.7085, "step": 153020 }, { "epoch": 0.62, "grad_norm": 4.057501316070557, "learning_rate": 0.0002, "loss": 1.68, "step": 153030 }, { "epoch": 0.62, "grad_norm": 3.222024440765381, "learning_rate": 0.0002, "loss": 1.6583, "step": 153040 }, { "epoch": 0.62, "grad_norm": 1.5485868453979492, "learning_rate": 0.0002, "loss": 1.3899, "step": 153050 }, { "epoch": 0.62, "grad_norm": 1.8999429941177368, "learning_rate": 0.0002, "loss": 1.6894, "step": 153060 }, { "epoch": 0.62, "grad_norm": 2.5427558422088623, "learning_rate": 0.0002, "loss": 1.6546, "step": 153070 }, { "epoch": 0.62, "grad_norm": 2.7442140579223633, "learning_rate": 0.0002, "loss": 1.771, "step": 153080 }, { "epoch": 0.62, "grad_norm": 2.745238780975342, "learning_rate": 0.0002, "loss": 1.8404, "step": 153090 }, { "epoch": 0.62, "grad_norm": 2.3400774002075195, "learning_rate": 0.0002, "loss": 1.5677, "step": 153100 }, { "epoch": 0.62, "grad_norm": 2.554227828979492, "learning_rate": 0.0002, "loss": 1.6431, "step": 153110 }, { "epoch": 0.62, "grad_norm": 2.720147132873535, "learning_rate": 0.0002, "loss": 1.6541, "step": 153120 }, { "epoch": 0.62, "grad_norm": 2.774451971054077, "learning_rate": 0.0002, "loss": 1.5944, "step": 153130 }, { "epoch": 0.62, "grad_norm": 1.8124374151229858, "learning_rate": 0.0002, "loss": 1.5288, "step": 153140 }, { "epoch": 0.62, "grad_norm": 3.6599419116973877, "learning_rate": 0.0002, "loss": 1.6546, "step": 153150 }, { "epoch": 0.62, "grad_norm": 4.16335391998291, "learning_rate": 0.0002, "loss": 1.7617, "step": 153160 }, { "epoch": 0.62, "grad_norm": 2.29343581199646, "learning_rate": 0.0002, "loss": 1.5472, "step": 153170 }, { "epoch": 0.62, "grad_norm": 3.678467273712158, "learning_rate": 0.0002, "loss": 1.4516, "step": 153180 }, { "epoch": 0.62, "grad_norm": 4.148049354553223, "learning_rate": 0.0002, "loss": 1.7394, "step": 153190 }, { "epoch": 0.62, "grad_norm": 2.4845962524414062, "learning_rate": 0.0002, "loss": 1.5382, "step": 153200 }, { "epoch": 0.62, "grad_norm": 3.140512466430664, "learning_rate": 0.0002, "loss": 1.7338, "step": 153210 }, { "epoch": 0.62, "grad_norm": 3.1204514503479004, "learning_rate": 0.0002, "loss": 1.6578, "step": 153220 }, { "epoch": 0.62, "grad_norm": 4.64969539642334, "learning_rate": 0.0002, "loss": 1.7308, "step": 153230 }, { "epoch": 0.62, "grad_norm": 3.9503166675567627, "learning_rate": 0.0002, "loss": 1.4741, "step": 153240 }, { "epoch": 0.62, "grad_norm": 3.061447858810425, "learning_rate": 0.0002, "loss": 1.658, "step": 153250 }, { "epoch": 0.62, "grad_norm": 4.895815849304199, "learning_rate": 0.0002, "loss": 1.9198, "step": 153260 }, { "epoch": 0.62, "grad_norm": 2.987799644470215, "learning_rate": 0.0002, "loss": 1.6122, "step": 153270 }, { "epoch": 0.62, "grad_norm": 5.580735206604004, "learning_rate": 0.0002, "loss": 1.534, "step": 153280 }, { "epoch": 0.62, "grad_norm": 2.8076696395874023, "learning_rate": 0.0002, "loss": 1.625, "step": 153290 }, { "epoch": 0.62, "grad_norm": 2.1889235973358154, "learning_rate": 0.0002, "loss": 1.473, "step": 153300 }, { "epoch": 0.62, "grad_norm": 2.9411115646362305, "learning_rate": 0.0002, "loss": 1.5175, "step": 153310 }, { "epoch": 0.62, "grad_norm": 3.214531898498535, "learning_rate": 0.0002, "loss": 1.5252, "step": 153320 }, { "epoch": 0.62, "grad_norm": 3.7730658054351807, "learning_rate": 0.0002, "loss": 1.7181, "step": 153330 }, { "epoch": 0.62, "grad_norm": 3.64884614944458, "learning_rate": 0.0002, "loss": 1.5893, "step": 153340 }, { "epoch": 0.62, "grad_norm": 2.5818777084350586, "learning_rate": 0.0002, "loss": 1.4235, "step": 153350 }, { "epoch": 0.62, "grad_norm": 2.3201615810394287, "learning_rate": 0.0002, "loss": 1.6644, "step": 153360 }, { "epoch": 0.62, "grad_norm": 2.195561408996582, "learning_rate": 0.0002, "loss": 1.3912, "step": 153370 }, { "epoch": 0.62, "grad_norm": 3.332770347595215, "learning_rate": 0.0002, "loss": 1.3633, "step": 153380 }, { "epoch": 0.62, "grad_norm": 3.481747627258301, "learning_rate": 0.0002, "loss": 1.5774, "step": 153390 }, { "epoch": 0.62, "grad_norm": 3.699777841567993, "learning_rate": 0.0002, "loss": 1.4335, "step": 153400 }, { "epoch": 0.62, "grad_norm": 2.525566577911377, "learning_rate": 0.0002, "loss": 1.3503, "step": 153410 }, { "epoch": 0.62, "grad_norm": 2.953886032104492, "learning_rate": 0.0002, "loss": 1.419, "step": 153420 }, { "epoch": 0.62, "grad_norm": 3.43278169631958, "learning_rate": 0.0002, "loss": 1.4476, "step": 153430 }, { "epoch": 0.62, "grad_norm": 2.756106376647949, "learning_rate": 0.0002, "loss": 1.4982, "step": 153440 }, { "epoch": 0.62, "grad_norm": 2.863962411880493, "learning_rate": 0.0002, "loss": 1.8723, "step": 153450 }, { "epoch": 0.62, "grad_norm": 2.334721088409424, "learning_rate": 0.0002, "loss": 1.247, "step": 153460 }, { "epoch": 0.62, "grad_norm": 3.4985086917877197, "learning_rate": 0.0002, "loss": 1.4283, "step": 153470 }, { "epoch": 0.62, "grad_norm": 3.1221377849578857, "learning_rate": 0.0002, "loss": 1.6947, "step": 153480 }, { "epoch": 0.62, "grad_norm": 4.030104160308838, "learning_rate": 0.0002, "loss": 1.7637, "step": 153490 }, { "epoch": 0.62, "grad_norm": 4.088573455810547, "learning_rate": 0.0002, "loss": 1.6207, "step": 153500 }, { "epoch": 0.62, "grad_norm": 2.8474788665771484, "learning_rate": 0.0002, "loss": 1.6657, "step": 153510 }, { "epoch": 0.62, "grad_norm": 2.685830593109131, "learning_rate": 0.0002, "loss": 1.9184, "step": 153520 }, { "epoch": 0.63, "grad_norm": 4.087762355804443, "learning_rate": 0.0002, "loss": 1.6703, "step": 153530 }, { "epoch": 0.63, "grad_norm": 2.968482494354248, "learning_rate": 0.0002, "loss": 1.5733, "step": 153540 }, { "epoch": 0.63, "grad_norm": 3.348748207092285, "learning_rate": 0.0002, "loss": 1.5602, "step": 153550 }, { "epoch": 0.63, "grad_norm": 2.438493490219116, "learning_rate": 0.0002, "loss": 1.5596, "step": 153560 }, { "epoch": 0.63, "grad_norm": 3.792649984359741, "learning_rate": 0.0002, "loss": 1.7701, "step": 153570 }, { "epoch": 0.63, "grad_norm": 2.469846725463867, "learning_rate": 0.0002, "loss": 1.6901, "step": 153580 }, { "epoch": 0.63, "grad_norm": 2.629234552383423, "learning_rate": 0.0002, "loss": 1.4921, "step": 153590 }, { "epoch": 0.63, "grad_norm": 2.9356396198272705, "learning_rate": 0.0002, "loss": 1.8174, "step": 153600 }, { "epoch": 0.63, "grad_norm": 2.381187677383423, "learning_rate": 0.0002, "loss": 1.5271, "step": 153610 }, { "epoch": 0.63, "grad_norm": 2.3264362812042236, "learning_rate": 0.0002, "loss": 1.4427, "step": 153620 }, { "epoch": 0.63, "grad_norm": 3.583508014678955, "learning_rate": 0.0002, "loss": 1.5776, "step": 153630 }, { "epoch": 0.63, "grad_norm": 4.476792335510254, "learning_rate": 0.0002, "loss": 1.7712, "step": 153640 }, { "epoch": 0.63, "grad_norm": 3.4607181549072266, "learning_rate": 0.0002, "loss": 1.5618, "step": 153650 }, { "epoch": 0.63, "grad_norm": 2.8838868141174316, "learning_rate": 0.0002, "loss": 1.6686, "step": 153660 }, { "epoch": 0.63, "grad_norm": 1.8704971075057983, "learning_rate": 0.0002, "loss": 1.4984, "step": 153670 }, { "epoch": 0.63, "grad_norm": 2.0422146320343018, "learning_rate": 0.0002, "loss": 1.7605, "step": 153680 }, { "epoch": 0.63, "grad_norm": 3.392045497894287, "learning_rate": 0.0002, "loss": 1.6532, "step": 153690 }, { "epoch": 0.63, "grad_norm": 2.743309736251831, "learning_rate": 0.0002, "loss": 1.5548, "step": 153700 }, { "epoch": 0.63, "grad_norm": 4.619032859802246, "learning_rate": 0.0002, "loss": 1.7504, "step": 153710 }, { "epoch": 0.63, "grad_norm": 2.0681490898132324, "learning_rate": 0.0002, "loss": 1.6886, "step": 153720 }, { "epoch": 0.63, "grad_norm": 2.5051872730255127, "learning_rate": 0.0002, "loss": 1.422, "step": 153730 }, { "epoch": 0.63, "grad_norm": 5.785161972045898, "learning_rate": 0.0002, "loss": 1.6216, "step": 153740 }, { "epoch": 0.63, "grad_norm": 6.43907356262207, "learning_rate": 0.0002, "loss": 1.5683, "step": 153750 }, { "epoch": 0.63, "grad_norm": 2.1020894050598145, "learning_rate": 0.0002, "loss": 1.6572, "step": 153760 }, { "epoch": 0.63, "grad_norm": 4.431777000427246, "learning_rate": 0.0002, "loss": 1.6757, "step": 153770 }, { "epoch": 0.63, "grad_norm": 3.3627195358276367, "learning_rate": 0.0002, "loss": 1.455, "step": 153780 }, { "epoch": 0.63, "grad_norm": 2.755366086959839, "learning_rate": 0.0002, "loss": 1.5181, "step": 153790 }, { "epoch": 0.63, "grad_norm": 3.3211143016815186, "learning_rate": 0.0002, "loss": 1.4643, "step": 153800 }, { "epoch": 0.63, "grad_norm": 2.7048208713531494, "learning_rate": 0.0002, "loss": 1.3374, "step": 153810 }, { "epoch": 0.63, "grad_norm": 2.0277273654937744, "learning_rate": 0.0002, "loss": 1.7972, "step": 153820 }, { "epoch": 0.63, "grad_norm": 3.456397533416748, "learning_rate": 0.0002, "loss": 1.5719, "step": 153830 }, { "epoch": 0.63, "grad_norm": 2.646512269973755, "learning_rate": 0.0002, "loss": 1.6092, "step": 153840 }, { "epoch": 0.63, "grad_norm": 2.665782928466797, "learning_rate": 0.0002, "loss": 1.5118, "step": 153850 }, { "epoch": 0.63, "grad_norm": 1.7648948431015015, "learning_rate": 0.0002, "loss": 1.6765, "step": 153860 }, { "epoch": 0.63, "grad_norm": 3.0247135162353516, "learning_rate": 0.0002, "loss": 1.5464, "step": 153870 }, { "epoch": 0.63, "grad_norm": 2.8667354583740234, "learning_rate": 0.0002, "loss": 1.3545, "step": 153880 }, { "epoch": 0.63, "grad_norm": 2.6113131046295166, "learning_rate": 0.0002, "loss": 1.711, "step": 153890 }, { "epoch": 0.63, "grad_norm": 2.5426995754241943, "learning_rate": 0.0002, "loss": 1.4618, "step": 153900 }, { "epoch": 0.63, "grad_norm": 6.169172763824463, "learning_rate": 0.0002, "loss": 1.3822, "step": 153910 }, { "epoch": 0.63, "grad_norm": 3.907028913497925, "learning_rate": 0.0002, "loss": 1.3712, "step": 153920 }, { "epoch": 0.63, "grad_norm": 3.3397157192230225, "learning_rate": 0.0002, "loss": 1.3939, "step": 153930 }, { "epoch": 0.63, "grad_norm": 3.0515329837799072, "learning_rate": 0.0002, "loss": 1.7043, "step": 153940 }, { "epoch": 0.63, "grad_norm": 3.8192551136016846, "learning_rate": 0.0002, "loss": 1.5121, "step": 153950 }, { "epoch": 0.63, "grad_norm": 2.7403488159179688, "learning_rate": 0.0002, "loss": 1.5865, "step": 153960 }, { "epoch": 0.63, "grad_norm": 2.6616363525390625, "learning_rate": 0.0002, "loss": 1.5949, "step": 153970 }, { "epoch": 0.63, "grad_norm": 2.4829559326171875, "learning_rate": 0.0002, "loss": 1.5803, "step": 153980 }, { "epoch": 0.63, "grad_norm": 2.9780900478363037, "learning_rate": 0.0002, "loss": 1.4903, "step": 153990 }, { "epoch": 0.63, "grad_norm": 1.8754206895828247, "learning_rate": 0.0002, "loss": 1.7131, "step": 154000 }, { "epoch": 0.63, "grad_norm": 3.885552406311035, "learning_rate": 0.0002, "loss": 1.5992, "step": 154010 }, { "epoch": 0.63, "grad_norm": 3.9904417991638184, "learning_rate": 0.0002, "loss": 1.5835, "step": 154020 }, { "epoch": 0.63, "grad_norm": 3.9688923358917236, "learning_rate": 0.0002, "loss": 1.5564, "step": 154030 }, { "epoch": 0.63, "grad_norm": 2.640726089477539, "learning_rate": 0.0002, "loss": 1.8543, "step": 154040 }, { "epoch": 0.63, "grad_norm": 4.154054641723633, "learning_rate": 0.0002, "loss": 1.8506, "step": 154050 }, { "epoch": 0.63, "grad_norm": 3.807387590408325, "learning_rate": 0.0002, "loss": 1.4508, "step": 154060 }, { "epoch": 0.63, "grad_norm": 3.9934561252593994, "learning_rate": 0.0002, "loss": 1.5052, "step": 154070 }, { "epoch": 0.63, "grad_norm": 2.3627371788024902, "learning_rate": 0.0002, "loss": 1.6646, "step": 154080 }, { "epoch": 0.63, "grad_norm": 4.364947319030762, "learning_rate": 0.0002, "loss": 1.6823, "step": 154090 }, { "epoch": 0.63, "grad_norm": 3.442140579223633, "learning_rate": 0.0002, "loss": 1.3749, "step": 154100 }, { "epoch": 0.63, "grad_norm": 1.9417160749435425, "learning_rate": 0.0002, "loss": 1.7344, "step": 154110 }, { "epoch": 0.63, "grad_norm": 5.18112325668335, "learning_rate": 0.0002, "loss": 1.6279, "step": 154120 }, { "epoch": 0.63, "grad_norm": 3.1893224716186523, "learning_rate": 0.0002, "loss": 1.6988, "step": 154130 }, { "epoch": 0.63, "grad_norm": 3.5879387855529785, "learning_rate": 0.0002, "loss": 1.4472, "step": 154140 }, { "epoch": 0.63, "grad_norm": 2.128283977508545, "learning_rate": 0.0002, "loss": 1.72, "step": 154150 }, { "epoch": 0.63, "grad_norm": 5.1239237785339355, "learning_rate": 0.0002, "loss": 1.6066, "step": 154160 }, { "epoch": 0.63, "grad_norm": 4.262213230133057, "learning_rate": 0.0002, "loss": 1.422, "step": 154170 }, { "epoch": 0.63, "grad_norm": 2.986079692840576, "learning_rate": 0.0002, "loss": 1.7554, "step": 154180 }, { "epoch": 0.63, "grad_norm": 2.8565165996551514, "learning_rate": 0.0002, "loss": 1.5744, "step": 154190 }, { "epoch": 0.63, "grad_norm": 2.6973230838775635, "learning_rate": 0.0002, "loss": 1.4574, "step": 154200 }, { "epoch": 0.63, "grad_norm": 1.8223189115524292, "learning_rate": 0.0002, "loss": 1.566, "step": 154210 }, { "epoch": 0.63, "grad_norm": 2.9421164989471436, "learning_rate": 0.0002, "loss": 1.4123, "step": 154220 }, { "epoch": 0.63, "grad_norm": 5.449639320373535, "learning_rate": 0.0002, "loss": 1.6059, "step": 154230 }, { "epoch": 0.63, "grad_norm": 3.191948652267456, "learning_rate": 0.0002, "loss": 1.6859, "step": 154240 }, { "epoch": 0.63, "grad_norm": 2.9690544605255127, "learning_rate": 0.0002, "loss": 1.706, "step": 154250 }, { "epoch": 0.63, "grad_norm": 2.8933610916137695, "learning_rate": 0.0002, "loss": 1.7394, "step": 154260 }, { "epoch": 0.63, "grad_norm": 2.749030113220215, "learning_rate": 0.0002, "loss": 1.6685, "step": 154270 }, { "epoch": 0.63, "grad_norm": 1.6564527750015259, "learning_rate": 0.0002, "loss": 1.4431, "step": 154280 }, { "epoch": 0.63, "grad_norm": 3.9780569076538086, "learning_rate": 0.0002, "loss": 1.5334, "step": 154290 }, { "epoch": 0.63, "grad_norm": 2.4720449447631836, "learning_rate": 0.0002, "loss": 1.5965, "step": 154300 }, { "epoch": 0.63, "grad_norm": 2.374720811843872, "learning_rate": 0.0002, "loss": 1.4532, "step": 154310 }, { "epoch": 0.63, "grad_norm": 5.576540946960449, "learning_rate": 0.0002, "loss": 1.4683, "step": 154320 }, { "epoch": 0.63, "grad_norm": 3.3979625701904297, "learning_rate": 0.0002, "loss": 1.5725, "step": 154330 }, { "epoch": 0.63, "grad_norm": 2.6773881912231445, "learning_rate": 0.0002, "loss": 1.6008, "step": 154340 }, { "epoch": 0.63, "grad_norm": 4.917464733123779, "learning_rate": 0.0002, "loss": 1.6354, "step": 154350 }, { "epoch": 0.63, "grad_norm": 4.425872325897217, "learning_rate": 0.0002, "loss": 1.4701, "step": 154360 }, { "epoch": 0.63, "grad_norm": 4.403124809265137, "learning_rate": 0.0002, "loss": 1.663, "step": 154370 }, { "epoch": 0.63, "grad_norm": 3.38728404045105, "learning_rate": 0.0002, "loss": 1.6015, "step": 154380 }, { "epoch": 0.63, "grad_norm": 2.7412922382354736, "learning_rate": 0.0002, "loss": 1.6545, "step": 154390 }, { "epoch": 0.63, "grad_norm": 2.3453991413116455, "learning_rate": 0.0002, "loss": 1.5187, "step": 154400 }, { "epoch": 0.63, "grad_norm": 2.764592409133911, "learning_rate": 0.0002, "loss": 1.4945, "step": 154410 }, { "epoch": 0.63, "grad_norm": 3.289961576461792, "learning_rate": 0.0002, "loss": 1.5873, "step": 154420 }, { "epoch": 0.63, "grad_norm": 3.2342875003814697, "learning_rate": 0.0002, "loss": 1.5979, "step": 154430 }, { "epoch": 0.63, "grad_norm": 3.278644561767578, "learning_rate": 0.0002, "loss": 1.4093, "step": 154440 }, { "epoch": 0.63, "grad_norm": 3.208996534347534, "learning_rate": 0.0002, "loss": 1.861, "step": 154450 }, { "epoch": 0.63, "grad_norm": 2.981734037399292, "learning_rate": 0.0002, "loss": 1.6734, "step": 154460 }, { "epoch": 0.63, "grad_norm": 4.9429168701171875, "learning_rate": 0.0002, "loss": 1.7418, "step": 154470 }, { "epoch": 0.63, "grad_norm": 3.0715525150299072, "learning_rate": 0.0002, "loss": 1.5999, "step": 154480 }, { "epoch": 0.63, "grad_norm": 6.912446975708008, "learning_rate": 0.0002, "loss": 1.5855, "step": 154490 }, { "epoch": 0.63, "grad_norm": 3.875518798828125, "learning_rate": 0.0002, "loss": 1.7613, "step": 154500 }, { "epoch": 0.63, "grad_norm": 2.10840106010437, "learning_rate": 0.0002, "loss": 1.8292, "step": 154510 }, { "epoch": 0.63, "grad_norm": 2.290471076965332, "learning_rate": 0.0002, "loss": 1.3977, "step": 154520 }, { "epoch": 0.63, "grad_norm": 2.241252899169922, "learning_rate": 0.0002, "loss": 1.7583, "step": 154530 }, { "epoch": 0.63, "grad_norm": 6.034826278686523, "learning_rate": 0.0002, "loss": 1.668, "step": 154540 }, { "epoch": 0.63, "grad_norm": 2.4884815216064453, "learning_rate": 0.0002, "loss": 1.7434, "step": 154550 }, { "epoch": 0.63, "grad_norm": 10.150384902954102, "learning_rate": 0.0002, "loss": 1.6223, "step": 154560 }, { "epoch": 0.63, "grad_norm": 3.13134765625, "learning_rate": 0.0002, "loss": 1.5693, "step": 154570 }, { "epoch": 0.63, "grad_norm": 2.310029983520508, "learning_rate": 0.0002, "loss": 1.4356, "step": 154580 }, { "epoch": 0.63, "grad_norm": 3.055889844894409, "learning_rate": 0.0002, "loss": 1.516, "step": 154590 }, { "epoch": 0.63, "grad_norm": 3.2186734676361084, "learning_rate": 0.0002, "loss": 1.4651, "step": 154600 }, { "epoch": 0.63, "grad_norm": 8.9699068069458, "learning_rate": 0.0002, "loss": 1.6002, "step": 154610 }, { "epoch": 0.63, "grad_norm": 3.0710155963897705, "learning_rate": 0.0002, "loss": 1.4868, "step": 154620 }, { "epoch": 0.63, "grad_norm": 3.2883851528167725, "learning_rate": 0.0002, "loss": 1.8315, "step": 154630 }, { "epoch": 0.63, "grad_norm": 2.0245985984802246, "learning_rate": 0.0002, "loss": 1.6466, "step": 154640 }, { "epoch": 0.63, "grad_norm": 2.832796573638916, "learning_rate": 0.0002, "loss": 1.6299, "step": 154650 }, { "epoch": 0.63, "grad_norm": 9.686689376831055, "learning_rate": 0.0002, "loss": 1.5993, "step": 154660 }, { "epoch": 0.63, "grad_norm": 3.7554192543029785, "learning_rate": 0.0002, "loss": 1.87, "step": 154670 }, { "epoch": 0.63, "grad_norm": 3.8481876850128174, "learning_rate": 0.0002, "loss": 1.5448, "step": 154680 }, { "epoch": 0.63, "grad_norm": 5.657331466674805, "learning_rate": 0.0002, "loss": 2.0604, "step": 154690 }, { "epoch": 0.63, "grad_norm": 3.8424291610717773, "learning_rate": 0.0002, "loss": 1.659, "step": 154700 }, { "epoch": 0.63, "grad_norm": 2.6474740505218506, "learning_rate": 0.0002, "loss": 1.7376, "step": 154710 }, { "epoch": 0.63, "grad_norm": 2.9556095600128174, "learning_rate": 0.0002, "loss": 1.6892, "step": 154720 }, { "epoch": 0.63, "grad_norm": 2.2114028930664062, "learning_rate": 0.0002, "loss": 1.5787, "step": 154730 }, { "epoch": 0.63, "grad_norm": 3.685225248336792, "learning_rate": 0.0002, "loss": 1.7005, "step": 154740 }, { "epoch": 0.63, "grad_norm": 3.4020912647247314, "learning_rate": 0.0002, "loss": 1.5371, "step": 154750 }, { "epoch": 0.63, "grad_norm": 3.348710536956787, "learning_rate": 0.0002, "loss": 1.5472, "step": 154760 }, { "epoch": 0.63, "grad_norm": 2.163834571838379, "learning_rate": 0.0002, "loss": 1.4524, "step": 154770 }, { "epoch": 0.63, "grad_norm": 1.7155072689056396, "learning_rate": 0.0002, "loss": 1.462, "step": 154780 }, { "epoch": 0.63, "grad_norm": 2.1894984245300293, "learning_rate": 0.0002, "loss": 1.61, "step": 154790 }, { "epoch": 0.63, "grad_norm": 1.765462875366211, "learning_rate": 0.0002, "loss": 1.7403, "step": 154800 }, { "epoch": 0.63, "grad_norm": 1.5863603353500366, "learning_rate": 0.0002, "loss": 1.6618, "step": 154810 }, { "epoch": 0.63, "grad_norm": 2.660144329071045, "learning_rate": 0.0002, "loss": 1.449, "step": 154820 }, { "epoch": 0.63, "grad_norm": 1.837830662727356, "learning_rate": 0.0002, "loss": 1.6304, "step": 154830 }, { "epoch": 0.63, "grad_norm": 4.079137802124023, "learning_rate": 0.0002, "loss": 1.8196, "step": 154840 }, { "epoch": 0.63, "grad_norm": 1.3080178499221802, "learning_rate": 0.0002, "loss": 1.6843, "step": 154850 }, { "epoch": 0.63, "grad_norm": 3.3657243251800537, "learning_rate": 0.0002, "loss": 1.6655, "step": 154860 }, { "epoch": 0.63, "grad_norm": 2.4506239891052246, "learning_rate": 0.0002, "loss": 1.4071, "step": 154870 }, { "epoch": 0.63, "grad_norm": 3.353120803833008, "learning_rate": 0.0002, "loss": 1.6593, "step": 154880 }, { "epoch": 0.63, "grad_norm": 2.91253924369812, "learning_rate": 0.0002, "loss": 1.4852, "step": 154890 }, { "epoch": 0.63, "grad_norm": 2.9498798847198486, "learning_rate": 0.0002, "loss": 1.5131, "step": 154900 }, { "epoch": 0.63, "grad_norm": 2.8181161880493164, "learning_rate": 0.0002, "loss": 1.5113, "step": 154910 }, { "epoch": 0.63, "grad_norm": 1.9906741380691528, "learning_rate": 0.0002, "loss": 1.7502, "step": 154920 }, { "epoch": 0.63, "grad_norm": 3.7077853679656982, "learning_rate": 0.0002, "loss": 1.6639, "step": 154930 }, { "epoch": 0.63, "grad_norm": 1.392402172088623, "learning_rate": 0.0002, "loss": 1.4979, "step": 154940 }, { "epoch": 0.63, "grad_norm": 2.6343936920166016, "learning_rate": 0.0002, "loss": 1.6877, "step": 154950 }, { "epoch": 0.63, "grad_norm": 3.049607276916504, "learning_rate": 0.0002, "loss": 1.5231, "step": 154960 }, { "epoch": 0.63, "grad_norm": 3.67531418800354, "learning_rate": 0.0002, "loss": 1.2251, "step": 154970 }, { "epoch": 0.63, "grad_norm": 1.9765610694885254, "learning_rate": 0.0002, "loss": 1.4034, "step": 154980 }, { "epoch": 0.63, "grad_norm": 3.852198600769043, "learning_rate": 0.0002, "loss": 1.6948, "step": 154990 }, { "epoch": 0.63, "grad_norm": 2.017271041870117, "learning_rate": 0.0002, "loss": 1.7239, "step": 155000 }, { "epoch": 0.63, "grad_norm": 3.293076753616333, "learning_rate": 0.0002, "loss": 1.4856, "step": 155010 }, { "epoch": 0.63, "grad_norm": 3.4008851051330566, "learning_rate": 0.0002, "loss": 1.6468, "step": 155020 }, { "epoch": 0.63, "grad_norm": 1.3717855215072632, "learning_rate": 0.0002, "loss": 1.6489, "step": 155030 }, { "epoch": 0.63, "grad_norm": 4.1904520988464355, "learning_rate": 0.0002, "loss": 1.8045, "step": 155040 }, { "epoch": 0.63, "grad_norm": 2.2213690280914307, "learning_rate": 0.0002, "loss": 1.5672, "step": 155050 }, { "epoch": 0.63, "grad_norm": 2.3228988647460938, "learning_rate": 0.0002, "loss": 1.6061, "step": 155060 }, { "epoch": 0.63, "grad_norm": 2.090040922164917, "learning_rate": 0.0002, "loss": 1.4489, "step": 155070 }, { "epoch": 0.63, "grad_norm": 2.682194232940674, "learning_rate": 0.0002, "loss": 1.6458, "step": 155080 }, { "epoch": 0.63, "grad_norm": 2.526167869567871, "learning_rate": 0.0002, "loss": 1.5669, "step": 155090 }, { "epoch": 0.63, "grad_norm": 2.755002498626709, "learning_rate": 0.0002, "loss": 1.7496, "step": 155100 }, { "epoch": 0.63, "grad_norm": 1.6452058553695679, "learning_rate": 0.0002, "loss": 1.4061, "step": 155110 }, { "epoch": 0.63, "grad_norm": 1.5461806058883667, "learning_rate": 0.0002, "loss": 1.5335, "step": 155120 }, { "epoch": 0.63, "grad_norm": 4.263041973114014, "learning_rate": 0.0002, "loss": 1.5828, "step": 155130 }, { "epoch": 0.63, "grad_norm": 3.008366823196411, "learning_rate": 0.0002, "loss": 1.5457, "step": 155140 }, { "epoch": 0.63, "grad_norm": 3.4436256885528564, "learning_rate": 0.0002, "loss": 1.5306, "step": 155150 }, { "epoch": 0.63, "grad_norm": 2.1439695358276367, "learning_rate": 0.0002, "loss": 1.4954, "step": 155160 }, { "epoch": 0.63, "grad_norm": 2.0115294456481934, "learning_rate": 0.0002, "loss": 1.585, "step": 155170 }, { "epoch": 0.63, "grad_norm": 6.219908714294434, "learning_rate": 0.0002, "loss": 1.1675, "step": 155180 }, { "epoch": 0.63, "grad_norm": 3.2038722038269043, "learning_rate": 0.0002, "loss": 1.6083, "step": 155190 }, { "epoch": 0.63, "grad_norm": 3.4564568996429443, "learning_rate": 0.0002, "loss": 1.6179, "step": 155200 }, { "epoch": 0.63, "grad_norm": 2.305777072906494, "learning_rate": 0.0002, "loss": 1.6297, "step": 155210 }, { "epoch": 0.63, "grad_norm": 2.2326109409332275, "learning_rate": 0.0002, "loss": 1.6963, "step": 155220 }, { "epoch": 0.63, "grad_norm": 2.165618419647217, "learning_rate": 0.0002, "loss": 1.5269, "step": 155230 }, { "epoch": 0.63, "grad_norm": 4.867171287536621, "learning_rate": 0.0002, "loss": 1.6001, "step": 155240 }, { "epoch": 0.63, "grad_norm": 2.8649232387542725, "learning_rate": 0.0002, "loss": 1.2373, "step": 155250 }, { "epoch": 0.63, "grad_norm": 2.537785291671753, "learning_rate": 0.0002, "loss": 1.5013, "step": 155260 }, { "epoch": 0.63, "grad_norm": 2.9683914184570312, "learning_rate": 0.0002, "loss": 1.8581, "step": 155270 }, { "epoch": 0.63, "grad_norm": 4.158030033111572, "learning_rate": 0.0002, "loss": 1.4524, "step": 155280 }, { "epoch": 0.63, "grad_norm": 2.7850847244262695, "learning_rate": 0.0002, "loss": 1.3792, "step": 155290 }, { "epoch": 0.63, "grad_norm": 2.9012858867645264, "learning_rate": 0.0002, "loss": 1.6443, "step": 155300 }, { "epoch": 0.63, "grad_norm": 4.737707614898682, "learning_rate": 0.0002, "loss": 1.235, "step": 155310 }, { "epoch": 0.63, "grad_norm": 1.976138949394226, "learning_rate": 0.0002, "loss": 1.5172, "step": 155320 }, { "epoch": 0.63, "grad_norm": 2.358412027359009, "learning_rate": 0.0002, "loss": 1.5806, "step": 155330 }, { "epoch": 0.63, "grad_norm": 2.9157018661499023, "learning_rate": 0.0002, "loss": 1.5816, "step": 155340 }, { "epoch": 0.63, "grad_norm": 2.9740633964538574, "learning_rate": 0.0002, "loss": 1.5992, "step": 155350 }, { "epoch": 0.63, "grad_norm": 3.524510622024536, "learning_rate": 0.0002, "loss": 1.4426, "step": 155360 }, { "epoch": 0.63, "grad_norm": 3.449174404144287, "learning_rate": 0.0002, "loss": 1.4545, "step": 155370 }, { "epoch": 0.63, "grad_norm": 2.006702423095703, "learning_rate": 0.0002, "loss": 1.3894, "step": 155380 }, { "epoch": 0.63, "grad_norm": 2.0104544162750244, "learning_rate": 0.0002, "loss": 1.6035, "step": 155390 }, { "epoch": 0.63, "grad_norm": 2.4637291431427, "learning_rate": 0.0002, "loss": 1.5777, "step": 155400 }, { "epoch": 0.63, "grad_norm": 2.9710693359375, "learning_rate": 0.0002, "loss": 1.5662, "step": 155410 }, { "epoch": 0.63, "grad_norm": 5.815410614013672, "learning_rate": 0.0002, "loss": 1.7339, "step": 155420 }, { "epoch": 0.63, "grad_norm": 4.111191272735596, "learning_rate": 0.0002, "loss": 1.6821, "step": 155430 }, { "epoch": 0.63, "grad_norm": 2.649852991104126, "learning_rate": 0.0002, "loss": 1.8673, "step": 155440 }, { "epoch": 0.63, "grad_norm": 2.5763585567474365, "learning_rate": 0.0002, "loss": 1.5745, "step": 155450 }, { "epoch": 0.63, "grad_norm": 3.4603941440582275, "learning_rate": 0.0002, "loss": 1.9137, "step": 155460 }, { "epoch": 0.63, "grad_norm": 1.1350719928741455, "learning_rate": 0.0002, "loss": 1.5164, "step": 155470 }, { "epoch": 0.63, "grad_norm": 3.6546239852905273, "learning_rate": 0.0002, "loss": 1.7851, "step": 155480 }, { "epoch": 0.63, "grad_norm": 1.7173335552215576, "learning_rate": 0.0002, "loss": 1.6929, "step": 155490 }, { "epoch": 0.63, "grad_norm": 4.936279773712158, "learning_rate": 0.0002, "loss": 1.886, "step": 155500 }, { "epoch": 0.63, "grad_norm": 1.5209527015686035, "learning_rate": 0.0002, "loss": 1.5848, "step": 155510 }, { "epoch": 0.63, "grad_norm": 2.637274742126465, "learning_rate": 0.0002, "loss": 1.5682, "step": 155520 }, { "epoch": 0.63, "grad_norm": 2.8114945888519287, "learning_rate": 0.0002, "loss": 1.4711, "step": 155530 }, { "epoch": 0.63, "grad_norm": 2.9545862674713135, "learning_rate": 0.0002, "loss": 1.6887, "step": 155540 }, { "epoch": 0.63, "grad_norm": 1.250443935394287, "learning_rate": 0.0002, "loss": 1.7554, "step": 155550 }, { "epoch": 0.63, "grad_norm": 1.9644880294799805, "learning_rate": 0.0002, "loss": 1.7798, "step": 155560 }, { "epoch": 0.63, "grad_norm": 3.4807183742523193, "learning_rate": 0.0002, "loss": 1.5524, "step": 155570 }, { "epoch": 0.63, "grad_norm": 2.062145471572876, "learning_rate": 0.0002, "loss": 1.3418, "step": 155580 }, { "epoch": 0.63, "grad_norm": 6.926460266113281, "learning_rate": 0.0002, "loss": 1.6912, "step": 155590 }, { "epoch": 0.63, "grad_norm": 2.7955195903778076, "learning_rate": 0.0002, "loss": 1.5488, "step": 155600 }, { "epoch": 0.63, "grad_norm": 3.4262535572052, "learning_rate": 0.0002, "loss": 1.7075, "step": 155610 }, { "epoch": 0.63, "grad_norm": 3.433317184448242, "learning_rate": 0.0002, "loss": 1.5653, "step": 155620 }, { "epoch": 0.63, "grad_norm": 3.0537376403808594, "learning_rate": 0.0002, "loss": 1.6387, "step": 155630 }, { "epoch": 0.63, "grad_norm": 3.014238119125366, "learning_rate": 0.0002, "loss": 1.5715, "step": 155640 }, { "epoch": 0.63, "grad_norm": 5.527846813201904, "learning_rate": 0.0002, "loss": 1.3834, "step": 155650 }, { "epoch": 0.63, "grad_norm": 4.131349086761475, "learning_rate": 0.0002, "loss": 1.6426, "step": 155660 }, { "epoch": 0.63, "grad_norm": 3.727421760559082, "learning_rate": 0.0002, "loss": 1.6517, "step": 155670 }, { "epoch": 0.63, "grad_norm": 3.2558822631835938, "learning_rate": 0.0002, "loss": 1.7994, "step": 155680 }, { "epoch": 0.63, "grad_norm": 4.1484222412109375, "learning_rate": 0.0002, "loss": 1.5573, "step": 155690 }, { "epoch": 0.63, "grad_norm": 2.822035074234009, "learning_rate": 0.0002, "loss": 1.5054, "step": 155700 }, { "epoch": 0.63, "grad_norm": 5.31950044631958, "learning_rate": 0.0002, "loss": 1.7295, "step": 155710 }, { "epoch": 0.63, "grad_norm": 3.228050708770752, "learning_rate": 0.0002, "loss": 1.5772, "step": 155720 }, { "epoch": 0.63, "grad_norm": 2.501227617263794, "learning_rate": 0.0002, "loss": 1.753, "step": 155730 }, { "epoch": 0.63, "grad_norm": 2.3880374431610107, "learning_rate": 0.0002, "loss": 1.6584, "step": 155740 }, { "epoch": 0.63, "grad_norm": 2.151507616043091, "learning_rate": 0.0002, "loss": 1.6328, "step": 155750 }, { "epoch": 0.63, "grad_norm": 1.6976367235183716, "learning_rate": 0.0002, "loss": 1.644, "step": 155760 }, { "epoch": 0.63, "grad_norm": 2.788862943649292, "learning_rate": 0.0002, "loss": 1.5091, "step": 155770 }, { "epoch": 0.63, "grad_norm": 3.9761412143707275, "learning_rate": 0.0002, "loss": 1.4924, "step": 155780 }, { "epoch": 0.63, "grad_norm": 4.030069828033447, "learning_rate": 0.0002, "loss": 1.8543, "step": 155790 }, { "epoch": 0.63, "grad_norm": 3.2415199279785156, "learning_rate": 0.0002, "loss": 1.5031, "step": 155800 }, { "epoch": 0.63, "grad_norm": 3.3116049766540527, "learning_rate": 0.0002, "loss": 1.7282, "step": 155810 }, { "epoch": 0.63, "grad_norm": 2.444819688796997, "learning_rate": 0.0002, "loss": 1.7541, "step": 155820 }, { "epoch": 0.63, "grad_norm": 3.4956836700439453, "learning_rate": 0.0002, "loss": 1.3904, "step": 155830 }, { "epoch": 0.63, "grad_norm": 2.2695772647857666, "learning_rate": 0.0002, "loss": 1.7534, "step": 155840 }, { "epoch": 0.63, "grad_norm": 4.6701226234436035, "learning_rate": 0.0002, "loss": 1.6414, "step": 155850 }, { "epoch": 0.63, "grad_norm": 2.6600403785705566, "learning_rate": 0.0002, "loss": 1.7601, "step": 155860 }, { "epoch": 0.63, "grad_norm": 3.7679364681243896, "learning_rate": 0.0002, "loss": 1.5815, "step": 155870 }, { "epoch": 0.63, "grad_norm": 2.4515652656555176, "learning_rate": 0.0002, "loss": 1.774, "step": 155880 }, { "epoch": 0.63, "grad_norm": 3.2977137565612793, "learning_rate": 0.0002, "loss": 1.5224, "step": 155890 }, { "epoch": 0.63, "grad_norm": 2.9706690311431885, "learning_rate": 0.0002, "loss": 1.6759, "step": 155900 }, { "epoch": 0.63, "grad_norm": 3.7340235710144043, "learning_rate": 0.0002, "loss": 1.5005, "step": 155910 }, { "epoch": 0.63, "grad_norm": 2.306703805923462, "learning_rate": 0.0002, "loss": 1.5102, "step": 155920 }, { "epoch": 0.63, "grad_norm": 2.517432928085327, "learning_rate": 0.0002, "loss": 1.3694, "step": 155930 }, { "epoch": 0.63, "grad_norm": 3.409757614135742, "learning_rate": 0.0002, "loss": 1.5276, "step": 155940 }, { "epoch": 0.63, "grad_norm": 3.1935641765594482, "learning_rate": 0.0002, "loss": 1.5758, "step": 155950 }, { "epoch": 0.63, "grad_norm": 2.5028369426727295, "learning_rate": 0.0002, "loss": 1.5, "step": 155960 }, { "epoch": 0.63, "grad_norm": 2.6910347938537598, "learning_rate": 0.0002, "loss": 1.4609, "step": 155970 }, { "epoch": 0.63, "grad_norm": 3.4302492141723633, "learning_rate": 0.0002, "loss": 1.6167, "step": 155980 }, { "epoch": 0.64, "grad_norm": 2.61527943611145, "learning_rate": 0.0002, "loss": 1.5646, "step": 155990 }, { "epoch": 0.64, "grad_norm": 2.0617895126342773, "learning_rate": 0.0002, "loss": 1.3588, "step": 156000 }, { "epoch": 0.64, "grad_norm": 2.852100133895874, "learning_rate": 0.0002, "loss": 1.5735, "step": 156010 }, { "epoch": 0.64, "grad_norm": 1.809827208518982, "learning_rate": 0.0002, "loss": 1.2918, "step": 156020 }, { "epoch": 0.64, "grad_norm": 3.3997604846954346, "learning_rate": 0.0002, "loss": 1.6391, "step": 156030 }, { "epoch": 0.64, "grad_norm": 3.227433204650879, "learning_rate": 0.0002, "loss": 1.482, "step": 156040 }, { "epoch": 0.64, "grad_norm": 4.24947452545166, "learning_rate": 0.0002, "loss": 1.5477, "step": 156050 }, { "epoch": 0.64, "grad_norm": 3.293088912963867, "learning_rate": 0.0002, "loss": 1.7688, "step": 156060 }, { "epoch": 0.64, "grad_norm": 1.8151041269302368, "learning_rate": 0.0002, "loss": 1.3543, "step": 156070 }, { "epoch": 0.64, "grad_norm": 3.783226728439331, "learning_rate": 0.0002, "loss": 1.6294, "step": 156080 }, { "epoch": 0.64, "grad_norm": 2.5331077575683594, "learning_rate": 0.0002, "loss": 1.5655, "step": 156090 }, { "epoch": 0.64, "grad_norm": 3.406585454940796, "learning_rate": 0.0002, "loss": 1.845, "step": 156100 }, { "epoch": 0.64, "grad_norm": 3.0554041862487793, "learning_rate": 0.0002, "loss": 1.6193, "step": 156110 }, { "epoch": 0.64, "grad_norm": 2.720430850982666, "learning_rate": 0.0002, "loss": 1.5803, "step": 156120 }, { "epoch": 0.64, "grad_norm": 2.5640132427215576, "learning_rate": 0.0002, "loss": 1.627, "step": 156130 }, { "epoch": 0.64, "grad_norm": 2.4440157413482666, "learning_rate": 0.0002, "loss": 1.4281, "step": 156140 }, { "epoch": 0.64, "grad_norm": 3.815140962600708, "learning_rate": 0.0002, "loss": 1.6557, "step": 156150 }, { "epoch": 0.64, "grad_norm": 3.1551353931427, "learning_rate": 0.0002, "loss": 1.7439, "step": 156160 }, { "epoch": 0.64, "grad_norm": 5.664883136749268, "learning_rate": 0.0002, "loss": 1.6287, "step": 156170 }, { "epoch": 0.64, "grad_norm": 3.876948118209839, "learning_rate": 0.0002, "loss": 1.575, "step": 156180 }, { "epoch": 0.64, "grad_norm": 4.201984882354736, "learning_rate": 0.0002, "loss": 1.6458, "step": 156190 }, { "epoch": 0.64, "grad_norm": 2.041316032409668, "learning_rate": 0.0002, "loss": 1.5861, "step": 156200 }, { "epoch": 0.64, "grad_norm": 3.2168610095977783, "learning_rate": 0.0002, "loss": 1.6675, "step": 156210 }, { "epoch": 0.64, "grad_norm": 2.641000270843506, "learning_rate": 0.0002, "loss": 1.6041, "step": 156220 }, { "epoch": 0.64, "grad_norm": 2.629302740097046, "learning_rate": 0.0002, "loss": 1.3946, "step": 156230 }, { "epoch": 0.64, "grad_norm": 1.096467137336731, "learning_rate": 0.0002, "loss": 1.6128, "step": 156240 }, { "epoch": 0.64, "grad_norm": 9.919772148132324, "learning_rate": 0.0002, "loss": 1.5555, "step": 156250 }, { "epoch": 0.64, "grad_norm": 3.2426645755767822, "learning_rate": 0.0002, "loss": 1.62, "step": 156260 }, { "epoch": 0.64, "grad_norm": 9.111732482910156, "learning_rate": 0.0002, "loss": 1.792, "step": 156270 }, { "epoch": 0.64, "grad_norm": 2.9060065746307373, "learning_rate": 0.0002, "loss": 1.4984, "step": 156280 }, { "epoch": 0.64, "grad_norm": 3.942086935043335, "learning_rate": 0.0002, "loss": 1.6989, "step": 156290 }, { "epoch": 0.64, "grad_norm": 2.2348599433898926, "learning_rate": 0.0002, "loss": 1.7223, "step": 156300 }, { "epoch": 0.64, "grad_norm": 3.5548932552337646, "learning_rate": 0.0002, "loss": 1.5115, "step": 156310 }, { "epoch": 0.64, "grad_norm": 2.8848273754119873, "learning_rate": 0.0002, "loss": 1.5731, "step": 156320 }, { "epoch": 0.64, "grad_norm": 4.018462657928467, "learning_rate": 0.0002, "loss": 1.5474, "step": 156330 }, { "epoch": 0.64, "grad_norm": 1.8377950191497803, "learning_rate": 0.0002, "loss": 1.4085, "step": 156340 }, { "epoch": 0.64, "grad_norm": 2.9741334915161133, "learning_rate": 0.0002, "loss": 1.6817, "step": 156350 }, { "epoch": 0.64, "grad_norm": 3.3723230361938477, "learning_rate": 0.0002, "loss": 1.7618, "step": 156360 }, { "epoch": 0.64, "grad_norm": 1.4126170873641968, "learning_rate": 0.0002, "loss": 1.4831, "step": 156370 }, { "epoch": 0.64, "grad_norm": 5.087194919586182, "learning_rate": 0.0002, "loss": 1.6887, "step": 156380 }, { "epoch": 0.64, "grad_norm": 2.7124905586242676, "learning_rate": 0.0002, "loss": 1.5154, "step": 156390 }, { "epoch": 0.64, "grad_norm": 2.38338041305542, "learning_rate": 0.0002, "loss": 1.4933, "step": 156400 }, { "epoch": 0.64, "grad_norm": 5.579502105712891, "learning_rate": 0.0002, "loss": 1.477, "step": 156410 }, { "epoch": 0.64, "grad_norm": 3.7228527069091797, "learning_rate": 0.0002, "loss": 1.6257, "step": 156420 }, { "epoch": 0.64, "grad_norm": 2.5542681217193604, "learning_rate": 0.0002, "loss": 1.5508, "step": 156430 }, { "epoch": 0.64, "grad_norm": 2.4752373695373535, "learning_rate": 0.0002, "loss": 1.7623, "step": 156440 }, { "epoch": 0.64, "grad_norm": 2.44022274017334, "learning_rate": 0.0002, "loss": 1.6228, "step": 156450 }, { "epoch": 0.64, "grad_norm": 2.7133944034576416, "learning_rate": 0.0002, "loss": 1.3104, "step": 156460 }, { "epoch": 0.64, "grad_norm": 1.7852052450180054, "learning_rate": 0.0002, "loss": 1.4285, "step": 156470 }, { "epoch": 0.64, "grad_norm": 3.8319578170776367, "learning_rate": 0.0002, "loss": 1.3437, "step": 156480 }, { "epoch": 0.64, "grad_norm": 1.558742642402649, "learning_rate": 0.0002, "loss": 1.7247, "step": 156490 }, { "epoch": 0.64, "grad_norm": 1.4063079357147217, "learning_rate": 0.0002, "loss": 1.5633, "step": 156500 }, { "epoch": 0.64, "grad_norm": 9.4176664352417, "learning_rate": 0.0002, "loss": 1.4668, "step": 156510 }, { "epoch": 0.64, "grad_norm": 3.8476803302764893, "learning_rate": 0.0002, "loss": 1.4739, "step": 156520 }, { "epoch": 0.64, "grad_norm": 1.8494975566864014, "learning_rate": 0.0002, "loss": 1.5986, "step": 156530 }, { "epoch": 0.64, "grad_norm": 1.8279238939285278, "learning_rate": 0.0002, "loss": 1.7461, "step": 156540 }, { "epoch": 0.64, "grad_norm": 2.398982048034668, "learning_rate": 0.0002, "loss": 1.4996, "step": 156550 }, { "epoch": 0.64, "grad_norm": 4.468385696411133, "learning_rate": 0.0002, "loss": 1.4661, "step": 156560 }, { "epoch": 0.64, "grad_norm": 3.1198153495788574, "learning_rate": 0.0002, "loss": 1.7029, "step": 156570 }, { "epoch": 0.64, "grad_norm": 2.186910629272461, "learning_rate": 0.0002, "loss": 1.5156, "step": 156580 }, { "epoch": 0.64, "grad_norm": 2.889813184738159, "learning_rate": 0.0002, "loss": 1.6214, "step": 156590 }, { "epoch": 0.64, "grad_norm": 10.578300476074219, "learning_rate": 0.0002, "loss": 1.6475, "step": 156600 }, { "epoch": 0.64, "grad_norm": 3.3597052097320557, "learning_rate": 0.0002, "loss": 1.8741, "step": 156610 }, { "epoch": 0.64, "grad_norm": 2.6976349353790283, "learning_rate": 0.0002, "loss": 1.7702, "step": 156620 }, { "epoch": 0.64, "grad_norm": 2.9843790531158447, "learning_rate": 0.0002, "loss": 1.6329, "step": 156630 }, { "epoch": 0.64, "grad_norm": 2.9854085445404053, "learning_rate": 0.0002, "loss": 1.4251, "step": 156640 }, { "epoch": 0.64, "grad_norm": 3.958927631378174, "learning_rate": 0.0002, "loss": 1.4544, "step": 156650 }, { "epoch": 0.64, "grad_norm": 3.6251935958862305, "learning_rate": 0.0002, "loss": 1.5641, "step": 156660 }, { "epoch": 0.64, "grad_norm": 2.485640287399292, "learning_rate": 0.0002, "loss": 1.6025, "step": 156670 }, { "epoch": 0.64, "grad_norm": 4.242908477783203, "learning_rate": 0.0002, "loss": 1.8846, "step": 156680 }, { "epoch": 0.64, "grad_norm": 4.110714912414551, "learning_rate": 0.0002, "loss": 1.5016, "step": 156690 }, { "epoch": 0.64, "grad_norm": 2.844069480895996, "learning_rate": 0.0002, "loss": 1.6425, "step": 156700 }, { "epoch": 0.64, "grad_norm": 2.3401620388031006, "learning_rate": 0.0002, "loss": 1.7223, "step": 156710 }, { "epoch": 0.64, "grad_norm": 1.992521047592163, "learning_rate": 0.0002, "loss": 1.789, "step": 156720 }, { "epoch": 0.64, "grad_norm": 2.36780047416687, "learning_rate": 0.0002, "loss": 1.4971, "step": 156730 }, { "epoch": 0.64, "grad_norm": 2.3494722843170166, "learning_rate": 0.0002, "loss": 1.8159, "step": 156740 }, { "epoch": 0.64, "grad_norm": 2.921380043029785, "learning_rate": 0.0002, "loss": 1.6858, "step": 156750 }, { "epoch": 0.64, "grad_norm": 8.518877029418945, "learning_rate": 0.0002, "loss": 1.5714, "step": 156760 }, { "epoch": 0.64, "grad_norm": 3.6718626022338867, "learning_rate": 0.0002, "loss": 1.4837, "step": 156770 }, { "epoch": 0.64, "grad_norm": 3.3048627376556396, "learning_rate": 0.0002, "loss": 1.8407, "step": 156780 }, { "epoch": 0.64, "grad_norm": 3.7398006916046143, "learning_rate": 0.0002, "loss": 1.3663, "step": 156790 }, { "epoch": 0.64, "grad_norm": 4.406425476074219, "learning_rate": 0.0002, "loss": 1.473, "step": 156800 }, { "epoch": 0.64, "grad_norm": 3.3441150188446045, "learning_rate": 0.0002, "loss": 1.6845, "step": 156810 }, { "epoch": 0.64, "grad_norm": 2.294980764389038, "learning_rate": 0.0002, "loss": 1.4777, "step": 156820 }, { "epoch": 0.64, "grad_norm": 3.9672234058380127, "learning_rate": 0.0002, "loss": 1.3855, "step": 156830 }, { "epoch": 0.64, "grad_norm": 1.4254158735275269, "learning_rate": 0.0002, "loss": 1.47, "step": 156840 }, { "epoch": 0.64, "grad_norm": 2.9081037044525146, "learning_rate": 0.0002, "loss": 1.7996, "step": 156850 }, { "epoch": 0.64, "grad_norm": 3.2476463317871094, "learning_rate": 0.0002, "loss": 1.5584, "step": 156860 }, { "epoch": 0.64, "grad_norm": 2.312072515487671, "learning_rate": 0.0002, "loss": 1.6903, "step": 156870 }, { "epoch": 0.64, "grad_norm": 2.788555383682251, "learning_rate": 0.0002, "loss": 1.5048, "step": 156880 }, { "epoch": 0.64, "grad_norm": 2.460386276245117, "learning_rate": 0.0002, "loss": 1.4978, "step": 156890 }, { "epoch": 0.64, "grad_norm": 2.859201669692993, "learning_rate": 0.0002, "loss": 1.5212, "step": 156900 }, { "epoch": 0.64, "grad_norm": 4.049281120300293, "learning_rate": 0.0002, "loss": 1.5095, "step": 156910 }, { "epoch": 0.64, "grad_norm": 2.5202040672302246, "learning_rate": 0.0002, "loss": 1.7026, "step": 156920 }, { "epoch": 0.64, "grad_norm": 1.8282203674316406, "learning_rate": 0.0002, "loss": 1.7721, "step": 156930 }, { "epoch": 0.64, "grad_norm": 4.434998035430908, "learning_rate": 0.0002, "loss": 1.2813, "step": 156940 }, { "epoch": 0.64, "grad_norm": 3.2214622497558594, "learning_rate": 0.0002, "loss": 1.7526, "step": 156950 }, { "epoch": 0.64, "grad_norm": 3.3553524017333984, "learning_rate": 0.0002, "loss": 1.5135, "step": 156960 }, { "epoch": 0.64, "grad_norm": 2.0696938037872314, "learning_rate": 0.0002, "loss": 1.7198, "step": 156970 }, { "epoch": 0.64, "grad_norm": 3.063798189163208, "learning_rate": 0.0002, "loss": 1.7351, "step": 156980 }, { "epoch": 0.64, "grad_norm": 11.178763389587402, "learning_rate": 0.0002, "loss": 1.5659, "step": 156990 }, { "epoch": 0.64, "grad_norm": 3.32138729095459, "learning_rate": 0.0002, "loss": 1.3655, "step": 157000 }, { "epoch": 0.64, "grad_norm": 2.9434056282043457, "learning_rate": 0.0002, "loss": 1.4627, "step": 157010 }, { "epoch": 0.64, "grad_norm": 3.5980796813964844, "learning_rate": 0.0002, "loss": 1.5431, "step": 157020 }, { "epoch": 0.64, "grad_norm": 4.056769847869873, "learning_rate": 0.0002, "loss": 1.5545, "step": 157030 }, { "epoch": 0.64, "grad_norm": 2.460744857788086, "learning_rate": 0.0002, "loss": 1.5559, "step": 157040 }, { "epoch": 0.64, "grad_norm": 5.103504180908203, "learning_rate": 0.0002, "loss": 1.5994, "step": 157050 }, { "epoch": 0.64, "grad_norm": 3.285383939743042, "learning_rate": 0.0002, "loss": 1.3658, "step": 157060 }, { "epoch": 0.64, "grad_norm": 3.013474941253662, "learning_rate": 0.0002, "loss": 1.6387, "step": 157070 }, { "epoch": 0.64, "grad_norm": 2.029218912124634, "learning_rate": 0.0002, "loss": 1.8059, "step": 157080 }, { "epoch": 0.64, "grad_norm": 3.3858022689819336, "learning_rate": 0.0002, "loss": 1.6437, "step": 157090 }, { "epoch": 0.64, "grad_norm": 3.4851579666137695, "learning_rate": 0.0002, "loss": 1.6792, "step": 157100 }, { "epoch": 0.64, "grad_norm": 1.9345059394836426, "learning_rate": 0.0002, "loss": 1.5117, "step": 157110 }, { "epoch": 0.64, "grad_norm": 2.8763480186462402, "learning_rate": 0.0002, "loss": 1.6802, "step": 157120 }, { "epoch": 0.64, "grad_norm": 2.9423747062683105, "learning_rate": 0.0002, "loss": 1.2775, "step": 157130 }, { "epoch": 0.64, "grad_norm": 2.5803706645965576, "learning_rate": 0.0002, "loss": 1.287, "step": 157140 }, { "epoch": 0.64, "grad_norm": 2.521075963973999, "learning_rate": 0.0002, "loss": 1.4595, "step": 157150 }, { "epoch": 0.64, "grad_norm": 2.7805287837982178, "learning_rate": 0.0002, "loss": 1.5681, "step": 157160 }, { "epoch": 0.64, "grad_norm": 2.4862186908721924, "learning_rate": 0.0002, "loss": 1.5961, "step": 157170 }, { "epoch": 0.64, "grad_norm": 3.1180579662323, "learning_rate": 0.0002, "loss": 1.6584, "step": 157180 }, { "epoch": 0.64, "grad_norm": 2.763265609741211, "learning_rate": 0.0002, "loss": 1.5968, "step": 157190 }, { "epoch": 0.64, "grad_norm": 2.6518614292144775, "learning_rate": 0.0002, "loss": 1.6705, "step": 157200 }, { "epoch": 0.64, "grad_norm": 2.257551670074463, "learning_rate": 0.0002, "loss": 1.8502, "step": 157210 }, { "epoch": 0.64, "grad_norm": 3.906198740005493, "learning_rate": 0.0002, "loss": 1.9062, "step": 157220 }, { "epoch": 0.64, "grad_norm": 2.7704312801361084, "learning_rate": 0.0002, "loss": 1.9453, "step": 157230 }, { "epoch": 0.64, "grad_norm": 3.045769214630127, "learning_rate": 0.0002, "loss": 1.4808, "step": 157240 }, { "epoch": 0.64, "grad_norm": 2.7765657901763916, "learning_rate": 0.0002, "loss": 1.4987, "step": 157250 }, { "epoch": 0.64, "grad_norm": 2.383965015411377, "learning_rate": 0.0002, "loss": 1.5841, "step": 157260 }, { "epoch": 0.64, "grad_norm": 3.031186819076538, "learning_rate": 0.0002, "loss": 1.4834, "step": 157270 }, { "epoch": 0.64, "grad_norm": 2.110487461090088, "learning_rate": 0.0002, "loss": 1.7778, "step": 157280 }, { "epoch": 0.64, "grad_norm": 3.133455991744995, "learning_rate": 0.0002, "loss": 1.4012, "step": 157290 }, { "epoch": 0.64, "grad_norm": 3.4955568313598633, "learning_rate": 0.0002, "loss": 1.6145, "step": 157300 }, { "epoch": 0.64, "grad_norm": 3.0896921157836914, "learning_rate": 0.0002, "loss": 1.7676, "step": 157310 }, { "epoch": 0.64, "grad_norm": 9.345869064331055, "learning_rate": 0.0002, "loss": 1.37, "step": 157320 }, { "epoch": 0.64, "grad_norm": 3.255830764770508, "learning_rate": 0.0002, "loss": 1.5528, "step": 157330 }, { "epoch": 0.64, "grad_norm": 3.640488624572754, "learning_rate": 0.0002, "loss": 1.6033, "step": 157340 }, { "epoch": 0.64, "grad_norm": 3.4460935592651367, "learning_rate": 0.0002, "loss": 1.5888, "step": 157350 }, { "epoch": 0.64, "grad_norm": 2.927966833114624, "learning_rate": 0.0002, "loss": 1.4251, "step": 157360 }, { "epoch": 0.64, "grad_norm": 3.229487419128418, "learning_rate": 0.0002, "loss": 1.7974, "step": 157370 }, { "epoch": 0.64, "grad_norm": 2.3627710342407227, "learning_rate": 0.0002, "loss": 1.5536, "step": 157380 }, { "epoch": 0.64, "grad_norm": 1.8822959661483765, "learning_rate": 0.0002, "loss": 1.5852, "step": 157390 }, { "epoch": 0.64, "grad_norm": 3.435269594192505, "learning_rate": 0.0002, "loss": 1.8071, "step": 157400 }, { "epoch": 0.64, "grad_norm": 3.496285915374756, "learning_rate": 0.0002, "loss": 1.4905, "step": 157410 }, { "epoch": 0.64, "grad_norm": 2.635159969329834, "learning_rate": 0.0002, "loss": 1.3476, "step": 157420 }, { "epoch": 0.64, "grad_norm": 4.299561977386475, "learning_rate": 0.0002, "loss": 1.2504, "step": 157430 }, { "epoch": 0.64, "grad_norm": 3.713716506958008, "learning_rate": 0.0002, "loss": 1.4538, "step": 157440 }, { "epoch": 0.64, "grad_norm": 2.8876428604125977, "learning_rate": 0.0002, "loss": 1.6666, "step": 157450 }, { "epoch": 0.64, "grad_norm": 5.390398025512695, "learning_rate": 0.0002, "loss": 1.5949, "step": 157460 }, { "epoch": 0.64, "grad_norm": 4.224001407623291, "learning_rate": 0.0002, "loss": 1.6938, "step": 157470 }, { "epoch": 0.64, "grad_norm": 4.108601093292236, "learning_rate": 0.0002, "loss": 1.5821, "step": 157480 }, { "epoch": 0.64, "grad_norm": 2.658362865447998, "learning_rate": 0.0002, "loss": 1.4761, "step": 157490 }, { "epoch": 0.64, "grad_norm": 3.268836498260498, "learning_rate": 0.0002, "loss": 1.6372, "step": 157500 }, { "epoch": 0.64, "grad_norm": 1.9225096702575684, "learning_rate": 0.0002, "loss": 1.5805, "step": 157510 }, { "epoch": 0.64, "grad_norm": 4.6292009353637695, "learning_rate": 0.0002, "loss": 1.6363, "step": 157520 }, { "epoch": 0.64, "grad_norm": 2.9058172702789307, "learning_rate": 0.0002, "loss": 1.5414, "step": 157530 }, { "epoch": 0.64, "grad_norm": 3.2610011100769043, "learning_rate": 0.0002, "loss": 1.5832, "step": 157540 }, { "epoch": 0.64, "grad_norm": 4.819775581359863, "learning_rate": 0.0002, "loss": 1.5481, "step": 157550 }, { "epoch": 0.64, "grad_norm": 2.5523219108581543, "learning_rate": 0.0002, "loss": 1.5436, "step": 157560 }, { "epoch": 0.64, "grad_norm": 3.3678371906280518, "learning_rate": 0.0002, "loss": 1.7846, "step": 157570 }, { "epoch": 0.64, "grad_norm": 3.03602933883667, "learning_rate": 0.0002, "loss": 1.353, "step": 157580 }, { "epoch": 0.64, "grad_norm": 3.812056064605713, "learning_rate": 0.0002, "loss": 1.4824, "step": 157590 }, { "epoch": 0.64, "grad_norm": 2.2936618328094482, "learning_rate": 0.0002, "loss": 1.7809, "step": 157600 }, { "epoch": 0.64, "grad_norm": 3.059674024581909, "learning_rate": 0.0002, "loss": 1.4362, "step": 157610 }, { "epoch": 0.64, "grad_norm": 2.7278683185577393, "learning_rate": 0.0002, "loss": 1.5517, "step": 157620 }, { "epoch": 0.64, "grad_norm": 3.069347620010376, "learning_rate": 0.0002, "loss": 1.5881, "step": 157630 }, { "epoch": 0.64, "grad_norm": 3.1823105812072754, "learning_rate": 0.0002, "loss": 1.574, "step": 157640 }, { "epoch": 0.64, "grad_norm": 3.6070263385772705, "learning_rate": 0.0002, "loss": 1.3003, "step": 157650 }, { "epoch": 0.64, "grad_norm": 3.04984188079834, "learning_rate": 0.0002, "loss": 1.6013, "step": 157660 }, { "epoch": 0.64, "grad_norm": 3.1533203125, "learning_rate": 0.0002, "loss": 1.7256, "step": 157670 }, { "epoch": 0.64, "grad_norm": 3.3095297813415527, "learning_rate": 0.0002, "loss": 1.7345, "step": 157680 }, { "epoch": 0.64, "grad_norm": 2.801454544067383, "learning_rate": 0.0002, "loss": 1.632, "step": 157690 }, { "epoch": 0.64, "grad_norm": 3.6125733852386475, "learning_rate": 0.0002, "loss": 1.4503, "step": 157700 }, { "epoch": 0.64, "grad_norm": 2.4827651977539062, "learning_rate": 0.0002, "loss": 1.9369, "step": 157710 }, { "epoch": 0.64, "grad_norm": 6.277704238891602, "learning_rate": 0.0002, "loss": 1.4838, "step": 157720 }, { "epoch": 0.64, "grad_norm": 2.653515100479126, "learning_rate": 0.0002, "loss": 1.6774, "step": 157730 }, { "epoch": 0.64, "grad_norm": 3.556131601333618, "learning_rate": 0.0002, "loss": 1.7749, "step": 157740 }, { "epoch": 0.64, "grad_norm": 4.2535400390625, "learning_rate": 0.0002, "loss": 1.6598, "step": 157750 }, { "epoch": 0.64, "grad_norm": 3.071883201599121, "learning_rate": 0.0002, "loss": 1.4017, "step": 157760 }, { "epoch": 0.64, "grad_norm": 2.74787974357605, "learning_rate": 0.0002, "loss": 1.5534, "step": 157770 }, { "epoch": 0.64, "grad_norm": 3.7678158283233643, "learning_rate": 0.0002, "loss": 1.6445, "step": 157780 }, { "epoch": 0.64, "grad_norm": 4.486494541168213, "learning_rate": 0.0002, "loss": 1.5298, "step": 157790 }, { "epoch": 0.64, "grad_norm": 3.230464220046997, "learning_rate": 0.0002, "loss": 1.6097, "step": 157800 }, { "epoch": 0.64, "grad_norm": 2.517320156097412, "learning_rate": 0.0002, "loss": 1.6757, "step": 157810 }, { "epoch": 0.64, "grad_norm": 2.9574785232543945, "learning_rate": 0.0002, "loss": 1.5102, "step": 157820 }, { "epoch": 0.64, "grad_norm": 4.038328170776367, "learning_rate": 0.0002, "loss": 1.6875, "step": 157830 }, { "epoch": 0.64, "grad_norm": 3.8401618003845215, "learning_rate": 0.0002, "loss": 1.5195, "step": 157840 }, { "epoch": 0.64, "grad_norm": 2.439553737640381, "learning_rate": 0.0002, "loss": 1.7232, "step": 157850 }, { "epoch": 0.64, "grad_norm": 1.7169777154922485, "learning_rate": 0.0002, "loss": 1.4989, "step": 157860 }, { "epoch": 0.64, "grad_norm": 4.019560813903809, "learning_rate": 0.0002, "loss": 1.8282, "step": 157870 }, { "epoch": 0.64, "grad_norm": 1.987762212753296, "learning_rate": 0.0002, "loss": 1.4757, "step": 157880 }, { "epoch": 0.64, "grad_norm": 1.97746741771698, "learning_rate": 0.0002, "loss": 1.5716, "step": 157890 }, { "epoch": 0.64, "grad_norm": 3.6139259338378906, "learning_rate": 0.0002, "loss": 1.6638, "step": 157900 }, { "epoch": 0.64, "grad_norm": 5.764078617095947, "learning_rate": 0.0002, "loss": 1.6348, "step": 157910 }, { "epoch": 0.64, "grad_norm": 3.6978328227996826, "learning_rate": 0.0002, "loss": 1.7294, "step": 157920 }, { "epoch": 0.64, "grad_norm": 2.9424219131469727, "learning_rate": 0.0002, "loss": 1.6077, "step": 157930 }, { "epoch": 0.64, "grad_norm": 6.918271541595459, "learning_rate": 0.0002, "loss": 1.7401, "step": 157940 }, { "epoch": 0.64, "grad_norm": 2.626575231552124, "learning_rate": 0.0002, "loss": 1.3305, "step": 157950 }, { "epoch": 0.64, "grad_norm": 2.6437084674835205, "learning_rate": 0.0002, "loss": 1.6688, "step": 157960 }, { "epoch": 0.64, "grad_norm": 4.180349349975586, "learning_rate": 0.0002, "loss": 1.6645, "step": 157970 }, { "epoch": 0.64, "grad_norm": 4.278696060180664, "learning_rate": 0.0002, "loss": 1.7627, "step": 157980 }, { "epoch": 0.64, "grad_norm": 3.446733236312866, "learning_rate": 0.0002, "loss": 1.7408, "step": 157990 }, { "epoch": 0.64, "grad_norm": 2.866140604019165, "learning_rate": 0.0002, "loss": 1.6495, "step": 158000 }, { "epoch": 0.64, "grad_norm": 3.6382505893707275, "learning_rate": 0.0002, "loss": 1.6727, "step": 158010 }, { "epoch": 0.64, "grad_norm": 3.0810344219207764, "learning_rate": 0.0002, "loss": 1.4916, "step": 158020 }, { "epoch": 0.64, "grad_norm": 2.35878849029541, "learning_rate": 0.0002, "loss": 1.645, "step": 158030 }, { "epoch": 0.64, "grad_norm": 10.351143836975098, "learning_rate": 0.0002, "loss": 1.5213, "step": 158040 }, { "epoch": 0.64, "grad_norm": 3.3252453804016113, "learning_rate": 0.0002, "loss": 1.9523, "step": 158050 }, { "epoch": 0.64, "grad_norm": 3.5585384368896484, "learning_rate": 0.0002, "loss": 1.4906, "step": 158060 }, { "epoch": 0.64, "grad_norm": 4.009501934051514, "learning_rate": 0.0002, "loss": 1.5615, "step": 158070 }, { "epoch": 0.64, "grad_norm": 2.9796736240386963, "learning_rate": 0.0002, "loss": 1.5855, "step": 158080 }, { "epoch": 0.64, "grad_norm": 2.942408323287964, "learning_rate": 0.0002, "loss": 1.8026, "step": 158090 }, { "epoch": 0.64, "grad_norm": 3.16119384765625, "learning_rate": 0.0002, "loss": 1.7588, "step": 158100 }, { "epoch": 0.64, "grad_norm": 2.8422508239746094, "learning_rate": 0.0002, "loss": 1.3851, "step": 158110 }, { "epoch": 0.64, "grad_norm": 3.612584352493286, "learning_rate": 0.0002, "loss": 1.4187, "step": 158120 }, { "epoch": 0.64, "grad_norm": 3.3720152378082275, "learning_rate": 0.0002, "loss": 1.6057, "step": 158130 }, { "epoch": 0.64, "grad_norm": 3.2874746322631836, "learning_rate": 0.0002, "loss": 1.5097, "step": 158140 }, { "epoch": 0.64, "grad_norm": 2.9509079456329346, "learning_rate": 0.0002, "loss": 1.6935, "step": 158150 }, { "epoch": 0.64, "grad_norm": 2.0655903816223145, "learning_rate": 0.0002, "loss": 1.6822, "step": 158160 }, { "epoch": 0.64, "grad_norm": 3.60198712348938, "learning_rate": 0.0002, "loss": 1.6075, "step": 158170 }, { "epoch": 0.64, "grad_norm": 3.3161489963531494, "learning_rate": 0.0002, "loss": 1.6585, "step": 158180 }, { "epoch": 0.64, "grad_norm": 2.970623731613159, "learning_rate": 0.0002, "loss": 1.5398, "step": 158190 }, { "epoch": 0.64, "grad_norm": 3.093229055404663, "learning_rate": 0.0002, "loss": 1.3995, "step": 158200 }, { "epoch": 0.64, "grad_norm": 3.2724039554595947, "learning_rate": 0.0002, "loss": 1.7215, "step": 158210 }, { "epoch": 0.64, "grad_norm": 3.5829038619995117, "learning_rate": 0.0002, "loss": 1.4522, "step": 158220 }, { "epoch": 0.64, "grad_norm": 3.546207904815674, "learning_rate": 0.0002, "loss": 1.5329, "step": 158230 }, { "epoch": 0.64, "grad_norm": 2.9817252159118652, "learning_rate": 0.0002, "loss": 1.6767, "step": 158240 }, { "epoch": 0.64, "grad_norm": 2.8400614261627197, "learning_rate": 0.0002, "loss": 1.5882, "step": 158250 }, { "epoch": 0.64, "grad_norm": 2.891526222229004, "learning_rate": 0.0002, "loss": 1.6418, "step": 158260 }, { "epoch": 0.64, "grad_norm": 3.835864543914795, "learning_rate": 0.0002, "loss": 1.7086, "step": 158270 }, { "epoch": 0.64, "grad_norm": 3.5216097831726074, "learning_rate": 0.0002, "loss": 1.5998, "step": 158280 }, { "epoch": 0.64, "grad_norm": 3.219888687133789, "learning_rate": 0.0002, "loss": 1.602, "step": 158290 }, { "epoch": 0.64, "grad_norm": 3.2459871768951416, "learning_rate": 0.0002, "loss": 1.596, "step": 158300 }, { "epoch": 0.64, "grad_norm": 7.849851131439209, "learning_rate": 0.0002, "loss": 1.5084, "step": 158310 }, { "epoch": 0.64, "grad_norm": 5.0082879066467285, "learning_rate": 0.0002, "loss": 1.6428, "step": 158320 }, { "epoch": 0.64, "grad_norm": 7.482524394989014, "learning_rate": 0.0002, "loss": 1.4734, "step": 158330 }, { "epoch": 0.64, "grad_norm": 2.968834161758423, "learning_rate": 0.0002, "loss": 1.6059, "step": 158340 }, { "epoch": 0.64, "grad_norm": 4.33762264251709, "learning_rate": 0.0002, "loss": 1.549, "step": 158350 }, { "epoch": 0.64, "grad_norm": 4.2329583168029785, "learning_rate": 0.0002, "loss": 1.7224, "step": 158360 }, { "epoch": 0.64, "grad_norm": 2.6698129177093506, "learning_rate": 0.0002, "loss": 1.68, "step": 158370 }, { "epoch": 0.64, "grad_norm": 2.2222392559051514, "learning_rate": 0.0002, "loss": 1.4599, "step": 158380 }, { "epoch": 0.64, "grad_norm": 2.4165267944335938, "learning_rate": 0.0002, "loss": 1.3677, "step": 158390 }, { "epoch": 0.64, "grad_norm": 2.3581435680389404, "learning_rate": 0.0002, "loss": 1.7783, "step": 158400 }, { "epoch": 0.64, "grad_norm": 3.642780303955078, "learning_rate": 0.0002, "loss": 1.6122, "step": 158410 }, { "epoch": 0.64, "grad_norm": 3.1512138843536377, "learning_rate": 0.0002, "loss": 1.5711, "step": 158420 }, { "epoch": 0.64, "grad_norm": 2.145496368408203, "learning_rate": 0.0002, "loss": 1.6072, "step": 158430 }, { "epoch": 0.64, "grad_norm": 2.8938820362091064, "learning_rate": 0.0002, "loss": 1.6496, "step": 158440 }, { "epoch": 0.65, "grad_norm": 1.5438770055770874, "learning_rate": 0.0002, "loss": 1.6094, "step": 158450 }, { "epoch": 0.65, "grad_norm": 4.0602521896362305, "learning_rate": 0.0002, "loss": 1.5512, "step": 158460 }, { "epoch": 0.65, "grad_norm": 2.739358901977539, "learning_rate": 0.0002, "loss": 1.4461, "step": 158470 }, { "epoch": 0.65, "grad_norm": 4.124687194824219, "learning_rate": 0.0002, "loss": 1.6336, "step": 158480 }, { "epoch": 0.65, "grad_norm": 2.056865930557251, "learning_rate": 0.0002, "loss": 1.5053, "step": 158490 }, { "epoch": 0.65, "grad_norm": 1.8796707391738892, "learning_rate": 0.0002, "loss": 1.5651, "step": 158500 }, { "epoch": 0.65, "grad_norm": 2.5019729137420654, "learning_rate": 0.0002, "loss": 1.4537, "step": 158510 }, { "epoch": 0.65, "grad_norm": 4.186634540557861, "learning_rate": 0.0002, "loss": 1.4647, "step": 158520 }, { "epoch": 0.65, "grad_norm": 1.7829190492630005, "learning_rate": 0.0002, "loss": 1.3556, "step": 158530 }, { "epoch": 0.65, "grad_norm": 4.045193195343018, "learning_rate": 0.0002, "loss": 1.5439, "step": 158540 }, { "epoch": 0.65, "grad_norm": 2.830651044845581, "learning_rate": 0.0002, "loss": 1.5731, "step": 158550 }, { "epoch": 0.65, "grad_norm": 3.1228010654449463, "learning_rate": 0.0002, "loss": 1.6228, "step": 158560 }, { "epoch": 0.65, "grad_norm": 3.383971691131592, "learning_rate": 0.0002, "loss": 1.779, "step": 158570 }, { "epoch": 0.65, "grad_norm": 4.328105449676514, "learning_rate": 0.0002, "loss": 1.6834, "step": 158580 }, { "epoch": 0.65, "grad_norm": 3.0723419189453125, "learning_rate": 0.0002, "loss": 1.5213, "step": 158590 }, { "epoch": 0.65, "grad_norm": 2.6080453395843506, "learning_rate": 0.0002, "loss": 1.4212, "step": 158600 }, { "epoch": 0.65, "grad_norm": 3.0295212268829346, "learning_rate": 0.0002, "loss": 1.3811, "step": 158610 }, { "epoch": 0.65, "grad_norm": 3.502345085144043, "learning_rate": 0.0002, "loss": 1.7163, "step": 158620 }, { "epoch": 0.65, "grad_norm": 3.8010191917419434, "learning_rate": 0.0002, "loss": 1.5686, "step": 158630 }, { "epoch": 0.65, "grad_norm": 2.778822660446167, "learning_rate": 0.0002, "loss": 1.3795, "step": 158640 }, { "epoch": 0.65, "grad_norm": 7.294536590576172, "learning_rate": 0.0002, "loss": 1.7092, "step": 158650 }, { "epoch": 0.65, "grad_norm": 2.728757619857788, "learning_rate": 0.0002, "loss": 1.5747, "step": 158660 }, { "epoch": 0.65, "grad_norm": 2.2978830337524414, "learning_rate": 0.0002, "loss": 1.7675, "step": 158670 }, { "epoch": 0.65, "grad_norm": 7.171021461486816, "learning_rate": 0.0002, "loss": 1.7385, "step": 158680 }, { "epoch": 0.65, "grad_norm": 3.79963755607605, "learning_rate": 0.0002, "loss": 1.803, "step": 158690 }, { "epoch": 0.65, "grad_norm": 2.1400740146636963, "learning_rate": 0.0002, "loss": 1.6614, "step": 158700 }, { "epoch": 0.65, "grad_norm": 3.0211570262908936, "learning_rate": 0.0002, "loss": 1.4202, "step": 158710 }, { "epoch": 0.65, "grad_norm": 3.359254837036133, "learning_rate": 0.0002, "loss": 1.6874, "step": 158720 }, { "epoch": 0.65, "grad_norm": 3.4006495475769043, "learning_rate": 0.0002, "loss": 1.5632, "step": 158730 }, { "epoch": 0.65, "grad_norm": 3.878798484802246, "learning_rate": 0.0002, "loss": 1.6359, "step": 158740 }, { "epoch": 0.65, "grad_norm": 3.402251720428467, "learning_rate": 0.0002, "loss": 1.4914, "step": 158750 }, { "epoch": 0.65, "grad_norm": 4.565312385559082, "learning_rate": 0.0002, "loss": 1.5106, "step": 158760 }, { "epoch": 0.65, "grad_norm": 2.6975626945495605, "learning_rate": 0.0002, "loss": 1.7555, "step": 158770 }, { "epoch": 0.65, "grad_norm": 2.307974338531494, "learning_rate": 0.0002, "loss": 1.4864, "step": 158780 }, { "epoch": 0.65, "grad_norm": 3.1474177837371826, "learning_rate": 0.0002, "loss": 1.6786, "step": 158790 }, { "epoch": 0.65, "grad_norm": 4.535497665405273, "learning_rate": 0.0002, "loss": 1.6844, "step": 158800 }, { "epoch": 0.65, "grad_norm": 2.5188541412353516, "learning_rate": 0.0002, "loss": 1.7638, "step": 158810 }, { "epoch": 0.65, "grad_norm": 2.6477293968200684, "learning_rate": 0.0002, "loss": 1.5831, "step": 158820 }, { "epoch": 0.65, "grad_norm": 4.769632339477539, "learning_rate": 0.0002, "loss": 1.8061, "step": 158830 }, { "epoch": 0.65, "grad_norm": 2.813589334487915, "learning_rate": 0.0002, "loss": 1.4581, "step": 158840 }, { "epoch": 0.65, "grad_norm": 4.303106784820557, "learning_rate": 0.0002, "loss": 1.4281, "step": 158850 }, { "epoch": 0.65, "grad_norm": 4.073075771331787, "learning_rate": 0.0002, "loss": 1.7812, "step": 158860 }, { "epoch": 0.65, "grad_norm": 12.433136940002441, "learning_rate": 0.0002, "loss": 1.5546, "step": 158870 }, { "epoch": 0.65, "grad_norm": 3.1490931510925293, "learning_rate": 0.0002, "loss": 1.6071, "step": 158880 }, { "epoch": 0.65, "grad_norm": 2.3046326637268066, "learning_rate": 0.0002, "loss": 1.4868, "step": 158890 }, { "epoch": 0.65, "grad_norm": 4.207827091217041, "learning_rate": 0.0002, "loss": 1.8107, "step": 158900 }, { "epoch": 0.65, "grad_norm": 2.452641010284424, "learning_rate": 0.0002, "loss": 1.8488, "step": 158910 }, { "epoch": 0.65, "grad_norm": 2.9169554710388184, "learning_rate": 0.0002, "loss": 1.392, "step": 158920 }, { "epoch": 0.65, "grad_norm": 4.256577968597412, "learning_rate": 0.0002, "loss": 1.8227, "step": 158930 }, { "epoch": 0.65, "grad_norm": 2.1872177124023438, "learning_rate": 0.0002, "loss": 1.4251, "step": 158940 }, { "epoch": 0.65, "grad_norm": 2.6488802433013916, "learning_rate": 0.0002, "loss": 1.4266, "step": 158950 }, { "epoch": 0.65, "grad_norm": 2.0755553245544434, "learning_rate": 0.0002, "loss": 1.4558, "step": 158960 }, { "epoch": 0.65, "grad_norm": 1.4990671873092651, "learning_rate": 0.0002, "loss": 1.672, "step": 158970 }, { "epoch": 0.65, "grad_norm": 3.5921154022216797, "learning_rate": 0.0002, "loss": 1.5768, "step": 158980 }, { "epoch": 0.65, "grad_norm": 2.460170269012451, "learning_rate": 0.0002, "loss": 1.456, "step": 158990 }, { "epoch": 0.65, "grad_norm": 4.5220866203308105, "learning_rate": 0.0002, "loss": 1.4257, "step": 159000 }, { "epoch": 0.65, "grad_norm": 3.543290853500366, "learning_rate": 0.0002, "loss": 1.5209, "step": 159010 }, { "epoch": 0.65, "grad_norm": 2.856208324432373, "learning_rate": 0.0002, "loss": 1.7174, "step": 159020 }, { "epoch": 0.65, "grad_norm": 1.951111912727356, "learning_rate": 0.0002, "loss": 1.7195, "step": 159030 }, { "epoch": 0.65, "grad_norm": 2.321514368057251, "learning_rate": 0.0002, "loss": 1.5432, "step": 159040 }, { "epoch": 0.65, "grad_norm": 3.5454986095428467, "learning_rate": 0.0002, "loss": 1.4906, "step": 159050 }, { "epoch": 0.65, "grad_norm": 3.381910562515259, "learning_rate": 0.0002, "loss": 1.893, "step": 159060 }, { "epoch": 0.65, "grad_norm": 3.2340126037597656, "learning_rate": 0.0002, "loss": 1.7172, "step": 159070 }, { "epoch": 0.65, "grad_norm": 2.587470293045044, "learning_rate": 0.0002, "loss": 1.6063, "step": 159080 }, { "epoch": 0.65, "grad_norm": 3.170886278152466, "learning_rate": 0.0002, "loss": 1.8052, "step": 159090 }, { "epoch": 0.65, "grad_norm": 3.0999114513397217, "learning_rate": 0.0002, "loss": 1.6356, "step": 159100 }, { "epoch": 0.65, "grad_norm": 3.8451449871063232, "learning_rate": 0.0002, "loss": 1.4278, "step": 159110 }, { "epoch": 0.65, "grad_norm": 2.9959847927093506, "learning_rate": 0.0002, "loss": 1.2915, "step": 159120 }, { "epoch": 0.65, "grad_norm": 2.8795857429504395, "learning_rate": 0.0002, "loss": 1.5581, "step": 159130 }, { "epoch": 0.65, "grad_norm": 2.337057590484619, "learning_rate": 0.0002, "loss": 1.4612, "step": 159140 }, { "epoch": 0.65, "grad_norm": 2.03078293800354, "learning_rate": 0.0002, "loss": 1.4569, "step": 159150 }, { "epoch": 0.65, "grad_norm": 2.6995861530303955, "learning_rate": 0.0002, "loss": 1.5224, "step": 159160 }, { "epoch": 0.65, "grad_norm": 1.9932739734649658, "learning_rate": 0.0002, "loss": 1.5724, "step": 159170 }, { "epoch": 0.65, "grad_norm": 4.505687236785889, "learning_rate": 0.0002, "loss": 1.5486, "step": 159180 }, { "epoch": 0.65, "grad_norm": 2.8517935276031494, "learning_rate": 0.0002, "loss": 1.7223, "step": 159190 }, { "epoch": 0.65, "grad_norm": 3.6785550117492676, "learning_rate": 0.0002, "loss": 1.4849, "step": 159200 }, { "epoch": 0.65, "grad_norm": 3.872734308242798, "learning_rate": 0.0002, "loss": 1.4845, "step": 159210 }, { "epoch": 0.65, "grad_norm": 2.736053466796875, "learning_rate": 0.0002, "loss": 1.6853, "step": 159220 }, { "epoch": 0.65, "grad_norm": 2.5217397212982178, "learning_rate": 0.0002, "loss": 1.5318, "step": 159230 }, { "epoch": 0.65, "grad_norm": 3.204699754714966, "learning_rate": 0.0002, "loss": 1.4794, "step": 159240 }, { "epoch": 0.65, "grad_norm": 3.1817073822021484, "learning_rate": 0.0002, "loss": 1.6113, "step": 159250 }, { "epoch": 0.65, "grad_norm": 2.3952231407165527, "learning_rate": 0.0002, "loss": 1.7656, "step": 159260 }, { "epoch": 0.65, "grad_norm": 2.7497830390930176, "learning_rate": 0.0002, "loss": 1.3123, "step": 159270 }, { "epoch": 0.65, "grad_norm": 1.9939082860946655, "learning_rate": 0.0002, "loss": 1.5599, "step": 159280 }, { "epoch": 0.65, "grad_norm": 3.4338245391845703, "learning_rate": 0.0002, "loss": 1.6449, "step": 159290 }, { "epoch": 0.65, "grad_norm": 2.924072027206421, "learning_rate": 0.0002, "loss": 1.7966, "step": 159300 }, { "epoch": 0.65, "grad_norm": 4.4363884925842285, "learning_rate": 0.0002, "loss": 1.5326, "step": 159310 }, { "epoch": 0.65, "grad_norm": 2.435405969619751, "learning_rate": 0.0002, "loss": 1.6655, "step": 159320 }, { "epoch": 0.65, "grad_norm": 2.606203556060791, "learning_rate": 0.0002, "loss": 1.5898, "step": 159330 }, { "epoch": 0.65, "grad_norm": 3.251495599746704, "learning_rate": 0.0002, "loss": 1.6113, "step": 159340 }, { "epoch": 0.65, "grad_norm": 2.0577504634857178, "learning_rate": 0.0002, "loss": 1.6451, "step": 159350 }, { "epoch": 0.65, "grad_norm": 2.0834736824035645, "learning_rate": 0.0002, "loss": 1.4538, "step": 159360 }, { "epoch": 0.65, "grad_norm": 2.78428316116333, "learning_rate": 0.0002, "loss": 1.7657, "step": 159370 }, { "epoch": 0.65, "grad_norm": 1.9749082326889038, "learning_rate": 0.0002, "loss": 1.4007, "step": 159380 }, { "epoch": 0.65, "grad_norm": 3.8402538299560547, "learning_rate": 0.0002, "loss": 1.4048, "step": 159390 }, { "epoch": 0.65, "grad_norm": 2.7194883823394775, "learning_rate": 0.0002, "loss": 1.7028, "step": 159400 }, { "epoch": 0.65, "grad_norm": 1.8893730640411377, "learning_rate": 0.0002, "loss": 1.6517, "step": 159410 }, { "epoch": 0.65, "grad_norm": 2.4794344902038574, "learning_rate": 0.0002, "loss": 1.6073, "step": 159420 }, { "epoch": 0.65, "grad_norm": 2.735318660736084, "learning_rate": 0.0002, "loss": 1.5252, "step": 159430 }, { "epoch": 0.65, "grad_norm": 2.466630458831787, "learning_rate": 0.0002, "loss": 1.6271, "step": 159440 }, { "epoch": 0.65, "grad_norm": 3.052061080932617, "learning_rate": 0.0002, "loss": 1.7024, "step": 159450 }, { "epoch": 0.65, "grad_norm": 2.648667335510254, "learning_rate": 0.0002, "loss": 1.4734, "step": 159460 }, { "epoch": 0.65, "grad_norm": 4.1127543449401855, "learning_rate": 0.0002, "loss": 1.636, "step": 159470 }, { "epoch": 0.65, "grad_norm": 5.237707138061523, "learning_rate": 0.0002, "loss": 1.6727, "step": 159480 }, { "epoch": 0.65, "grad_norm": 2.864912986755371, "learning_rate": 0.0002, "loss": 1.5632, "step": 159490 }, { "epoch": 0.65, "grad_norm": 2.7884256839752197, "learning_rate": 0.0002, "loss": 1.8592, "step": 159500 }, { "epoch": 0.65, "grad_norm": 3.2523128986358643, "learning_rate": 0.0002, "loss": 1.6695, "step": 159510 }, { "epoch": 0.65, "grad_norm": 4.286017894744873, "learning_rate": 0.0002, "loss": 1.5024, "step": 159520 }, { "epoch": 0.65, "grad_norm": 2.1159372329711914, "learning_rate": 0.0002, "loss": 1.4998, "step": 159530 }, { "epoch": 0.65, "grad_norm": 2.6298959255218506, "learning_rate": 0.0002, "loss": 1.5672, "step": 159540 }, { "epoch": 0.65, "grad_norm": 2.530414581298828, "learning_rate": 0.0002, "loss": 1.7921, "step": 159550 }, { "epoch": 0.65, "grad_norm": 2.7953755855560303, "learning_rate": 0.0002, "loss": 1.5342, "step": 159560 }, { "epoch": 0.65, "grad_norm": 2.4555604457855225, "learning_rate": 0.0002, "loss": 1.6097, "step": 159570 }, { "epoch": 0.65, "grad_norm": 2.9102261066436768, "learning_rate": 0.0002, "loss": 1.692, "step": 159580 }, { "epoch": 0.65, "grad_norm": 1.7507591247558594, "learning_rate": 0.0002, "loss": 1.353, "step": 159590 }, { "epoch": 0.65, "grad_norm": 1.4408477544784546, "learning_rate": 0.0002, "loss": 1.5297, "step": 159600 }, { "epoch": 0.65, "grad_norm": 2.2842721939086914, "learning_rate": 0.0002, "loss": 1.6088, "step": 159610 }, { "epoch": 0.65, "grad_norm": 2.56829571723938, "learning_rate": 0.0002, "loss": 1.6918, "step": 159620 }, { "epoch": 0.65, "grad_norm": 3.7881922721862793, "learning_rate": 0.0002, "loss": 1.4398, "step": 159630 }, { "epoch": 0.65, "grad_norm": 3.851752758026123, "learning_rate": 0.0002, "loss": 1.6281, "step": 159640 }, { "epoch": 0.65, "grad_norm": 2.7614598274230957, "learning_rate": 0.0002, "loss": 1.5796, "step": 159650 }, { "epoch": 0.65, "grad_norm": 4.704785346984863, "learning_rate": 0.0002, "loss": 1.6799, "step": 159660 }, { "epoch": 0.65, "grad_norm": 3.2798900604248047, "learning_rate": 0.0002, "loss": 1.4867, "step": 159670 }, { "epoch": 0.65, "grad_norm": 4.216997146606445, "learning_rate": 0.0002, "loss": 1.6962, "step": 159680 }, { "epoch": 0.65, "grad_norm": 3.4013454914093018, "learning_rate": 0.0002, "loss": 1.4835, "step": 159690 }, { "epoch": 0.65, "grad_norm": 3.4198596477508545, "learning_rate": 0.0002, "loss": 1.4484, "step": 159700 }, { "epoch": 0.65, "grad_norm": 2.362722873687744, "learning_rate": 0.0002, "loss": 1.6721, "step": 159710 }, { "epoch": 0.65, "grad_norm": 2.432231903076172, "learning_rate": 0.0002, "loss": 1.5118, "step": 159720 }, { "epoch": 0.65, "grad_norm": 5.205719947814941, "learning_rate": 0.0002, "loss": 1.4746, "step": 159730 }, { "epoch": 0.65, "grad_norm": 3.4818954467773438, "learning_rate": 0.0002, "loss": 1.7199, "step": 159740 }, { "epoch": 0.65, "grad_norm": 3.7185285091400146, "learning_rate": 0.0002, "loss": 1.7089, "step": 159750 }, { "epoch": 0.65, "grad_norm": 4.593902587890625, "learning_rate": 0.0002, "loss": 1.7462, "step": 159760 }, { "epoch": 0.65, "grad_norm": 3.8348793983459473, "learning_rate": 0.0002, "loss": 1.4221, "step": 159770 }, { "epoch": 0.65, "grad_norm": 1.8938382863998413, "learning_rate": 0.0002, "loss": 1.4648, "step": 159780 }, { "epoch": 0.65, "grad_norm": 4.251233100891113, "learning_rate": 0.0002, "loss": 1.7451, "step": 159790 }, { "epoch": 0.65, "grad_norm": 2.6905508041381836, "learning_rate": 0.0002, "loss": 1.721, "step": 159800 }, { "epoch": 0.65, "grad_norm": 2.688171148300171, "learning_rate": 0.0002, "loss": 1.378, "step": 159810 }, { "epoch": 0.65, "grad_norm": 4.245928764343262, "learning_rate": 0.0002, "loss": 1.9112, "step": 159820 }, { "epoch": 0.65, "grad_norm": 2.8296360969543457, "learning_rate": 0.0002, "loss": 1.5094, "step": 159830 }, { "epoch": 0.65, "grad_norm": 2.1353602409362793, "learning_rate": 0.0002, "loss": 1.5407, "step": 159840 }, { "epoch": 0.65, "grad_norm": 3.8084206581115723, "learning_rate": 0.0002, "loss": 1.7627, "step": 159850 }, { "epoch": 0.65, "grad_norm": 3.8480374813079834, "learning_rate": 0.0002, "loss": 1.746, "step": 159860 }, { "epoch": 0.65, "grad_norm": 3.094020366668701, "learning_rate": 0.0002, "loss": 1.3938, "step": 159870 }, { "epoch": 0.65, "grad_norm": 2.7135047912597656, "learning_rate": 0.0002, "loss": 1.422, "step": 159880 }, { "epoch": 0.65, "grad_norm": 1.4662179946899414, "learning_rate": 0.0002, "loss": 1.6283, "step": 159890 }, { "epoch": 0.65, "grad_norm": 4.429464817047119, "learning_rate": 0.0002, "loss": 1.5832, "step": 159900 }, { "epoch": 0.65, "grad_norm": 3.116748332977295, "learning_rate": 0.0002, "loss": 1.7688, "step": 159910 }, { "epoch": 0.65, "grad_norm": 2.8450372219085693, "learning_rate": 0.0002, "loss": 1.6816, "step": 159920 }, { "epoch": 0.65, "grad_norm": 4.6109619140625, "learning_rate": 0.0002, "loss": 1.6206, "step": 159930 }, { "epoch": 0.65, "grad_norm": 3.087880849838257, "learning_rate": 0.0002, "loss": 1.4299, "step": 159940 }, { "epoch": 0.65, "grad_norm": 3.346928119659424, "learning_rate": 0.0002, "loss": 1.6459, "step": 159950 }, { "epoch": 0.65, "grad_norm": 4.235134124755859, "learning_rate": 0.0002, "loss": 1.6823, "step": 159960 }, { "epoch": 0.65, "grad_norm": 2.7380380630493164, "learning_rate": 0.0002, "loss": 1.5882, "step": 159970 }, { "epoch": 0.65, "grad_norm": 2.618563413619995, "learning_rate": 0.0002, "loss": 1.562, "step": 159980 }, { "epoch": 0.65, "grad_norm": 3.4186580181121826, "learning_rate": 0.0002, "loss": 1.8139, "step": 159990 }, { "epoch": 0.65, "grad_norm": 4.645101547241211, "learning_rate": 0.0002, "loss": 1.527, "step": 160000 }, { "epoch": 0.65, "grad_norm": 1.6952687501907349, "learning_rate": 0.0002, "loss": 1.5172, "step": 160010 }, { "epoch": 0.65, "grad_norm": 3.161025047302246, "learning_rate": 0.0002, "loss": 1.644, "step": 160020 }, { "epoch": 0.65, "grad_norm": 2.638880968093872, "learning_rate": 0.0002, "loss": 1.68, "step": 160030 }, { "epoch": 0.65, "grad_norm": 2.249993085861206, "learning_rate": 0.0002, "loss": 1.7962, "step": 160040 }, { "epoch": 0.65, "grad_norm": 2.2327067852020264, "learning_rate": 0.0002, "loss": 1.7886, "step": 160050 }, { "epoch": 0.65, "grad_norm": 3.54140567779541, "learning_rate": 0.0002, "loss": 1.477, "step": 160060 }, { "epoch": 0.65, "grad_norm": 4.678021430969238, "learning_rate": 0.0002, "loss": 1.5998, "step": 160070 }, { "epoch": 0.65, "grad_norm": 2.249377489089966, "learning_rate": 0.0002, "loss": 1.5927, "step": 160080 }, { "epoch": 0.65, "grad_norm": 3.549837589263916, "learning_rate": 0.0002, "loss": 1.5149, "step": 160090 }, { "epoch": 0.65, "grad_norm": 2.795117139816284, "learning_rate": 0.0002, "loss": 1.6103, "step": 160100 }, { "epoch": 0.65, "grad_norm": 3.0463712215423584, "learning_rate": 0.0002, "loss": 1.5677, "step": 160110 }, { "epoch": 0.65, "grad_norm": 2.920090675354004, "learning_rate": 0.0002, "loss": 1.7482, "step": 160120 }, { "epoch": 0.65, "grad_norm": 2.9775218963623047, "learning_rate": 0.0002, "loss": 1.5135, "step": 160130 }, { "epoch": 0.65, "grad_norm": 3.198240041732788, "learning_rate": 0.0002, "loss": 1.5383, "step": 160140 }, { "epoch": 0.65, "grad_norm": 2.900938034057617, "learning_rate": 0.0002, "loss": 1.5751, "step": 160150 }, { "epoch": 0.65, "grad_norm": 3.7970123291015625, "learning_rate": 0.0002, "loss": 1.4826, "step": 160160 }, { "epoch": 0.65, "grad_norm": 3.5867981910705566, "learning_rate": 0.0002, "loss": 1.4989, "step": 160170 }, { "epoch": 0.65, "grad_norm": 2.310471534729004, "learning_rate": 0.0002, "loss": 1.4407, "step": 160180 }, { "epoch": 0.65, "grad_norm": 3.852421283721924, "learning_rate": 0.0002, "loss": 1.8528, "step": 160190 }, { "epoch": 0.65, "grad_norm": 3.3764400482177734, "learning_rate": 0.0002, "loss": 1.7072, "step": 160200 }, { "epoch": 0.65, "grad_norm": 4.247435092926025, "learning_rate": 0.0002, "loss": 1.4988, "step": 160210 }, { "epoch": 0.65, "grad_norm": 4.240201473236084, "learning_rate": 0.0002, "loss": 1.7221, "step": 160220 }, { "epoch": 0.65, "grad_norm": 2.7142715454101562, "learning_rate": 0.0002, "loss": 1.6161, "step": 160230 }, { "epoch": 0.65, "grad_norm": 2.7471325397491455, "learning_rate": 0.0002, "loss": 1.4225, "step": 160240 }, { "epoch": 0.65, "grad_norm": 3.059462308883667, "learning_rate": 0.0002, "loss": 1.6248, "step": 160250 }, { "epoch": 0.65, "grad_norm": 3.7498905658721924, "learning_rate": 0.0002, "loss": 1.5479, "step": 160260 }, { "epoch": 0.65, "grad_norm": 2.5642030239105225, "learning_rate": 0.0002, "loss": 1.3189, "step": 160270 }, { "epoch": 0.65, "grad_norm": 2.3542492389678955, "learning_rate": 0.0002, "loss": 1.3432, "step": 160280 }, { "epoch": 0.65, "grad_norm": 2.6049516201019287, "learning_rate": 0.0002, "loss": 1.6267, "step": 160290 }, { "epoch": 0.65, "grad_norm": 4.037672519683838, "learning_rate": 0.0002, "loss": 1.7974, "step": 160300 }, { "epoch": 0.65, "grad_norm": 3.1814143657684326, "learning_rate": 0.0002, "loss": 1.7415, "step": 160310 }, { "epoch": 0.65, "grad_norm": 3.063180446624756, "learning_rate": 0.0002, "loss": 1.4118, "step": 160320 }, { "epoch": 0.65, "grad_norm": 4.462960243225098, "learning_rate": 0.0002, "loss": 1.3525, "step": 160330 }, { "epoch": 0.65, "grad_norm": 3.3375446796417236, "learning_rate": 0.0002, "loss": 1.5419, "step": 160340 }, { "epoch": 0.65, "grad_norm": 3.185105562210083, "learning_rate": 0.0002, "loss": 1.5678, "step": 160350 }, { "epoch": 0.65, "grad_norm": 2.625293493270874, "learning_rate": 0.0002, "loss": 1.5784, "step": 160360 }, { "epoch": 0.65, "grad_norm": 2.7037014961242676, "learning_rate": 0.0002, "loss": 1.6151, "step": 160370 }, { "epoch": 0.65, "grad_norm": 4.189154624938965, "learning_rate": 0.0002, "loss": 1.6507, "step": 160380 }, { "epoch": 0.65, "grad_norm": 3.914550542831421, "learning_rate": 0.0002, "loss": 1.8354, "step": 160390 }, { "epoch": 0.65, "grad_norm": 2.816901683807373, "learning_rate": 0.0002, "loss": 1.5718, "step": 160400 }, { "epoch": 0.65, "grad_norm": 2.445573091506958, "learning_rate": 0.0002, "loss": 1.599, "step": 160410 }, { "epoch": 0.65, "grad_norm": 2.970463275909424, "learning_rate": 0.0002, "loss": 1.8526, "step": 160420 }, { "epoch": 0.65, "grad_norm": 3.761524200439453, "learning_rate": 0.0002, "loss": 1.4828, "step": 160430 }, { "epoch": 0.65, "grad_norm": 2.714813232421875, "learning_rate": 0.0002, "loss": 1.4117, "step": 160440 }, { "epoch": 0.65, "grad_norm": 2.5452957153320312, "learning_rate": 0.0002, "loss": 1.6591, "step": 160450 }, { "epoch": 0.65, "grad_norm": 3.724083185195923, "learning_rate": 0.0002, "loss": 1.3762, "step": 160460 }, { "epoch": 0.65, "grad_norm": 7.871354103088379, "learning_rate": 0.0002, "loss": 1.7416, "step": 160470 }, { "epoch": 0.65, "grad_norm": 2.7780370712280273, "learning_rate": 0.0002, "loss": 1.4729, "step": 160480 }, { "epoch": 0.65, "grad_norm": 3.932361602783203, "learning_rate": 0.0002, "loss": 1.6834, "step": 160490 }, { "epoch": 0.65, "grad_norm": 2.569064140319824, "learning_rate": 0.0002, "loss": 1.6369, "step": 160500 }, { "epoch": 0.65, "grad_norm": 6.4423346519470215, "learning_rate": 0.0002, "loss": 1.4858, "step": 160510 }, { "epoch": 0.65, "grad_norm": 3.5479161739349365, "learning_rate": 0.0002, "loss": 1.5954, "step": 160520 }, { "epoch": 0.65, "grad_norm": 3.8392271995544434, "learning_rate": 0.0002, "loss": 1.5373, "step": 160530 }, { "epoch": 0.65, "grad_norm": 3.56085467338562, "learning_rate": 0.0002, "loss": 1.5509, "step": 160540 }, { "epoch": 0.65, "grad_norm": 3.682300329208374, "learning_rate": 0.0002, "loss": 1.6115, "step": 160550 }, { "epoch": 0.65, "grad_norm": 2.9036576747894287, "learning_rate": 0.0002, "loss": 1.7315, "step": 160560 }, { "epoch": 0.65, "grad_norm": 4.3700456619262695, "learning_rate": 0.0002, "loss": 1.6399, "step": 160570 }, { "epoch": 0.65, "grad_norm": 2.181211233139038, "learning_rate": 0.0002, "loss": 1.6452, "step": 160580 }, { "epoch": 0.65, "grad_norm": 4.668014049530029, "learning_rate": 0.0002, "loss": 1.5536, "step": 160590 }, { "epoch": 0.65, "grad_norm": 3.212998628616333, "learning_rate": 0.0002, "loss": 1.427, "step": 160600 }, { "epoch": 0.65, "grad_norm": 2.828003168106079, "learning_rate": 0.0002, "loss": 1.6581, "step": 160610 }, { "epoch": 0.65, "grad_norm": 4.147526741027832, "learning_rate": 0.0002, "loss": 1.7978, "step": 160620 }, { "epoch": 0.65, "grad_norm": 3.797283411026001, "learning_rate": 0.0002, "loss": 1.3899, "step": 160630 }, { "epoch": 0.65, "grad_norm": 2.7518045902252197, "learning_rate": 0.0002, "loss": 1.7594, "step": 160640 }, { "epoch": 0.65, "grad_norm": 1.8520044088363647, "learning_rate": 0.0002, "loss": 1.5863, "step": 160650 }, { "epoch": 0.65, "grad_norm": 2.7305359840393066, "learning_rate": 0.0002, "loss": 1.711, "step": 160660 }, { "epoch": 0.65, "grad_norm": 2.4591610431671143, "learning_rate": 0.0002, "loss": 1.5848, "step": 160670 }, { "epoch": 0.65, "grad_norm": 2.9875974655151367, "learning_rate": 0.0002, "loss": 1.435, "step": 160680 }, { "epoch": 0.65, "grad_norm": 2.4615466594696045, "learning_rate": 0.0002, "loss": 1.8255, "step": 160690 }, { "epoch": 0.65, "grad_norm": 3.0219504833221436, "learning_rate": 0.0002, "loss": 1.5904, "step": 160700 }, { "epoch": 0.65, "grad_norm": 2.294325828552246, "learning_rate": 0.0002, "loss": 1.5228, "step": 160710 }, { "epoch": 0.65, "grad_norm": 1.8716578483581543, "learning_rate": 0.0002, "loss": 1.6792, "step": 160720 }, { "epoch": 0.65, "grad_norm": 4.627278804779053, "learning_rate": 0.0002, "loss": 1.6447, "step": 160730 }, { "epoch": 0.65, "grad_norm": 2.2101080417633057, "learning_rate": 0.0002, "loss": 1.4855, "step": 160740 }, { "epoch": 0.65, "grad_norm": 2.719318389892578, "learning_rate": 0.0002, "loss": 1.5589, "step": 160750 }, { "epoch": 0.65, "grad_norm": 1.5920993089675903, "learning_rate": 0.0002, "loss": 1.5349, "step": 160760 }, { "epoch": 0.65, "grad_norm": 2.994871139526367, "learning_rate": 0.0002, "loss": 1.5857, "step": 160770 }, { "epoch": 0.65, "grad_norm": 4.636584281921387, "learning_rate": 0.0002, "loss": 1.4885, "step": 160780 }, { "epoch": 0.65, "grad_norm": 3.5923216342926025, "learning_rate": 0.0002, "loss": 1.5193, "step": 160790 }, { "epoch": 0.65, "grad_norm": 3.088114023208618, "learning_rate": 0.0002, "loss": 1.7472, "step": 160800 }, { "epoch": 0.65, "grad_norm": 2.7513601779937744, "learning_rate": 0.0002, "loss": 1.6636, "step": 160810 }, { "epoch": 0.65, "grad_norm": 3.863649606704712, "learning_rate": 0.0002, "loss": 1.3829, "step": 160820 }, { "epoch": 0.65, "grad_norm": 4.028121471405029, "learning_rate": 0.0002, "loss": 1.5916, "step": 160830 }, { "epoch": 0.65, "grad_norm": 2.704259157180786, "learning_rate": 0.0002, "loss": 1.5818, "step": 160840 }, { "epoch": 0.65, "grad_norm": 2.968730926513672, "learning_rate": 0.0002, "loss": 1.6028, "step": 160850 }, { "epoch": 0.65, "grad_norm": 2.83687686920166, "learning_rate": 0.0002, "loss": 1.3514, "step": 160860 }, { "epoch": 0.65, "grad_norm": 1.4831806421279907, "learning_rate": 0.0002, "loss": 1.4864, "step": 160870 }, { "epoch": 0.65, "grad_norm": 3.180657386779785, "learning_rate": 0.0002, "loss": 1.5417, "step": 160880 }, { "epoch": 0.65, "grad_norm": 5.212709426879883, "learning_rate": 0.0002, "loss": 1.5331, "step": 160890 }, { "epoch": 0.66, "grad_norm": 4.253506660461426, "learning_rate": 0.0002, "loss": 1.7363, "step": 160900 }, { "epoch": 0.66, "grad_norm": 3.20082688331604, "learning_rate": 0.0002, "loss": 1.4198, "step": 160910 }, { "epoch": 0.66, "grad_norm": 3.0981719493865967, "learning_rate": 0.0002, "loss": 1.6628, "step": 160920 }, { "epoch": 0.66, "grad_norm": 3.3876967430114746, "learning_rate": 0.0002, "loss": 1.5518, "step": 160930 }, { "epoch": 0.66, "grad_norm": 2.599250555038452, "learning_rate": 0.0002, "loss": 1.5722, "step": 160940 }, { "epoch": 0.66, "grad_norm": 0.9135868549346924, "learning_rate": 0.0002, "loss": 1.3673, "step": 160950 }, { "epoch": 0.66, "grad_norm": 2.5345098972320557, "learning_rate": 0.0002, "loss": 1.5313, "step": 160960 }, { "epoch": 0.66, "grad_norm": 3.0786197185516357, "learning_rate": 0.0002, "loss": 1.5578, "step": 160970 }, { "epoch": 0.66, "grad_norm": 2.3380484580993652, "learning_rate": 0.0002, "loss": 1.4588, "step": 160980 }, { "epoch": 0.66, "grad_norm": 1.6438528299331665, "learning_rate": 0.0002, "loss": 1.4544, "step": 160990 }, { "epoch": 0.66, "grad_norm": 1.9110404253005981, "learning_rate": 0.0002, "loss": 1.4422, "step": 161000 }, { "epoch": 0.66, "grad_norm": 2.8603620529174805, "learning_rate": 0.0002, "loss": 1.6414, "step": 161010 }, { "epoch": 0.66, "grad_norm": 5.057913303375244, "learning_rate": 0.0002, "loss": 1.4642, "step": 161020 }, { "epoch": 0.66, "grad_norm": 2.2170557975769043, "learning_rate": 0.0002, "loss": 1.66, "step": 161030 }, { "epoch": 0.66, "grad_norm": 4.88305139541626, "learning_rate": 0.0002, "loss": 1.5493, "step": 161040 }, { "epoch": 0.66, "grad_norm": 1.9737170934677124, "learning_rate": 0.0002, "loss": 1.6006, "step": 161050 }, { "epoch": 0.66, "grad_norm": 2.977966785430908, "learning_rate": 0.0002, "loss": 1.5505, "step": 161060 }, { "epoch": 0.66, "grad_norm": 4.008727073669434, "learning_rate": 0.0002, "loss": 1.8218, "step": 161070 }, { "epoch": 0.66, "grad_norm": 2.4465930461883545, "learning_rate": 0.0002, "loss": 1.5516, "step": 161080 }, { "epoch": 0.66, "grad_norm": 2.9579432010650635, "learning_rate": 0.0002, "loss": 1.5411, "step": 161090 }, { "epoch": 0.66, "grad_norm": 4.75169038772583, "learning_rate": 0.0002, "loss": 1.6359, "step": 161100 }, { "epoch": 0.66, "grad_norm": 2.61555814743042, "learning_rate": 0.0002, "loss": 1.6654, "step": 161110 }, { "epoch": 0.66, "grad_norm": 2.876262664794922, "learning_rate": 0.0002, "loss": 1.507, "step": 161120 }, { "epoch": 0.66, "grad_norm": 2.038069009780884, "learning_rate": 0.0002, "loss": 1.5773, "step": 161130 }, { "epoch": 0.66, "grad_norm": 4.582350254058838, "learning_rate": 0.0002, "loss": 1.7624, "step": 161140 }, { "epoch": 0.66, "grad_norm": 2.029794216156006, "learning_rate": 0.0002, "loss": 1.5203, "step": 161150 }, { "epoch": 0.66, "grad_norm": 5.9558024406433105, "learning_rate": 0.0002, "loss": 1.6194, "step": 161160 }, { "epoch": 0.66, "grad_norm": 3.4707682132720947, "learning_rate": 0.0002, "loss": 1.7357, "step": 161170 }, { "epoch": 0.66, "grad_norm": 1.9815634489059448, "learning_rate": 0.0002, "loss": 1.3903, "step": 161180 }, { "epoch": 0.66, "grad_norm": 2.6589601039886475, "learning_rate": 0.0002, "loss": 1.4418, "step": 161190 }, { "epoch": 0.66, "grad_norm": 2.8160343170166016, "learning_rate": 0.0002, "loss": 1.3699, "step": 161200 }, { "epoch": 0.66, "grad_norm": 3.4528751373291016, "learning_rate": 0.0002, "loss": 1.628, "step": 161210 }, { "epoch": 0.66, "grad_norm": 2.3091163635253906, "learning_rate": 0.0002, "loss": 1.6656, "step": 161220 }, { "epoch": 0.66, "grad_norm": 2.689474582672119, "learning_rate": 0.0002, "loss": 1.6031, "step": 161230 }, { "epoch": 0.66, "grad_norm": 1.8577313423156738, "learning_rate": 0.0002, "loss": 1.712, "step": 161240 }, { "epoch": 0.66, "grad_norm": 4.554281711578369, "learning_rate": 0.0002, "loss": 1.792, "step": 161250 }, { "epoch": 0.66, "grad_norm": 3.624692678451538, "learning_rate": 0.0002, "loss": 1.8524, "step": 161260 }, { "epoch": 0.66, "grad_norm": 4.01756477355957, "learning_rate": 0.0002, "loss": 1.5566, "step": 161270 }, { "epoch": 0.66, "grad_norm": 3.3305587768554688, "learning_rate": 0.0002, "loss": 1.7023, "step": 161280 }, { "epoch": 0.66, "grad_norm": 3.9321117401123047, "learning_rate": 0.0002, "loss": 1.6157, "step": 161290 }, { "epoch": 0.66, "grad_norm": 3.5022811889648438, "learning_rate": 0.0002, "loss": 1.7568, "step": 161300 }, { "epoch": 0.66, "grad_norm": 2.0099570751190186, "learning_rate": 0.0002, "loss": 1.5631, "step": 161310 }, { "epoch": 0.66, "grad_norm": 2.1934494972229004, "learning_rate": 0.0002, "loss": 1.4192, "step": 161320 }, { "epoch": 0.66, "grad_norm": 3.2361884117126465, "learning_rate": 0.0002, "loss": 1.3397, "step": 161330 }, { "epoch": 0.66, "grad_norm": 3.870049238204956, "learning_rate": 0.0002, "loss": 1.5868, "step": 161340 }, { "epoch": 0.66, "grad_norm": 2.8813891410827637, "learning_rate": 0.0002, "loss": 1.6086, "step": 161350 }, { "epoch": 0.66, "grad_norm": 2.014721393585205, "learning_rate": 0.0002, "loss": 1.588, "step": 161360 }, { "epoch": 0.66, "grad_norm": 3.626493453979492, "learning_rate": 0.0002, "loss": 1.4248, "step": 161370 }, { "epoch": 0.66, "grad_norm": 3.694509267807007, "learning_rate": 0.0002, "loss": 1.6806, "step": 161380 }, { "epoch": 0.66, "grad_norm": 2.388087272644043, "learning_rate": 0.0002, "loss": 1.4754, "step": 161390 }, { "epoch": 0.66, "grad_norm": 3.028679847717285, "learning_rate": 0.0002, "loss": 1.3972, "step": 161400 }, { "epoch": 0.66, "grad_norm": 1.8776706457138062, "learning_rate": 0.0002, "loss": 1.6473, "step": 161410 }, { "epoch": 0.66, "grad_norm": 3.9480249881744385, "learning_rate": 0.0002, "loss": 1.619, "step": 161420 }, { "epoch": 0.66, "grad_norm": 3.7481703758239746, "learning_rate": 0.0002, "loss": 1.6184, "step": 161430 }, { "epoch": 0.66, "grad_norm": 2.911562204360962, "learning_rate": 0.0002, "loss": 1.5584, "step": 161440 }, { "epoch": 0.66, "grad_norm": 3.3174383640289307, "learning_rate": 0.0002, "loss": 1.3509, "step": 161450 }, { "epoch": 0.66, "grad_norm": 2.0612387657165527, "learning_rate": 0.0002, "loss": 1.5975, "step": 161460 }, { "epoch": 0.66, "grad_norm": 3.1418676376342773, "learning_rate": 0.0002, "loss": 1.7265, "step": 161470 }, { "epoch": 0.66, "grad_norm": 2.6858620643615723, "learning_rate": 0.0002, "loss": 1.6681, "step": 161480 }, { "epoch": 0.66, "grad_norm": 2.8856589794158936, "learning_rate": 0.0002, "loss": 1.5057, "step": 161490 }, { "epoch": 0.66, "grad_norm": 2.1517059803009033, "learning_rate": 0.0002, "loss": 1.6209, "step": 161500 }, { "epoch": 0.66, "grad_norm": 3.2018356323242188, "learning_rate": 0.0002, "loss": 1.4062, "step": 161510 }, { "epoch": 0.66, "grad_norm": 2.996392250061035, "learning_rate": 0.0002, "loss": 1.5359, "step": 161520 }, { "epoch": 0.66, "grad_norm": 2.518583297729492, "learning_rate": 0.0002, "loss": 1.512, "step": 161530 }, { "epoch": 0.66, "grad_norm": 1.8407328128814697, "learning_rate": 0.0002, "loss": 1.7307, "step": 161540 }, { "epoch": 0.66, "grad_norm": 4.727161407470703, "learning_rate": 0.0002, "loss": 1.6364, "step": 161550 }, { "epoch": 0.66, "grad_norm": 1.4418635368347168, "learning_rate": 0.0002, "loss": 1.8403, "step": 161560 }, { "epoch": 0.66, "grad_norm": 2.076965808868408, "learning_rate": 0.0002, "loss": 1.3723, "step": 161570 }, { "epoch": 0.66, "grad_norm": 1.6568878889083862, "learning_rate": 0.0002, "loss": 1.4848, "step": 161580 }, { "epoch": 0.66, "grad_norm": 4.598419666290283, "learning_rate": 0.0002, "loss": 1.5934, "step": 161590 }, { "epoch": 0.66, "grad_norm": 1.5474447011947632, "learning_rate": 0.0002, "loss": 1.5409, "step": 161600 }, { "epoch": 0.66, "grad_norm": 3.0079050064086914, "learning_rate": 0.0002, "loss": 1.6701, "step": 161610 }, { "epoch": 0.66, "grad_norm": 2.808368444442749, "learning_rate": 0.0002, "loss": 1.7104, "step": 161620 }, { "epoch": 0.66, "grad_norm": 2.107346534729004, "learning_rate": 0.0002, "loss": 1.3792, "step": 161630 }, { "epoch": 0.66, "grad_norm": 3.216759204864502, "learning_rate": 0.0002, "loss": 1.3897, "step": 161640 }, { "epoch": 0.66, "grad_norm": 2.2517201900482178, "learning_rate": 0.0002, "loss": 1.4812, "step": 161650 }, { "epoch": 0.66, "grad_norm": 4.040902137756348, "learning_rate": 0.0002, "loss": 1.6243, "step": 161660 }, { "epoch": 0.66, "grad_norm": 2.8277227878570557, "learning_rate": 0.0002, "loss": 1.435, "step": 161670 }, { "epoch": 0.66, "grad_norm": 2.8008873462677, "learning_rate": 0.0002, "loss": 1.8491, "step": 161680 }, { "epoch": 0.66, "grad_norm": 3.82378888130188, "learning_rate": 0.0002, "loss": 1.5276, "step": 161690 }, { "epoch": 0.66, "grad_norm": 1.9973150491714478, "learning_rate": 0.0002, "loss": 1.8736, "step": 161700 }, { "epoch": 0.66, "grad_norm": 3.63181734085083, "learning_rate": 0.0002, "loss": 1.3648, "step": 161710 }, { "epoch": 0.66, "grad_norm": 2.0784943103790283, "learning_rate": 0.0002, "loss": 1.486, "step": 161720 }, { "epoch": 0.66, "grad_norm": 2.1960561275482178, "learning_rate": 0.0002, "loss": 1.4609, "step": 161730 }, { "epoch": 0.66, "grad_norm": 3.1347756385803223, "learning_rate": 0.0002, "loss": 1.5207, "step": 161740 }, { "epoch": 0.66, "grad_norm": 5.7960100173950195, "learning_rate": 0.0002, "loss": 1.53, "step": 161750 }, { "epoch": 0.66, "grad_norm": 2.3651304244995117, "learning_rate": 0.0002, "loss": 1.7729, "step": 161760 }, { "epoch": 0.66, "grad_norm": 3.654642105102539, "learning_rate": 0.0002, "loss": 1.6066, "step": 161770 }, { "epoch": 0.66, "grad_norm": 2.169605255126953, "learning_rate": 0.0002, "loss": 1.6077, "step": 161780 }, { "epoch": 0.66, "grad_norm": 2.4638214111328125, "learning_rate": 0.0002, "loss": 1.6754, "step": 161790 }, { "epoch": 0.66, "grad_norm": 2.7315754890441895, "learning_rate": 0.0002, "loss": 1.7364, "step": 161800 }, { "epoch": 0.66, "grad_norm": 2.380138397216797, "learning_rate": 0.0002, "loss": 1.6666, "step": 161810 }, { "epoch": 0.66, "grad_norm": 2.492229700088501, "learning_rate": 0.0002, "loss": 1.5131, "step": 161820 }, { "epoch": 0.66, "grad_norm": 4.843387126922607, "learning_rate": 0.0002, "loss": 1.425, "step": 161830 }, { "epoch": 0.66, "grad_norm": 3.033961534500122, "learning_rate": 0.0002, "loss": 1.698, "step": 161840 }, { "epoch": 0.66, "grad_norm": 4.49305534362793, "learning_rate": 0.0002, "loss": 1.3791, "step": 161850 }, { "epoch": 0.66, "grad_norm": 3.5529470443725586, "learning_rate": 0.0002, "loss": 1.8698, "step": 161860 }, { "epoch": 0.66, "grad_norm": 2.911254405975342, "learning_rate": 0.0002, "loss": 1.8677, "step": 161870 }, { "epoch": 0.66, "grad_norm": 2.769259214401245, "learning_rate": 0.0002, "loss": 1.4722, "step": 161880 }, { "epoch": 0.66, "grad_norm": 3.5684516429901123, "learning_rate": 0.0002, "loss": 1.6401, "step": 161890 }, { "epoch": 0.66, "grad_norm": 3.350658655166626, "learning_rate": 0.0002, "loss": 1.5971, "step": 161900 }, { "epoch": 0.66, "grad_norm": 2.3373050689697266, "learning_rate": 0.0002, "loss": 1.6406, "step": 161910 }, { "epoch": 0.66, "grad_norm": 6.891529083251953, "learning_rate": 0.0002, "loss": 1.7373, "step": 161920 }, { "epoch": 0.66, "grad_norm": 1.7242426872253418, "learning_rate": 0.0002, "loss": 1.3381, "step": 161930 }, { "epoch": 0.66, "grad_norm": 1.7090281248092651, "learning_rate": 0.0002, "loss": 1.4978, "step": 161940 }, { "epoch": 0.66, "grad_norm": 2.9377336502075195, "learning_rate": 0.0002, "loss": 1.4079, "step": 161950 }, { "epoch": 0.66, "grad_norm": 3.7579879760742188, "learning_rate": 0.0002, "loss": 1.3698, "step": 161960 }, { "epoch": 0.66, "grad_norm": 2.3947572708129883, "learning_rate": 0.0002, "loss": 1.5158, "step": 161970 }, { "epoch": 0.66, "grad_norm": 3.0401721000671387, "learning_rate": 0.0002, "loss": 1.5262, "step": 161980 }, { "epoch": 0.66, "grad_norm": 3.08130145072937, "learning_rate": 0.0002, "loss": 1.7905, "step": 161990 }, { "epoch": 0.66, "grad_norm": 2.0393636226654053, "learning_rate": 0.0002, "loss": 1.5983, "step": 162000 }, { "epoch": 0.66, "grad_norm": 2.350208282470703, "learning_rate": 0.0002, "loss": 1.578, "step": 162010 }, { "epoch": 0.66, "grad_norm": 3.339165687561035, "learning_rate": 0.0002, "loss": 1.7215, "step": 162020 }, { "epoch": 0.66, "grad_norm": 1.791212797164917, "learning_rate": 0.0002, "loss": 1.5684, "step": 162030 }, { "epoch": 0.66, "grad_norm": 2.5930423736572266, "learning_rate": 0.0002, "loss": 1.6504, "step": 162040 }, { "epoch": 0.66, "grad_norm": 5.375638484954834, "learning_rate": 0.0002, "loss": 1.4356, "step": 162050 }, { "epoch": 0.66, "grad_norm": 2.249345541000366, "learning_rate": 0.0002, "loss": 1.5933, "step": 162060 }, { "epoch": 0.66, "grad_norm": 4.171838760375977, "learning_rate": 0.0002, "loss": 1.5324, "step": 162070 }, { "epoch": 0.66, "grad_norm": 2.106755495071411, "learning_rate": 0.0002, "loss": 1.7798, "step": 162080 }, { "epoch": 0.66, "grad_norm": 3.4051353931427, "learning_rate": 0.0002, "loss": 1.5516, "step": 162090 }, { "epoch": 0.66, "grad_norm": 3.307018280029297, "learning_rate": 0.0002, "loss": 1.6522, "step": 162100 }, { "epoch": 0.66, "grad_norm": 3.809542179107666, "learning_rate": 0.0002, "loss": 1.4602, "step": 162110 }, { "epoch": 0.66, "grad_norm": 3.2576823234558105, "learning_rate": 0.0002, "loss": 1.4231, "step": 162120 }, { "epoch": 0.66, "grad_norm": 2.7344939708709717, "learning_rate": 0.0002, "loss": 1.7007, "step": 162130 }, { "epoch": 0.66, "grad_norm": 3.5093297958374023, "learning_rate": 0.0002, "loss": 1.7261, "step": 162140 }, { "epoch": 0.66, "grad_norm": 2.220548629760742, "learning_rate": 0.0002, "loss": 1.6461, "step": 162150 }, { "epoch": 0.66, "grad_norm": 6.187688827514648, "learning_rate": 0.0002, "loss": 1.5524, "step": 162160 }, { "epoch": 0.66, "grad_norm": 4.7051920890808105, "learning_rate": 0.0002, "loss": 1.6725, "step": 162170 }, { "epoch": 0.66, "grad_norm": 4.416228771209717, "learning_rate": 0.0002, "loss": 1.5171, "step": 162180 }, { "epoch": 0.66, "grad_norm": 2.2477612495422363, "learning_rate": 0.0002, "loss": 1.5254, "step": 162190 }, { "epoch": 0.66, "grad_norm": 2.5554606914520264, "learning_rate": 0.0002, "loss": 1.5989, "step": 162200 }, { "epoch": 0.66, "grad_norm": 2.8444406986236572, "learning_rate": 0.0002, "loss": 1.484, "step": 162210 }, { "epoch": 0.66, "grad_norm": 3.6120553016662598, "learning_rate": 0.0002, "loss": 1.5668, "step": 162220 }, { "epoch": 0.66, "grad_norm": 4.091746807098389, "learning_rate": 0.0002, "loss": 1.4786, "step": 162230 }, { "epoch": 0.66, "grad_norm": 3.638951539993286, "learning_rate": 0.0002, "loss": 1.4761, "step": 162240 }, { "epoch": 0.66, "grad_norm": 2.8267998695373535, "learning_rate": 0.0002, "loss": 1.7098, "step": 162250 }, { "epoch": 0.66, "grad_norm": 3.5206210613250732, "learning_rate": 0.0002, "loss": 1.8042, "step": 162260 }, { "epoch": 0.66, "grad_norm": 3.5749659538269043, "learning_rate": 0.0002, "loss": 1.76, "step": 162270 }, { "epoch": 0.66, "grad_norm": 4.9489946365356445, "learning_rate": 0.0002, "loss": 1.1735, "step": 162280 }, { "epoch": 0.66, "grad_norm": 2.9598045349121094, "learning_rate": 0.0002, "loss": 1.6179, "step": 162290 }, { "epoch": 0.66, "grad_norm": 3.133746385574341, "learning_rate": 0.0002, "loss": 1.4605, "step": 162300 }, { "epoch": 0.66, "grad_norm": 3.268720865249634, "learning_rate": 0.0002, "loss": 1.5178, "step": 162310 }, { "epoch": 0.66, "grad_norm": 3.3290512561798096, "learning_rate": 0.0002, "loss": 1.6074, "step": 162320 }, { "epoch": 0.66, "grad_norm": 2.961751699447632, "learning_rate": 0.0002, "loss": 1.5774, "step": 162330 }, { "epoch": 0.66, "grad_norm": 2.479933500289917, "learning_rate": 0.0002, "loss": 1.6767, "step": 162340 }, { "epoch": 0.66, "grad_norm": 4.722728729248047, "learning_rate": 0.0002, "loss": 1.5975, "step": 162350 }, { "epoch": 0.66, "grad_norm": 3.2336392402648926, "learning_rate": 0.0002, "loss": 1.6598, "step": 162360 }, { "epoch": 0.66, "grad_norm": 2.2877719402313232, "learning_rate": 0.0002, "loss": 1.6132, "step": 162370 }, { "epoch": 0.66, "grad_norm": 2.263803243637085, "learning_rate": 0.0002, "loss": 1.8263, "step": 162380 }, { "epoch": 0.66, "grad_norm": 2.2289485931396484, "learning_rate": 0.0002, "loss": 1.677, "step": 162390 }, { "epoch": 0.66, "grad_norm": 2.3351686000823975, "learning_rate": 0.0002, "loss": 1.5872, "step": 162400 }, { "epoch": 0.66, "grad_norm": 4.636143207550049, "learning_rate": 0.0002, "loss": 1.3209, "step": 162410 }, { "epoch": 0.66, "grad_norm": 3.4034383296966553, "learning_rate": 0.0002, "loss": 1.7139, "step": 162420 }, { "epoch": 0.66, "grad_norm": 2.450813055038452, "learning_rate": 0.0002, "loss": 1.6858, "step": 162430 }, { "epoch": 0.66, "grad_norm": 3.7901806831359863, "learning_rate": 0.0002, "loss": 1.5466, "step": 162440 }, { "epoch": 0.66, "grad_norm": 2.619263172149658, "learning_rate": 0.0002, "loss": 1.2962, "step": 162450 }, { "epoch": 0.66, "grad_norm": 3.1600968837738037, "learning_rate": 0.0002, "loss": 1.8372, "step": 162460 }, { "epoch": 0.66, "grad_norm": 3.0036356449127197, "learning_rate": 0.0002, "loss": 1.6988, "step": 162470 }, { "epoch": 0.66, "grad_norm": 9.79708194732666, "learning_rate": 0.0002, "loss": 1.4027, "step": 162480 }, { "epoch": 0.66, "grad_norm": 3.2421393394470215, "learning_rate": 0.0002, "loss": 1.4122, "step": 162490 }, { "epoch": 0.66, "grad_norm": 2.6464033126831055, "learning_rate": 0.0002, "loss": 1.5126, "step": 162500 }, { "epoch": 0.66, "grad_norm": 3.041693925857544, "learning_rate": 0.0002, "loss": 1.5425, "step": 162510 }, { "epoch": 0.66, "grad_norm": 3.0383880138397217, "learning_rate": 0.0002, "loss": 1.4431, "step": 162520 }, { "epoch": 0.66, "grad_norm": 4.3286566734313965, "learning_rate": 0.0002, "loss": 1.5341, "step": 162530 }, { "epoch": 0.66, "grad_norm": 2.607023239135742, "learning_rate": 0.0002, "loss": 1.6231, "step": 162540 }, { "epoch": 0.66, "grad_norm": 3.1081509590148926, "learning_rate": 0.0002, "loss": 1.282, "step": 162550 }, { "epoch": 0.66, "grad_norm": 2.7434332370758057, "learning_rate": 0.0002, "loss": 1.5471, "step": 162560 }, { "epoch": 0.66, "grad_norm": 2.237480401992798, "learning_rate": 0.0002, "loss": 1.5853, "step": 162570 }, { "epoch": 0.66, "grad_norm": 3.2911648750305176, "learning_rate": 0.0002, "loss": 1.7513, "step": 162580 }, { "epoch": 0.66, "grad_norm": 2.5280075073242188, "learning_rate": 0.0002, "loss": 1.4311, "step": 162590 }, { "epoch": 0.66, "grad_norm": 3.572789192199707, "learning_rate": 0.0002, "loss": 1.7279, "step": 162600 }, { "epoch": 0.66, "grad_norm": 2.284120798110962, "learning_rate": 0.0002, "loss": 1.5878, "step": 162610 }, { "epoch": 0.66, "grad_norm": 3.6307222843170166, "learning_rate": 0.0002, "loss": 1.6033, "step": 162620 }, { "epoch": 0.66, "grad_norm": 3.4426820278167725, "learning_rate": 0.0002, "loss": 1.7198, "step": 162630 }, { "epoch": 0.66, "grad_norm": 3.6639394760131836, "learning_rate": 0.0002, "loss": 1.6574, "step": 162640 }, { "epoch": 0.66, "grad_norm": 2.358281373977661, "learning_rate": 0.0002, "loss": 1.5093, "step": 162650 }, { "epoch": 0.66, "grad_norm": 3.8108770847320557, "learning_rate": 0.0002, "loss": 1.6115, "step": 162660 }, { "epoch": 0.66, "grad_norm": 3.379340887069702, "learning_rate": 0.0002, "loss": 1.427, "step": 162670 }, { "epoch": 0.66, "grad_norm": 5.413537502288818, "learning_rate": 0.0002, "loss": 1.3412, "step": 162680 }, { "epoch": 0.66, "grad_norm": 2.373467206954956, "learning_rate": 0.0002, "loss": 1.4772, "step": 162690 }, { "epoch": 0.66, "grad_norm": 2.7301981449127197, "learning_rate": 0.0002, "loss": 1.5122, "step": 162700 }, { "epoch": 0.66, "grad_norm": 2.248159885406494, "learning_rate": 0.0002, "loss": 1.3456, "step": 162710 }, { "epoch": 0.66, "grad_norm": 3.871711492538452, "learning_rate": 0.0002, "loss": 1.3913, "step": 162720 }, { "epoch": 0.66, "grad_norm": 3.0870113372802734, "learning_rate": 0.0002, "loss": 1.4249, "step": 162730 }, { "epoch": 0.66, "grad_norm": 2.826869487762451, "learning_rate": 0.0002, "loss": 1.5778, "step": 162740 }, { "epoch": 0.66, "grad_norm": 2.9142138957977295, "learning_rate": 0.0002, "loss": 1.5061, "step": 162750 }, { "epoch": 0.66, "grad_norm": 2.007847547531128, "learning_rate": 0.0002, "loss": 1.5976, "step": 162760 }, { "epoch": 0.66, "grad_norm": 1.9988051652908325, "learning_rate": 0.0002, "loss": 1.5109, "step": 162770 }, { "epoch": 0.66, "grad_norm": 3.6250455379486084, "learning_rate": 0.0002, "loss": 1.5482, "step": 162780 }, { "epoch": 0.66, "grad_norm": 2.512165069580078, "learning_rate": 0.0002, "loss": 1.6708, "step": 162790 }, { "epoch": 0.66, "grad_norm": 3.124678611755371, "learning_rate": 0.0002, "loss": 1.6896, "step": 162800 }, { "epoch": 0.66, "grad_norm": 3.191190719604492, "learning_rate": 0.0002, "loss": 1.9808, "step": 162810 }, { "epoch": 0.66, "grad_norm": 3.2991974353790283, "learning_rate": 0.0002, "loss": 1.3424, "step": 162820 }, { "epoch": 0.66, "grad_norm": 3.7759945392608643, "learning_rate": 0.0002, "loss": 1.5266, "step": 162830 }, { "epoch": 0.66, "grad_norm": 2.0723400115966797, "learning_rate": 0.0002, "loss": 1.6299, "step": 162840 }, { "epoch": 0.66, "grad_norm": 2.2614126205444336, "learning_rate": 0.0002, "loss": 1.6532, "step": 162850 }, { "epoch": 0.66, "grad_norm": 4.127065658569336, "learning_rate": 0.0002, "loss": 1.5413, "step": 162860 }, { "epoch": 0.66, "grad_norm": 3.015022039413452, "learning_rate": 0.0002, "loss": 1.5715, "step": 162870 }, { "epoch": 0.66, "grad_norm": 3.297319173812866, "learning_rate": 0.0002, "loss": 1.3001, "step": 162880 }, { "epoch": 0.66, "grad_norm": 2.9934465885162354, "learning_rate": 0.0002, "loss": 1.5755, "step": 162890 }, { "epoch": 0.66, "grad_norm": 1.5868397951126099, "learning_rate": 0.0002, "loss": 1.5504, "step": 162900 }, { "epoch": 0.66, "grad_norm": 2.1584441661834717, "learning_rate": 0.0002, "loss": 1.3772, "step": 162910 }, { "epoch": 0.66, "grad_norm": 3.747344732284546, "learning_rate": 0.0002, "loss": 1.8825, "step": 162920 }, { "epoch": 0.66, "grad_norm": 3.116081714630127, "learning_rate": 0.0002, "loss": 1.6846, "step": 162930 }, { "epoch": 0.66, "grad_norm": 4.351200580596924, "learning_rate": 0.0002, "loss": 1.5226, "step": 162940 }, { "epoch": 0.66, "grad_norm": 2.5426619052886963, "learning_rate": 0.0002, "loss": 1.688, "step": 162950 }, { "epoch": 0.66, "grad_norm": 2.7718117237091064, "learning_rate": 0.0002, "loss": 1.6109, "step": 162960 }, { "epoch": 0.66, "grad_norm": 3.1860108375549316, "learning_rate": 0.0002, "loss": 1.4121, "step": 162970 }, { "epoch": 0.66, "grad_norm": 3.601494789123535, "learning_rate": 0.0002, "loss": 1.5411, "step": 162980 }, { "epoch": 0.66, "grad_norm": 2.957686185836792, "learning_rate": 0.0002, "loss": 1.4541, "step": 162990 }, { "epoch": 0.66, "grad_norm": 3.275465726852417, "learning_rate": 0.0002, "loss": 1.5608, "step": 163000 }, { "epoch": 0.66, "grad_norm": 3.4598848819732666, "learning_rate": 0.0002, "loss": 1.5024, "step": 163010 }, { "epoch": 0.66, "grad_norm": 1.8429795503616333, "learning_rate": 0.0002, "loss": 1.6955, "step": 163020 }, { "epoch": 0.66, "grad_norm": 3.4549660682678223, "learning_rate": 0.0002, "loss": 1.42, "step": 163030 }, { "epoch": 0.66, "grad_norm": 3.1181282997131348, "learning_rate": 0.0002, "loss": 1.4877, "step": 163040 }, { "epoch": 0.66, "grad_norm": 1.8216649293899536, "learning_rate": 0.0002, "loss": 1.5105, "step": 163050 }, { "epoch": 0.66, "grad_norm": 2.764288902282715, "learning_rate": 0.0002, "loss": 1.4761, "step": 163060 }, { "epoch": 0.66, "grad_norm": 3.297121286392212, "learning_rate": 0.0002, "loss": 1.596, "step": 163070 }, { "epoch": 0.66, "grad_norm": 2.1534640789031982, "learning_rate": 0.0002, "loss": 1.5885, "step": 163080 }, { "epoch": 0.66, "grad_norm": 3.5444815158843994, "learning_rate": 0.0002, "loss": 1.6536, "step": 163090 }, { "epoch": 0.66, "grad_norm": 2.6729629039764404, "learning_rate": 0.0002, "loss": 1.6004, "step": 163100 }, { "epoch": 0.66, "grad_norm": 2.44442081451416, "learning_rate": 0.0002, "loss": 1.7026, "step": 163110 }, { "epoch": 0.66, "grad_norm": 2.7971489429473877, "learning_rate": 0.0002, "loss": 1.6122, "step": 163120 }, { "epoch": 0.66, "grad_norm": 4.030086040496826, "learning_rate": 0.0002, "loss": 1.2858, "step": 163130 }, { "epoch": 0.66, "grad_norm": 6.031049728393555, "learning_rate": 0.0002, "loss": 1.8498, "step": 163140 }, { "epoch": 0.66, "grad_norm": 2.5091445446014404, "learning_rate": 0.0002, "loss": 1.3716, "step": 163150 }, { "epoch": 0.66, "grad_norm": 2.3896920680999756, "learning_rate": 0.0002, "loss": 1.3954, "step": 163160 }, { "epoch": 0.66, "grad_norm": 3.2110519409179688, "learning_rate": 0.0002, "loss": 1.7358, "step": 163170 }, { "epoch": 0.66, "grad_norm": 2.2092368602752686, "learning_rate": 0.0002, "loss": 1.579, "step": 163180 }, { "epoch": 0.66, "grad_norm": 2.7266008853912354, "learning_rate": 0.0002, "loss": 1.5098, "step": 163190 }, { "epoch": 0.66, "grad_norm": 2.388849973678589, "learning_rate": 0.0002, "loss": 1.6331, "step": 163200 }, { "epoch": 0.66, "grad_norm": 3.3889074325561523, "learning_rate": 0.0002, "loss": 1.5899, "step": 163210 }, { "epoch": 0.66, "grad_norm": 1.5079748630523682, "learning_rate": 0.0002, "loss": 1.7488, "step": 163220 }, { "epoch": 0.66, "grad_norm": 2.8208529949188232, "learning_rate": 0.0002, "loss": 1.5717, "step": 163230 }, { "epoch": 0.66, "grad_norm": 3.302074909210205, "learning_rate": 0.0002, "loss": 1.71, "step": 163240 }, { "epoch": 0.66, "grad_norm": 5.532672882080078, "learning_rate": 0.0002, "loss": 1.6118, "step": 163250 }, { "epoch": 0.66, "grad_norm": 2.985549211502075, "learning_rate": 0.0002, "loss": 1.6674, "step": 163260 }, { "epoch": 0.66, "grad_norm": 2.593029499053955, "learning_rate": 0.0002, "loss": 1.8106, "step": 163270 }, { "epoch": 0.66, "grad_norm": 2.751685857772827, "learning_rate": 0.0002, "loss": 1.696, "step": 163280 }, { "epoch": 0.66, "grad_norm": 2.319561004638672, "learning_rate": 0.0002, "loss": 1.6049, "step": 163290 }, { "epoch": 0.66, "grad_norm": 2.6811909675598145, "learning_rate": 0.0002, "loss": 1.6988, "step": 163300 }, { "epoch": 0.66, "grad_norm": 2.1478281021118164, "learning_rate": 0.0002, "loss": 1.7771, "step": 163310 }, { "epoch": 0.66, "grad_norm": 2.749423027038574, "learning_rate": 0.0002, "loss": 1.6317, "step": 163320 }, { "epoch": 0.66, "grad_norm": 3.3141751289367676, "learning_rate": 0.0002, "loss": 1.6808, "step": 163330 }, { "epoch": 0.66, "grad_norm": 2.646650791168213, "learning_rate": 0.0002, "loss": 1.4006, "step": 163340 }, { "epoch": 0.66, "grad_norm": 1.6795872449874878, "learning_rate": 0.0002, "loss": 1.5134, "step": 163350 }, { "epoch": 0.67, "grad_norm": 2.7062432765960693, "learning_rate": 0.0002, "loss": 1.4739, "step": 163360 }, { "epoch": 0.67, "grad_norm": 2.7180404663085938, "learning_rate": 0.0002, "loss": 1.5617, "step": 163370 }, { "epoch": 0.67, "grad_norm": 2.7724664211273193, "learning_rate": 0.0002, "loss": 1.3989, "step": 163380 }, { "epoch": 0.67, "grad_norm": 3.3611161708831787, "learning_rate": 0.0002, "loss": 1.9408, "step": 163390 }, { "epoch": 0.67, "grad_norm": 2.805130958557129, "learning_rate": 0.0002, "loss": 1.7655, "step": 163400 }, { "epoch": 0.67, "grad_norm": 3.133411407470703, "learning_rate": 0.0002, "loss": 1.5998, "step": 163410 }, { "epoch": 0.67, "grad_norm": 2.5954062938690186, "learning_rate": 0.0002, "loss": 1.7164, "step": 163420 }, { "epoch": 0.67, "grad_norm": 5.2047905921936035, "learning_rate": 0.0002, "loss": 1.5835, "step": 163430 }, { "epoch": 0.67, "grad_norm": 3.227052927017212, "learning_rate": 0.0002, "loss": 1.5422, "step": 163440 }, { "epoch": 0.67, "grad_norm": 2.6658854484558105, "learning_rate": 0.0002, "loss": 1.6208, "step": 163450 }, { "epoch": 0.67, "grad_norm": 2.8470427989959717, "learning_rate": 0.0002, "loss": 1.6572, "step": 163460 }, { "epoch": 0.67, "grad_norm": 3.9727249145507812, "learning_rate": 0.0002, "loss": 1.418, "step": 163470 }, { "epoch": 0.67, "grad_norm": 2.2817673683166504, "learning_rate": 0.0002, "loss": 1.3987, "step": 163480 }, { "epoch": 0.67, "grad_norm": 2.617185592651367, "learning_rate": 0.0002, "loss": 1.6094, "step": 163490 }, { "epoch": 0.67, "grad_norm": 5.200794696807861, "learning_rate": 0.0002, "loss": 1.6706, "step": 163500 }, { "epoch": 0.67, "grad_norm": 3.38417649269104, "learning_rate": 0.0002, "loss": 1.487, "step": 163510 }, { "epoch": 0.67, "grad_norm": 2.5170230865478516, "learning_rate": 0.0002, "loss": 1.442, "step": 163520 }, { "epoch": 0.67, "grad_norm": 4.243866920471191, "learning_rate": 0.0002, "loss": 1.6067, "step": 163530 }, { "epoch": 0.67, "grad_norm": 2.2122995853424072, "learning_rate": 0.0002, "loss": 1.7375, "step": 163540 }, { "epoch": 0.67, "grad_norm": 7.5101423263549805, "learning_rate": 0.0002, "loss": 1.8232, "step": 163550 }, { "epoch": 0.67, "grad_norm": 2.022958517074585, "learning_rate": 0.0002, "loss": 1.6527, "step": 163560 }, { "epoch": 0.67, "grad_norm": 2.6604108810424805, "learning_rate": 0.0002, "loss": 1.4643, "step": 163570 }, { "epoch": 0.67, "grad_norm": 2.644199848175049, "learning_rate": 0.0002, "loss": 1.5545, "step": 163580 }, { "epoch": 0.67, "grad_norm": 3.317112684249878, "learning_rate": 0.0002, "loss": 1.5565, "step": 163590 }, { "epoch": 0.67, "grad_norm": 8.706257820129395, "learning_rate": 0.0002, "loss": 1.4537, "step": 163600 }, { "epoch": 0.67, "grad_norm": 3.362508773803711, "learning_rate": 0.0002, "loss": 1.409, "step": 163610 }, { "epoch": 0.67, "grad_norm": 2.1975274085998535, "learning_rate": 0.0002, "loss": 1.5218, "step": 163620 }, { "epoch": 0.67, "grad_norm": 2.1917102336883545, "learning_rate": 0.0002, "loss": 1.5725, "step": 163630 }, { "epoch": 0.67, "grad_norm": 3.5325210094451904, "learning_rate": 0.0002, "loss": 1.8894, "step": 163640 }, { "epoch": 0.67, "grad_norm": 2.729050874710083, "learning_rate": 0.0002, "loss": 1.3866, "step": 163650 }, { "epoch": 0.67, "grad_norm": 3.257514476776123, "learning_rate": 0.0002, "loss": 1.5362, "step": 163660 }, { "epoch": 0.67, "grad_norm": 3.8363640308380127, "learning_rate": 0.0002, "loss": 1.3739, "step": 163670 }, { "epoch": 0.67, "grad_norm": 3.2043240070343018, "learning_rate": 0.0002, "loss": 1.5823, "step": 163680 }, { "epoch": 0.67, "grad_norm": 3.2149717807769775, "learning_rate": 0.0002, "loss": 1.5831, "step": 163690 }, { "epoch": 0.67, "grad_norm": 1.2341179847717285, "learning_rate": 0.0002, "loss": 1.4995, "step": 163700 }, { "epoch": 0.67, "grad_norm": 3.489734411239624, "learning_rate": 0.0002, "loss": 1.4146, "step": 163710 }, { "epoch": 0.67, "grad_norm": 2.2780942916870117, "learning_rate": 0.0002, "loss": 1.3671, "step": 163720 }, { "epoch": 0.67, "grad_norm": 3.119434356689453, "learning_rate": 0.0002, "loss": 1.6003, "step": 163730 }, { "epoch": 0.67, "grad_norm": 3.082611083984375, "learning_rate": 0.0002, "loss": 1.6396, "step": 163740 }, { "epoch": 0.67, "grad_norm": 2.068809747695923, "learning_rate": 0.0002, "loss": 1.4902, "step": 163750 }, { "epoch": 0.67, "grad_norm": 1.7660990953445435, "learning_rate": 0.0002, "loss": 1.7484, "step": 163760 }, { "epoch": 0.67, "grad_norm": 2.6437196731567383, "learning_rate": 0.0002, "loss": 1.5254, "step": 163770 }, { "epoch": 0.67, "grad_norm": 2.4558939933776855, "learning_rate": 0.0002, "loss": 1.6848, "step": 163780 }, { "epoch": 0.67, "grad_norm": 3.8664190769195557, "learning_rate": 0.0002, "loss": 1.9407, "step": 163790 }, { "epoch": 0.67, "grad_norm": 3.7189371585845947, "learning_rate": 0.0002, "loss": 1.7534, "step": 163800 }, { "epoch": 0.67, "grad_norm": 2.8827755451202393, "learning_rate": 0.0002, "loss": 1.7887, "step": 163810 }, { "epoch": 0.67, "grad_norm": 4.383185863494873, "learning_rate": 0.0002, "loss": 1.5903, "step": 163820 }, { "epoch": 0.67, "grad_norm": 2.5085864067077637, "learning_rate": 0.0002, "loss": 1.6831, "step": 163830 }, { "epoch": 0.67, "grad_norm": 3.9249050617218018, "learning_rate": 0.0002, "loss": 1.6262, "step": 163840 }, { "epoch": 0.67, "grad_norm": 1.3463643789291382, "learning_rate": 0.0002, "loss": 1.4457, "step": 163850 }, { "epoch": 0.67, "grad_norm": 3.8084027767181396, "learning_rate": 0.0002, "loss": 1.6107, "step": 163860 }, { "epoch": 0.67, "grad_norm": 3.523890256881714, "learning_rate": 0.0002, "loss": 1.516, "step": 163870 }, { "epoch": 0.67, "grad_norm": 2.39062762260437, "learning_rate": 0.0002, "loss": 1.4792, "step": 163880 }, { "epoch": 0.67, "grad_norm": 2.712502956390381, "learning_rate": 0.0002, "loss": 1.6875, "step": 163890 }, { "epoch": 0.67, "grad_norm": 4.830347061157227, "learning_rate": 0.0002, "loss": 1.5333, "step": 163900 }, { "epoch": 0.67, "grad_norm": 2.6721293926239014, "learning_rate": 0.0002, "loss": 1.647, "step": 163910 }, { "epoch": 0.67, "grad_norm": 2.826788902282715, "learning_rate": 0.0002, "loss": 1.5992, "step": 163920 }, { "epoch": 0.67, "grad_norm": 3.89115571975708, "learning_rate": 0.0002, "loss": 1.7017, "step": 163930 }, { "epoch": 0.67, "grad_norm": 2.115626096725464, "learning_rate": 0.0002, "loss": 1.5369, "step": 163940 }, { "epoch": 0.67, "grad_norm": 4.140992164611816, "learning_rate": 0.0002, "loss": 1.6658, "step": 163950 }, { "epoch": 0.67, "grad_norm": 3.9823663234710693, "learning_rate": 0.0002, "loss": 1.294, "step": 163960 }, { "epoch": 0.67, "grad_norm": 2.1943702697753906, "learning_rate": 0.0002, "loss": 1.7495, "step": 163970 }, { "epoch": 0.67, "grad_norm": 1.8222965002059937, "learning_rate": 0.0002, "loss": 1.4661, "step": 163980 }, { "epoch": 0.67, "grad_norm": 3.2726082801818848, "learning_rate": 0.0002, "loss": 1.3743, "step": 163990 }, { "epoch": 0.67, "grad_norm": 3.7946479320526123, "learning_rate": 0.0002, "loss": 1.6012, "step": 164000 }, { "epoch": 0.67, "grad_norm": 3.0101261138916016, "learning_rate": 0.0002, "loss": 1.609, "step": 164010 }, { "epoch": 0.67, "grad_norm": 2.6906344890594482, "learning_rate": 0.0002, "loss": 1.3595, "step": 164020 }, { "epoch": 0.67, "grad_norm": 7.495233535766602, "learning_rate": 0.0002, "loss": 1.6276, "step": 164030 }, { "epoch": 0.67, "grad_norm": 4.726022243499756, "learning_rate": 0.0002, "loss": 1.6683, "step": 164040 }, { "epoch": 0.67, "grad_norm": 5.4571213722229, "learning_rate": 0.0002, "loss": 1.3762, "step": 164050 }, { "epoch": 0.67, "grad_norm": 3.208211660385132, "learning_rate": 0.0002, "loss": 1.4354, "step": 164060 }, { "epoch": 0.67, "grad_norm": 2.39022159576416, "learning_rate": 0.0002, "loss": 1.5062, "step": 164070 }, { "epoch": 0.67, "grad_norm": 1.9404475688934326, "learning_rate": 0.0002, "loss": 1.5469, "step": 164080 }, { "epoch": 0.67, "grad_norm": 2.639061212539673, "learning_rate": 0.0002, "loss": 1.4048, "step": 164090 }, { "epoch": 0.67, "grad_norm": 1.6967952251434326, "learning_rate": 0.0002, "loss": 1.7165, "step": 164100 }, { "epoch": 0.67, "grad_norm": 4.647598743438721, "learning_rate": 0.0002, "loss": 1.6677, "step": 164110 }, { "epoch": 0.67, "grad_norm": 3.6328420639038086, "learning_rate": 0.0002, "loss": 1.8206, "step": 164120 }, { "epoch": 0.67, "grad_norm": 2.90693998336792, "learning_rate": 0.0002, "loss": 1.5343, "step": 164130 }, { "epoch": 0.67, "grad_norm": 2.0209243297576904, "learning_rate": 0.0002, "loss": 1.4006, "step": 164140 }, { "epoch": 0.67, "grad_norm": 2.68622088432312, "learning_rate": 0.0002, "loss": 1.1724, "step": 164150 }, { "epoch": 0.67, "grad_norm": 5.150204181671143, "learning_rate": 0.0002, "loss": 1.4693, "step": 164160 }, { "epoch": 0.67, "grad_norm": 3.048419952392578, "learning_rate": 0.0002, "loss": 1.6036, "step": 164170 }, { "epoch": 0.67, "grad_norm": 3.2245912551879883, "learning_rate": 0.0002, "loss": 1.4925, "step": 164180 }, { "epoch": 0.67, "grad_norm": 3.2751553058624268, "learning_rate": 0.0002, "loss": 1.6567, "step": 164190 }, { "epoch": 0.67, "grad_norm": 2.290022611618042, "learning_rate": 0.0002, "loss": 1.4637, "step": 164200 }, { "epoch": 0.67, "grad_norm": 2.051100730895996, "learning_rate": 0.0002, "loss": 1.4713, "step": 164210 }, { "epoch": 0.67, "grad_norm": 2.465515613555908, "learning_rate": 0.0002, "loss": 1.7631, "step": 164220 }, { "epoch": 0.67, "grad_norm": 3.7202258110046387, "learning_rate": 0.0002, "loss": 1.5701, "step": 164230 }, { "epoch": 0.67, "grad_norm": 2.318035840988159, "learning_rate": 0.0002, "loss": 1.6028, "step": 164240 }, { "epoch": 0.67, "grad_norm": 2.984025478363037, "learning_rate": 0.0002, "loss": 1.6966, "step": 164250 }, { "epoch": 0.67, "grad_norm": 2.2269012928009033, "learning_rate": 0.0002, "loss": 1.3606, "step": 164260 }, { "epoch": 0.67, "grad_norm": 3.1498818397521973, "learning_rate": 0.0002, "loss": 1.6081, "step": 164270 }, { "epoch": 0.67, "grad_norm": 3.7821996212005615, "learning_rate": 0.0002, "loss": 1.6522, "step": 164280 }, { "epoch": 0.67, "grad_norm": 2.737281560897827, "learning_rate": 0.0002, "loss": 1.5811, "step": 164290 }, { "epoch": 0.67, "grad_norm": 3.613229513168335, "learning_rate": 0.0002, "loss": 1.6829, "step": 164300 }, { "epoch": 0.67, "grad_norm": 3.708885431289673, "learning_rate": 0.0002, "loss": 1.6233, "step": 164310 }, { "epoch": 0.67, "grad_norm": 2.5908238887786865, "learning_rate": 0.0002, "loss": 1.6457, "step": 164320 }, { "epoch": 0.67, "grad_norm": 1.9895954132080078, "learning_rate": 0.0002, "loss": 1.5585, "step": 164330 }, { "epoch": 0.67, "grad_norm": 3.5386064052581787, "learning_rate": 0.0002, "loss": 1.5791, "step": 164340 }, { "epoch": 0.67, "grad_norm": 2.764197587966919, "learning_rate": 0.0002, "loss": 1.5407, "step": 164350 }, { "epoch": 0.67, "grad_norm": 1.795423150062561, "learning_rate": 0.0002, "loss": 1.3407, "step": 164360 }, { "epoch": 0.67, "grad_norm": 4.022106170654297, "learning_rate": 0.0002, "loss": 1.6972, "step": 164370 }, { "epoch": 0.67, "grad_norm": 2.778502941131592, "learning_rate": 0.0002, "loss": 1.6693, "step": 164380 }, { "epoch": 0.67, "grad_norm": 2.237910032272339, "learning_rate": 0.0002, "loss": 1.9309, "step": 164390 }, { "epoch": 0.67, "grad_norm": 3.8220839500427246, "learning_rate": 0.0002, "loss": 1.4681, "step": 164400 }, { "epoch": 0.67, "grad_norm": 4.410398960113525, "learning_rate": 0.0002, "loss": 1.5938, "step": 164410 }, { "epoch": 0.67, "grad_norm": 3.956890821456909, "learning_rate": 0.0002, "loss": 1.6996, "step": 164420 }, { "epoch": 0.67, "grad_norm": 2.4926798343658447, "learning_rate": 0.0002, "loss": 1.4552, "step": 164430 }, { "epoch": 0.67, "grad_norm": 4.278395652770996, "learning_rate": 0.0002, "loss": 1.5678, "step": 164440 }, { "epoch": 0.67, "grad_norm": 2.394545555114746, "learning_rate": 0.0002, "loss": 1.6623, "step": 164450 }, { "epoch": 0.67, "grad_norm": 3.2981996536254883, "learning_rate": 0.0002, "loss": 1.4164, "step": 164460 }, { "epoch": 0.67, "grad_norm": 3.2793033123016357, "learning_rate": 0.0002, "loss": 1.4508, "step": 164470 }, { "epoch": 0.67, "grad_norm": 3.4157092571258545, "learning_rate": 0.0002, "loss": 1.4699, "step": 164480 }, { "epoch": 0.67, "grad_norm": 3.26296067237854, "learning_rate": 0.0002, "loss": 1.5492, "step": 164490 }, { "epoch": 0.67, "grad_norm": 3.75435209274292, "learning_rate": 0.0002, "loss": 1.3791, "step": 164500 }, { "epoch": 0.67, "grad_norm": 2.4023067951202393, "learning_rate": 0.0002, "loss": 1.48, "step": 164510 }, { "epoch": 0.67, "grad_norm": 2.681678533554077, "learning_rate": 0.0002, "loss": 1.4908, "step": 164520 }, { "epoch": 0.67, "grad_norm": 2.440370559692383, "learning_rate": 0.0002, "loss": 1.3468, "step": 164530 }, { "epoch": 0.67, "grad_norm": 6.11726713180542, "learning_rate": 0.0002, "loss": 1.4285, "step": 164540 }, { "epoch": 0.67, "grad_norm": 2.9712822437286377, "learning_rate": 0.0002, "loss": 1.7592, "step": 164550 }, { "epoch": 0.67, "grad_norm": 1.880728006362915, "learning_rate": 0.0002, "loss": 1.3938, "step": 164560 }, { "epoch": 0.67, "grad_norm": 4.567353248596191, "learning_rate": 0.0002, "loss": 1.544, "step": 164570 }, { "epoch": 0.67, "grad_norm": 2.787982702255249, "learning_rate": 0.0002, "loss": 1.4273, "step": 164580 }, { "epoch": 0.67, "grad_norm": 3.4853322505950928, "learning_rate": 0.0002, "loss": 1.6014, "step": 164590 }, { "epoch": 0.67, "grad_norm": 2.940673589706421, "learning_rate": 0.0002, "loss": 1.4592, "step": 164600 }, { "epoch": 0.67, "grad_norm": 3.237868547439575, "learning_rate": 0.0002, "loss": 1.5508, "step": 164610 }, { "epoch": 0.67, "grad_norm": 1.9868226051330566, "learning_rate": 0.0002, "loss": 1.3916, "step": 164620 }, { "epoch": 0.67, "grad_norm": 2.983602523803711, "learning_rate": 0.0002, "loss": 1.868, "step": 164630 }, { "epoch": 0.67, "grad_norm": 2.48893404006958, "learning_rate": 0.0002, "loss": 1.6287, "step": 164640 }, { "epoch": 0.67, "grad_norm": 3.5634164810180664, "learning_rate": 0.0002, "loss": 1.8776, "step": 164650 }, { "epoch": 0.67, "grad_norm": 3.0354321002960205, "learning_rate": 0.0002, "loss": 1.6361, "step": 164660 }, { "epoch": 0.67, "grad_norm": 2.1277904510498047, "learning_rate": 0.0002, "loss": 1.7297, "step": 164670 }, { "epoch": 0.67, "grad_norm": 1.9558218717575073, "learning_rate": 0.0002, "loss": 1.6845, "step": 164680 }, { "epoch": 0.67, "grad_norm": 3.8533120155334473, "learning_rate": 0.0002, "loss": 1.4084, "step": 164690 }, { "epoch": 0.67, "grad_norm": 2.3551523685455322, "learning_rate": 0.0002, "loss": 1.5853, "step": 164700 }, { "epoch": 0.67, "grad_norm": 3.4785475730895996, "learning_rate": 0.0002, "loss": 1.405, "step": 164710 }, { "epoch": 0.67, "grad_norm": 2.8471195697784424, "learning_rate": 0.0002, "loss": 1.4733, "step": 164720 }, { "epoch": 0.67, "grad_norm": 3.853548765182495, "learning_rate": 0.0002, "loss": 1.7058, "step": 164730 }, { "epoch": 0.67, "grad_norm": 6.046029090881348, "learning_rate": 0.0002, "loss": 1.6581, "step": 164740 }, { "epoch": 0.67, "grad_norm": 2.697542428970337, "learning_rate": 0.0002, "loss": 1.5602, "step": 164750 }, { "epoch": 0.67, "grad_norm": 2.368257999420166, "learning_rate": 0.0002, "loss": 1.7454, "step": 164760 }, { "epoch": 0.67, "grad_norm": 1.9917625188827515, "learning_rate": 0.0002, "loss": 1.6598, "step": 164770 }, { "epoch": 0.67, "grad_norm": 2.3636956214904785, "learning_rate": 0.0002, "loss": 1.4455, "step": 164780 }, { "epoch": 0.67, "grad_norm": 2.7068800926208496, "learning_rate": 0.0002, "loss": 1.6398, "step": 164790 }, { "epoch": 0.67, "grad_norm": 3.413970708847046, "learning_rate": 0.0002, "loss": 1.5831, "step": 164800 }, { "epoch": 0.67, "grad_norm": 2.137728214263916, "learning_rate": 0.0002, "loss": 1.6982, "step": 164810 }, { "epoch": 0.67, "grad_norm": 3.6580421924591064, "learning_rate": 0.0002, "loss": 1.7804, "step": 164820 }, { "epoch": 0.67, "grad_norm": 2.2133636474609375, "learning_rate": 0.0002, "loss": 1.5976, "step": 164830 }, { "epoch": 0.67, "grad_norm": 3.0701608657836914, "learning_rate": 0.0002, "loss": 1.6775, "step": 164840 }, { "epoch": 0.67, "grad_norm": 3.306816577911377, "learning_rate": 0.0002, "loss": 1.462, "step": 164850 }, { "epoch": 0.67, "grad_norm": 2.973745822906494, "learning_rate": 0.0002, "loss": 1.559, "step": 164860 }, { "epoch": 0.67, "grad_norm": 2.654414653778076, "learning_rate": 0.0002, "loss": 1.6064, "step": 164870 }, { "epoch": 0.67, "grad_norm": 2.510869264602661, "learning_rate": 0.0002, "loss": 1.4346, "step": 164880 }, { "epoch": 0.67, "grad_norm": 3.10628342628479, "learning_rate": 0.0002, "loss": 1.5012, "step": 164890 }, { "epoch": 0.67, "grad_norm": 2.367849111557007, "learning_rate": 0.0002, "loss": 1.3408, "step": 164900 }, { "epoch": 0.67, "grad_norm": 4.3809895515441895, "learning_rate": 0.0002, "loss": 1.6278, "step": 164910 }, { "epoch": 0.67, "grad_norm": 3.739783763885498, "learning_rate": 0.0002, "loss": 1.6747, "step": 164920 }, { "epoch": 0.67, "grad_norm": 2.30348801612854, "learning_rate": 0.0002, "loss": 1.5343, "step": 164930 }, { "epoch": 0.67, "grad_norm": 3.1784327030181885, "learning_rate": 0.0002, "loss": 1.7041, "step": 164940 }, { "epoch": 0.67, "grad_norm": 2.3175113201141357, "learning_rate": 0.0002, "loss": 1.5872, "step": 164950 }, { "epoch": 0.67, "grad_norm": 2.513777017593384, "learning_rate": 0.0002, "loss": 1.5677, "step": 164960 }, { "epoch": 0.67, "grad_norm": 3.1769237518310547, "learning_rate": 0.0002, "loss": 1.6649, "step": 164970 }, { "epoch": 0.67, "grad_norm": 2.9598195552825928, "learning_rate": 0.0002, "loss": 1.7475, "step": 164980 }, { "epoch": 0.67, "grad_norm": 4.27944803237915, "learning_rate": 0.0002, "loss": 1.8687, "step": 164990 }, { "epoch": 0.67, "grad_norm": 5.239758491516113, "learning_rate": 0.0002, "loss": 1.5998, "step": 165000 }, { "epoch": 0.67, "grad_norm": 2.5476489067077637, "learning_rate": 0.0002, "loss": 1.4895, "step": 165010 }, { "epoch": 0.67, "grad_norm": 3.3189663887023926, "learning_rate": 0.0002, "loss": 1.5312, "step": 165020 }, { "epoch": 0.67, "grad_norm": 4.947011470794678, "learning_rate": 0.0002, "loss": 1.6893, "step": 165030 }, { "epoch": 0.67, "grad_norm": 2.5499343872070312, "learning_rate": 0.0002, "loss": 1.4146, "step": 165040 }, { "epoch": 0.67, "grad_norm": 3.6132991313934326, "learning_rate": 0.0002, "loss": 1.6128, "step": 165050 }, { "epoch": 0.67, "grad_norm": 1.834839940071106, "learning_rate": 0.0002, "loss": 1.4652, "step": 165060 }, { "epoch": 0.67, "grad_norm": 4.857144355773926, "learning_rate": 0.0002, "loss": 1.6051, "step": 165070 }, { "epoch": 0.67, "grad_norm": 2.409113645553589, "learning_rate": 0.0002, "loss": 1.6499, "step": 165080 }, { "epoch": 0.67, "grad_norm": 3.264035940170288, "learning_rate": 0.0002, "loss": 1.7387, "step": 165090 }, { "epoch": 0.67, "grad_norm": 2.280750036239624, "learning_rate": 0.0002, "loss": 1.5357, "step": 165100 }, { "epoch": 0.67, "grad_norm": 2.3680124282836914, "learning_rate": 0.0002, "loss": 1.4975, "step": 165110 }, { "epoch": 0.67, "grad_norm": 3.9899463653564453, "learning_rate": 0.0002, "loss": 1.5209, "step": 165120 }, { "epoch": 0.67, "grad_norm": 2.521571159362793, "learning_rate": 0.0002, "loss": 1.6602, "step": 165130 }, { "epoch": 0.67, "grad_norm": 2.942967176437378, "learning_rate": 0.0002, "loss": 1.5209, "step": 165140 }, { "epoch": 0.67, "grad_norm": 2.441573143005371, "learning_rate": 0.0002, "loss": 1.4541, "step": 165150 }, { "epoch": 0.67, "grad_norm": 3.9899520874023438, "learning_rate": 0.0002, "loss": 1.3904, "step": 165160 }, { "epoch": 0.67, "grad_norm": 3.4055488109588623, "learning_rate": 0.0002, "loss": 1.6118, "step": 165170 }, { "epoch": 0.67, "grad_norm": 2.852893352508545, "learning_rate": 0.0002, "loss": 1.3353, "step": 165180 }, { "epoch": 0.67, "grad_norm": 3.7114219665527344, "learning_rate": 0.0002, "loss": 1.6031, "step": 165190 }, { "epoch": 0.67, "grad_norm": 2.632453203201294, "learning_rate": 0.0002, "loss": 1.4521, "step": 165200 }, { "epoch": 0.67, "grad_norm": 2.872720956802368, "learning_rate": 0.0002, "loss": 1.4008, "step": 165210 }, { "epoch": 0.67, "grad_norm": 2.8711462020874023, "learning_rate": 0.0002, "loss": 1.7288, "step": 165220 }, { "epoch": 0.67, "grad_norm": 3.485095262527466, "learning_rate": 0.0002, "loss": 1.4442, "step": 165230 }, { "epoch": 0.67, "grad_norm": 3.2580695152282715, "learning_rate": 0.0002, "loss": 1.6106, "step": 165240 }, { "epoch": 0.67, "grad_norm": 1.9341013431549072, "learning_rate": 0.0002, "loss": 1.4272, "step": 165250 }, { "epoch": 0.67, "grad_norm": 4.17338228225708, "learning_rate": 0.0002, "loss": 1.9657, "step": 165260 }, { "epoch": 0.67, "grad_norm": 3.378917932510376, "learning_rate": 0.0002, "loss": 1.617, "step": 165270 }, { "epoch": 0.67, "grad_norm": 4.72146463394165, "learning_rate": 0.0002, "loss": 1.7459, "step": 165280 }, { "epoch": 0.67, "grad_norm": 2.8077433109283447, "learning_rate": 0.0002, "loss": 1.6572, "step": 165290 }, { "epoch": 0.67, "grad_norm": 2.569164752960205, "learning_rate": 0.0002, "loss": 1.7726, "step": 165300 }, { "epoch": 0.67, "grad_norm": 3.3426730632781982, "learning_rate": 0.0002, "loss": 1.8848, "step": 165310 }, { "epoch": 0.67, "grad_norm": 5.239786148071289, "learning_rate": 0.0002, "loss": 1.6363, "step": 165320 }, { "epoch": 0.67, "grad_norm": 4.688346862792969, "learning_rate": 0.0002, "loss": 1.5486, "step": 165330 }, { "epoch": 0.67, "grad_norm": 1.8663896322250366, "learning_rate": 0.0002, "loss": 1.6575, "step": 165340 }, { "epoch": 0.67, "grad_norm": 3.078774929046631, "learning_rate": 0.0002, "loss": 1.5215, "step": 165350 }, { "epoch": 0.67, "grad_norm": 2.9312281608581543, "learning_rate": 0.0002, "loss": 1.4484, "step": 165360 }, { "epoch": 0.67, "grad_norm": 2.957585334777832, "learning_rate": 0.0002, "loss": 1.6698, "step": 165370 }, { "epoch": 0.67, "grad_norm": 3.133967399597168, "learning_rate": 0.0002, "loss": 1.5892, "step": 165380 }, { "epoch": 0.67, "grad_norm": 3.7130823135375977, "learning_rate": 0.0002, "loss": 1.7242, "step": 165390 }, { "epoch": 0.67, "grad_norm": 2.180253505706787, "learning_rate": 0.0002, "loss": 1.5664, "step": 165400 }, { "epoch": 0.67, "grad_norm": 5.477969169616699, "learning_rate": 0.0002, "loss": 1.7666, "step": 165410 }, { "epoch": 0.67, "grad_norm": 2.663839340209961, "learning_rate": 0.0002, "loss": 1.5401, "step": 165420 }, { "epoch": 0.67, "grad_norm": 5.3880391120910645, "learning_rate": 0.0002, "loss": 1.5882, "step": 165430 }, { "epoch": 0.67, "grad_norm": 3.787151575088501, "learning_rate": 0.0002, "loss": 1.6274, "step": 165440 }, { "epoch": 0.67, "grad_norm": 5.2675981521606445, "learning_rate": 0.0002, "loss": 1.6097, "step": 165450 }, { "epoch": 0.67, "grad_norm": 3.245321035385132, "learning_rate": 0.0002, "loss": 1.496, "step": 165460 }, { "epoch": 0.67, "grad_norm": 4.443258285522461, "learning_rate": 0.0002, "loss": 1.5176, "step": 165470 }, { "epoch": 0.67, "grad_norm": 3.331322193145752, "learning_rate": 0.0002, "loss": 1.553, "step": 165480 }, { "epoch": 0.67, "grad_norm": 3.8617544174194336, "learning_rate": 0.0002, "loss": 1.6098, "step": 165490 }, { "epoch": 0.67, "grad_norm": 2.7440664768218994, "learning_rate": 0.0002, "loss": 1.3656, "step": 165500 }, { "epoch": 0.67, "grad_norm": 1.5491083860397339, "learning_rate": 0.0002, "loss": 1.4316, "step": 165510 }, { "epoch": 0.67, "grad_norm": 3.032503366470337, "learning_rate": 0.0002, "loss": 1.7135, "step": 165520 }, { "epoch": 0.67, "grad_norm": 3.477656364440918, "learning_rate": 0.0002, "loss": 1.5932, "step": 165530 }, { "epoch": 0.67, "grad_norm": 3.755432605743408, "learning_rate": 0.0002, "loss": 1.6054, "step": 165540 }, { "epoch": 0.67, "grad_norm": 2.7345659732818604, "learning_rate": 0.0002, "loss": 1.5883, "step": 165550 }, { "epoch": 0.67, "grad_norm": 2.006901502609253, "learning_rate": 0.0002, "loss": 1.4725, "step": 165560 }, { "epoch": 0.67, "grad_norm": 3.5755715370178223, "learning_rate": 0.0002, "loss": 1.6368, "step": 165570 }, { "epoch": 0.67, "grad_norm": 2.646549940109253, "learning_rate": 0.0002, "loss": 1.4939, "step": 165580 }, { "epoch": 0.67, "grad_norm": 4.0451555252075195, "learning_rate": 0.0002, "loss": 1.6607, "step": 165590 }, { "epoch": 0.67, "grad_norm": 4.84345817565918, "learning_rate": 0.0002, "loss": 1.5477, "step": 165600 }, { "epoch": 0.67, "grad_norm": 2.546067237854004, "learning_rate": 0.0002, "loss": 1.6232, "step": 165610 }, { "epoch": 0.67, "grad_norm": 2.4184043407440186, "learning_rate": 0.0002, "loss": 1.6137, "step": 165620 }, { "epoch": 0.67, "grad_norm": 2.2447073459625244, "learning_rate": 0.0002, "loss": 1.9056, "step": 165630 }, { "epoch": 0.67, "grad_norm": 3.117354154586792, "learning_rate": 0.0002, "loss": 1.6533, "step": 165640 }, { "epoch": 0.67, "grad_norm": 2.855177164077759, "learning_rate": 0.0002, "loss": 1.4424, "step": 165650 }, { "epoch": 0.67, "grad_norm": 3.702023983001709, "learning_rate": 0.0002, "loss": 1.4018, "step": 165660 }, { "epoch": 0.67, "grad_norm": 3.9081952571868896, "learning_rate": 0.0002, "loss": 1.3635, "step": 165670 }, { "epoch": 0.67, "grad_norm": 3.207153558731079, "learning_rate": 0.0002, "loss": 1.4977, "step": 165680 }, { "epoch": 0.67, "grad_norm": 3.110138177871704, "learning_rate": 0.0002, "loss": 1.9881, "step": 165690 }, { "epoch": 0.67, "grad_norm": 3.622352123260498, "learning_rate": 0.0002, "loss": 1.6921, "step": 165700 }, { "epoch": 0.67, "grad_norm": 3.795764923095703, "learning_rate": 0.0002, "loss": 1.8452, "step": 165710 }, { "epoch": 0.67, "grad_norm": 2.688857316970825, "learning_rate": 0.0002, "loss": 1.558, "step": 165720 }, { "epoch": 0.67, "grad_norm": 4.1015849113464355, "learning_rate": 0.0002, "loss": 1.4485, "step": 165730 }, { "epoch": 0.67, "grad_norm": 6.045797824859619, "learning_rate": 0.0002, "loss": 1.4518, "step": 165740 }, { "epoch": 0.67, "grad_norm": 2.4102675914764404, "learning_rate": 0.0002, "loss": 1.5439, "step": 165750 }, { "epoch": 0.67, "grad_norm": 5.49129056930542, "learning_rate": 0.0002, "loss": 1.6028, "step": 165760 }, { "epoch": 0.67, "grad_norm": 2.8344433307647705, "learning_rate": 0.0002, "loss": 1.8353, "step": 165770 }, { "epoch": 0.67, "grad_norm": 3.0828568935394287, "learning_rate": 0.0002, "loss": 1.6271, "step": 165780 }, { "epoch": 0.67, "grad_norm": 4.130350112915039, "learning_rate": 0.0002, "loss": 1.589, "step": 165790 }, { "epoch": 0.67, "grad_norm": 2.6162641048431396, "learning_rate": 0.0002, "loss": 1.607, "step": 165800 }, { "epoch": 0.68, "grad_norm": 2.2357027530670166, "learning_rate": 0.0002, "loss": 1.5972, "step": 165810 }, { "epoch": 0.68, "grad_norm": 2.8452181816101074, "learning_rate": 0.0002, "loss": 1.7575, "step": 165820 }, { "epoch": 0.68, "grad_norm": 2.40333890914917, "learning_rate": 0.0002, "loss": 1.4562, "step": 165830 }, { "epoch": 0.68, "grad_norm": 2.018955707550049, "learning_rate": 0.0002, "loss": 1.6522, "step": 165840 }, { "epoch": 0.68, "grad_norm": 3.170280694961548, "learning_rate": 0.0002, "loss": 1.7038, "step": 165850 }, { "epoch": 0.68, "grad_norm": 1.7772819995880127, "learning_rate": 0.0002, "loss": 1.7496, "step": 165860 }, { "epoch": 0.68, "grad_norm": 2.7904927730560303, "learning_rate": 0.0002, "loss": 1.416, "step": 165870 }, { "epoch": 0.68, "grad_norm": 1.6969034671783447, "learning_rate": 0.0002, "loss": 1.4724, "step": 165880 }, { "epoch": 0.68, "grad_norm": 4.9716057777404785, "learning_rate": 0.0002, "loss": 1.7362, "step": 165890 }, { "epoch": 0.68, "grad_norm": 3.2092485427856445, "learning_rate": 0.0002, "loss": 1.613, "step": 165900 }, { "epoch": 0.68, "grad_norm": 4.60104513168335, "learning_rate": 0.0002, "loss": 1.525, "step": 165910 }, { "epoch": 0.68, "grad_norm": 2.7749571800231934, "learning_rate": 0.0002, "loss": 1.4729, "step": 165920 }, { "epoch": 0.68, "grad_norm": 2.632779836654663, "learning_rate": 0.0002, "loss": 1.5481, "step": 165930 }, { "epoch": 0.68, "grad_norm": 2.423701047897339, "learning_rate": 0.0002, "loss": 1.6573, "step": 165940 }, { "epoch": 0.68, "grad_norm": 3.8473525047302246, "learning_rate": 0.0002, "loss": 1.4952, "step": 165950 }, { "epoch": 0.68, "grad_norm": 3.5629026889801025, "learning_rate": 0.0002, "loss": 1.4901, "step": 165960 }, { "epoch": 0.68, "grad_norm": 3.429617166519165, "learning_rate": 0.0002, "loss": 1.635, "step": 165970 }, { "epoch": 0.68, "grad_norm": 10.194046020507812, "learning_rate": 0.0002, "loss": 1.5197, "step": 165980 }, { "epoch": 0.68, "grad_norm": 3.31669282913208, "learning_rate": 0.0002, "loss": 1.3178, "step": 165990 }, { "epoch": 0.68, "grad_norm": 3.008794069290161, "learning_rate": 0.0002, "loss": 1.7084, "step": 166000 }, { "epoch": 0.68, "grad_norm": 2.4255964756011963, "learning_rate": 0.0002, "loss": 1.6136, "step": 166010 }, { "epoch": 0.68, "grad_norm": 4.2979583740234375, "learning_rate": 0.0002, "loss": 1.7269, "step": 166020 }, { "epoch": 0.68, "grad_norm": 2.9536638259887695, "learning_rate": 0.0002, "loss": 1.6066, "step": 166030 }, { "epoch": 0.68, "grad_norm": 2.6240108013153076, "learning_rate": 0.0002, "loss": 1.3174, "step": 166040 }, { "epoch": 0.68, "grad_norm": 2.6472508907318115, "learning_rate": 0.0002, "loss": 1.5541, "step": 166050 }, { "epoch": 0.68, "grad_norm": 2.465038299560547, "learning_rate": 0.0002, "loss": 1.4174, "step": 166060 }, { "epoch": 0.68, "grad_norm": 3.0769619941711426, "learning_rate": 0.0002, "loss": 1.6214, "step": 166070 }, { "epoch": 0.68, "grad_norm": 1.7990258932113647, "learning_rate": 0.0002, "loss": 1.5037, "step": 166080 }, { "epoch": 0.68, "grad_norm": 3.8069381713867188, "learning_rate": 0.0002, "loss": 1.9146, "step": 166090 }, { "epoch": 0.68, "grad_norm": 1.9607388973236084, "learning_rate": 0.0002, "loss": 1.6836, "step": 166100 }, { "epoch": 0.68, "grad_norm": 3.6496198177337646, "learning_rate": 0.0002, "loss": 1.8588, "step": 166110 }, { "epoch": 0.68, "grad_norm": 2.961881399154663, "learning_rate": 0.0002, "loss": 1.5407, "step": 166120 }, { "epoch": 0.68, "grad_norm": 2.9614853858947754, "learning_rate": 0.0002, "loss": 1.7389, "step": 166130 }, { "epoch": 0.68, "grad_norm": 3.23294997215271, "learning_rate": 0.0002, "loss": 1.7675, "step": 166140 }, { "epoch": 0.68, "grad_norm": 3.510352373123169, "learning_rate": 0.0002, "loss": 1.5897, "step": 166150 }, { "epoch": 0.68, "grad_norm": 2.024439573287964, "learning_rate": 0.0002, "loss": 1.7356, "step": 166160 }, { "epoch": 0.68, "grad_norm": 2.7368810176849365, "learning_rate": 0.0002, "loss": 1.3665, "step": 166170 }, { "epoch": 0.68, "grad_norm": 3.4089808464050293, "learning_rate": 0.0002, "loss": 1.7801, "step": 166180 }, { "epoch": 0.68, "grad_norm": 2.4578659534454346, "learning_rate": 0.0002, "loss": 1.5464, "step": 166190 }, { "epoch": 0.68, "grad_norm": 3.1136279106140137, "learning_rate": 0.0002, "loss": 1.8085, "step": 166200 }, { "epoch": 0.68, "grad_norm": 3.3037359714508057, "learning_rate": 0.0002, "loss": 1.4031, "step": 166210 }, { "epoch": 0.68, "grad_norm": 2.5358827114105225, "learning_rate": 0.0002, "loss": 1.6666, "step": 166220 }, { "epoch": 0.68, "grad_norm": 2.629476547241211, "learning_rate": 0.0002, "loss": 1.7003, "step": 166230 }, { "epoch": 0.68, "grad_norm": 5.250633716583252, "learning_rate": 0.0002, "loss": 1.5449, "step": 166240 }, { "epoch": 0.68, "grad_norm": 2.228276014328003, "learning_rate": 0.0002, "loss": 1.2713, "step": 166250 }, { "epoch": 0.68, "grad_norm": 3.155714988708496, "learning_rate": 0.0002, "loss": 1.6969, "step": 166260 }, { "epoch": 0.68, "grad_norm": 2.7449851036071777, "learning_rate": 0.0002, "loss": 1.8908, "step": 166270 }, { "epoch": 0.68, "grad_norm": 5.938347816467285, "learning_rate": 0.0002, "loss": 1.9121, "step": 166280 }, { "epoch": 0.68, "grad_norm": 3.389111042022705, "learning_rate": 0.0002, "loss": 1.638, "step": 166290 }, { "epoch": 0.68, "grad_norm": 3.7294607162475586, "learning_rate": 0.0002, "loss": 1.4461, "step": 166300 }, { "epoch": 0.68, "grad_norm": 2.530064582824707, "learning_rate": 0.0002, "loss": 1.6878, "step": 166310 }, { "epoch": 0.68, "grad_norm": 3.0249600410461426, "learning_rate": 0.0002, "loss": 1.2011, "step": 166320 }, { "epoch": 0.68, "grad_norm": 3.925996780395508, "learning_rate": 0.0002, "loss": 1.4365, "step": 166330 }, { "epoch": 0.68, "grad_norm": 2.863518476486206, "learning_rate": 0.0002, "loss": 1.4855, "step": 166340 }, { "epoch": 0.68, "grad_norm": 3.131408452987671, "learning_rate": 0.0002, "loss": 1.7913, "step": 166350 }, { "epoch": 0.68, "grad_norm": 4.865170478820801, "learning_rate": 0.0002, "loss": 1.8361, "step": 166360 }, { "epoch": 0.68, "grad_norm": 4.146879196166992, "learning_rate": 0.0002, "loss": 1.6465, "step": 166370 }, { "epoch": 0.68, "grad_norm": 3.4434897899627686, "learning_rate": 0.0002, "loss": 1.6364, "step": 166380 }, { "epoch": 0.68, "grad_norm": 2.4621644020080566, "learning_rate": 0.0002, "loss": 1.7223, "step": 166390 }, { "epoch": 0.68, "grad_norm": 3.261357307434082, "learning_rate": 0.0002, "loss": 1.5695, "step": 166400 }, { "epoch": 0.68, "grad_norm": 3.407879114151001, "learning_rate": 0.0002, "loss": 1.5327, "step": 166410 }, { "epoch": 0.68, "grad_norm": 1.9794929027557373, "learning_rate": 0.0002, "loss": 1.7312, "step": 166420 }, { "epoch": 0.68, "grad_norm": 3.190948724746704, "learning_rate": 0.0002, "loss": 1.7098, "step": 166430 }, { "epoch": 0.68, "grad_norm": 2.9526307582855225, "learning_rate": 0.0002, "loss": 1.8346, "step": 166440 }, { "epoch": 0.68, "grad_norm": 2.699805736541748, "learning_rate": 0.0002, "loss": 1.5557, "step": 166450 }, { "epoch": 0.68, "grad_norm": 2.5860793590545654, "learning_rate": 0.0002, "loss": 1.5152, "step": 166460 }, { "epoch": 0.68, "grad_norm": 2.7213571071624756, "learning_rate": 0.0002, "loss": 1.9036, "step": 166470 }, { "epoch": 0.68, "grad_norm": 2.1825058460235596, "learning_rate": 0.0002, "loss": 1.4144, "step": 166480 }, { "epoch": 0.68, "grad_norm": 3.092174768447876, "learning_rate": 0.0002, "loss": 1.5658, "step": 166490 }, { "epoch": 0.68, "grad_norm": 4.131333827972412, "learning_rate": 0.0002, "loss": 1.525, "step": 166500 }, { "epoch": 0.68, "grad_norm": 3.0104384422302246, "learning_rate": 0.0002, "loss": 1.6009, "step": 166510 }, { "epoch": 0.68, "grad_norm": 3.1912875175476074, "learning_rate": 0.0002, "loss": 1.5909, "step": 166520 }, { "epoch": 0.68, "grad_norm": 4.75732421875, "learning_rate": 0.0002, "loss": 1.5816, "step": 166530 }, { "epoch": 0.68, "grad_norm": 2.598529100418091, "learning_rate": 0.0002, "loss": 1.6074, "step": 166540 }, { "epoch": 0.68, "grad_norm": 3.9551424980163574, "learning_rate": 0.0002, "loss": 1.4637, "step": 166550 }, { "epoch": 0.68, "grad_norm": 4.7344069480896, "learning_rate": 0.0002, "loss": 1.4767, "step": 166560 }, { "epoch": 0.68, "grad_norm": 2.7803454399108887, "learning_rate": 0.0002, "loss": 1.3425, "step": 166570 }, { "epoch": 0.68, "grad_norm": 2.4125564098358154, "learning_rate": 0.0002, "loss": 1.5689, "step": 166580 }, { "epoch": 0.68, "grad_norm": 2.421487331390381, "learning_rate": 0.0002, "loss": 1.4941, "step": 166590 }, { "epoch": 0.68, "grad_norm": 2.1469483375549316, "learning_rate": 0.0002, "loss": 1.3123, "step": 166600 }, { "epoch": 0.68, "grad_norm": 2.553145408630371, "learning_rate": 0.0002, "loss": 1.5878, "step": 166610 }, { "epoch": 0.68, "grad_norm": 3.3504254817962646, "learning_rate": 0.0002, "loss": 1.8, "step": 166620 }, { "epoch": 0.68, "grad_norm": 3.2419514656066895, "learning_rate": 0.0002, "loss": 1.7602, "step": 166630 }, { "epoch": 0.68, "grad_norm": 2.822709798812866, "learning_rate": 0.0002, "loss": 1.4081, "step": 166640 }, { "epoch": 0.68, "grad_norm": 2.856365442276001, "learning_rate": 0.0002, "loss": 1.7078, "step": 166650 }, { "epoch": 0.68, "grad_norm": 4.061096668243408, "learning_rate": 0.0002, "loss": 1.524, "step": 166660 }, { "epoch": 0.68, "grad_norm": 2.3773882389068604, "learning_rate": 0.0002, "loss": 1.536, "step": 166670 }, { "epoch": 0.68, "grad_norm": 3.296092987060547, "learning_rate": 0.0002, "loss": 1.474, "step": 166680 }, { "epoch": 0.68, "grad_norm": 1.5348453521728516, "learning_rate": 0.0002, "loss": 1.5693, "step": 166690 }, { "epoch": 0.68, "grad_norm": 1.9342492818832397, "learning_rate": 0.0002, "loss": 1.4971, "step": 166700 }, { "epoch": 0.68, "grad_norm": 3.4165942668914795, "learning_rate": 0.0002, "loss": 1.6941, "step": 166710 }, { "epoch": 0.68, "grad_norm": 2.663304090499878, "learning_rate": 0.0002, "loss": 1.8387, "step": 166720 }, { "epoch": 0.68, "grad_norm": 1.6556395292282104, "learning_rate": 0.0002, "loss": 1.6504, "step": 166730 }, { "epoch": 0.68, "grad_norm": 2.2121284008026123, "learning_rate": 0.0002, "loss": 1.5795, "step": 166740 }, { "epoch": 0.68, "grad_norm": 3.942087173461914, "learning_rate": 0.0002, "loss": 1.6422, "step": 166750 }, { "epoch": 0.68, "grad_norm": 3.7092137336730957, "learning_rate": 0.0002, "loss": 1.5629, "step": 166760 }, { "epoch": 0.68, "grad_norm": 2.916489362716675, "learning_rate": 0.0002, "loss": 1.374, "step": 166770 }, { "epoch": 0.68, "grad_norm": 3.925781726837158, "learning_rate": 0.0002, "loss": 1.6544, "step": 166780 }, { "epoch": 0.68, "grad_norm": 2.15047287940979, "learning_rate": 0.0002, "loss": 1.7071, "step": 166790 }, { "epoch": 0.68, "grad_norm": 3.39642071723938, "learning_rate": 0.0002, "loss": 1.5772, "step": 166800 }, { "epoch": 0.68, "grad_norm": 1.393959641456604, "learning_rate": 0.0002, "loss": 1.8632, "step": 166810 }, { "epoch": 0.68, "grad_norm": 6.618443489074707, "learning_rate": 0.0002, "loss": 1.6959, "step": 166820 }, { "epoch": 0.68, "grad_norm": 2.639251947402954, "learning_rate": 0.0002, "loss": 1.5844, "step": 166830 }, { "epoch": 0.68, "grad_norm": 4.357324123382568, "learning_rate": 0.0002, "loss": 1.6335, "step": 166840 }, { "epoch": 0.68, "grad_norm": 1.9588541984558105, "learning_rate": 0.0002, "loss": 1.544, "step": 166850 }, { "epoch": 0.68, "grad_norm": 6.334938049316406, "learning_rate": 0.0002, "loss": 1.6727, "step": 166860 }, { "epoch": 0.68, "grad_norm": 2.510880708694458, "learning_rate": 0.0002, "loss": 1.8027, "step": 166870 }, { "epoch": 0.68, "grad_norm": 3.3577628135681152, "learning_rate": 0.0002, "loss": 1.6177, "step": 166880 }, { "epoch": 0.68, "grad_norm": 2.862171173095703, "learning_rate": 0.0002, "loss": 1.4881, "step": 166890 }, { "epoch": 0.68, "grad_norm": 2.9600982666015625, "learning_rate": 0.0002, "loss": 1.992, "step": 166900 }, { "epoch": 0.68, "grad_norm": 3.103634834289551, "learning_rate": 0.0002, "loss": 1.5902, "step": 166910 }, { "epoch": 0.68, "grad_norm": 2.7392494678497314, "learning_rate": 0.0002, "loss": 1.7019, "step": 166920 }, { "epoch": 0.68, "grad_norm": 2.796637535095215, "learning_rate": 0.0002, "loss": 1.8114, "step": 166930 }, { "epoch": 0.68, "grad_norm": 3.5950229167938232, "learning_rate": 0.0002, "loss": 1.6551, "step": 166940 }, { "epoch": 0.68, "grad_norm": 3.153045177459717, "learning_rate": 0.0002, "loss": 1.5135, "step": 166950 }, { "epoch": 0.68, "grad_norm": 3.1643130779266357, "learning_rate": 0.0002, "loss": 1.9088, "step": 166960 }, { "epoch": 0.68, "grad_norm": 3.737532615661621, "learning_rate": 0.0002, "loss": 1.4456, "step": 166970 }, { "epoch": 0.68, "grad_norm": 2.737680196762085, "learning_rate": 0.0002, "loss": 1.5231, "step": 166980 }, { "epoch": 0.68, "grad_norm": 2.326249599456787, "learning_rate": 0.0002, "loss": 1.5246, "step": 166990 }, { "epoch": 0.68, "grad_norm": 2.7627944946289062, "learning_rate": 0.0002, "loss": 1.7025, "step": 167000 }, { "epoch": 0.68, "grad_norm": 1.7869800329208374, "learning_rate": 0.0002, "loss": 1.4389, "step": 167010 }, { "epoch": 0.68, "grad_norm": 3.5036795139312744, "learning_rate": 0.0002, "loss": 1.7126, "step": 167020 }, { "epoch": 0.68, "grad_norm": 2.9777064323425293, "learning_rate": 0.0002, "loss": 1.3547, "step": 167030 }, { "epoch": 0.68, "grad_norm": 1.5996460914611816, "learning_rate": 0.0002, "loss": 1.6699, "step": 167040 }, { "epoch": 0.68, "grad_norm": 4.727224826812744, "learning_rate": 0.0002, "loss": 1.5243, "step": 167050 }, { "epoch": 0.68, "grad_norm": 2.097031593322754, "learning_rate": 0.0002, "loss": 1.5605, "step": 167060 }, { "epoch": 0.68, "grad_norm": 2.225911855697632, "learning_rate": 0.0002, "loss": 1.4169, "step": 167070 }, { "epoch": 0.68, "grad_norm": 3.196988344192505, "learning_rate": 0.0002, "loss": 1.4448, "step": 167080 }, { "epoch": 0.68, "grad_norm": 3.8725247383117676, "learning_rate": 0.0002, "loss": 1.4047, "step": 167090 }, { "epoch": 0.68, "grad_norm": 3.5175650119781494, "learning_rate": 0.0002, "loss": 1.472, "step": 167100 }, { "epoch": 0.68, "grad_norm": 5.409758567810059, "learning_rate": 0.0002, "loss": 1.6331, "step": 167110 }, { "epoch": 0.68, "grad_norm": 3.8150389194488525, "learning_rate": 0.0002, "loss": 1.4107, "step": 167120 }, { "epoch": 0.68, "grad_norm": 3.5180749893188477, "learning_rate": 0.0002, "loss": 1.4691, "step": 167130 }, { "epoch": 0.68, "grad_norm": 1.8615542650222778, "learning_rate": 0.0002, "loss": 1.8275, "step": 167140 }, { "epoch": 0.68, "grad_norm": 3.570030927658081, "learning_rate": 0.0002, "loss": 1.5892, "step": 167150 }, { "epoch": 0.68, "grad_norm": 2.736778736114502, "learning_rate": 0.0002, "loss": 1.6257, "step": 167160 }, { "epoch": 0.68, "grad_norm": 3.6884377002716064, "learning_rate": 0.0002, "loss": 1.3623, "step": 167170 }, { "epoch": 0.68, "grad_norm": 4.200292587280273, "learning_rate": 0.0002, "loss": 1.8605, "step": 167180 }, { "epoch": 0.68, "grad_norm": 3.3143117427825928, "learning_rate": 0.0002, "loss": 1.3351, "step": 167190 }, { "epoch": 0.68, "grad_norm": 2.7951574325561523, "learning_rate": 0.0002, "loss": 1.4472, "step": 167200 }, { "epoch": 0.68, "grad_norm": 2.7124722003936768, "learning_rate": 0.0002, "loss": 1.7653, "step": 167210 }, { "epoch": 0.68, "grad_norm": 2.477187156677246, "learning_rate": 0.0002, "loss": 1.6139, "step": 167220 }, { "epoch": 0.68, "grad_norm": 2.203295946121216, "learning_rate": 0.0002, "loss": 1.5825, "step": 167230 }, { "epoch": 0.68, "grad_norm": 2.6347360610961914, "learning_rate": 0.0002, "loss": 1.6062, "step": 167240 }, { "epoch": 0.68, "grad_norm": 3.0976998805999756, "learning_rate": 0.0002, "loss": 1.4972, "step": 167250 }, { "epoch": 0.68, "grad_norm": 2.8824198246002197, "learning_rate": 0.0002, "loss": 1.8377, "step": 167260 }, { "epoch": 0.68, "grad_norm": 3.050633430480957, "learning_rate": 0.0002, "loss": 1.3864, "step": 167270 }, { "epoch": 0.68, "grad_norm": 2.645897388458252, "learning_rate": 0.0002, "loss": 1.6925, "step": 167280 }, { "epoch": 0.68, "grad_norm": 1.9556450843811035, "learning_rate": 0.0002, "loss": 1.4811, "step": 167290 }, { "epoch": 0.68, "grad_norm": 2.869597911834717, "learning_rate": 0.0002, "loss": 1.6613, "step": 167300 }, { "epoch": 0.68, "grad_norm": 2.5627827644348145, "learning_rate": 0.0002, "loss": 1.6942, "step": 167310 }, { "epoch": 0.68, "grad_norm": 3.3502986431121826, "learning_rate": 0.0002, "loss": 1.7786, "step": 167320 }, { "epoch": 0.68, "grad_norm": 2.275763511657715, "learning_rate": 0.0002, "loss": 1.6195, "step": 167330 }, { "epoch": 0.68, "grad_norm": 3.333251714706421, "learning_rate": 0.0002, "loss": 1.4748, "step": 167340 }, { "epoch": 0.68, "grad_norm": 3.0611283779144287, "learning_rate": 0.0002, "loss": 1.771, "step": 167350 }, { "epoch": 0.68, "grad_norm": 3.467893362045288, "learning_rate": 0.0002, "loss": 1.6434, "step": 167360 }, { "epoch": 0.68, "grad_norm": 2.3132164478302, "learning_rate": 0.0002, "loss": 1.7958, "step": 167370 }, { "epoch": 0.68, "grad_norm": 4.871222496032715, "learning_rate": 0.0002, "loss": 1.5283, "step": 167380 }, { "epoch": 0.68, "grad_norm": 2.7857186794281006, "learning_rate": 0.0002, "loss": 1.5802, "step": 167390 }, { "epoch": 0.68, "grad_norm": 2.3697402477264404, "learning_rate": 0.0002, "loss": 1.5656, "step": 167400 }, { "epoch": 0.68, "grad_norm": 2.8133726119995117, "learning_rate": 0.0002, "loss": 1.4715, "step": 167410 }, { "epoch": 0.68, "grad_norm": 3.978304624557495, "learning_rate": 0.0002, "loss": 1.6445, "step": 167420 }, { "epoch": 0.68, "grad_norm": 3.2314271926879883, "learning_rate": 0.0002, "loss": 1.7784, "step": 167430 }, { "epoch": 0.68, "grad_norm": 6.944593906402588, "learning_rate": 0.0002, "loss": 1.5207, "step": 167440 }, { "epoch": 0.68, "grad_norm": 1.6062463521957397, "learning_rate": 0.0002, "loss": 1.6282, "step": 167450 }, { "epoch": 0.68, "grad_norm": 2.24826979637146, "learning_rate": 0.0002, "loss": 1.7754, "step": 167460 }, { "epoch": 0.68, "grad_norm": 1.6743320226669312, "learning_rate": 0.0002, "loss": 1.6951, "step": 167470 }, { "epoch": 0.68, "grad_norm": 3.2588798999786377, "learning_rate": 0.0002, "loss": 1.6472, "step": 167480 }, { "epoch": 0.68, "grad_norm": 2.9107205867767334, "learning_rate": 0.0002, "loss": 1.5456, "step": 167490 }, { "epoch": 0.68, "grad_norm": 2.394970417022705, "learning_rate": 0.0002, "loss": 1.6456, "step": 167500 }, { "epoch": 0.68, "grad_norm": 2.5005943775177, "learning_rate": 0.0002, "loss": 1.6431, "step": 167510 }, { "epoch": 0.68, "grad_norm": 3.0531160831451416, "learning_rate": 0.0002, "loss": 1.3052, "step": 167520 }, { "epoch": 0.68, "grad_norm": 2.055713176727295, "learning_rate": 0.0002, "loss": 1.5876, "step": 167530 }, { "epoch": 0.68, "grad_norm": 5.625297546386719, "learning_rate": 0.0002, "loss": 1.8255, "step": 167540 }, { "epoch": 0.68, "grad_norm": 2.7612104415893555, "learning_rate": 0.0002, "loss": 1.3427, "step": 167550 }, { "epoch": 0.68, "grad_norm": 2.5342330932617188, "learning_rate": 0.0002, "loss": 1.3796, "step": 167560 }, { "epoch": 0.68, "grad_norm": 4.195702075958252, "learning_rate": 0.0002, "loss": 1.7262, "step": 167570 }, { "epoch": 0.68, "grad_norm": 6.005864143371582, "learning_rate": 0.0002, "loss": 1.6636, "step": 167580 }, { "epoch": 0.68, "grad_norm": 2.0079243183135986, "learning_rate": 0.0002, "loss": 1.5161, "step": 167590 }, { "epoch": 0.68, "grad_norm": 2.347827434539795, "learning_rate": 0.0002, "loss": 1.7752, "step": 167600 }, { "epoch": 0.68, "grad_norm": 2.6982786655426025, "learning_rate": 0.0002, "loss": 1.6743, "step": 167610 }, { "epoch": 0.68, "grad_norm": 2.953775405883789, "learning_rate": 0.0002, "loss": 1.7272, "step": 167620 }, { "epoch": 0.68, "grad_norm": 3.7829575538635254, "learning_rate": 0.0002, "loss": 1.7342, "step": 167630 }, { "epoch": 0.68, "grad_norm": 2.541173219680786, "learning_rate": 0.0002, "loss": 1.3728, "step": 167640 }, { "epoch": 0.68, "grad_norm": 3.010526418685913, "learning_rate": 0.0002, "loss": 1.7233, "step": 167650 }, { "epoch": 0.68, "grad_norm": 3.0684115886688232, "learning_rate": 0.0002, "loss": 1.7082, "step": 167660 }, { "epoch": 0.68, "grad_norm": 2.529209852218628, "learning_rate": 0.0002, "loss": 1.5115, "step": 167670 }, { "epoch": 0.68, "grad_norm": 2.5414679050445557, "learning_rate": 0.0002, "loss": 1.5345, "step": 167680 }, { "epoch": 0.68, "grad_norm": 7.452814102172852, "learning_rate": 0.0002, "loss": 1.68, "step": 167690 }, { "epoch": 0.68, "grad_norm": 2.371164560317993, "learning_rate": 0.0002, "loss": 1.6203, "step": 167700 }, { "epoch": 0.68, "grad_norm": 2.190263509750366, "learning_rate": 0.0002, "loss": 1.4088, "step": 167710 }, { "epoch": 0.68, "grad_norm": 5.294126033782959, "learning_rate": 0.0002, "loss": 1.6079, "step": 167720 }, { "epoch": 0.68, "grad_norm": 3.719963312149048, "learning_rate": 0.0002, "loss": 1.8173, "step": 167730 }, { "epoch": 0.68, "grad_norm": 2.409308671951294, "learning_rate": 0.0002, "loss": 1.7625, "step": 167740 }, { "epoch": 0.68, "grad_norm": 3.0943732261657715, "learning_rate": 0.0002, "loss": 1.6523, "step": 167750 }, { "epoch": 0.68, "grad_norm": 2.7427268028259277, "learning_rate": 0.0002, "loss": 1.6793, "step": 167760 }, { "epoch": 0.68, "grad_norm": 2.4915995597839355, "learning_rate": 0.0002, "loss": 1.6674, "step": 167770 }, { "epoch": 0.68, "grad_norm": 3.642749547958374, "learning_rate": 0.0002, "loss": 1.6706, "step": 167780 }, { "epoch": 0.68, "grad_norm": 2.4793543815612793, "learning_rate": 0.0002, "loss": 1.4109, "step": 167790 }, { "epoch": 0.68, "grad_norm": 4.206832408905029, "learning_rate": 0.0002, "loss": 1.7169, "step": 167800 }, { "epoch": 0.68, "grad_norm": 2.821732759475708, "learning_rate": 0.0002, "loss": 1.5469, "step": 167810 }, { "epoch": 0.68, "grad_norm": 5.434319019317627, "learning_rate": 0.0002, "loss": 1.5063, "step": 167820 }, { "epoch": 0.68, "grad_norm": 2.241321563720703, "learning_rate": 0.0002, "loss": 1.4462, "step": 167830 }, { "epoch": 0.68, "grad_norm": 1.8327786922454834, "learning_rate": 0.0002, "loss": 1.5331, "step": 167840 }, { "epoch": 0.68, "grad_norm": 6.890905857086182, "learning_rate": 0.0002, "loss": 1.5907, "step": 167850 }, { "epoch": 0.68, "grad_norm": 3.637774705886841, "learning_rate": 0.0002, "loss": 1.7045, "step": 167860 }, { "epoch": 0.68, "grad_norm": 1.8253164291381836, "learning_rate": 0.0002, "loss": 1.6429, "step": 167870 }, { "epoch": 0.68, "grad_norm": 2.995123863220215, "learning_rate": 0.0002, "loss": 1.8044, "step": 167880 }, { "epoch": 0.68, "grad_norm": 3.6188745498657227, "learning_rate": 0.0002, "loss": 1.5619, "step": 167890 }, { "epoch": 0.68, "grad_norm": 3.9083008766174316, "learning_rate": 0.0002, "loss": 1.4919, "step": 167900 }, { "epoch": 0.68, "grad_norm": 4.948090076446533, "learning_rate": 0.0002, "loss": 1.6693, "step": 167910 }, { "epoch": 0.68, "grad_norm": 4.167623996734619, "learning_rate": 0.0002, "loss": 1.7103, "step": 167920 }, { "epoch": 0.68, "grad_norm": 1.151566505432129, "learning_rate": 0.0002, "loss": 1.6259, "step": 167930 }, { "epoch": 0.68, "grad_norm": 2.3258066177368164, "learning_rate": 0.0002, "loss": 1.7231, "step": 167940 }, { "epoch": 0.68, "grad_norm": 2.99316143989563, "learning_rate": 0.0002, "loss": 1.3936, "step": 167950 }, { "epoch": 0.68, "grad_norm": 2.50327730178833, "learning_rate": 0.0002, "loss": 1.7606, "step": 167960 }, { "epoch": 0.68, "grad_norm": 3.0142531394958496, "learning_rate": 0.0002, "loss": 1.4861, "step": 167970 }, { "epoch": 0.68, "grad_norm": 2.0239884853363037, "learning_rate": 0.0002, "loss": 1.3968, "step": 167980 }, { "epoch": 0.68, "grad_norm": 4.207311630249023, "learning_rate": 0.0002, "loss": 1.5412, "step": 167990 }, { "epoch": 0.68, "grad_norm": 3.624851942062378, "learning_rate": 0.0002, "loss": 1.5261, "step": 168000 }, { "epoch": 0.68, "grad_norm": 2.756228446960449, "learning_rate": 0.0002, "loss": 1.5403, "step": 168010 }, { "epoch": 0.68, "grad_norm": 2.1196377277374268, "learning_rate": 0.0002, "loss": 1.3592, "step": 168020 }, { "epoch": 0.68, "grad_norm": 2.838832378387451, "learning_rate": 0.0002, "loss": 1.6335, "step": 168030 }, { "epoch": 0.68, "grad_norm": 2.0203797817230225, "learning_rate": 0.0002, "loss": 1.5143, "step": 168040 }, { "epoch": 0.68, "grad_norm": 3.2401773929595947, "learning_rate": 0.0002, "loss": 1.8202, "step": 168050 }, { "epoch": 0.68, "grad_norm": 2.114551067352295, "learning_rate": 0.0002, "loss": 1.7747, "step": 168060 }, { "epoch": 0.68, "grad_norm": 2.8362693786621094, "learning_rate": 0.0002, "loss": 1.5276, "step": 168070 }, { "epoch": 0.68, "grad_norm": 4.675114631652832, "learning_rate": 0.0002, "loss": 1.4971, "step": 168080 }, { "epoch": 0.68, "grad_norm": 2.297612190246582, "learning_rate": 0.0002, "loss": 1.7356, "step": 168090 }, { "epoch": 0.68, "grad_norm": 2.5175399780273438, "learning_rate": 0.0002, "loss": 1.6273, "step": 168100 }, { "epoch": 0.68, "grad_norm": 5.830643177032471, "learning_rate": 0.0002, "loss": 1.5742, "step": 168110 }, { "epoch": 0.68, "grad_norm": 2.6935155391693115, "learning_rate": 0.0002, "loss": 1.8234, "step": 168120 }, { "epoch": 0.68, "grad_norm": 3.1687533855438232, "learning_rate": 0.0002, "loss": 1.4995, "step": 168130 }, { "epoch": 0.68, "grad_norm": 2.92995285987854, "learning_rate": 0.0002, "loss": 1.8307, "step": 168140 }, { "epoch": 0.68, "grad_norm": 3.569578170776367, "learning_rate": 0.0002, "loss": 1.476, "step": 168150 }, { "epoch": 0.68, "grad_norm": 3.321682929992676, "learning_rate": 0.0002, "loss": 1.6237, "step": 168160 }, { "epoch": 0.68, "grad_norm": 3.511754035949707, "learning_rate": 0.0002, "loss": 1.4572, "step": 168170 }, { "epoch": 0.68, "grad_norm": 4.35184383392334, "learning_rate": 0.0002, "loss": 1.4892, "step": 168180 }, { "epoch": 0.68, "grad_norm": 4.337796211242676, "learning_rate": 0.0002, "loss": 1.6254, "step": 168190 }, { "epoch": 0.68, "grad_norm": 2.6852738857269287, "learning_rate": 0.0002, "loss": 1.4646, "step": 168200 }, { "epoch": 0.68, "grad_norm": 1.947412371635437, "learning_rate": 0.0002, "loss": 1.665, "step": 168210 }, { "epoch": 0.68, "grad_norm": 3.2835395336151123, "learning_rate": 0.0002, "loss": 1.7899, "step": 168220 }, { "epoch": 0.68, "grad_norm": 2.141939640045166, "learning_rate": 0.0002, "loss": 1.561, "step": 168230 }, { "epoch": 0.68, "grad_norm": 2.840747356414795, "learning_rate": 0.0002, "loss": 1.5083, "step": 168240 }, { "epoch": 0.68, "grad_norm": 2.9017913341522217, "learning_rate": 0.0002, "loss": 1.6605, "step": 168250 }, { "epoch": 0.68, "grad_norm": 9.220938682556152, "learning_rate": 0.0002, "loss": 1.4946, "step": 168260 }, { "epoch": 0.69, "grad_norm": 1.9591721296310425, "learning_rate": 0.0002, "loss": 1.6314, "step": 168270 }, { "epoch": 0.69, "grad_norm": 4.0718512535095215, "learning_rate": 0.0002, "loss": 1.6871, "step": 168280 }, { "epoch": 0.69, "grad_norm": 4.364752292633057, "learning_rate": 0.0002, "loss": 1.69, "step": 168290 }, { "epoch": 0.69, "grad_norm": 3.3762032985687256, "learning_rate": 0.0002, "loss": 1.3478, "step": 168300 }, { "epoch": 0.69, "grad_norm": 2.843116521835327, "learning_rate": 0.0002, "loss": 1.7644, "step": 168310 }, { "epoch": 0.69, "grad_norm": 4.362547874450684, "learning_rate": 0.0002, "loss": 1.5019, "step": 168320 }, { "epoch": 0.69, "grad_norm": 3.402597665786743, "learning_rate": 0.0002, "loss": 1.5774, "step": 168330 }, { "epoch": 0.69, "grad_norm": 1.9700065851211548, "learning_rate": 0.0002, "loss": 1.6881, "step": 168340 }, { "epoch": 0.69, "grad_norm": 4.172196865081787, "learning_rate": 0.0002, "loss": 1.6953, "step": 168350 }, { "epoch": 0.69, "grad_norm": 2.152170181274414, "learning_rate": 0.0002, "loss": 1.618, "step": 168360 }, { "epoch": 0.69, "grad_norm": 4.2363786697387695, "learning_rate": 0.0002, "loss": 1.3545, "step": 168370 }, { "epoch": 0.69, "grad_norm": 3.194319009780884, "learning_rate": 0.0002, "loss": 1.6278, "step": 168380 }, { "epoch": 0.69, "grad_norm": 3.652212619781494, "learning_rate": 0.0002, "loss": 1.5016, "step": 168390 }, { "epoch": 0.69, "grad_norm": 3.49796986579895, "learning_rate": 0.0002, "loss": 1.4283, "step": 168400 }, { "epoch": 0.69, "grad_norm": 3.0717406272888184, "learning_rate": 0.0002, "loss": 1.5128, "step": 168410 }, { "epoch": 0.69, "grad_norm": 2.5040009021759033, "learning_rate": 0.0002, "loss": 1.5374, "step": 168420 }, { "epoch": 0.69, "grad_norm": 2.5006985664367676, "learning_rate": 0.0002, "loss": 1.6705, "step": 168430 }, { "epoch": 0.69, "grad_norm": 2.962254285812378, "learning_rate": 0.0002, "loss": 1.789, "step": 168440 }, { "epoch": 0.69, "grad_norm": 4.016177177429199, "learning_rate": 0.0002, "loss": 1.5224, "step": 168450 }, { "epoch": 0.69, "grad_norm": 3.4046621322631836, "learning_rate": 0.0002, "loss": 1.5341, "step": 168460 }, { "epoch": 0.69, "grad_norm": 2.1107981204986572, "learning_rate": 0.0002, "loss": 1.6951, "step": 168470 }, { "epoch": 0.69, "grad_norm": 5.278545379638672, "learning_rate": 0.0002, "loss": 1.7186, "step": 168480 }, { "epoch": 0.69, "grad_norm": 1.84572434425354, "learning_rate": 0.0002, "loss": 1.7044, "step": 168490 }, { "epoch": 0.69, "grad_norm": 3.4369125366210938, "learning_rate": 0.0002, "loss": 1.6759, "step": 168500 }, { "epoch": 0.69, "grad_norm": 3.3526411056518555, "learning_rate": 0.0002, "loss": 1.3348, "step": 168510 }, { "epoch": 0.69, "grad_norm": 2.3334009647369385, "learning_rate": 0.0002, "loss": 1.5543, "step": 168520 }, { "epoch": 0.69, "grad_norm": 3.0685510635375977, "learning_rate": 0.0002, "loss": 1.7049, "step": 168530 }, { "epoch": 0.69, "grad_norm": 4.742043972015381, "learning_rate": 0.0002, "loss": 1.5725, "step": 168540 }, { "epoch": 0.69, "grad_norm": 3.1755402088165283, "learning_rate": 0.0002, "loss": 1.344, "step": 168550 }, { "epoch": 0.69, "grad_norm": 4.603555679321289, "learning_rate": 0.0002, "loss": 1.5717, "step": 168560 }, { "epoch": 0.69, "grad_norm": 2.1877853870391846, "learning_rate": 0.0002, "loss": 1.5741, "step": 168570 }, { "epoch": 0.69, "grad_norm": 3.254749059677124, "learning_rate": 0.0002, "loss": 1.7603, "step": 168580 }, { "epoch": 0.69, "grad_norm": 1.8229204416275024, "learning_rate": 0.0002, "loss": 1.4909, "step": 168590 }, { "epoch": 0.69, "grad_norm": 2.5544886589050293, "learning_rate": 0.0002, "loss": 1.4568, "step": 168600 }, { "epoch": 0.69, "grad_norm": 3.7293314933776855, "learning_rate": 0.0002, "loss": 1.5233, "step": 168610 }, { "epoch": 0.69, "grad_norm": 3.108595609664917, "learning_rate": 0.0002, "loss": 1.6563, "step": 168620 }, { "epoch": 0.69, "grad_norm": 2.6527533531188965, "learning_rate": 0.0002, "loss": 1.5923, "step": 168630 }, { "epoch": 0.69, "grad_norm": 2.9892828464508057, "learning_rate": 0.0002, "loss": 1.5765, "step": 168640 }, { "epoch": 0.69, "grad_norm": 2.7790772914886475, "learning_rate": 0.0002, "loss": 1.7372, "step": 168650 }, { "epoch": 0.69, "grad_norm": 3.1142148971557617, "learning_rate": 0.0002, "loss": 1.9994, "step": 168660 }, { "epoch": 0.69, "grad_norm": 2.4063093662261963, "learning_rate": 0.0002, "loss": 1.5663, "step": 168670 }, { "epoch": 0.69, "grad_norm": 2.640291452407837, "learning_rate": 0.0002, "loss": 1.4717, "step": 168680 }, { "epoch": 0.69, "grad_norm": 3.3479554653167725, "learning_rate": 0.0002, "loss": 1.6057, "step": 168690 }, { "epoch": 0.69, "grad_norm": 2.793802499771118, "learning_rate": 0.0002, "loss": 1.4791, "step": 168700 }, { "epoch": 0.69, "grad_norm": 2.749962329864502, "learning_rate": 0.0002, "loss": 1.67, "step": 168710 }, { "epoch": 0.69, "grad_norm": 2.3663132190704346, "learning_rate": 0.0002, "loss": 1.4964, "step": 168720 }, { "epoch": 0.69, "grad_norm": 3.7566661834716797, "learning_rate": 0.0002, "loss": 1.5506, "step": 168730 }, { "epoch": 0.69, "grad_norm": 4.171689510345459, "learning_rate": 0.0002, "loss": 1.6125, "step": 168740 }, { "epoch": 0.69, "grad_norm": 7.921569347381592, "learning_rate": 0.0002, "loss": 1.7313, "step": 168750 }, { "epoch": 0.69, "grad_norm": 1.307134747505188, "learning_rate": 0.0002, "loss": 1.4144, "step": 168760 }, { "epoch": 0.69, "grad_norm": 2.6142120361328125, "learning_rate": 0.0002, "loss": 1.5151, "step": 168770 }, { "epoch": 0.69, "grad_norm": 7.122247695922852, "learning_rate": 0.0002, "loss": 1.4425, "step": 168780 }, { "epoch": 0.69, "grad_norm": 2.921114683151245, "learning_rate": 0.0002, "loss": 1.6551, "step": 168790 }, { "epoch": 0.69, "grad_norm": 2.431142568588257, "learning_rate": 0.0002, "loss": 1.5159, "step": 168800 }, { "epoch": 0.69, "grad_norm": 2.62093448638916, "learning_rate": 0.0002, "loss": 1.5508, "step": 168810 }, { "epoch": 0.69, "grad_norm": 4.09503173828125, "learning_rate": 0.0002, "loss": 1.5022, "step": 168820 }, { "epoch": 0.69, "grad_norm": 2.4935638904571533, "learning_rate": 0.0002, "loss": 1.5457, "step": 168830 }, { "epoch": 0.69, "grad_norm": 3.4721128940582275, "learning_rate": 0.0002, "loss": 1.5842, "step": 168840 }, { "epoch": 0.69, "grad_norm": 3.6439218521118164, "learning_rate": 0.0002, "loss": 1.5035, "step": 168850 }, { "epoch": 0.69, "grad_norm": 3.8236799240112305, "learning_rate": 0.0002, "loss": 1.6962, "step": 168860 }, { "epoch": 0.69, "grad_norm": 3.161172389984131, "learning_rate": 0.0002, "loss": 1.6443, "step": 168870 }, { "epoch": 0.69, "grad_norm": 12.27932071685791, "learning_rate": 0.0002, "loss": 1.3012, "step": 168880 }, { "epoch": 0.69, "grad_norm": 3.8243894577026367, "learning_rate": 0.0002, "loss": 1.5767, "step": 168890 }, { "epoch": 0.69, "grad_norm": 1.6737624406814575, "learning_rate": 0.0002, "loss": 1.6133, "step": 168900 }, { "epoch": 0.69, "grad_norm": 2.8798279762268066, "learning_rate": 0.0002, "loss": 1.5452, "step": 168910 }, { "epoch": 0.69, "grad_norm": 2.0989627838134766, "learning_rate": 0.0002, "loss": 1.7041, "step": 168920 }, { "epoch": 0.69, "grad_norm": 2.603851556777954, "learning_rate": 0.0002, "loss": 1.6068, "step": 168930 }, { "epoch": 0.69, "grad_norm": 2.3326539993286133, "learning_rate": 0.0002, "loss": 1.3296, "step": 168940 }, { "epoch": 0.69, "grad_norm": 5.323826789855957, "learning_rate": 0.0002, "loss": 1.3523, "step": 168950 }, { "epoch": 0.69, "grad_norm": 5.64208459854126, "learning_rate": 0.0002, "loss": 1.6951, "step": 168960 }, { "epoch": 0.69, "grad_norm": 1.9581063985824585, "learning_rate": 0.0002, "loss": 1.6406, "step": 168970 }, { "epoch": 0.69, "grad_norm": 2.9269049167633057, "learning_rate": 0.0002, "loss": 1.4817, "step": 168980 }, { "epoch": 0.69, "grad_norm": 4.410707473754883, "learning_rate": 0.0002, "loss": 1.5688, "step": 168990 }, { "epoch": 0.69, "grad_norm": 3.3551132678985596, "learning_rate": 0.0002, "loss": 1.6069, "step": 169000 }, { "epoch": 0.69, "grad_norm": 2.1477267742156982, "learning_rate": 0.0002, "loss": 1.599, "step": 169010 }, { "epoch": 0.69, "grad_norm": 3.4253573417663574, "learning_rate": 0.0002, "loss": 1.4438, "step": 169020 }, { "epoch": 0.69, "grad_norm": 3.6051197052001953, "learning_rate": 0.0002, "loss": 1.4218, "step": 169030 }, { "epoch": 0.69, "grad_norm": 3.0321896076202393, "learning_rate": 0.0002, "loss": 1.2965, "step": 169040 }, { "epoch": 0.69, "grad_norm": 2.1960690021514893, "learning_rate": 0.0002, "loss": 1.5859, "step": 169050 }, { "epoch": 0.69, "grad_norm": 1.9338569641113281, "learning_rate": 0.0002, "loss": 1.4587, "step": 169060 }, { "epoch": 0.69, "grad_norm": 4.546993255615234, "learning_rate": 0.0002, "loss": 1.5483, "step": 169070 }, { "epoch": 0.69, "grad_norm": 2.2698326110839844, "learning_rate": 0.0002, "loss": 1.571, "step": 169080 }, { "epoch": 0.69, "grad_norm": 7.3747687339782715, "learning_rate": 0.0002, "loss": 1.7112, "step": 169090 }, { "epoch": 0.69, "grad_norm": 2.7749249935150146, "learning_rate": 0.0002, "loss": 1.2612, "step": 169100 }, { "epoch": 0.69, "grad_norm": 2.99015212059021, "learning_rate": 0.0002, "loss": 1.7063, "step": 169110 }, { "epoch": 0.69, "grad_norm": 2.8967907428741455, "learning_rate": 0.0002, "loss": 1.6124, "step": 169120 }, { "epoch": 0.69, "grad_norm": 2.717888593673706, "learning_rate": 0.0002, "loss": 1.5139, "step": 169130 }, { "epoch": 0.69, "grad_norm": 4.976190567016602, "learning_rate": 0.0002, "loss": 1.7613, "step": 169140 }, { "epoch": 0.69, "grad_norm": 4.556543350219727, "learning_rate": 0.0002, "loss": 1.4766, "step": 169150 }, { "epoch": 0.69, "grad_norm": 3.644571304321289, "learning_rate": 0.0002, "loss": 1.5771, "step": 169160 }, { "epoch": 0.69, "grad_norm": 3.9064042568206787, "learning_rate": 0.0002, "loss": 1.5395, "step": 169170 }, { "epoch": 0.69, "grad_norm": 2.791428327560425, "learning_rate": 0.0002, "loss": 1.4615, "step": 169180 }, { "epoch": 0.69, "grad_norm": 1.9994834661483765, "learning_rate": 0.0002, "loss": 1.5313, "step": 169190 }, { "epoch": 0.69, "grad_norm": 4.427884101867676, "learning_rate": 0.0002, "loss": 1.738, "step": 169200 }, { "epoch": 0.69, "grad_norm": 5.577940464019775, "learning_rate": 0.0002, "loss": 1.5386, "step": 169210 }, { "epoch": 0.69, "grad_norm": 1.8063132762908936, "learning_rate": 0.0002, "loss": 1.6508, "step": 169220 }, { "epoch": 0.69, "grad_norm": 5.858205318450928, "learning_rate": 0.0002, "loss": 1.7093, "step": 169230 }, { "epoch": 0.69, "grad_norm": 4.347449779510498, "learning_rate": 0.0002, "loss": 1.7461, "step": 169240 }, { "epoch": 0.69, "grad_norm": 5.223968505859375, "learning_rate": 0.0002, "loss": 1.6491, "step": 169250 }, { "epoch": 0.69, "grad_norm": 4.347543239593506, "learning_rate": 0.0002, "loss": 1.6078, "step": 169260 }, { "epoch": 0.69, "grad_norm": 3.934845447540283, "learning_rate": 0.0002, "loss": 1.6431, "step": 169270 }, { "epoch": 0.69, "grad_norm": 3.2990305423736572, "learning_rate": 0.0002, "loss": 1.8018, "step": 169280 }, { "epoch": 0.69, "grad_norm": 2.2783873081207275, "learning_rate": 0.0002, "loss": 1.761, "step": 169290 }, { "epoch": 0.69, "grad_norm": 1.982016921043396, "learning_rate": 0.0002, "loss": 1.6747, "step": 169300 }, { "epoch": 0.69, "grad_norm": 3.3293416500091553, "learning_rate": 0.0002, "loss": 1.5685, "step": 169310 }, { "epoch": 0.69, "grad_norm": 3.5387685298919678, "learning_rate": 0.0002, "loss": 1.7135, "step": 169320 }, { "epoch": 0.69, "grad_norm": 2.4664978981018066, "learning_rate": 0.0002, "loss": 1.7512, "step": 169330 }, { "epoch": 0.69, "grad_norm": 3.2609806060791016, "learning_rate": 0.0002, "loss": 1.5169, "step": 169340 }, { "epoch": 0.69, "grad_norm": 3.587684392929077, "learning_rate": 0.0002, "loss": 1.505, "step": 169350 }, { "epoch": 0.69, "grad_norm": 2.0793397426605225, "learning_rate": 0.0002, "loss": 1.6303, "step": 169360 }, { "epoch": 0.69, "grad_norm": 1.7949327230453491, "learning_rate": 0.0002, "loss": 1.6733, "step": 169370 }, { "epoch": 0.69, "grad_norm": 3.606363534927368, "learning_rate": 0.0002, "loss": 1.4506, "step": 169380 }, { "epoch": 0.69, "grad_norm": 4.598589897155762, "learning_rate": 0.0002, "loss": 1.6032, "step": 169390 }, { "epoch": 0.69, "grad_norm": 2.1340057849884033, "learning_rate": 0.0002, "loss": 1.5625, "step": 169400 }, { "epoch": 0.69, "grad_norm": 2.2741663455963135, "learning_rate": 0.0002, "loss": 1.5532, "step": 169410 }, { "epoch": 0.69, "grad_norm": 2.9293270111083984, "learning_rate": 0.0002, "loss": 1.6171, "step": 169420 }, { "epoch": 0.69, "grad_norm": 4.729479789733887, "learning_rate": 0.0002, "loss": 1.7655, "step": 169430 }, { "epoch": 0.69, "grad_norm": 4.562866687774658, "learning_rate": 0.0002, "loss": 1.6688, "step": 169440 }, { "epoch": 0.69, "grad_norm": 3.7974836826324463, "learning_rate": 0.0002, "loss": 1.5759, "step": 169450 }, { "epoch": 0.69, "grad_norm": 3.10404896736145, "learning_rate": 0.0002, "loss": 1.5837, "step": 169460 }, { "epoch": 0.69, "grad_norm": 1.9343676567077637, "learning_rate": 0.0002, "loss": 1.745, "step": 169470 }, { "epoch": 0.69, "grad_norm": 3.57464861869812, "learning_rate": 0.0002, "loss": 1.6127, "step": 169480 }, { "epoch": 0.69, "grad_norm": 3.1837031841278076, "learning_rate": 0.0002, "loss": 1.6497, "step": 169490 }, { "epoch": 0.69, "grad_norm": 3.8972439765930176, "learning_rate": 0.0002, "loss": 1.4062, "step": 169500 }, { "epoch": 0.69, "grad_norm": 2.7254233360290527, "learning_rate": 0.0002, "loss": 1.3793, "step": 169510 }, { "epoch": 0.69, "grad_norm": 3.3773510456085205, "learning_rate": 0.0002, "loss": 1.6882, "step": 169520 }, { "epoch": 0.69, "grad_norm": 2.790379047393799, "learning_rate": 0.0002, "loss": 1.4905, "step": 169530 }, { "epoch": 0.69, "grad_norm": 1.7292778491973877, "learning_rate": 0.0002, "loss": 1.5, "step": 169540 }, { "epoch": 0.69, "grad_norm": 2.7487664222717285, "learning_rate": 0.0002, "loss": 1.2336, "step": 169550 }, { "epoch": 0.69, "grad_norm": 2.377206563949585, "learning_rate": 0.0002, "loss": 1.3749, "step": 169560 }, { "epoch": 0.69, "grad_norm": 3.429953098297119, "learning_rate": 0.0002, "loss": 1.4526, "step": 169570 }, { "epoch": 0.69, "grad_norm": 2.897766590118408, "learning_rate": 0.0002, "loss": 1.4939, "step": 169580 }, { "epoch": 0.69, "grad_norm": 3.056272506713867, "learning_rate": 0.0002, "loss": 1.3214, "step": 169590 }, { "epoch": 0.69, "grad_norm": 3.3463938236236572, "learning_rate": 0.0002, "loss": 1.3901, "step": 169600 }, { "epoch": 0.69, "grad_norm": 2.1560258865356445, "learning_rate": 0.0002, "loss": 1.6344, "step": 169610 }, { "epoch": 0.69, "grad_norm": 3.401873826980591, "learning_rate": 0.0002, "loss": 1.8709, "step": 169620 }, { "epoch": 0.69, "grad_norm": 3.2575860023498535, "learning_rate": 0.0002, "loss": 1.5548, "step": 169630 }, { "epoch": 0.69, "grad_norm": 2.5490944385528564, "learning_rate": 0.0002, "loss": 1.5549, "step": 169640 }, { "epoch": 0.69, "grad_norm": 4.518015384674072, "learning_rate": 0.0002, "loss": 1.5529, "step": 169650 }, { "epoch": 0.69, "grad_norm": 2.1389265060424805, "learning_rate": 0.0002, "loss": 1.4706, "step": 169660 }, { "epoch": 0.69, "grad_norm": 1.6925967931747437, "learning_rate": 0.0002, "loss": 1.688, "step": 169670 }, { "epoch": 0.69, "grad_norm": 3.757178783416748, "learning_rate": 0.0002, "loss": 1.5908, "step": 169680 }, { "epoch": 0.69, "grad_norm": 2.828331470489502, "learning_rate": 0.0002, "loss": 1.6388, "step": 169690 }, { "epoch": 0.69, "grad_norm": 2.8567733764648438, "learning_rate": 0.0002, "loss": 1.5532, "step": 169700 }, { "epoch": 0.69, "grad_norm": 4.757110595703125, "learning_rate": 0.0002, "loss": 1.6664, "step": 169710 }, { "epoch": 0.69, "grad_norm": 8.206643104553223, "learning_rate": 0.0002, "loss": 1.5874, "step": 169720 }, { "epoch": 0.69, "grad_norm": 2.928314685821533, "learning_rate": 0.0002, "loss": 1.6188, "step": 169730 }, { "epoch": 0.69, "grad_norm": 3.18015718460083, "learning_rate": 0.0002, "loss": 1.6566, "step": 169740 }, { "epoch": 0.69, "grad_norm": 1.8785721063613892, "learning_rate": 0.0002, "loss": 1.4146, "step": 169750 }, { "epoch": 0.69, "grad_norm": 1.8908920288085938, "learning_rate": 0.0002, "loss": 1.6359, "step": 169760 }, { "epoch": 0.69, "grad_norm": 3.313075304031372, "learning_rate": 0.0002, "loss": 1.3832, "step": 169770 }, { "epoch": 0.69, "grad_norm": 2.2250099182128906, "learning_rate": 0.0002, "loss": 1.5614, "step": 169780 }, { "epoch": 0.69, "grad_norm": 5.077352046966553, "learning_rate": 0.0002, "loss": 1.7297, "step": 169790 }, { "epoch": 0.69, "grad_norm": 3.1504454612731934, "learning_rate": 0.0002, "loss": 1.5233, "step": 169800 }, { "epoch": 0.69, "grad_norm": 4.565423965454102, "learning_rate": 0.0002, "loss": 1.5512, "step": 169810 }, { "epoch": 0.69, "grad_norm": 2.890019416809082, "learning_rate": 0.0002, "loss": 1.4106, "step": 169820 }, { "epoch": 0.69, "grad_norm": 2.7447659969329834, "learning_rate": 0.0002, "loss": 1.5945, "step": 169830 }, { "epoch": 0.69, "grad_norm": 2.750469446182251, "learning_rate": 0.0002, "loss": 1.7661, "step": 169840 }, { "epoch": 0.69, "grad_norm": 2.529383659362793, "learning_rate": 0.0002, "loss": 1.9151, "step": 169850 }, { "epoch": 0.69, "grad_norm": 3.060189723968506, "learning_rate": 0.0002, "loss": 1.6316, "step": 169860 }, { "epoch": 0.69, "grad_norm": 4.061463832855225, "learning_rate": 0.0002, "loss": 1.6076, "step": 169870 }, { "epoch": 0.69, "grad_norm": 2.5039196014404297, "learning_rate": 0.0002, "loss": 1.8868, "step": 169880 }, { "epoch": 0.69, "grad_norm": 2.8558149337768555, "learning_rate": 0.0002, "loss": 1.6939, "step": 169890 }, { "epoch": 0.69, "grad_norm": 2.857255220413208, "learning_rate": 0.0002, "loss": 1.6319, "step": 169900 }, { "epoch": 0.69, "grad_norm": 3.4015231132507324, "learning_rate": 0.0002, "loss": 1.6376, "step": 169910 }, { "epoch": 0.69, "grad_norm": 3.2462799549102783, "learning_rate": 0.0002, "loss": 1.4131, "step": 169920 }, { "epoch": 0.69, "grad_norm": 2.221423625946045, "learning_rate": 0.0002, "loss": 1.749, "step": 169930 }, { "epoch": 0.69, "grad_norm": 2.599156618118286, "learning_rate": 0.0002, "loss": 1.79, "step": 169940 }, { "epoch": 0.69, "grad_norm": 2.879258871078491, "learning_rate": 0.0002, "loss": 1.526, "step": 169950 }, { "epoch": 0.69, "grad_norm": 2.559170961380005, "learning_rate": 0.0002, "loss": 1.5906, "step": 169960 }, { "epoch": 0.69, "grad_norm": 2.7496588230133057, "learning_rate": 0.0002, "loss": 1.5844, "step": 169970 }, { "epoch": 0.69, "grad_norm": 3.4695382118225098, "learning_rate": 0.0002, "loss": 1.3872, "step": 169980 }, { "epoch": 0.69, "grad_norm": 1.580311894416809, "learning_rate": 0.0002, "loss": 1.6222, "step": 169990 }, { "epoch": 0.69, "grad_norm": 2.889002561569214, "learning_rate": 0.0002, "loss": 1.7689, "step": 170000 }, { "epoch": 0.69, "grad_norm": 3.2554280757904053, "learning_rate": 0.0002, "loss": 1.6002, "step": 170010 }, { "epoch": 0.69, "grad_norm": 3.9201035499572754, "learning_rate": 0.0002, "loss": 1.5922, "step": 170020 }, { "epoch": 0.69, "grad_norm": 4.241007328033447, "learning_rate": 0.0002, "loss": 1.6606, "step": 170030 }, { "epoch": 0.69, "grad_norm": 3.302300214767456, "learning_rate": 0.0002, "loss": 1.755, "step": 170040 }, { "epoch": 0.69, "grad_norm": 1.9682797193527222, "learning_rate": 0.0002, "loss": 1.3555, "step": 170050 }, { "epoch": 0.69, "grad_norm": 3.5239555835723877, "learning_rate": 0.0002, "loss": 1.7038, "step": 170060 }, { "epoch": 0.69, "grad_norm": 4.767122745513916, "learning_rate": 0.0002, "loss": 1.5442, "step": 170070 }, { "epoch": 0.69, "grad_norm": 4.238733768463135, "learning_rate": 0.0002, "loss": 1.5336, "step": 170080 }, { "epoch": 0.69, "grad_norm": 2.8224103450775146, "learning_rate": 0.0002, "loss": 1.4674, "step": 170090 }, { "epoch": 0.69, "grad_norm": 2.581343650817871, "learning_rate": 0.0002, "loss": 1.4489, "step": 170100 }, { "epoch": 0.69, "grad_norm": 2.57303524017334, "learning_rate": 0.0002, "loss": 1.5014, "step": 170110 }, { "epoch": 0.69, "grad_norm": 3.1475765705108643, "learning_rate": 0.0002, "loss": 1.3851, "step": 170120 }, { "epoch": 0.69, "grad_norm": 4.898166179656982, "learning_rate": 0.0002, "loss": 1.6122, "step": 170130 }, { "epoch": 0.69, "grad_norm": 3.8156840801239014, "learning_rate": 0.0002, "loss": 1.515, "step": 170140 }, { "epoch": 0.69, "grad_norm": 2.877545118331909, "learning_rate": 0.0002, "loss": 1.5465, "step": 170150 }, { "epoch": 0.69, "grad_norm": 2.6484158039093018, "learning_rate": 0.0002, "loss": 1.5286, "step": 170160 }, { "epoch": 0.69, "grad_norm": 5.045298099517822, "learning_rate": 0.0002, "loss": 1.5099, "step": 170170 }, { "epoch": 0.69, "grad_norm": 3.1546435356140137, "learning_rate": 0.0002, "loss": 1.4006, "step": 170180 }, { "epoch": 0.69, "grad_norm": 2.559525489807129, "learning_rate": 0.0002, "loss": 1.5899, "step": 170190 }, { "epoch": 0.69, "grad_norm": 2.382859706878662, "learning_rate": 0.0002, "loss": 1.536, "step": 170200 }, { "epoch": 0.69, "grad_norm": 3.568248748779297, "learning_rate": 0.0002, "loss": 1.9335, "step": 170210 }, { "epoch": 0.69, "grad_norm": 3.372068166732788, "learning_rate": 0.0002, "loss": 1.5188, "step": 170220 }, { "epoch": 0.69, "grad_norm": 3.6186211109161377, "learning_rate": 0.0002, "loss": 1.6878, "step": 170230 }, { "epoch": 0.69, "grad_norm": 3.808192491531372, "learning_rate": 0.0002, "loss": 1.5067, "step": 170240 }, { "epoch": 0.69, "grad_norm": 3.60632061958313, "learning_rate": 0.0002, "loss": 1.5022, "step": 170250 }, { "epoch": 0.69, "grad_norm": 3.1286044120788574, "learning_rate": 0.0002, "loss": 1.4685, "step": 170260 }, { "epoch": 0.69, "grad_norm": 1.6505768299102783, "learning_rate": 0.0002, "loss": 1.4677, "step": 170270 }, { "epoch": 0.69, "grad_norm": 3.1119205951690674, "learning_rate": 0.0002, "loss": 1.8302, "step": 170280 }, { "epoch": 0.69, "grad_norm": 2.9312241077423096, "learning_rate": 0.0002, "loss": 1.7003, "step": 170290 }, { "epoch": 0.69, "grad_norm": 2.1474738121032715, "learning_rate": 0.0002, "loss": 1.8619, "step": 170300 }, { "epoch": 0.69, "grad_norm": 3.97959041595459, "learning_rate": 0.0002, "loss": 1.5472, "step": 170310 }, { "epoch": 0.69, "grad_norm": 2.546192169189453, "learning_rate": 0.0002, "loss": 1.7843, "step": 170320 }, { "epoch": 0.69, "grad_norm": 2.069500684738159, "learning_rate": 0.0002, "loss": 1.8936, "step": 170330 }, { "epoch": 0.69, "grad_norm": 2.4205760955810547, "learning_rate": 0.0002, "loss": 1.5358, "step": 170340 }, { "epoch": 0.69, "grad_norm": 4.302454471588135, "learning_rate": 0.0002, "loss": 1.8112, "step": 170350 }, { "epoch": 0.69, "grad_norm": 2.564596652984619, "learning_rate": 0.0002, "loss": 1.679, "step": 170360 }, { "epoch": 0.69, "grad_norm": 3.2570414543151855, "learning_rate": 0.0002, "loss": 1.3683, "step": 170370 }, { "epoch": 0.69, "grad_norm": 3.4275906085968018, "learning_rate": 0.0002, "loss": 1.5771, "step": 170380 }, { "epoch": 0.69, "grad_norm": 4.045681953430176, "learning_rate": 0.0002, "loss": 1.6395, "step": 170390 }, { "epoch": 0.69, "grad_norm": 1.2148983478546143, "learning_rate": 0.0002, "loss": 1.5778, "step": 170400 }, { "epoch": 0.69, "grad_norm": 1.9439024925231934, "learning_rate": 0.0002, "loss": 1.3083, "step": 170410 }, { "epoch": 0.69, "grad_norm": 4.228872299194336, "learning_rate": 0.0002, "loss": 1.5311, "step": 170420 }, { "epoch": 0.69, "grad_norm": 4.102644443511963, "learning_rate": 0.0002, "loss": 1.5806, "step": 170430 }, { "epoch": 0.69, "grad_norm": 2.91615891456604, "learning_rate": 0.0002, "loss": 1.3466, "step": 170440 }, { "epoch": 0.69, "grad_norm": 2.3233962059020996, "learning_rate": 0.0002, "loss": 1.6278, "step": 170450 }, { "epoch": 0.69, "grad_norm": 2.4821014404296875, "learning_rate": 0.0002, "loss": 1.6413, "step": 170460 }, { "epoch": 0.69, "grad_norm": 3.01625657081604, "learning_rate": 0.0002, "loss": 1.6131, "step": 170470 }, { "epoch": 0.69, "grad_norm": 3.5035345554351807, "learning_rate": 0.0002, "loss": 1.4831, "step": 170480 }, { "epoch": 0.69, "grad_norm": 2.451381206512451, "learning_rate": 0.0002, "loss": 1.4546, "step": 170490 }, { "epoch": 0.69, "grad_norm": 2.47367262840271, "learning_rate": 0.0002, "loss": 1.5848, "step": 170500 }, { "epoch": 0.69, "grad_norm": 2.4657163619995117, "learning_rate": 0.0002, "loss": 1.6034, "step": 170510 }, { "epoch": 0.69, "grad_norm": 1.7515558004379272, "learning_rate": 0.0002, "loss": 1.635, "step": 170520 }, { "epoch": 0.69, "grad_norm": 2.968005895614624, "learning_rate": 0.0002, "loss": 1.5241, "step": 170530 }, { "epoch": 0.69, "grad_norm": 3.5524981021881104, "learning_rate": 0.0002, "loss": 1.4538, "step": 170540 }, { "epoch": 0.69, "grad_norm": 2.6318411827087402, "learning_rate": 0.0002, "loss": 1.518, "step": 170550 }, { "epoch": 0.69, "grad_norm": 6.77802038192749, "learning_rate": 0.0002, "loss": 1.5634, "step": 170560 }, { "epoch": 0.69, "grad_norm": 2.9809815883636475, "learning_rate": 0.0002, "loss": 1.6923, "step": 170570 }, { "epoch": 0.69, "grad_norm": 2.2661030292510986, "learning_rate": 0.0002, "loss": 1.4277, "step": 170580 }, { "epoch": 0.69, "grad_norm": 3.489208698272705, "learning_rate": 0.0002, "loss": 1.4164, "step": 170590 }, { "epoch": 0.69, "grad_norm": 3.6374144554138184, "learning_rate": 0.0002, "loss": 1.6336, "step": 170600 }, { "epoch": 0.69, "grad_norm": 2.9412176609039307, "learning_rate": 0.0002, "loss": 1.4687, "step": 170610 }, { "epoch": 0.69, "grad_norm": 2.2900521755218506, "learning_rate": 0.0002, "loss": 1.4938, "step": 170620 }, { "epoch": 0.69, "grad_norm": 3.986776113510132, "learning_rate": 0.0002, "loss": 1.3824, "step": 170630 }, { "epoch": 0.69, "grad_norm": 3.3727948665618896, "learning_rate": 0.0002, "loss": 1.5176, "step": 170640 }, { "epoch": 0.69, "grad_norm": 3.11155104637146, "learning_rate": 0.0002, "loss": 1.6264, "step": 170650 }, { "epoch": 0.69, "grad_norm": 2.707512378692627, "learning_rate": 0.0002, "loss": 1.5372, "step": 170660 }, { "epoch": 0.69, "grad_norm": 3.0914483070373535, "learning_rate": 0.0002, "loss": 1.9798, "step": 170670 }, { "epoch": 0.69, "grad_norm": 5.660788536071777, "learning_rate": 0.0002, "loss": 1.6879, "step": 170680 }, { "epoch": 0.69, "grad_norm": 1.33487069606781, "learning_rate": 0.0002, "loss": 1.6742, "step": 170690 }, { "epoch": 0.69, "grad_norm": 3.3488619327545166, "learning_rate": 0.0002, "loss": 1.7111, "step": 170700 }, { "epoch": 0.69, "grad_norm": 3.847693920135498, "learning_rate": 0.0002, "loss": 1.435, "step": 170710 }, { "epoch": 0.69, "grad_norm": 2.3883719444274902, "learning_rate": 0.0002, "loss": 1.7554, "step": 170720 }, { "epoch": 0.7, "grad_norm": 2.265289306640625, "learning_rate": 0.0002, "loss": 1.5981, "step": 170730 }, { "epoch": 0.7, "grad_norm": 7.700551986694336, "learning_rate": 0.0002, "loss": 1.5259, "step": 170740 }, { "epoch": 0.7, "grad_norm": 2.729527473449707, "learning_rate": 0.0002, "loss": 1.8388, "step": 170750 }, { "epoch": 0.7, "grad_norm": 3.735818386077881, "learning_rate": 0.0002, "loss": 1.7952, "step": 170760 }, { "epoch": 0.7, "grad_norm": 2.586271286010742, "learning_rate": 0.0002, "loss": 1.6601, "step": 170770 }, { "epoch": 0.7, "grad_norm": 2.239863634109497, "learning_rate": 0.0002, "loss": 1.552, "step": 170780 }, { "epoch": 0.7, "grad_norm": 2.3721914291381836, "learning_rate": 0.0002, "loss": 1.8019, "step": 170790 }, { "epoch": 0.7, "grad_norm": 2.932171106338501, "learning_rate": 0.0002, "loss": 1.4124, "step": 170800 }, { "epoch": 0.7, "grad_norm": 2.110156774520874, "learning_rate": 0.0002, "loss": 1.8442, "step": 170810 }, { "epoch": 0.7, "grad_norm": 2.9245078563690186, "learning_rate": 0.0002, "loss": 1.7735, "step": 170820 }, { "epoch": 0.7, "grad_norm": 3.188448905944824, "learning_rate": 0.0002, "loss": 1.3352, "step": 170830 }, { "epoch": 0.7, "grad_norm": 3.4759645462036133, "learning_rate": 0.0002, "loss": 1.4404, "step": 170840 }, { "epoch": 0.7, "grad_norm": 3.4710819721221924, "learning_rate": 0.0002, "loss": 1.3657, "step": 170850 }, { "epoch": 0.7, "grad_norm": 3.9637908935546875, "learning_rate": 0.0002, "loss": 1.5809, "step": 170860 }, { "epoch": 0.7, "grad_norm": 2.6390533447265625, "learning_rate": 0.0002, "loss": 1.6116, "step": 170870 }, { "epoch": 0.7, "grad_norm": 4.259045600891113, "learning_rate": 0.0002, "loss": 1.6109, "step": 170880 }, { "epoch": 0.7, "grad_norm": 3.0362539291381836, "learning_rate": 0.0002, "loss": 1.7971, "step": 170890 }, { "epoch": 0.7, "grad_norm": 2.6806461811065674, "learning_rate": 0.0002, "loss": 1.792, "step": 170900 }, { "epoch": 0.7, "grad_norm": 5.3664960861206055, "learning_rate": 0.0002, "loss": 1.5487, "step": 170910 }, { "epoch": 0.7, "grad_norm": 2.5488550662994385, "learning_rate": 0.0002, "loss": 1.5902, "step": 170920 }, { "epoch": 0.7, "grad_norm": 1.7695406675338745, "learning_rate": 0.0002, "loss": 1.7615, "step": 170930 }, { "epoch": 0.7, "grad_norm": 3.1345672607421875, "learning_rate": 0.0002, "loss": 1.5819, "step": 170940 }, { "epoch": 0.7, "grad_norm": 3.822165012359619, "learning_rate": 0.0002, "loss": 1.6899, "step": 170950 }, { "epoch": 0.7, "grad_norm": 2.5012595653533936, "learning_rate": 0.0002, "loss": 1.5696, "step": 170960 }, { "epoch": 0.7, "grad_norm": 3.1831214427948, "learning_rate": 0.0002, "loss": 1.4338, "step": 170970 }, { "epoch": 0.7, "grad_norm": 3.0543196201324463, "learning_rate": 0.0002, "loss": 1.4564, "step": 170980 }, { "epoch": 0.7, "grad_norm": 3.143256187438965, "learning_rate": 0.0002, "loss": 1.4277, "step": 170990 }, { "epoch": 0.7, "grad_norm": 5.146460056304932, "learning_rate": 0.0002, "loss": 1.5355, "step": 171000 }, { "epoch": 0.7, "grad_norm": 2.2841711044311523, "learning_rate": 0.0002, "loss": 1.5028, "step": 171010 }, { "epoch": 0.7, "grad_norm": 4.1032938957214355, "learning_rate": 0.0002, "loss": 1.6975, "step": 171020 }, { "epoch": 0.7, "grad_norm": 8.990226745605469, "learning_rate": 0.0002, "loss": 1.702, "step": 171030 }, { "epoch": 0.7, "grad_norm": 5.557175159454346, "learning_rate": 0.0002, "loss": 1.5253, "step": 171040 }, { "epoch": 0.7, "grad_norm": 4.695300102233887, "learning_rate": 0.0002, "loss": 1.6377, "step": 171050 }, { "epoch": 0.7, "grad_norm": 2.0055596828460693, "learning_rate": 0.0002, "loss": 1.6012, "step": 171060 }, { "epoch": 0.7, "grad_norm": 2.2802045345306396, "learning_rate": 0.0002, "loss": 1.6421, "step": 171070 }, { "epoch": 0.7, "grad_norm": 2.5170345306396484, "learning_rate": 0.0002, "loss": 1.674, "step": 171080 }, { "epoch": 0.7, "grad_norm": 2.6367197036743164, "learning_rate": 0.0002, "loss": 1.619, "step": 171090 }, { "epoch": 0.7, "grad_norm": 8.763285636901855, "learning_rate": 0.0002, "loss": 1.6046, "step": 171100 }, { "epoch": 0.7, "grad_norm": 2.1123623847961426, "learning_rate": 0.0002, "loss": 1.6037, "step": 171110 }, { "epoch": 0.7, "grad_norm": 3.447157382965088, "learning_rate": 0.0002, "loss": 1.4744, "step": 171120 }, { "epoch": 0.7, "grad_norm": 3.631518840789795, "learning_rate": 0.0002, "loss": 1.6991, "step": 171130 }, { "epoch": 0.7, "grad_norm": 3.2283120155334473, "learning_rate": 0.0002, "loss": 1.484, "step": 171140 }, { "epoch": 0.7, "grad_norm": 2.4005048274993896, "learning_rate": 0.0002, "loss": 1.5611, "step": 171150 }, { "epoch": 0.7, "grad_norm": 1.2379848957061768, "learning_rate": 0.0002, "loss": 1.6018, "step": 171160 }, { "epoch": 0.7, "grad_norm": 7.467668056488037, "learning_rate": 0.0002, "loss": 1.7985, "step": 171170 }, { "epoch": 0.7, "grad_norm": 3.422981023788452, "learning_rate": 0.0002, "loss": 1.6963, "step": 171180 }, { "epoch": 0.7, "grad_norm": 5.278225421905518, "learning_rate": 0.0002, "loss": 1.5064, "step": 171190 }, { "epoch": 0.7, "grad_norm": 3.1315269470214844, "learning_rate": 0.0002, "loss": 1.5965, "step": 171200 }, { "epoch": 0.7, "grad_norm": 6.610055923461914, "learning_rate": 0.0002, "loss": 1.6194, "step": 171210 }, { "epoch": 0.7, "grad_norm": 4.294654846191406, "learning_rate": 0.0002, "loss": 1.6271, "step": 171220 }, { "epoch": 0.7, "grad_norm": 3.5694828033447266, "learning_rate": 0.0002, "loss": 1.7068, "step": 171230 }, { "epoch": 0.7, "grad_norm": 4.259414196014404, "learning_rate": 0.0002, "loss": 1.5172, "step": 171240 }, { "epoch": 0.7, "grad_norm": 2.8117151260375977, "learning_rate": 0.0002, "loss": 1.4909, "step": 171250 }, { "epoch": 0.7, "grad_norm": 2.8819563388824463, "learning_rate": 0.0002, "loss": 1.7717, "step": 171260 }, { "epoch": 0.7, "grad_norm": 2.119952440261841, "learning_rate": 0.0002, "loss": 1.6505, "step": 171270 }, { "epoch": 0.7, "grad_norm": 3.2334179878234863, "learning_rate": 0.0002, "loss": 1.6248, "step": 171280 }, { "epoch": 0.7, "grad_norm": 2.0107929706573486, "learning_rate": 0.0002, "loss": 1.6957, "step": 171290 }, { "epoch": 0.7, "grad_norm": 2.7603471279144287, "learning_rate": 0.0002, "loss": 1.3442, "step": 171300 }, { "epoch": 0.7, "grad_norm": 2.605006694793701, "learning_rate": 0.0002, "loss": 1.6425, "step": 171310 }, { "epoch": 0.7, "grad_norm": 3.8967716693878174, "learning_rate": 0.0002, "loss": 1.6771, "step": 171320 }, { "epoch": 0.7, "grad_norm": 3.8235793113708496, "learning_rate": 0.0002, "loss": 1.5112, "step": 171330 }, { "epoch": 0.7, "grad_norm": 3.3044233322143555, "learning_rate": 0.0002, "loss": 1.5484, "step": 171340 }, { "epoch": 0.7, "grad_norm": 2.7190444469451904, "learning_rate": 0.0002, "loss": 1.4452, "step": 171350 }, { "epoch": 0.7, "grad_norm": 4.089377403259277, "learning_rate": 0.0002, "loss": 1.4749, "step": 171360 }, { "epoch": 0.7, "grad_norm": 3.133145570755005, "learning_rate": 0.0002, "loss": 1.6731, "step": 171370 }, { "epoch": 0.7, "grad_norm": 4.111311435699463, "learning_rate": 0.0002, "loss": 1.7828, "step": 171380 }, { "epoch": 0.7, "grad_norm": 2.37250018119812, "learning_rate": 0.0002, "loss": 1.4968, "step": 171390 }, { "epoch": 0.7, "grad_norm": 3.3370604515075684, "learning_rate": 0.0002, "loss": 1.7157, "step": 171400 }, { "epoch": 0.7, "grad_norm": 3.2264442443847656, "learning_rate": 0.0002, "loss": 1.441, "step": 171410 }, { "epoch": 0.7, "grad_norm": 3.137596845626831, "learning_rate": 0.0002, "loss": 1.4059, "step": 171420 }, { "epoch": 0.7, "grad_norm": 3.4666574001312256, "learning_rate": 0.0002, "loss": 1.755, "step": 171430 }, { "epoch": 0.7, "grad_norm": 4.519732475280762, "learning_rate": 0.0002, "loss": 1.631, "step": 171440 }, { "epoch": 0.7, "grad_norm": 4.766505241394043, "learning_rate": 0.0002, "loss": 1.8761, "step": 171450 }, { "epoch": 0.7, "grad_norm": 2.7869181632995605, "learning_rate": 0.0002, "loss": 1.6289, "step": 171460 }, { "epoch": 0.7, "grad_norm": 2.8819420337677, "learning_rate": 0.0002, "loss": 1.5494, "step": 171470 }, { "epoch": 0.7, "grad_norm": 2.605193614959717, "learning_rate": 0.0002, "loss": 1.783, "step": 171480 }, { "epoch": 0.7, "grad_norm": 2.610884189605713, "learning_rate": 0.0002, "loss": 1.5597, "step": 171490 }, { "epoch": 0.7, "grad_norm": 3.1544110774993896, "learning_rate": 0.0002, "loss": 1.4039, "step": 171500 }, { "epoch": 0.7, "grad_norm": 1.9914072751998901, "learning_rate": 0.0002, "loss": 1.7227, "step": 171510 }, { "epoch": 0.7, "grad_norm": 1.8893061876296997, "learning_rate": 0.0002, "loss": 1.8612, "step": 171520 }, { "epoch": 0.7, "grad_norm": 2.426115036010742, "learning_rate": 0.0002, "loss": 1.7028, "step": 171530 }, { "epoch": 0.7, "grad_norm": 15.358962059020996, "learning_rate": 0.0002, "loss": 1.5616, "step": 171540 }, { "epoch": 0.7, "grad_norm": 4.297152996063232, "learning_rate": 0.0002, "loss": 1.4362, "step": 171550 }, { "epoch": 0.7, "grad_norm": 2.8493006229400635, "learning_rate": 0.0002, "loss": 1.7615, "step": 171560 }, { "epoch": 0.7, "grad_norm": 2.222245216369629, "learning_rate": 0.0002, "loss": 1.589, "step": 171570 }, { "epoch": 0.7, "grad_norm": 1.6432738304138184, "learning_rate": 0.0002, "loss": 1.3619, "step": 171580 }, { "epoch": 0.7, "grad_norm": 3.777804374694824, "learning_rate": 0.0002, "loss": 1.612, "step": 171590 }, { "epoch": 0.7, "grad_norm": 3.2677295207977295, "learning_rate": 0.0002, "loss": 1.6429, "step": 171600 }, { "epoch": 0.7, "grad_norm": 2.8854801654815674, "learning_rate": 0.0002, "loss": 1.6949, "step": 171610 }, { "epoch": 0.7, "grad_norm": 4.1392598152160645, "learning_rate": 0.0002, "loss": 1.5611, "step": 171620 }, { "epoch": 0.7, "grad_norm": 1.5044140815734863, "learning_rate": 0.0002, "loss": 1.4984, "step": 171630 }, { "epoch": 0.7, "grad_norm": 2.6738460063934326, "learning_rate": 0.0002, "loss": 1.6404, "step": 171640 }, { "epoch": 0.7, "grad_norm": 1.6959067583084106, "learning_rate": 0.0002, "loss": 1.7204, "step": 171650 }, { "epoch": 0.7, "grad_norm": 2.122653007507324, "learning_rate": 0.0002, "loss": 1.4799, "step": 171660 }, { "epoch": 0.7, "grad_norm": 2.8478481769561768, "learning_rate": 0.0002, "loss": 1.6044, "step": 171670 }, { "epoch": 0.7, "grad_norm": 6.107132434844971, "learning_rate": 0.0002, "loss": 1.4437, "step": 171680 }, { "epoch": 0.7, "grad_norm": 3.4045817852020264, "learning_rate": 0.0002, "loss": 1.5935, "step": 171690 }, { "epoch": 0.7, "grad_norm": 2.0477914810180664, "learning_rate": 0.0002, "loss": 1.4242, "step": 171700 }, { "epoch": 0.7, "grad_norm": 5.480673789978027, "learning_rate": 0.0002, "loss": 1.5951, "step": 171710 }, { "epoch": 0.7, "grad_norm": 3.0766489505767822, "learning_rate": 0.0002, "loss": 1.5899, "step": 171720 }, { "epoch": 0.7, "grad_norm": 3.09611439704895, "learning_rate": 0.0002, "loss": 1.4996, "step": 171730 }, { "epoch": 0.7, "grad_norm": 3.6288907527923584, "learning_rate": 0.0002, "loss": 1.5704, "step": 171740 }, { "epoch": 0.7, "grad_norm": 3.6328063011169434, "learning_rate": 0.0002, "loss": 1.5578, "step": 171750 }, { "epoch": 0.7, "grad_norm": 3.7122137546539307, "learning_rate": 0.0002, "loss": 1.5857, "step": 171760 }, { "epoch": 0.7, "grad_norm": 5.074295520782471, "learning_rate": 0.0002, "loss": 1.6659, "step": 171770 }, { "epoch": 0.7, "grad_norm": 2.7825522422790527, "learning_rate": 0.0002, "loss": 1.4293, "step": 171780 }, { "epoch": 0.7, "grad_norm": 2.1301567554473877, "learning_rate": 0.0002, "loss": 1.7279, "step": 171790 }, { "epoch": 0.7, "grad_norm": 5.521487236022949, "learning_rate": 0.0002, "loss": 1.5615, "step": 171800 }, { "epoch": 0.7, "grad_norm": 2.9438416957855225, "learning_rate": 0.0002, "loss": 1.7112, "step": 171810 }, { "epoch": 0.7, "grad_norm": 3.3251492977142334, "learning_rate": 0.0002, "loss": 1.7841, "step": 171820 }, { "epoch": 0.7, "grad_norm": 2.5371811389923096, "learning_rate": 0.0002, "loss": 1.4979, "step": 171830 }, { "epoch": 0.7, "grad_norm": 1.433128833770752, "learning_rate": 0.0002, "loss": 1.6207, "step": 171840 }, { "epoch": 0.7, "grad_norm": 3.314222812652588, "learning_rate": 0.0002, "loss": 1.515, "step": 171850 }, { "epoch": 0.7, "grad_norm": 3.276244878768921, "learning_rate": 0.0002, "loss": 1.5615, "step": 171860 }, { "epoch": 0.7, "grad_norm": 2.0773203372955322, "learning_rate": 0.0002, "loss": 1.5807, "step": 171870 }, { "epoch": 0.7, "grad_norm": 3.3194217681884766, "learning_rate": 0.0002, "loss": 1.7318, "step": 171880 }, { "epoch": 0.7, "grad_norm": 4.057121753692627, "learning_rate": 0.0002, "loss": 1.4424, "step": 171890 }, { "epoch": 0.7, "grad_norm": 2.4357473850250244, "learning_rate": 0.0002, "loss": 1.3913, "step": 171900 }, { "epoch": 0.7, "grad_norm": 3.2497739791870117, "learning_rate": 0.0002, "loss": 1.615, "step": 171910 }, { "epoch": 0.7, "grad_norm": 3.401679277420044, "learning_rate": 0.0002, "loss": 1.9659, "step": 171920 }, { "epoch": 0.7, "grad_norm": 3.7436022758483887, "learning_rate": 0.0002, "loss": 1.4611, "step": 171930 }, { "epoch": 0.7, "grad_norm": 2.1282033920288086, "learning_rate": 0.0002, "loss": 1.7811, "step": 171940 }, { "epoch": 0.7, "grad_norm": 3.005495309829712, "learning_rate": 0.0002, "loss": 1.6226, "step": 171950 }, { "epoch": 0.7, "grad_norm": 2.472755193710327, "learning_rate": 0.0002, "loss": 1.7592, "step": 171960 }, { "epoch": 0.7, "grad_norm": 2.263666868209839, "learning_rate": 0.0002, "loss": 1.5326, "step": 171970 }, { "epoch": 0.7, "grad_norm": 4.25425910949707, "learning_rate": 0.0002, "loss": 1.4652, "step": 171980 }, { "epoch": 0.7, "grad_norm": 2.2997891902923584, "learning_rate": 0.0002, "loss": 1.4441, "step": 171990 }, { "epoch": 0.7, "grad_norm": 2.838526964187622, "learning_rate": 0.0002, "loss": 1.5042, "step": 172000 }, { "epoch": 0.7, "grad_norm": 4.187102317810059, "learning_rate": 0.0002, "loss": 1.3942, "step": 172010 }, { "epoch": 0.7, "grad_norm": 4.459667682647705, "learning_rate": 0.0002, "loss": 1.7339, "step": 172020 }, { "epoch": 0.7, "grad_norm": 3.3148915767669678, "learning_rate": 0.0002, "loss": 1.6434, "step": 172030 }, { "epoch": 0.7, "grad_norm": 4.6408467292785645, "learning_rate": 0.0002, "loss": 1.6973, "step": 172040 }, { "epoch": 0.7, "grad_norm": 3.1182687282562256, "learning_rate": 0.0002, "loss": 1.5816, "step": 172050 }, { "epoch": 0.7, "grad_norm": 3.024155616760254, "learning_rate": 0.0002, "loss": 1.9599, "step": 172060 }, { "epoch": 0.7, "grad_norm": 2.9594802856445312, "learning_rate": 0.0002, "loss": 1.6339, "step": 172070 }, { "epoch": 0.7, "grad_norm": 4.645786762237549, "learning_rate": 0.0002, "loss": 1.3899, "step": 172080 }, { "epoch": 0.7, "grad_norm": 2.415483236312866, "learning_rate": 0.0002, "loss": 1.5548, "step": 172090 }, { "epoch": 0.7, "grad_norm": 2.051077127456665, "learning_rate": 0.0002, "loss": 1.4497, "step": 172100 }, { "epoch": 0.7, "grad_norm": 3.261793375015259, "learning_rate": 0.0002, "loss": 1.6461, "step": 172110 }, { "epoch": 0.7, "grad_norm": 2.546954870223999, "learning_rate": 0.0002, "loss": 1.387, "step": 172120 }, { "epoch": 0.7, "grad_norm": 2.9131221771240234, "learning_rate": 0.0002, "loss": 1.4365, "step": 172130 }, { "epoch": 0.7, "grad_norm": 2.5843095779418945, "learning_rate": 0.0002, "loss": 1.6597, "step": 172140 }, { "epoch": 0.7, "grad_norm": 3.0415544509887695, "learning_rate": 0.0002, "loss": 1.6061, "step": 172150 }, { "epoch": 0.7, "grad_norm": 3.990677833557129, "learning_rate": 0.0002, "loss": 1.6528, "step": 172160 }, { "epoch": 0.7, "grad_norm": 2.277785301208496, "learning_rate": 0.0002, "loss": 1.7697, "step": 172170 }, { "epoch": 0.7, "grad_norm": 3.9297397136688232, "learning_rate": 0.0002, "loss": 1.2806, "step": 172180 }, { "epoch": 0.7, "grad_norm": 4.528871059417725, "learning_rate": 0.0002, "loss": 1.6398, "step": 172190 }, { "epoch": 0.7, "grad_norm": 2.760942220687866, "learning_rate": 0.0002, "loss": 1.4815, "step": 172200 }, { "epoch": 0.7, "grad_norm": 2.7604386806488037, "learning_rate": 0.0002, "loss": 1.3708, "step": 172210 }, { "epoch": 0.7, "grad_norm": 2.96976375579834, "learning_rate": 0.0002, "loss": 1.6543, "step": 172220 }, { "epoch": 0.7, "grad_norm": 3.5894904136657715, "learning_rate": 0.0002, "loss": 1.5708, "step": 172230 }, { "epoch": 0.7, "grad_norm": 5.894717693328857, "learning_rate": 0.0002, "loss": 1.5803, "step": 172240 }, { "epoch": 0.7, "grad_norm": 3.2442803382873535, "learning_rate": 0.0002, "loss": 1.5392, "step": 172250 }, { "epoch": 0.7, "grad_norm": 8.773040771484375, "learning_rate": 0.0002, "loss": 1.6596, "step": 172260 }, { "epoch": 0.7, "grad_norm": 3.2055399417877197, "learning_rate": 0.0002, "loss": 1.6161, "step": 172270 }, { "epoch": 0.7, "grad_norm": 3.036733388900757, "learning_rate": 0.0002, "loss": 1.5964, "step": 172280 }, { "epoch": 0.7, "grad_norm": 1.6529792547225952, "learning_rate": 0.0002, "loss": 1.5402, "step": 172290 }, { "epoch": 0.7, "grad_norm": 3.154634475708008, "learning_rate": 0.0002, "loss": 1.5891, "step": 172300 }, { "epoch": 0.7, "grad_norm": 2.3623266220092773, "learning_rate": 0.0002, "loss": 1.732, "step": 172310 }, { "epoch": 0.7, "grad_norm": 2.311772584915161, "learning_rate": 0.0002, "loss": 1.4843, "step": 172320 }, { "epoch": 0.7, "grad_norm": 3.5247719287872314, "learning_rate": 0.0002, "loss": 1.5976, "step": 172330 }, { "epoch": 0.7, "grad_norm": 4.358273983001709, "learning_rate": 0.0002, "loss": 1.6912, "step": 172340 }, { "epoch": 0.7, "grad_norm": 3.5451979637145996, "learning_rate": 0.0002, "loss": 1.7377, "step": 172350 }, { "epoch": 0.7, "grad_norm": 2.796527624130249, "learning_rate": 0.0002, "loss": 1.3239, "step": 172360 }, { "epoch": 0.7, "grad_norm": 2.7986514568328857, "learning_rate": 0.0002, "loss": 1.8615, "step": 172370 }, { "epoch": 0.7, "grad_norm": 2.4107255935668945, "learning_rate": 0.0002, "loss": 1.7351, "step": 172380 }, { "epoch": 0.7, "grad_norm": 1.9782054424285889, "learning_rate": 0.0002, "loss": 1.7388, "step": 172390 }, { "epoch": 0.7, "grad_norm": 4.032543182373047, "learning_rate": 0.0002, "loss": 1.9364, "step": 172400 }, { "epoch": 0.7, "grad_norm": 2.6227035522460938, "learning_rate": 0.0002, "loss": 1.6898, "step": 172410 }, { "epoch": 0.7, "grad_norm": 3.898646354675293, "learning_rate": 0.0002, "loss": 1.4495, "step": 172420 }, { "epoch": 0.7, "grad_norm": 2.7580628395080566, "learning_rate": 0.0002, "loss": 1.4351, "step": 172430 }, { "epoch": 0.7, "grad_norm": 2.9197442531585693, "learning_rate": 0.0002, "loss": 1.5136, "step": 172440 }, { "epoch": 0.7, "grad_norm": 3.9764554500579834, "learning_rate": 0.0002, "loss": 1.4107, "step": 172450 }, { "epoch": 0.7, "grad_norm": 2.608384132385254, "learning_rate": 0.0002, "loss": 1.6808, "step": 172460 }, { "epoch": 0.7, "grad_norm": 3.0931246280670166, "learning_rate": 0.0002, "loss": 1.6708, "step": 172470 }, { "epoch": 0.7, "grad_norm": 4.449082374572754, "learning_rate": 0.0002, "loss": 1.4723, "step": 172480 }, { "epoch": 0.7, "grad_norm": 3.7990527153015137, "learning_rate": 0.0002, "loss": 1.6144, "step": 172490 }, { "epoch": 0.7, "grad_norm": 3.0248003005981445, "learning_rate": 0.0002, "loss": 1.3715, "step": 172500 }, { "epoch": 0.7, "grad_norm": 3.2031750679016113, "learning_rate": 0.0002, "loss": 1.7182, "step": 172510 }, { "epoch": 0.7, "grad_norm": 2.2302303314208984, "learning_rate": 0.0002, "loss": 1.3143, "step": 172520 }, { "epoch": 0.7, "grad_norm": 4.184154033660889, "learning_rate": 0.0002, "loss": 1.6172, "step": 172530 }, { "epoch": 0.7, "grad_norm": 1.5862479209899902, "learning_rate": 0.0002, "loss": 1.4981, "step": 172540 }, { "epoch": 0.7, "grad_norm": 5.244941234588623, "learning_rate": 0.0002, "loss": 1.7054, "step": 172550 }, { "epoch": 0.7, "grad_norm": 2.299433708190918, "learning_rate": 0.0002, "loss": 1.8493, "step": 172560 }, { "epoch": 0.7, "grad_norm": 2.948195219039917, "learning_rate": 0.0002, "loss": 1.3664, "step": 172570 }, { "epoch": 0.7, "grad_norm": 3.840454578399658, "learning_rate": 0.0002, "loss": 1.3129, "step": 172580 }, { "epoch": 0.7, "grad_norm": 3.391700506210327, "learning_rate": 0.0002, "loss": 1.7017, "step": 172590 }, { "epoch": 0.7, "grad_norm": 2.2656404972076416, "learning_rate": 0.0002, "loss": 1.5472, "step": 172600 }, { "epoch": 0.7, "grad_norm": 2.9759249687194824, "learning_rate": 0.0002, "loss": 1.7236, "step": 172610 }, { "epoch": 0.7, "grad_norm": 3.6941232681274414, "learning_rate": 0.0002, "loss": 1.4613, "step": 172620 }, { "epoch": 0.7, "grad_norm": 1.5403445959091187, "learning_rate": 0.0002, "loss": 1.5401, "step": 172630 }, { "epoch": 0.7, "grad_norm": 3.3026416301727295, "learning_rate": 0.0002, "loss": 1.5871, "step": 172640 }, { "epoch": 0.7, "grad_norm": 2.1071484088897705, "learning_rate": 0.0002, "loss": 1.7203, "step": 172650 }, { "epoch": 0.7, "grad_norm": 3.2146012783050537, "learning_rate": 0.0002, "loss": 1.6007, "step": 172660 }, { "epoch": 0.7, "grad_norm": 3.4147281646728516, "learning_rate": 0.0002, "loss": 1.7217, "step": 172670 }, { "epoch": 0.7, "grad_norm": 2.47727370262146, "learning_rate": 0.0002, "loss": 1.6634, "step": 172680 }, { "epoch": 0.7, "grad_norm": 2.792268753051758, "learning_rate": 0.0002, "loss": 1.292, "step": 172690 }, { "epoch": 0.7, "grad_norm": 2.8166284561157227, "learning_rate": 0.0002, "loss": 1.4376, "step": 172700 }, { "epoch": 0.7, "grad_norm": 6.035714149475098, "learning_rate": 0.0002, "loss": 1.5901, "step": 172710 }, { "epoch": 0.7, "grad_norm": 4.375119209289551, "learning_rate": 0.0002, "loss": 1.5268, "step": 172720 }, { "epoch": 0.7, "grad_norm": 3.5566794872283936, "learning_rate": 0.0002, "loss": 1.6998, "step": 172730 }, { "epoch": 0.7, "grad_norm": 1.8404566049575806, "learning_rate": 0.0002, "loss": 1.6534, "step": 172740 }, { "epoch": 0.7, "grad_norm": 2.197866439819336, "learning_rate": 0.0002, "loss": 1.3122, "step": 172750 }, { "epoch": 0.7, "grad_norm": 2.557434558868408, "learning_rate": 0.0002, "loss": 1.8492, "step": 172760 }, { "epoch": 0.7, "grad_norm": 3.5279195308685303, "learning_rate": 0.0002, "loss": 1.56, "step": 172770 }, { "epoch": 0.7, "grad_norm": 3.631254196166992, "learning_rate": 0.0002, "loss": 1.5981, "step": 172780 }, { "epoch": 0.7, "grad_norm": 3.5541775226593018, "learning_rate": 0.0002, "loss": 1.9177, "step": 172790 }, { "epoch": 0.7, "grad_norm": 3.338820457458496, "learning_rate": 0.0002, "loss": 1.4572, "step": 172800 }, { "epoch": 0.7, "grad_norm": 1.909057855606079, "learning_rate": 0.0002, "loss": 1.7171, "step": 172810 }, { "epoch": 0.7, "grad_norm": 2.3363723754882812, "learning_rate": 0.0002, "loss": 1.477, "step": 172820 }, { "epoch": 0.7, "grad_norm": 1.7070788145065308, "learning_rate": 0.0002, "loss": 1.6708, "step": 172830 }, { "epoch": 0.7, "grad_norm": 3.1795754432678223, "learning_rate": 0.0002, "loss": 1.7382, "step": 172840 }, { "epoch": 0.7, "grad_norm": 3.8197662830352783, "learning_rate": 0.0002, "loss": 1.575, "step": 172850 }, { "epoch": 0.7, "grad_norm": 2.8407328128814697, "learning_rate": 0.0002, "loss": 1.8008, "step": 172860 }, { "epoch": 0.7, "grad_norm": 3.6424896717071533, "learning_rate": 0.0002, "loss": 1.6662, "step": 172870 }, { "epoch": 0.7, "grad_norm": 4.093358516693115, "learning_rate": 0.0002, "loss": 1.5777, "step": 172880 }, { "epoch": 0.7, "grad_norm": 2.429948329925537, "learning_rate": 0.0002, "loss": 1.3523, "step": 172890 }, { "epoch": 0.7, "grad_norm": 2.34769606590271, "learning_rate": 0.0002, "loss": 1.7133, "step": 172900 }, { "epoch": 0.7, "grad_norm": 2.72495174407959, "learning_rate": 0.0002, "loss": 1.7391, "step": 172910 }, { "epoch": 0.7, "grad_norm": 4.794466495513916, "learning_rate": 0.0002, "loss": 1.6308, "step": 172920 }, { "epoch": 0.7, "grad_norm": 2.7636332511901855, "learning_rate": 0.0002, "loss": 1.8286, "step": 172930 }, { "epoch": 0.7, "grad_norm": 3.3804574012756348, "learning_rate": 0.0002, "loss": 1.5217, "step": 172940 }, { "epoch": 0.7, "grad_norm": 4.844468593597412, "learning_rate": 0.0002, "loss": 1.761, "step": 172950 }, { "epoch": 0.7, "grad_norm": 4.972886085510254, "learning_rate": 0.0002, "loss": 1.4688, "step": 172960 }, { "epoch": 0.7, "grad_norm": 3.191404104232788, "learning_rate": 0.0002, "loss": 1.5011, "step": 172970 }, { "epoch": 0.7, "grad_norm": 3.419259786605835, "learning_rate": 0.0002, "loss": 1.6053, "step": 172980 }, { "epoch": 0.7, "grad_norm": 3.5194945335388184, "learning_rate": 0.0002, "loss": 1.7698, "step": 172990 }, { "epoch": 0.7, "grad_norm": 4.976685523986816, "learning_rate": 0.0002, "loss": 1.4634, "step": 173000 }, { "epoch": 0.7, "grad_norm": 3.752370834350586, "learning_rate": 0.0002, "loss": 1.6919, "step": 173010 }, { "epoch": 0.7, "grad_norm": 3.155106544494629, "learning_rate": 0.0002, "loss": 1.1915, "step": 173020 }, { "epoch": 0.7, "grad_norm": 4.725892066955566, "learning_rate": 0.0002, "loss": 1.4884, "step": 173030 }, { "epoch": 0.7, "grad_norm": 3.121631383895874, "learning_rate": 0.0002, "loss": 1.5711, "step": 173040 }, { "epoch": 0.7, "grad_norm": 2.8341217041015625, "learning_rate": 0.0002, "loss": 1.556, "step": 173050 }, { "epoch": 0.7, "grad_norm": 2.3452062606811523, "learning_rate": 0.0002, "loss": 1.4074, "step": 173060 }, { "epoch": 0.7, "grad_norm": 3.381596326828003, "learning_rate": 0.0002, "loss": 1.6592, "step": 173070 }, { "epoch": 0.7, "grad_norm": 1.7723054885864258, "learning_rate": 0.0002, "loss": 1.4171, "step": 173080 }, { "epoch": 0.7, "grad_norm": 2.6635866165161133, "learning_rate": 0.0002, "loss": 1.4218, "step": 173090 }, { "epoch": 0.7, "grad_norm": 3.103510618209839, "learning_rate": 0.0002, "loss": 1.5431, "step": 173100 }, { "epoch": 0.7, "grad_norm": 2.6526641845703125, "learning_rate": 0.0002, "loss": 1.5998, "step": 173110 }, { "epoch": 0.7, "grad_norm": 4.198506832122803, "learning_rate": 0.0002, "loss": 1.4541, "step": 173120 }, { "epoch": 0.7, "grad_norm": 1.8707937002182007, "learning_rate": 0.0002, "loss": 1.6077, "step": 173130 }, { "epoch": 0.7, "grad_norm": 2.4602999687194824, "learning_rate": 0.0002, "loss": 1.5099, "step": 173140 }, { "epoch": 0.7, "grad_norm": 4.454916954040527, "learning_rate": 0.0002, "loss": 1.3369, "step": 173150 }, { "epoch": 0.7, "grad_norm": 4.072022914886475, "learning_rate": 0.0002, "loss": 1.5964, "step": 173160 }, { "epoch": 0.7, "grad_norm": 3.0747690200805664, "learning_rate": 0.0002, "loss": 1.6495, "step": 173170 }, { "epoch": 0.71, "grad_norm": 1.8688302040100098, "learning_rate": 0.0002, "loss": 1.8294, "step": 173180 }, { "epoch": 0.71, "grad_norm": 2.3973615169525146, "learning_rate": 0.0002, "loss": 1.7357, "step": 173190 }, { "epoch": 0.71, "grad_norm": 5.580162048339844, "learning_rate": 0.0002, "loss": 1.729, "step": 173200 }, { "epoch": 0.71, "grad_norm": 2.281247615814209, "learning_rate": 0.0002, "loss": 1.3922, "step": 173210 }, { "epoch": 0.71, "grad_norm": 2.4364612102508545, "learning_rate": 0.0002, "loss": 1.5879, "step": 173220 }, { "epoch": 0.71, "grad_norm": 6.368052959442139, "learning_rate": 0.0002, "loss": 1.3371, "step": 173230 }, { "epoch": 0.71, "grad_norm": 2.1597044467926025, "learning_rate": 0.0002, "loss": 1.7336, "step": 173240 }, { "epoch": 0.71, "grad_norm": 4.909510135650635, "learning_rate": 0.0002, "loss": 1.4507, "step": 173250 }, { "epoch": 0.71, "grad_norm": 2.966334342956543, "learning_rate": 0.0002, "loss": 1.5312, "step": 173260 }, { "epoch": 0.71, "grad_norm": 1.142950415611267, "learning_rate": 0.0002, "loss": 1.5229, "step": 173270 }, { "epoch": 0.71, "grad_norm": 5.276647567749023, "learning_rate": 0.0002, "loss": 1.8559, "step": 173280 }, { "epoch": 0.71, "grad_norm": 6.4246063232421875, "learning_rate": 0.0002, "loss": 1.6224, "step": 173290 }, { "epoch": 0.71, "grad_norm": 2.7522099018096924, "learning_rate": 0.0002, "loss": 1.56, "step": 173300 }, { "epoch": 0.71, "grad_norm": 2.348456621170044, "learning_rate": 0.0002, "loss": 1.5641, "step": 173310 }, { "epoch": 0.71, "grad_norm": 3.236518383026123, "learning_rate": 0.0002, "loss": 1.8345, "step": 173320 }, { "epoch": 0.71, "grad_norm": 2.5620534420013428, "learning_rate": 0.0002, "loss": 1.5918, "step": 173330 }, { "epoch": 0.71, "grad_norm": 2.8578240871429443, "learning_rate": 0.0002, "loss": 1.7441, "step": 173340 }, { "epoch": 0.71, "grad_norm": 2.038266181945801, "learning_rate": 0.0002, "loss": 1.4072, "step": 173350 }, { "epoch": 0.71, "grad_norm": 2.6805977821350098, "learning_rate": 0.0002, "loss": 1.4752, "step": 173360 }, { "epoch": 0.71, "grad_norm": 3.7461652755737305, "learning_rate": 0.0002, "loss": 1.5477, "step": 173370 }, { "epoch": 0.71, "grad_norm": 4.681021690368652, "learning_rate": 0.0002, "loss": 1.8305, "step": 173380 }, { "epoch": 0.71, "grad_norm": 2.9091615676879883, "learning_rate": 0.0002, "loss": 1.6192, "step": 173390 }, { "epoch": 0.71, "grad_norm": 3.009350538253784, "learning_rate": 0.0002, "loss": 1.343, "step": 173400 }, { "epoch": 0.71, "grad_norm": 3.379991054534912, "learning_rate": 0.0002, "loss": 1.5628, "step": 173410 }, { "epoch": 0.71, "grad_norm": 2.5634849071502686, "learning_rate": 0.0002, "loss": 1.4136, "step": 173420 }, { "epoch": 0.71, "grad_norm": 2.1817126274108887, "learning_rate": 0.0002, "loss": 1.6697, "step": 173430 }, { "epoch": 0.71, "grad_norm": 3.135977268218994, "learning_rate": 0.0002, "loss": 1.6379, "step": 173440 }, { "epoch": 0.71, "grad_norm": 2.6378817558288574, "learning_rate": 0.0002, "loss": 1.696, "step": 173450 }, { "epoch": 0.71, "grad_norm": 5.177924633026123, "learning_rate": 0.0002, "loss": 1.6729, "step": 173460 }, { "epoch": 0.71, "grad_norm": 2.393543243408203, "learning_rate": 0.0002, "loss": 1.3899, "step": 173470 }, { "epoch": 0.71, "grad_norm": 2.9020285606384277, "learning_rate": 0.0002, "loss": 1.6529, "step": 173480 }, { "epoch": 0.71, "grad_norm": 2.5054969787597656, "learning_rate": 0.0002, "loss": 1.6703, "step": 173490 }, { "epoch": 0.71, "grad_norm": 2.8331682682037354, "learning_rate": 0.0002, "loss": 1.4849, "step": 173500 }, { "epoch": 0.71, "grad_norm": 2.971341371536255, "learning_rate": 0.0002, "loss": 1.7142, "step": 173510 }, { "epoch": 0.71, "grad_norm": 2.9127635955810547, "learning_rate": 0.0002, "loss": 1.7696, "step": 173520 }, { "epoch": 0.71, "grad_norm": 3.0165247917175293, "learning_rate": 0.0002, "loss": 1.4257, "step": 173530 }, { "epoch": 0.71, "grad_norm": 4.018976211547852, "learning_rate": 0.0002, "loss": 1.515, "step": 173540 }, { "epoch": 0.71, "grad_norm": 2.7149524688720703, "learning_rate": 0.0002, "loss": 1.7013, "step": 173550 }, { "epoch": 0.71, "grad_norm": 3.0226032733917236, "learning_rate": 0.0002, "loss": 1.1484, "step": 173560 }, { "epoch": 0.71, "grad_norm": 2.4223594665527344, "learning_rate": 0.0002, "loss": 1.7974, "step": 173570 }, { "epoch": 0.71, "grad_norm": 2.892683267593384, "learning_rate": 0.0002, "loss": 1.4816, "step": 173580 }, { "epoch": 0.71, "grad_norm": 2.6756157875061035, "learning_rate": 0.0002, "loss": 1.4876, "step": 173590 }, { "epoch": 0.71, "grad_norm": 3.209591865539551, "learning_rate": 0.0002, "loss": 1.5135, "step": 173600 }, { "epoch": 0.71, "grad_norm": 4.2288031578063965, "learning_rate": 0.0002, "loss": 1.5325, "step": 173610 }, { "epoch": 0.71, "grad_norm": 3.8504865169525146, "learning_rate": 0.0002, "loss": 1.5011, "step": 173620 }, { "epoch": 0.71, "grad_norm": 4.107960224151611, "learning_rate": 0.0002, "loss": 1.6867, "step": 173630 }, { "epoch": 0.71, "grad_norm": 4.691107273101807, "learning_rate": 0.0002, "loss": 1.5149, "step": 173640 }, { "epoch": 0.71, "grad_norm": 3.041363000869751, "learning_rate": 0.0002, "loss": 1.7204, "step": 173650 }, { "epoch": 0.71, "grad_norm": 2.867110013961792, "learning_rate": 0.0002, "loss": 1.5662, "step": 173660 }, { "epoch": 0.71, "grad_norm": 2.8731510639190674, "learning_rate": 0.0002, "loss": 1.721, "step": 173670 }, { "epoch": 0.71, "grad_norm": 1.8257931470870972, "learning_rate": 0.0002, "loss": 1.4449, "step": 173680 }, { "epoch": 0.71, "grad_norm": 2.766592025756836, "learning_rate": 0.0002, "loss": 1.7177, "step": 173690 }, { "epoch": 0.71, "grad_norm": 2.0729329586029053, "learning_rate": 0.0002, "loss": 1.4258, "step": 173700 }, { "epoch": 0.71, "grad_norm": 1.5845035314559937, "learning_rate": 0.0002, "loss": 1.6301, "step": 173710 }, { "epoch": 0.71, "grad_norm": 2.6668448448181152, "learning_rate": 0.0002, "loss": 1.1311, "step": 173720 }, { "epoch": 0.71, "grad_norm": 2.683232069015503, "learning_rate": 0.0002, "loss": 1.4064, "step": 173730 }, { "epoch": 0.71, "grad_norm": 3.902937412261963, "learning_rate": 0.0002, "loss": 1.601, "step": 173740 }, { "epoch": 0.71, "grad_norm": 3.0376553535461426, "learning_rate": 0.0002, "loss": 1.5014, "step": 173750 }, { "epoch": 0.71, "grad_norm": 2.575011968612671, "learning_rate": 0.0002, "loss": 1.6782, "step": 173760 }, { "epoch": 0.71, "grad_norm": 5.012789726257324, "learning_rate": 0.0002, "loss": 1.6536, "step": 173770 }, { "epoch": 0.71, "grad_norm": 3.51778244972229, "learning_rate": 0.0002, "loss": 1.5337, "step": 173780 }, { "epoch": 0.71, "grad_norm": 3.1978001594543457, "learning_rate": 0.0002, "loss": 1.4423, "step": 173790 }, { "epoch": 0.71, "grad_norm": 2.577828884124756, "learning_rate": 0.0002, "loss": 1.6942, "step": 173800 }, { "epoch": 0.71, "grad_norm": 2.7239692211151123, "learning_rate": 0.0002, "loss": 1.4962, "step": 173810 }, { "epoch": 0.71, "grad_norm": 1.8400700092315674, "learning_rate": 0.0002, "loss": 1.4244, "step": 173820 }, { "epoch": 0.71, "grad_norm": 2.940589427947998, "learning_rate": 0.0002, "loss": 1.5282, "step": 173830 }, { "epoch": 0.71, "grad_norm": 1.8156312704086304, "learning_rate": 0.0002, "loss": 1.6282, "step": 173840 }, { "epoch": 0.71, "grad_norm": 2.8880484104156494, "learning_rate": 0.0002, "loss": 1.7462, "step": 173850 }, { "epoch": 0.71, "grad_norm": 2.7851762771606445, "learning_rate": 0.0002, "loss": 1.6238, "step": 173860 }, { "epoch": 0.71, "grad_norm": 4.498571872711182, "learning_rate": 0.0002, "loss": 1.6321, "step": 173870 }, { "epoch": 0.71, "grad_norm": 3.2393202781677246, "learning_rate": 0.0002, "loss": 1.4034, "step": 173880 }, { "epoch": 0.71, "grad_norm": 3.5682833194732666, "learning_rate": 0.0002, "loss": 1.4211, "step": 173890 }, { "epoch": 0.71, "grad_norm": 4.2268877029418945, "learning_rate": 0.0002, "loss": 1.3804, "step": 173900 }, { "epoch": 0.71, "grad_norm": 4.624118804931641, "learning_rate": 0.0002, "loss": 1.7355, "step": 173910 }, { "epoch": 0.71, "grad_norm": 3.565356731414795, "learning_rate": 0.0002, "loss": 1.4264, "step": 173920 }, { "epoch": 0.71, "grad_norm": 4.721280574798584, "learning_rate": 0.0002, "loss": 1.4404, "step": 173930 }, { "epoch": 0.71, "grad_norm": 2.663740396499634, "learning_rate": 0.0002, "loss": 1.936, "step": 173940 }, { "epoch": 0.71, "grad_norm": 1.8354036808013916, "learning_rate": 0.0002, "loss": 1.7626, "step": 173950 }, { "epoch": 0.71, "grad_norm": 3.739349842071533, "learning_rate": 0.0002, "loss": 1.6131, "step": 173960 }, { "epoch": 0.71, "grad_norm": 3.1977460384368896, "learning_rate": 0.0002, "loss": 1.4094, "step": 173970 }, { "epoch": 0.71, "grad_norm": 2.305372714996338, "learning_rate": 0.0002, "loss": 1.4501, "step": 173980 }, { "epoch": 0.71, "grad_norm": 5.547214984893799, "learning_rate": 0.0002, "loss": 1.4454, "step": 173990 }, { "epoch": 0.71, "grad_norm": 3.2784764766693115, "learning_rate": 0.0002, "loss": 1.422, "step": 174000 }, { "epoch": 0.71, "grad_norm": 2.6275346279144287, "learning_rate": 0.0002, "loss": 1.4464, "step": 174010 }, { "epoch": 0.71, "grad_norm": 6.356273174285889, "learning_rate": 0.0002, "loss": 1.7345, "step": 174020 }, { "epoch": 0.71, "grad_norm": 3.053978204727173, "learning_rate": 0.0002, "loss": 1.5633, "step": 174030 }, { "epoch": 0.71, "grad_norm": 2.8064703941345215, "learning_rate": 0.0002, "loss": 1.9172, "step": 174040 }, { "epoch": 0.71, "grad_norm": 3.129692792892456, "learning_rate": 0.0002, "loss": 1.4895, "step": 174050 }, { "epoch": 0.71, "grad_norm": 3.1333706378936768, "learning_rate": 0.0002, "loss": 1.5555, "step": 174060 }, { "epoch": 0.71, "grad_norm": 3.167874336242676, "learning_rate": 0.0002, "loss": 1.6992, "step": 174070 }, { "epoch": 0.71, "grad_norm": 1.8029693365097046, "learning_rate": 0.0002, "loss": 1.4905, "step": 174080 }, { "epoch": 0.71, "grad_norm": 3.5127954483032227, "learning_rate": 0.0002, "loss": 1.9427, "step": 174090 }, { "epoch": 0.71, "grad_norm": 1.75066339969635, "learning_rate": 0.0002, "loss": 1.6511, "step": 174100 }, { "epoch": 0.71, "grad_norm": 3.2918953895568848, "learning_rate": 0.0002, "loss": 1.7196, "step": 174110 }, { "epoch": 0.71, "grad_norm": 2.727586269378662, "learning_rate": 0.0002, "loss": 1.7061, "step": 174120 }, { "epoch": 0.71, "grad_norm": 4.09667444229126, "learning_rate": 0.0002, "loss": 1.7179, "step": 174130 }, { "epoch": 0.71, "grad_norm": 8.149401664733887, "learning_rate": 0.0002, "loss": 1.5885, "step": 174140 }, { "epoch": 0.71, "grad_norm": 3.246941328048706, "learning_rate": 0.0002, "loss": 1.4936, "step": 174150 }, { "epoch": 0.71, "grad_norm": 3.5971009731292725, "learning_rate": 0.0002, "loss": 1.7895, "step": 174160 }, { "epoch": 0.71, "grad_norm": 3.4297120571136475, "learning_rate": 0.0002, "loss": 1.6061, "step": 174170 }, { "epoch": 0.71, "grad_norm": 2.0482876300811768, "learning_rate": 0.0002, "loss": 1.7696, "step": 174180 }, { "epoch": 0.71, "grad_norm": 2.706624746322632, "learning_rate": 0.0002, "loss": 1.6517, "step": 174190 }, { "epoch": 0.71, "grad_norm": 1.7559386491775513, "learning_rate": 0.0002, "loss": 1.4737, "step": 174200 }, { "epoch": 0.71, "grad_norm": 2.659642457962036, "learning_rate": 0.0002, "loss": 1.511, "step": 174210 }, { "epoch": 0.71, "grad_norm": 3.9291141033172607, "learning_rate": 0.0002, "loss": 1.7816, "step": 174220 }, { "epoch": 0.71, "grad_norm": 2.3317172527313232, "learning_rate": 0.0002, "loss": 1.7972, "step": 174230 }, { "epoch": 0.71, "grad_norm": 3.951488971710205, "learning_rate": 0.0002, "loss": 1.5831, "step": 174240 }, { "epoch": 0.71, "grad_norm": 7.380377769470215, "learning_rate": 0.0002, "loss": 1.7304, "step": 174250 }, { "epoch": 0.71, "grad_norm": 2.514700174331665, "learning_rate": 0.0002, "loss": 1.6897, "step": 174260 }, { "epoch": 0.71, "grad_norm": 2.6140458583831787, "learning_rate": 0.0002, "loss": 1.4896, "step": 174270 }, { "epoch": 0.71, "grad_norm": 4.251332759857178, "learning_rate": 0.0002, "loss": 1.5058, "step": 174280 }, { "epoch": 0.71, "grad_norm": 2.4066083431243896, "learning_rate": 0.0002, "loss": 1.7357, "step": 174290 }, { "epoch": 0.71, "grad_norm": 2.4662740230560303, "learning_rate": 0.0002, "loss": 1.5665, "step": 174300 }, { "epoch": 0.71, "grad_norm": 1.3261812925338745, "learning_rate": 0.0002, "loss": 1.4323, "step": 174310 }, { "epoch": 0.71, "grad_norm": 3.257798433303833, "learning_rate": 0.0002, "loss": 1.2863, "step": 174320 }, { "epoch": 0.71, "grad_norm": 2.2217841148376465, "learning_rate": 0.0002, "loss": 1.7316, "step": 174330 }, { "epoch": 0.71, "grad_norm": 3.1043457984924316, "learning_rate": 0.0002, "loss": 1.3654, "step": 174340 }, { "epoch": 0.71, "grad_norm": 3.332935094833374, "learning_rate": 0.0002, "loss": 1.5829, "step": 174350 }, { "epoch": 0.71, "grad_norm": 4.531470775604248, "learning_rate": 0.0002, "loss": 1.6097, "step": 174360 }, { "epoch": 0.71, "grad_norm": 5.0182576179504395, "learning_rate": 0.0002, "loss": 1.5298, "step": 174370 }, { "epoch": 0.71, "grad_norm": 2.628850221633911, "learning_rate": 0.0002, "loss": 1.6249, "step": 174380 }, { "epoch": 0.71, "grad_norm": 3.2051424980163574, "learning_rate": 0.0002, "loss": 1.4327, "step": 174390 }, { "epoch": 0.71, "grad_norm": 2.750210762023926, "learning_rate": 0.0002, "loss": 1.5289, "step": 174400 }, { "epoch": 0.71, "grad_norm": 3.8537392616271973, "learning_rate": 0.0002, "loss": 1.5696, "step": 174410 }, { "epoch": 0.71, "grad_norm": 2.4247686862945557, "learning_rate": 0.0002, "loss": 1.6698, "step": 174420 }, { "epoch": 0.71, "grad_norm": 5.2328667640686035, "learning_rate": 0.0002, "loss": 1.8095, "step": 174430 }, { "epoch": 0.71, "grad_norm": 4.190642356872559, "learning_rate": 0.0002, "loss": 1.6045, "step": 174440 }, { "epoch": 0.71, "grad_norm": 4.519454479217529, "learning_rate": 0.0002, "loss": 1.634, "step": 174450 }, { "epoch": 0.71, "grad_norm": 4.236530780792236, "learning_rate": 0.0002, "loss": 1.6137, "step": 174460 }, { "epoch": 0.71, "grad_norm": 3.257803201675415, "learning_rate": 0.0002, "loss": 1.4731, "step": 174470 }, { "epoch": 0.71, "grad_norm": 2.095743179321289, "learning_rate": 0.0002, "loss": 1.802, "step": 174480 }, { "epoch": 0.71, "grad_norm": 2.8294880390167236, "learning_rate": 0.0002, "loss": 1.6265, "step": 174490 }, { "epoch": 0.71, "grad_norm": 2.699680805206299, "learning_rate": 0.0002, "loss": 1.6978, "step": 174500 }, { "epoch": 0.71, "grad_norm": 4.172417163848877, "learning_rate": 0.0002, "loss": 1.7606, "step": 174510 }, { "epoch": 0.71, "grad_norm": 3.672366142272949, "learning_rate": 0.0002, "loss": 1.4455, "step": 174520 }, { "epoch": 0.71, "grad_norm": 3.0330188274383545, "learning_rate": 0.0002, "loss": 1.442, "step": 174530 }, { "epoch": 0.71, "grad_norm": 1.92466402053833, "learning_rate": 0.0002, "loss": 1.4655, "step": 174540 }, { "epoch": 0.71, "grad_norm": 3.3539187908172607, "learning_rate": 0.0002, "loss": 1.519, "step": 174550 }, { "epoch": 0.71, "grad_norm": 4.150979042053223, "learning_rate": 0.0002, "loss": 1.6417, "step": 174560 }, { "epoch": 0.71, "grad_norm": 3.840885877609253, "learning_rate": 0.0002, "loss": 1.608, "step": 174570 }, { "epoch": 0.71, "grad_norm": 4.818384170532227, "learning_rate": 0.0002, "loss": 1.4287, "step": 174580 }, { "epoch": 0.71, "grad_norm": 2.6637794971466064, "learning_rate": 0.0002, "loss": 1.5544, "step": 174590 }, { "epoch": 0.71, "grad_norm": 1.9030860662460327, "learning_rate": 0.0002, "loss": 1.1205, "step": 174600 }, { "epoch": 0.71, "grad_norm": 2.033928871154785, "learning_rate": 0.0002, "loss": 1.6994, "step": 174610 }, { "epoch": 0.71, "grad_norm": 2.6064999103546143, "learning_rate": 0.0002, "loss": 1.7168, "step": 174620 }, { "epoch": 0.71, "grad_norm": 3.0404133796691895, "learning_rate": 0.0002, "loss": 1.4149, "step": 174630 }, { "epoch": 0.71, "grad_norm": 1.7558153867721558, "learning_rate": 0.0002, "loss": 1.7844, "step": 174640 }, { "epoch": 0.71, "grad_norm": 3.7191343307495117, "learning_rate": 0.0002, "loss": 1.5432, "step": 174650 }, { "epoch": 0.71, "grad_norm": 3.666804313659668, "learning_rate": 0.0002, "loss": 1.5959, "step": 174660 }, { "epoch": 0.71, "grad_norm": 3.5050692558288574, "learning_rate": 0.0002, "loss": 1.5201, "step": 174670 }, { "epoch": 0.71, "grad_norm": 6.187988758087158, "learning_rate": 0.0002, "loss": 1.7426, "step": 174680 }, { "epoch": 0.71, "grad_norm": 3.055671453475952, "learning_rate": 0.0002, "loss": 1.6126, "step": 174690 }, { "epoch": 0.71, "grad_norm": 4.635411739349365, "learning_rate": 0.0002, "loss": 1.364, "step": 174700 }, { "epoch": 0.71, "grad_norm": 3.208176374435425, "learning_rate": 0.0002, "loss": 1.5256, "step": 174710 }, { "epoch": 0.71, "grad_norm": 2.0576565265655518, "learning_rate": 0.0002, "loss": 1.5202, "step": 174720 }, { "epoch": 0.71, "grad_norm": 3.6194982528686523, "learning_rate": 0.0002, "loss": 1.5792, "step": 174730 }, { "epoch": 0.71, "grad_norm": 3.0793588161468506, "learning_rate": 0.0002, "loss": 1.4163, "step": 174740 }, { "epoch": 0.71, "grad_norm": 3.3826568126678467, "learning_rate": 0.0002, "loss": 1.6294, "step": 174750 }, { "epoch": 0.71, "grad_norm": 2.3754630088806152, "learning_rate": 0.0002, "loss": 1.4287, "step": 174760 }, { "epoch": 0.71, "grad_norm": 1.864548921585083, "learning_rate": 0.0002, "loss": 1.5068, "step": 174770 }, { "epoch": 0.71, "grad_norm": 3.033803939819336, "learning_rate": 0.0002, "loss": 1.4696, "step": 174780 }, { "epoch": 0.71, "grad_norm": 2.2039783000946045, "learning_rate": 0.0002, "loss": 1.6456, "step": 174790 }, { "epoch": 0.71, "grad_norm": 4.061129570007324, "learning_rate": 0.0002, "loss": 1.6105, "step": 174800 }, { "epoch": 0.71, "grad_norm": 2.6118264198303223, "learning_rate": 0.0002, "loss": 1.6215, "step": 174810 }, { "epoch": 0.71, "grad_norm": 2.1486151218414307, "learning_rate": 0.0002, "loss": 1.4987, "step": 174820 }, { "epoch": 0.71, "grad_norm": 2.90248966217041, "learning_rate": 0.0002, "loss": 1.7788, "step": 174830 }, { "epoch": 0.71, "grad_norm": 3.5311930179595947, "learning_rate": 0.0002, "loss": 1.6312, "step": 174840 }, { "epoch": 0.71, "grad_norm": 2.6947333812713623, "learning_rate": 0.0002, "loss": 1.4796, "step": 174850 }, { "epoch": 0.71, "grad_norm": 2.85945463180542, "learning_rate": 0.0002, "loss": 1.8838, "step": 174860 }, { "epoch": 0.71, "grad_norm": 3.800529718399048, "learning_rate": 0.0002, "loss": 1.5785, "step": 174870 }, { "epoch": 0.71, "grad_norm": 4.147684574127197, "learning_rate": 0.0002, "loss": 1.6287, "step": 174880 }, { "epoch": 0.71, "grad_norm": 3.2916314601898193, "learning_rate": 0.0002, "loss": 1.455, "step": 174890 }, { "epoch": 0.71, "grad_norm": 3.112579107284546, "learning_rate": 0.0002, "loss": 1.9334, "step": 174900 }, { "epoch": 0.71, "grad_norm": 3.6797585487365723, "learning_rate": 0.0002, "loss": 1.5467, "step": 174910 }, { "epoch": 0.71, "grad_norm": 3.1198887825012207, "learning_rate": 0.0002, "loss": 1.7868, "step": 174920 }, { "epoch": 0.71, "grad_norm": 3.187610149383545, "learning_rate": 0.0002, "loss": 1.4615, "step": 174930 }, { "epoch": 0.71, "grad_norm": 3.807361602783203, "learning_rate": 0.0002, "loss": 1.7058, "step": 174940 }, { "epoch": 0.71, "grad_norm": 5.411694526672363, "learning_rate": 0.0002, "loss": 1.6044, "step": 174950 }, { "epoch": 0.71, "grad_norm": 3.1447243690490723, "learning_rate": 0.0002, "loss": 1.6056, "step": 174960 }, { "epoch": 0.71, "grad_norm": 3.611814022064209, "learning_rate": 0.0002, "loss": 1.6504, "step": 174970 }, { "epoch": 0.71, "grad_norm": 1.6849061250686646, "learning_rate": 0.0002, "loss": 1.6203, "step": 174980 }, { "epoch": 0.71, "grad_norm": 2.1460373401641846, "learning_rate": 0.0002, "loss": 1.7263, "step": 174990 }, { "epoch": 0.71, "grad_norm": 3.84777569770813, "learning_rate": 0.0002, "loss": 1.4384, "step": 175000 }, { "epoch": 0.71, "grad_norm": 2.482926368713379, "learning_rate": 0.0002, "loss": 1.4806, "step": 175010 }, { "epoch": 0.71, "grad_norm": 2.954190254211426, "learning_rate": 0.0002, "loss": 1.6318, "step": 175020 }, { "epoch": 0.71, "grad_norm": 3.669360637664795, "learning_rate": 0.0002, "loss": 1.62, "step": 175030 }, { "epoch": 0.71, "grad_norm": 3.77034068107605, "learning_rate": 0.0002, "loss": 1.6729, "step": 175040 }, { "epoch": 0.71, "grad_norm": 3.8890137672424316, "learning_rate": 0.0002, "loss": 1.6069, "step": 175050 }, { "epoch": 0.71, "grad_norm": 3.4520366191864014, "learning_rate": 0.0002, "loss": 1.5813, "step": 175060 }, { "epoch": 0.71, "grad_norm": 4.329905033111572, "learning_rate": 0.0002, "loss": 1.7617, "step": 175070 }, { "epoch": 0.71, "grad_norm": 3.618129253387451, "learning_rate": 0.0002, "loss": 1.5413, "step": 175080 }, { "epoch": 0.71, "grad_norm": 2.217633008956909, "learning_rate": 0.0002, "loss": 1.4244, "step": 175090 }, { "epoch": 0.71, "grad_norm": 5.553396224975586, "learning_rate": 0.0002, "loss": 1.6042, "step": 175100 }, { "epoch": 0.71, "grad_norm": 3.293682098388672, "learning_rate": 0.0002, "loss": 1.7188, "step": 175110 }, { "epoch": 0.71, "grad_norm": 4.0289177894592285, "learning_rate": 0.0002, "loss": 1.6399, "step": 175120 }, { "epoch": 0.71, "grad_norm": 2.443075656890869, "learning_rate": 0.0002, "loss": 1.5806, "step": 175130 }, { "epoch": 0.71, "grad_norm": 2.823111057281494, "learning_rate": 0.0002, "loss": 1.6426, "step": 175140 }, { "epoch": 0.71, "grad_norm": 2.865588426589966, "learning_rate": 0.0002, "loss": 1.2465, "step": 175150 }, { "epoch": 0.71, "grad_norm": 2.0986521244049072, "learning_rate": 0.0002, "loss": 1.7855, "step": 175160 }, { "epoch": 0.71, "grad_norm": 3.7341625690460205, "learning_rate": 0.0002, "loss": 1.4723, "step": 175170 }, { "epoch": 0.71, "grad_norm": 2.5952701568603516, "learning_rate": 0.0002, "loss": 1.479, "step": 175180 }, { "epoch": 0.71, "grad_norm": 2.203711986541748, "learning_rate": 0.0002, "loss": 1.5615, "step": 175190 }, { "epoch": 0.71, "grad_norm": 1.8431508541107178, "learning_rate": 0.0002, "loss": 1.7964, "step": 175200 }, { "epoch": 0.71, "grad_norm": 2.217139482498169, "learning_rate": 0.0002, "loss": 1.612, "step": 175210 }, { "epoch": 0.71, "grad_norm": 4.583515167236328, "learning_rate": 0.0002, "loss": 1.3104, "step": 175220 }, { "epoch": 0.71, "grad_norm": 3.2345356941223145, "learning_rate": 0.0002, "loss": 1.5883, "step": 175230 }, { "epoch": 0.71, "grad_norm": 3.6142170429229736, "learning_rate": 0.0002, "loss": 1.5783, "step": 175240 }, { "epoch": 0.71, "grad_norm": 3.7836685180664062, "learning_rate": 0.0002, "loss": 1.4715, "step": 175250 }, { "epoch": 0.71, "grad_norm": 3.480250358581543, "learning_rate": 0.0002, "loss": 1.3338, "step": 175260 }, { "epoch": 0.71, "grad_norm": 3.4493703842163086, "learning_rate": 0.0002, "loss": 1.6539, "step": 175270 }, { "epoch": 0.71, "grad_norm": 2.989610433578491, "learning_rate": 0.0002, "loss": 1.4129, "step": 175280 }, { "epoch": 0.71, "grad_norm": 4.125009059906006, "learning_rate": 0.0002, "loss": 1.6245, "step": 175290 }, { "epoch": 0.71, "grad_norm": 3.2293283939361572, "learning_rate": 0.0002, "loss": 1.3963, "step": 175300 }, { "epoch": 0.71, "grad_norm": 1.645354151725769, "learning_rate": 0.0002, "loss": 1.4497, "step": 175310 }, { "epoch": 0.71, "grad_norm": 3.005908727645874, "learning_rate": 0.0002, "loss": 1.5456, "step": 175320 }, { "epoch": 0.71, "grad_norm": 2.2989864349365234, "learning_rate": 0.0002, "loss": 1.6171, "step": 175330 }, { "epoch": 0.71, "grad_norm": 3.133251905441284, "learning_rate": 0.0002, "loss": 1.7128, "step": 175340 }, { "epoch": 0.71, "grad_norm": 3.357034683227539, "learning_rate": 0.0002, "loss": 1.627, "step": 175350 }, { "epoch": 0.71, "grad_norm": 3.3399696350097656, "learning_rate": 0.0002, "loss": 1.6744, "step": 175360 }, { "epoch": 0.71, "grad_norm": 4.698909759521484, "learning_rate": 0.0002, "loss": 1.2912, "step": 175370 }, { "epoch": 0.71, "grad_norm": 3.8310394287109375, "learning_rate": 0.0002, "loss": 1.5966, "step": 175380 }, { "epoch": 0.71, "grad_norm": 2.271713972091675, "learning_rate": 0.0002, "loss": 1.6287, "step": 175390 }, { "epoch": 0.71, "grad_norm": 2.2886483669281006, "learning_rate": 0.0002, "loss": 1.6825, "step": 175400 }, { "epoch": 0.71, "grad_norm": 2.3253026008605957, "learning_rate": 0.0002, "loss": 1.5235, "step": 175410 }, { "epoch": 0.71, "grad_norm": 3.5594193935394287, "learning_rate": 0.0002, "loss": 1.6718, "step": 175420 }, { "epoch": 0.71, "grad_norm": 3.7448782920837402, "learning_rate": 0.0002, "loss": 1.3337, "step": 175430 }, { "epoch": 0.71, "grad_norm": 2.3216660022735596, "learning_rate": 0.0002, "loss": 1.4037, "step": 175440 }, { "epoch": 0.71, "grad_norm": 2.695072650909424, "learning_rate": 0.0002, "loss": 1.4692, "step": 175450 }, { "epoch": 0.71, "grad_norm": 3.1242263317108154, "learning_rate": 0.0002, "loss": 1.4005, "step": 175460 }, { "epoch": 0.71, "grad_norm": 2.777387857437134, "learning_rate": 0.0002, "loss": 1.4051, "step": 175470 }, { "epoch": 0.71, "grad_norm": 2.433591604232788, "learning_rate": 0.0002, "loss": 1.8611, "step": 175480 }, { "epoch": 0.71, "grad_norm": 4.428089618682861, "learning_rate": 0.0002, "loss": 1.6928, "step": 175490 }, { "epoch": 0.71, "grad_norm": 2.5064315795898438, "learning_rate": 0.0002, "loss": 1.425, "step": 175500 }, { "epoch": 0.71, "grad_norm": 2.612920045852661, "learning_rate": 0.0002, "loss": 1.7552, "step": 175510 }, { "epoch": 0.71, "grad_norm": 2.9938247203826904, "learning_rate": 0.0002, "loss": 1.7376, "step": 175520 }, { "epoch": 0.71, "grad_norm": 2.8446123600006104, "learning_rate": 0.0002, "loss": 1.3962, "step": 175530 }, { "epoch": 0.71, "grad_norm": 6.05838680267334, "learning_rate": 0.0002, "loss": 1.7063, "step": 175540 }, { "epoch": 0.71, "grad_norm": 2.4323291778564453, "learning_rate": 0.0002, "loss": 1.272, "step": 175550 }, { "epoch": 0.71, "grad_norm": 2.9953272342681885, "learning_rate": 0.0002, "loss": 1.711, "step": 175560 }, { "epoch": 0.71, "grad_norm": 3.040679454803467, "learning_rate": 0.0002, "loss": 1.4519, "step": 175570 }, { "epoch": 0.71, "grad_norm": 3.6057302951812744, "learning_rate": 0.0002, "loss": 1.4534, "step": 175580 }, { "epoch": 0.71, "grad_norm": 3.3566789627075195, "learning_rate": 0.0002, "loss": 1.6532, "step": 175590 }, { "epoch": 0.71, "grad_norm": 2.2242791652679443, "learning_rate": 0.0002, "loss": 1.3325, "step": 175600 }, { "epoch": 0.71, "grad_norm": 2.7433199882507324, "learning_rate": 0.0002, "loss": 1.6933, "step": 175610 }, { "epoch": 0.71, "grad_norm": 2.6117305755615234, "learning_rate": 0.0002, "loss": 1.4921, "step": 175620 }, { "epoch": 0.71, "grad_norm": 3.004096269607544, "learning_rate": 0.0002, "loss": 1.6068, "step": 175630 }, { "epoch": 0.72, "grad_norm": 2.4728171825408936, "learning_rate": 0.0002, "loss": 1.26, "step": 175640 }, { "epoch": 0.72, "grad_norm": 3.248668909072876, "learning_rate": 0.0002, "loss": 1.3138, "step": 175650 }, { "epoch": 0.72, "grad_norm": 3.4375367164611816, "learning_rate": 0.0002, "loss": 1.4676, "step": 175660 }, { "epoch": 0.72, "grad_norm": 2.9482362270355225, "learning_rate": 0.0002, "loss": 1.5573, "step": 175670 }, { "epoch": 0.72, "grad_norm": 2.81280779838562, "learning_rate": 0.0002, "loss": 1.4996, "step": 175680 }, { "epoch": 0.72, "grad_norm": 2.253206968307495, "learning_rate": 0.0002, "loss": 1.6474, "step": 175690 }, { "epoch": 0.72, "grad_norm": 2.9251158237457275, "learning_rate": 0.0002, "loss": 1.3966, "step": 175700 }, { "epoch": 0.72, "grad_norm": 3.0354158878326416, "learning_rate": 0.0002, "loss": 1.6063, "step": 175710 }, { "epoch": 0.72, "grad_norm": 3.564624071121216, "learning_rate": 0.0002, "loss": 1.7609, "step": 175720 }, { "epoch": 0.72, "grad_norm": 1.037021279335022, "learning_rate": 0.0002, "loss": 1.8573, "step": 175730 }, { "epoch": 0.72, "grad_norm": 2.7993760108947754, "learning_rate": 0.0002, "loss": 1.5404, "step": 175740 }, { "epoch": 0.72, "grad_norm": 3.878404378890991, "learning_rate": 0.0002, "loss": 1.4955, "step": 175750 }, { "epoch": 0.72, "grad_norm": 2.3950886726379395, "learning_rate": 0.0002, "loss": 1.8045, "step": 175760 }, { "epoch": 0.72, "grad_norm": 2.3375468254089355, "learning_rate": 0.0002, "loss": 1.6506, "step": 175770 }, { "epoch": 0.72, "grad_norm": 2.2934491634368896, "learning_rate": 0.0002, "loss": 1.5993, "step": 175780 }, { "epoch": 0.72, "grad_norm": 2.770716428756714, "learning_rate": 0.0002, "loss": 1.7682, "step": 175790 }, { "epoch": 0.72, "grad_norm": 1.3115469217300415, "learning_rate": 0.0002, "loss": 1.6387, "step": 175800 }, { "epoch": 0.72, "grad_norm": 3.532485008239746, "learning_rate": 0.0002, "loss": 1.6357, "step": 175810 }, { "epoch": 0.72, "grad_norm": 5.027840614318848, "learning_rate": 0.0002, "loss": 1.6322, "step": 175820 }, { "epoch": 0.72, "grad_norm": 2.9469470977783203, "learning_rate": 0.0002, "loss": 1.7796, "step": 175830 }, { "epoch": 0.72, "grad_norm": 2.608353853225708, "learning_rate": 0.0002, "loss": 1.5795, "step": 175840 }, { "epoch": 0.72, "grad_norm": 3.2321226596832275, "learning_rate": 0.0002, "loss": 1.7166, "step": 175850 }, { "epoch": 0.72, "grad_norm": 3.0205342769622803, "learning_rate": 0.0002, "loss": 1.5727, "step": 175860 }, { "epoch": 0.72, "grad_norm": 4.694793701171875, "learning_rate": 0.0002, "loss": 1.3451, "step": 175870 }, { "epoch": 0.72, "grad_norm": 3.852290391921997, "learning_rate": 0.0002, "loss": 1.5755, "step": 175880 }, { "epoch": 0.72, "grad_norm": 3.778146266937256, "learning_rate": 0.0002, "loss": 1.5134, "step": 175890 }, { "epoch": 0.72, "grad_norm": 1.8045482635498047, "learning_rate": 0.0002, "loss": 1.6782, "step": 175900 }, { "epoch": 0.72, "grad_norm": 3.2777531147003174, "learning_rate": 0.0002, "loss": 1.5712, "step": 175910 }, { "epoch": 0.72, "grad_norm": 2.2401297092437744, "learning_rate": 0.0002, "loss": 1.285, "step": 175920 }, { "epoch": 0.72, "grad_norm": 2.7748405933380127, "learning_rate": 0.0002, "loss": 1.6046, "step": 175930 }, { "epoch": 0.72, "grad_norm": 3.353837013244629, "learning_rate": 0.0002, "loss": 1.4585, "step": 175940 }, { "epoch": 0.72, "grad_norm": 4.193062782287598, "learning_rate": 0.0002, "loss": 1.3523, "step": 175950 }, { "epoch": 0.72, "grad_norm": 1.6442155838012695, "learning_rate": 0.0002, "loss": 1.8046, "step": 175960 }, { "epoch": 0.72, "grad_norm": 3.6741225719451904, "learning_rate": 0.0002, "loss": 1.5811, "step": 175970 }, { "epoch": 0.72, "grad_norm": 2.4735939502716064, "learning_rate": 0.0002, "loss": 1.4538, "step": 175980 }, { "epoch": 0.72, "grad_norm": 1.5807552337646484, "learning_rate": 0.0002, "loss": 1.656, "step": 175990 }, { "epoch": 0.72, "grad_norm": 19.35000991821289, "learning_rate": 0.0002, "loss": 1.7885, "step": 176000 }, { "epoch": 0.72, "grad_norm": 3.066769599914551, "learning_rate": 0.0002, "loss": 1.5688, "step": 176010 }, { "epoch": 0.72, "grad_norm": 2.773954153060913, "learning_rate": 0.0002, "loss": 1.4087, "step": 176020 }, { "epoch": 0.72, "grad_norm": 4.235053062438965, "learning_rate": 0.0002, "loss": 1.5964, "step": 176030 }, { "epoch": 0.72, "grad_norm": 2.565558910369873, "learning_rate": 0.0002, "loss": 1.8434, "step": 176040 }, { "epoch": 0.72, "grad_norm": 3.5440797805786133, "learning_rate": 0.0002, "loss": 1.6409, "step": 176050 }, { "epoch": 0.72, "grad_norm": 3.0560975074768066, "learning_rate": 0.0002, "loss": 1.6646, "step": 176060 }, { "epoch": 0.72, "grad_norm": 3.694944381713867, "learning_rate": 0.0002, "loss": 1.6471, "step": 176070 }, { "epoch": 0.72, "grad_norm": 2.1712403297424316, "learning_rate": 0.0002, "loss": 1.5211, "step": 176080 }, { "epoch": 0.72, "grad_norm": 2.6702420711517334, "learning_rate": 0.0002, "loss": 1.5479, "step": 176090 }, { "epoch": 0.72, "grad_norm": 2.9903101921081543, "learning_rate": 0.0002, "loss": 1.5933, "step": 176100 }, { "epoch": 0.72, "grad_norm": 2.8177950382232666, "learning_rate": 0.0002, "loss": 1.5806, "step": 176110 }, { "epoch": 0.72, "grad_norm": 4.042330741882324, "learning_rate": 0.0002, "loss": 1.7201, "step": 176120 }, { "epoch": 0.72, "grad_norm": 3.589801788330078, "learning_rate": 0.0002, "loss": 1.7503, "step": 176130 }, { "epoch": 0.72, "grad_norm": 3.294105291366577, "learning_rate": 0.0002, "loss": 1.6758, "step": 176140 }, { "epoch": 0.72, "grad_norm": 2.0479042530059814, "learning_rate": 0.0002, "loss": 1.3984, "step": 176150 }, { "epoch": 0.72, "grad_norm": 3.593877077102661, "learning_rate": 0.0002, "loss": 1.5721, "step": 176160 }, { "epoch": 0.72, "grad_norm": 1.9997175931930542, "learning_rate": 0.0002, "loss": 1.8784, "step": 176170 }, { "epoch": 0.72, "grad_norm": 4.124565601348877, "learning_rate": 0.0002, "loss": 1.4628, "step": 176180 }, { "epoch": 0.72, "grad_norm": 3.7555835247039795, "learning_rate": 0.0002, "loss": 1.6295, "step": 176190 }, { "epoch": 0.72, "grad_norm": 5.066704750061035, "learning_rate": 0.0002, "loss": 1.9054, "step": 176200 }, { "epoch": 0.72, "grad_norm": 1.8218882083892822, "learning_rate": 0.0002, "loss": 1.5491, "step": 176210 }, { "epoch": 0.72, "grad_norm": 2.6452596187591553, "learning_rate": 0.0002, "loss": 1.5506, "step": 176220 }, { "epoch": 0.72, "grad_norm": 2.6744461059570312, "learning_rate": 0.0002, "loss": 1.5305, "step": 176230 }, { "epoch": 0.72, "grad_norm": 3.0163893699645996, "learning_rate": 0.0002, "loss": 1.5497, "step": 176240 }, { "epoch": 0.72, "grad_norm": 2.9441680908203125, "learning_rate": 0.0002, "loss": 1.4506, "step": 176250 }, { "epoch": 0.72, "grad_norm": 2.976767063140869, "learning_rate": 0.0002, "loss": 1.7443, "step": 176260 }, { "epoch": 0.72, "grad_norm": 4.941463470458984, "learning_rate": 0.0002, "loss": 1.4437, "step": 176270 }, { "epoch": 0.72, "grad_norm": 2.712803363800049, "learning_rate": 0.0002, "loss": 1.767, "step": 176280 }, { "epoch": 0.72, "grad_norm": 2.15639591217041, "learning_rate": 0.0002, "loss": 1.5844, "step": 176290 }, { "epoch": 0.72, "grad_norm": 4.281930446624756, "learning_rate": 0.0002, "loss": 1.5874, "step": 176300 }, { "epoch": 0.72, "grad_norm": 3.2560174465179443, "learning_rate": 0.0002, "loss": 1.8044, "step": 176310 }, { "epoch": 0.72, "grad_norm": 1.8284584283828735, "learning_rate": 0.0002, "loss": 1.6579, "step": 176320 }, { "epoch": 0.72, "grad_norm": 3.413656711578369, "learning_rate": 0.0002, "loss": 1.6233, "step": 176330 }, { "epoch": 0.72, "grad_norm": 3.1352007389068604, "learning_rate": 0.0002, "loss": 1.3963, "step": 176340 }, { "epoch": 0.72, "grad_norm": 2.771313428878784, "learning_rate": 0.0002, "loss": 1.5804, "step": 176350 }, { "epoch": 0.72, "grad_norm": 3.6980717182159424, "learning_rate": 0.0002, "loss": 1.6448, "step": 176360 }, { "epoch": 0.72, "grad_norm": 2.979299306869507, "learning_rate": 0.0002, "loss": 1.4791, "step": 176370 }, { "epoch": 0.72, "grad_norm": 3.027315378189087, "learning_rate": 0.0002, "loss": 1.7738, "step": 176380 }, { "epoch": 0.72, "grad_norm": 5.01724910736084, "learning_rate": 0.0002, "loss": 1.5146, "step": 176390 }, { "epoch": 0.72, "grad_norm": 3.1518678665161133, "learning_rate": 0.0002, "loss": 1.4178, "step": 176400 }, { "epoch": 0.72, "grad_norm": 3.9633491039276123, "learning_rate": 0.0002, "loss": 1.6517, "step": 176410 }, { "epoch": 0.72, "grad_norm": 2.898907423019409, "learning_rate": 0.0002, "loss": 1.7299, "step": 176420 }, { "epoch": 0.72, "grad_norm": 1.714737892150879, "learning_rate": 0.0002, "loss": 1.3863, "step": 176430 }, { "epoch": 0.72, "grad_norm": 3.4445526599884033, "learning_rate": 0.0002, "loss": 1.5864, "step": 176440 }, { "epoch": 0.72, "grad_norm": 3.422537326812744, "learning_rate": 0.0002, "loss": 1.6593, "step": 176450 }, { "epoch": 0.72, "grad_norm": 2.2443864345550537, "learning_rate": 0.0002, "loss": 1.8301, "step": 176460 }, { "epoch": 0.72, "grad_norm": 3.4382827281951904, "learning_rate": 0.0002, "loss": 1.6605, "step": 176470 }, { "epoch": 0.72, "grad_norm": 4.457676887512207, "learning_rate": 0.0002, "loss": 1.5811, "step": 176480 }, { "epoch": 0.72, "grad_norm": 5.089151859283447, "learning_rate": 0.0002, "loss": 1.6211, "step": 176490 }, { "epoch": 0.72, "grad_norm": 3.111541986465454, "learning_rate": 0.0002, "loss": 1.6452, "step": 176500 }, { "epoch": 0.72, "grad_norm": 4.194549083709717, "learning_rate": 0.0002, "loss": 1.6078, "step": 176510 }, { "epoch": 0.72, "grad_norm": 2.9438092708587646, "learning_rate": 0.0002, "loss": 1.6704, "step": 176520 }, { "epoch": 0.72, "grad_norm": 2.462921142578125, "learning_rate": 0.0002, "loss": 1.53, "step": 176530 }, { "epoch": 0.72, "grad_norm": 3.3449621200561523, "learning_rate": 0.0002, "loss": 1.7984, "step": 176540 }, { "epoch": 0.72, "grad_norm": 2.9853501319885254, "learning_rate": 0.0002, "loss": 1.5354, "step": 176550 }, { "epoch": 0.72, "grad_norm": 3.3090946674346924, "learning_rate": 0.0002, "loss": 1.7435, "step": 176560 }, { "epoch": 0.72, "grad_norm": 2.764747142791748, "learning_rate": 0.0002, "loss": 1.585, "step": 176570 }, { "epoch": 0.72, "grad_norm": 4.9324727058410645, "learning_rate": 0.0002, "loss": 1.5259, "step": 176580 }, { "epoch": 0.72, "grad_norm": 2.354231119155884, "learning_rate": 0.0002, "loss": 1.4809, "step": 176590 }, { "epoch": 0.72, "grad_norm": 2.2285208702087402, "learning_rate": 0.0002, "loss": 1.6633, "step": 176600 }, { "epoch": 0.72, "grad_norm": 2.0672292709350586, "learning_rate": 0.0002, "loss": 1.6764, "step": 176610 }, { "epoch": 0.72, "grad_norm": 2.1001460552215576, "learning_rate": 0.0002, "loss": 1.5955, "step": 176620 }, { "epoch": 0.72, "grad_norm": 3.2235798835754395, "learning_rate": 0.0002, "loss": 1.8443, "step": 176630 }, { "epoch": 0.72, "grad_norm": 1.6920233964920044, "learning_rate": 0.0002, "loss": 1.4921, "step": 176640 }, { "epoch": 0.72, "grad_norm": 2.004894256591797, "learning_rate": 0.0002, "loss": 1.5942, "step": 176650 }, { "epoch": 0.72, "grad_norm": 3.0594849586486816, "learning_rate": 0.0002, "loss": 1.6167, "step": 176660 }, { "epoch": 0.72, "grad_norm": 1.762205958366394, "learning_rate": 0.0002, "loss": 1.4817, "step": 176670 }, { "epoch": 0.72, "grad_norm": 3.8120007514953613, "learning_rate": 0.0002, "loss": 1.7138, "step": 176680 }, { "epoch": 0.72, "grad_norm": 3.7907252311706543, "learning_rate": 0.0002, "loss": 1.5115, "step": 176690 }, { "epoch": 0.72, "grad_norm": 3.4354701042175293, "learning_rate": 0.0002, "loss": 1.5741, "step": 176700 }, { "epoch": 0.72, "grad_norm": 3.5661802291870117, "learning_rate": 0.0002, "loss": 1.4131, "step": 176710 }, { "epoch": 0.72, "grad_norm": 4.177687168121338, "learning_rate": 0.0002, "loss": 1.5318, "step": 176720 }, { "epoch": 0.72, "grad_norm": 2.7779078483581543, "learning_rate": 0.0002, "loss": 1.3475, "step": 176730 }, { "epoch": 0.72, "grad_norm": 4.758575439453125, "learning_rate": 0.0002, "loss": 1.4279, "step": 176740 }, { "epoch": 0.72, "grad_norm": 2.964186906814575, "learning_rate": 0.0002, "loss": 1.7613, "step": 176750 }, { "epoch": 0.72, "grad_norm": 2.87749981880188, "learning_rate": 0.0002, "loss": 1.384, "step": 176760 }, { "epoch": 0.72, "grad_norm": 2.555217981338501, "learning_rate": 0.0002, "loss": 1.4998, "step": 176770 }, { "epoch": 0.72, "grad_norm": 3.0862784385681152, "learning_rate": 0.0002, "loss": 1.6852, "step": 176780 }, { "epoch": 0.72, "grad_norm": 2.590216636657715, "learning_rate": 0.0002, "loss": 1.6551, "step": 176790 }, { "epoch": 0.72, "grad_norm": 2.659970998764038, "learning_rate": 0.0002, "loss": 1.8442, "step": 176800 }, { "epoch": 0.72, "grad_norm": 1.9715439081192017, "learning_rate": 0.0002, "loss": 1.497, "step": 176810 }, { "epoch": 0.72, "grad_norm": 1.7412614822387695, "learning_rate": 0.0002, "loss": 1.5569, "step": 176820 }, { "epoch": 0.72, "grad_norm": 2.5267131328582764, "learning_rate": 0.0002, "loss": 1.6786, "step": 176830 }, { "epoch": 0.72, "grad_norm": 4.737471103668213, "learning_rate": 0.0002, "loss": 1.708, "step": 176840 }, { "epoch": 0.72, "grad_norm": 4.277928829193115, "learning_rate": 0.0002, "loss": 1.7392, "step": 176850 }, { "epoch": 0.72, "grad_norm": 3.5199129581451416, "learning_rate": 0.0002, "loss": 1.5565, "step": 176860 }, { "epoch": 0.72, "grad_norm": 2.738516330718994, "learning_rate": 0.0002, "loss": 1.4777, "step": 176870 }, { "epoch": 0.72, "grad_norm": 3.2038581371307373, "learning_rate": 0.0002, "loss": 1.4433, "step": 176880 }, { "epoch": 0.72, "grad_norm": 1.463962197303772, "learning_rate": 0.0002, "loss": 1.5441, "step": 176890 }, { "epoch": 0.72, "grad_norm": 1.7501229047775269, "learning_rate": 0.0002, "loss": 1.6139, "step": 176900 }, { "epoch": 0.72, "grad_norm": 3.762932777404785, "learning_rate": 0.0002, "loss": 1.6077, "step": 176910 }, { "epoch": 0.72, "grad_norm": 4.924373626708984, "learning_rate": 0.0002, "loss": 1.709, "step": 176920 }, { "epoch": 0.72, "grad_norm": 2.2265448570251465, "learning_rate": 0.0002, "loss": 1.7256, "step": 176930 }, { "epoch": 0.72, "grad_norm": 3.4714951515197754, "learning_rate": 0.0002, "loss": 1.4609, "step": 176940 }, { "epoch": 0.72, "grad_norm": 1.2881922721862793, "learning_rate": 0.0002, "loss": 1.4089, "step": 176950 }, { "epoch": 0.72, "grad_norm": 1.5045981407165527, "learning_rate": 0.0002, "loss": 1.5457, "step": 176960 }, { "epoch": 0.72, "grad_norm": 1.815807580947876, "learning_rate": 0.0002, "loss": 1.4221, "step": 176970 }, { "epoch": 0.72, "grad_norm": 3.236862897872925, "learning_rate": 0.0002, "loss": 1.449, "step": 176980 }, { "epoch": 0.72, "grad_norm": 2.549346923828125, "learning_rate": 0.0002, "loss": 1.7644, "step": 176990 }, { "epoch": 0.72, "grad_norm": 2.3607699871063232, "learning_rate": 0.0002, "loss": 1.4239, "step": 177000 }, { "epoch": 0.72, "grad_norm": 2.1309022903442383, "learning_rate": 0.0002, "loss": 1.3926, "step": 177010 }, { "epoch": 0.72, "grad_norm": 3.8865370750427246, "learning_rate": 0.0002, "loss": 1.7766, "step": 177020 }, { "epoch": 0.72, "grad_norm": 1.55144202709198, "learning_rate": 0.0002, "loss": 1.5986, "step": 177030 }, { "epoch": 0.72, "grad_norm": 2.2231051921844482, "learning_rate": 0.0002, "loss": 1.5806, "step": 177040 }, { "epoch": 0.72, "grad_norm": 2.7006959915161133, "learning_rate": 0.0002, "loss": 1.6659, "step": 177050 }, { "epoch": 0.72, "grad_norm": 2.469484329223633, "learning_rate": 0.0002, "loss": 1.671, "step": 177060 }, { "epoch": 0.72, "grad_norm": 2.237330436706543, "learning_rate": 0.0002, "loss": 1.3491, "step": 177070 }, { "epoch": 0.72, "grad_norm": 2.309904098510742, "learning_rate": 0.0002, "loss": 1.3945, "step": 177080 }, { "epoch": 0.72, "grad_norm": 4.090519428253174, "learning_rate": 0.0002, "loss": 1.5431, "step": 177090 }, { "epoch": 0.72, "grad_norm": 2.1216249465942383, "learning_rate": 0.0002, "loss": 1.6836, "step": 177100 }, { "epoch": 0.72, "grad_norm": 3.2791779041290283, "learning_rate": 0.0002, "loss": 1.7487, "step": 177110 }, { "epoch": 0.72, "grad_norm": 3.021336317062378, "learning_rate": 0.0002, "loss": 1.7487, "step": 177120 }, { "epoch": 0.72, "grad_norm": 4.554561138153076, "learning_rate": 0.0002, "loss": 1.6707, "step": 177130 }, { "epoch": 0.72, "grad_norm": 3.56301212310791, "learning_rate": 0.0002, "loss": 1.5947, "step": 177140 }, { "epoch": 0.72, "grad_norm": 4.725766658782959, "learning_rate": 0.0002, "loss": 1.618, "step": 177150 }, { "epoch": 0.72, "grad_norm": 3.73282527923584, "learning_rate": 0.0002, "loss": 1.6467, "step": 177160 }, { "epoch": 0.72, "grad_norm": 1.8295255899429321, "learning_rate": 0.0002, "loss": 1.5399, "step": 177170 }, { "epoch": 0.72, "grad_norm": 2.8282277584075928, "learning_rate": 0.0002, "loss": 1.6228, "step": 177180 }, { "epoch": 0.72, "grad_norm": 3.2337875366210938, "learning_rate": 0.0002, "loss": 1.7511, "step": 177190 }, { "epoch": 0.72, "grad_norm": 12.657002449035645, "learning_rate": 0.0002, "loss": 1.6187, "step": 177200 }, { "epoch": 0.72, "grad_norm": 3.9555366039276123, "learning_rate": 0.0002, "loss": 1.7475, "step": 177210 }, { "epoch": 0.72, "grad_norm": 3.0737829208374023, "learning_rate": 0.0002, "loss": 1.6339, "step": 177220 }, { "epoch": 0.72, "grad_norm": 2.640019178390503, "learning_rate": 0.0002, "loss": 1.682, "step": 177230 }, { "epoch": 0.72, "grad_norm": 2.756545066833496, "learning_rate": 0.0002, "loss": 1.5455, "step": 177240 }, { "epoch": 0.72, "grad_norm": 3.5616490840911865, "learning_rate": 0.0002, "loss": 1.7157, "step": 177250 }, { "epoch": 0.72, "grad_norm": 2.8497533798217773, "learning_rate": 0.0002, "loss": 1.4876, "step": 177260 }, { "epoch": 0.72, "grad_norm": 2.763305902481079, "learning_rate": 0.0002, "loss": 1.5742, "step": 177270 }, { "epoch": 0.72, "grad_norm": 3.5073533058166504, "learning_rate": 0.0002, "loss": 1.7107, "step": 177280 }, { "epoch": 0.72, "grad_norm": 2.3625540733337402, "learning_rate": 0.0002, "loss": 1.5788, "step": 177290 }, { "epoch": 0.72, "grad_norm": 3.616455554962158, "learning_rate": 0.0002, "loss": 1.6609, "step": 177300 }, { "epoch": 0.72, "grad_norm": 3.0695557594299316, "learning_rate": 0.0002, "loss": 1.7219, "step": 177310 }, { "epoch": 0.72, "grad_norm": 2.505291700363159, "learning_rate": 0.0002, "loss": 1.4475, "step": 177320 }, { "epoch": 0.72, "grad_norm": 5.611968517303467, "learning_rate": 0.0002, "loss": 1.6317, "step": 177330 }, { "epoch": 0.72, "grad_norm": 2.5421080589294434, "learning_rate": 0.0002, "loss": 1.7056, "step": 177340 }, { "epoch": 0.72, "grad_norm": 3.6499202251434326, "learning_rate": 0.0002, "loss": 1.4315, "step": 177350 }, { "epoch": 0.72, "grad_norm": 3.661827564239502, "learning_rate": 0.0002, "loss": 1.3824, "step": 177360 }, { "epoch": 0.72, "grad_norm": 4.17420768737793, "learning_rate": 0.0002, "loss": 1.6689, "step": 177370 }, { "epoch": 0.72, "grad_norm": 2.9565532207489014, "learning_rate": 0.0002, "loss": 1.5776, "step": 177380 }, { "epoch": 0.72, "grad_norm": 2.685168743133545, "learning_rate": 0.0002, "loss": 1.4628, "step": 177390 }, { "epoch": 0.72, "grad_norm": 3.315148115158081, "learning_rate": 0.0002, "loss": 1.6701, "step": 177400 }, { "epoch": 0.72, "grad_norm": 2.742072343826294, "learning_rate": 0.0002, "loss": 1.5935, "step": 177410 }, { "epoch": 0.72, "grad_norm": 2.347588062286377, "learning_rate": 0.0002, "loss": 1.441, "step": 177420 }, { "epoch": 0.72, "grad_norm": 2.889016628265381, "learning_rate": 0.0002, "loss": 1.394, "step": 177430 }, { "epoch": 0.72, "grad_norm": 2.9928927421569824, "learning_rate": 0.0002, "loss": 1.6522, "step": 177440 }, { "epoch": 0.72, "grad_norm": 4.433704376220703, "learning_rate": 0.0002, "loss": 1.4255, "step": 177450 }, { "epoch": 0.72, "grad_norm": 2.4502618312835693, "learning_rate": 0.0002, "loss": 1.7041, "step": 177460 }, { "epoch": 0.72, "grad_norm": 3.1793181896209717, "learning_rate": 0.0002, "loss": 1.7319, "step": 177470 }, { "epoch": 0.72, "grad_norm": 3.0818419456481934, "learning_rate": 0.0002, "loss": 1.3961, "step": 177480 }, { "epoch": 0.72, "grad_norm": 3.1138463020324707, "learning_rate": 0.0002, "loss": 1.6142, "step": 177490 }, { "epoch": 0.72, "grad_norm": 2.695016860961914, "learning_rate": 0.0002, "loss": 1.6933, "step": 177500 }, { "epoch": 0.72, "grad_norm": 2.0243701934814453, "learning_rate": 0.0002, "loss": 1.6108, "step": 177510 }, { "epoch": 0.72, "grad_norm": 3.592440605163574, "learning_rate": 0.0002, "loss": 1.7221, "step": 177520 }, { "epoch": 0.72, "grad_norm": 2.7245285511016846, "learning_rate": 0.0002, "loss": 1.6882, "step": 177530 }, { "epoch": 0.72, "grad_norm": 3.389141321182251, "learning_rate": 0.0002, "loss": 1.5036, "step": 177540 }, { "epoch": 0.72, "grad_norm": 3.020914316177368, "learning_rate": 0.0002, "loss": 1.5526, "step": 177550 }, { "epoch": 0.72, "grad_norm": 7.071706295013428, "learning_rate": 0.0002, "loss": 1.7992, "step": 177560 }, { "epoch": 0.72, "grad_norm": 3.106369972229004, "learning_rate": 0.0002, "loss": 1.5113, "step": 177570 }, { "epoch": 0.72, "grad_norm": 2.7712347507476807, "learning_rate": 0.0002, "loss": 1.645, "step": 177580 }, { "epoch": 0.72, "grad_norm": 4.278432369232178, "learning_rate": 0.0002, "loss": 1.5412, "step": 177590 }, { "epoch": 0.72, "grad_norm": 2.353189468383789, "learning_rate": 0.0002, "loss": 1.6438, "step": 177600 }, { "epoch": 0.72, "grad_norm": 2.7659294605255127, "learning_rate": 0.0002, "loss": 1.5669, "step": 177610 }, { "epoch": 0.72, "grad_norm": 5.179714679718018, "learning_rate": 0.0002, "loss": 1.4864, "step": 177620 }, { "epoch": 0.72, "grad_norm": 1.9283677339553833, "learning_rate": 0.0002, "loss": 1.3932, "step": 177630 }, { "epoch": 0.72, "grad_norm": 4.0926361083984375, "learning_rate": 0.0002, "loss": 1.5029, "step": 177640 }, { "epoch": 0.72, "grad_norm": 1.7713422775268555, "learning_rate": 0.0002, "loss": 1.7552, "step": 177650 }, { "epoch": 0.72, "grad_norm": 3.239042043685913, "learning_rate": 0.0002, "loss": 1.6554, "step": 177660 }, { "epoch": 0.72, "grad_norm": 3.1528117656707764, "learning_rate": 0.0002, "loss": 1.9736, "step": 177670 }, { "epoch": 0.72, "grad_norm": 2.459120750427246, "learning_rate": 0.0002, "loss": 1.6055, "step": 177680 }, { "epoch": 0.72, "grad_norm": 2.573056221008301, "learning_rate": 0.0002, "loss": 1.6083, "step": 177690 }, { "epoch": 0.72, "grad_norm": 3.363847255706787, "learning_rate": 0.0002, "loss": 1.7207, "step": 177700 }, { "epoch": 0.72, "grad_norm": 3.1179280281066895, "learning_rate": 0.0002, "loss": 1.4462, "step": 177710 }, { "epoch": 0.72, "grad_norm": 1.2214878797531128, "learning_rate": 0.0002, "loss": 1.5092, "step": 177720 }, { "epoch": 0.72, "grad_norm": 2.490241289138794, "learning_rate": 0.0002, "loss": 1.6084, "step": 177730 }, { "epoch": 0.72, "grad_norm": 2.8958380222320557, "learning_rate": 0.0002, "loss": 1.6382, "step": 177740 }, { "epoch": 0.72, "grad_norm": 5.285848140716553, "learning_rate": 0.0002, "loss": 1.5286, "step": 177750 }, { "epoch": 0.72, "grad_norm": 3.978590250015259, "learning_rate": 0.0002, "loss": 1.5141, "step": 177760 }, { "epoch": 0.72, "grad_norm": 2.394026756286621, "learning_rate": 0.0002, "loss": 1.419, "step": 177770 }, { "epoch": 0.72, "grad_norm": 2.2887632846832275, "learning_rate": 0.0002, "loss": 1.4972, "step": 177780 }, { "epoch": 0.72, "grad_norm": 2.877366304397583, "learning_rate": 0.0002, "loss": 1.7673, "step": 177790 }, { "epoch": 0.72, "grad_norm": 3.6445255279541016, "learning_rate": 0.0002, "loss": 1.4016, "step": 177800 }, { "epoch": 0.72, "grad_norm": 4.305093288421631, "learning_rate": 0.0002, "loss": 1.4899, "step": 177810 }, { "epoch": 0.72, "grad_norm": 4.786158084869385, "learning_rate": 0.0002, "loss": 1.6592, "step": 177820 }, { "epoch": 0.72, "grad_norm": 2.541843891143799, "learning_rate": 0.0002, "loss": 1.6123, "step": 177830 }, { "epoch": 0.72, "grad_norm": 3.453258514404297, "learning_rate": 0.0002, "loss": 1.8193, "step": 177840 }, { "epoch": 0.72, "grad_norm": 1.8222177028656006, "learning_rate": 0.0002, "loss": 1.5404, "step": 177850 }, { "epoch": 0.72, "grad_norm": 3.400737762451172, "learning_rate": 0.0002, "loss": 1.5118, "step": 177860 }, { "epoch": 0.72, "grad_norm": 4.599906921386719, "learning_rate": 0.0002, "loss": 1.6248, "step": 177870 }, { "epoch": 0.72, "grad_norm": 2.0891597270965576, "learning_rate": 0.0002, "loss": 1.508, "step": 177880 }, { "epoch": 0.72, "grad_norm": 3.4473447799682617, "learning_rate": 0.0002, "loss": 1.3677, "step": 177890 }, { "epoch": 0.72, "grad_norm": 1.6039010286331177, "learning_rate": 0.0002, "loss": 1.4012, "step": 177900 }, { "epoch": 0.72, "grad_norm": 3.590688705444336, "learning_rate": 0.0002, "loss": 1.8608, "step": 177910 }, { "epoch": 0.72, "grad_norm": 2.1331491470336914, "learning_rate": 0.0002, "loss": 1.6725, "step": 177920 }, { "epoch": 0.72, "grad_norm": 2.0691654682159424, "learning_rate": 0.0002, "loss": 1.535, "step": 177930 }, { "epoch": 0.72, "grad_norm": 3.5326061248779297, "learning_rate": 0.0002, "loss": 1.5331, "step": 177940 }, { "epoch": 0.72, "grad_norm": 3.1250040531158447, "learning_rate": 0.0002, "loss": 1.7409, "step": 177950 }, { "epoch": 0.72, "grad_norm": 2.2163476943969727, "learning_rate": 0.0002, "loss": 1.5237, "step": 177960 }, { "epoch": 0.72, "grad_norm": 5.746660232543945, "learning_rate": 0.0002, "loss": 1.4456, "step": 177970 }, { "epoch": 0.72, "grad_norm": 2.5959510803222656, "learning_rate": 0.0002, "loss": 1.6274, "step": 177980 }, { "epoch": 0.72, "grad_norm": 3.108877420425415, "learning_rate": 0.0002, "loss": 1.4415, "step": 177990 }, { "epoch": 0.72, "grad_norm": 3.1706624031066895, "learning_rate": 0.0002, "loss": 1.8603, "step": 178000 }, { "epoch": 0.72, "grad_norm": 3.2562923431396484, "learning_rate": 0.0002, "loss": 1.5136, "step": 178010 }, { "epoch": 0.72, "grad_norm": 2.4700801372528076, "learning_rate": 0.0002, "loss": 1.7072, "step": 178020 }, { "epoch": 0.72, "grad_norm": 3.191987991333008, "learning_rate": 0.0002, "loss": 1.5436, "step": 178030 }, { "epoch": 0.72, "grad_norm": 4.146881580352783, "learning_rate": 0.0002, "loss": 1.5838, "step": 178040 }, { "epoch": 0.72, "grad_norm": 2.433130979537964, "learning_rate": 0.0002, "loss": 1.5405, "step": 178050 }, { "epoch": 0.72, "grad_norm": 4.278594970703125, "learning_rate": 0.0002, "loss": 1.4696, "step": 178060 }, { "epoch": 0.72, "grad_norm": 3.254669427871704, "learning_rate": 0.0002, "loss": 1.3433, "step": 178070 }, { "epoch": 0.72, "grad_norm": 3.5643229484558105, "learning_rate": 0.0002, "loss": 1.3429, "step": 178080 }, { "epoch": 0.72, "grad_norm": 2.5222761631011963, "learning_rate": 0.0002, "loss": 1.6514, "step": 178090 }, { "epoch": 0.73, "grad_norm": 3.104678153991699, "learning_rate": 0.0002, "loss": 1.5565, "step": 178100 }, { "epoch": 0.73, "grad_norm": 4.620092868804932, "learning_rate": 0.0002, "loss": 1.3747, "step": 178110 }, { "epoch": 0.73, "grad_norm": 3.4478468894958496, "learning_rate": 0.0002, "loss": 1.6034, "step": 178120 }, { "epoch": 0.73, "grad_norm": 3.7104012966156006, "learning_rate": 0.0002, "loss": 1.3574, "step": 178130 }, { "epoch": 0.73, "grad_norm": 3.2408547401428223, "learning_rate": 0.0002, "loss": 1.6626, "step": 178140 }, { "epoch": 0.73, "grad_norm": 3.7718541622161865, "learning_rate": 0.0002, "loss": 1.6665, "step": 178150 }, { "epoch": 0.73, "grad_norm": 1.7874799966812134, "learning_rate": 0.0002, "loss": 1.8545, "step": 178160 }, { "epoch": 0.73, "grad_norm": 2.4828310012817383, "learning_rate": 0.0002, "loss": 1.6197, "step": 178170 }, { "epoch": 0.73, "grad_norm": 2.510516881942749, "learning_rate": 0.0002, "loss": 1.329, "step": 178180 }, { "epoch": 0.73, "grad_norm": 2.910830497741699, "learning_rate": 0.0002, "loss": 1.7541, "step": 178190 }, { "epoch": 0.73, "grad_norm": 2.7705323696136475, "learning_rate": 0.0002, "loss": 1.5885, "step": 178200 }, { "epoch": 0.73, "grad_norm": 2.414452314376831, "learning_rate": 0.0002, "loss": 1.5169, "step": 178210 }, { "epoch": 0.73, "grad_norm": 1.9388967752456665, "learning_rate": 0.0002, "loss": 1.3502, "step": 178220 }, { "epoch": 0.73, "grad_norm": 3.645810127258301, "learning_rate": 0.0002, "loss": 1.7867, "step": 178230 }, { "epoch": 0.73, "grad_norm": 5.22769021987915, "learning_rate": 0.0002, "loss": 1.541, "step": 178240 }, { "epoch": 0.73, "grad_norm": 4.719672679901123, "learning_rate": 0.0002, "loss": 2.1463, "step": 178250 }, { "epoch": 0.73, "grad_norm": 2.5798110961914062, "learning_rate": 0.0002, "loss": 1.3424, "step": 178260 }, { "epoch": 0.73, "grad_norm": 3.261307716369629, "learning_rate": 0.0002, "loss": 1.435, "step": 178270 }, { "epoch": 0.73, "grad_norm": 2.3260834217071533, "learning_rate": 0.0002, "loss": 1.4805, "step": 178280 }, { "epoch": 0.73, "grad_norm": 2.234563112258911, "learning_rate": 0.0002, "loss": 1.5135, "step": 178290 }, { "epoch": 0.73, "grad_norm": 2.263779878616333, "learning_rate": 0.0002, "loss": 1.4256, "step": 178300 }, { "epoch": 0.73, "grad_norm": 3.896476984024048, "learning_rate": 0.0002, "loss": 1.6222, "step": 178310 }, { "epoch": 0.73, "grad_norm": 4.220394611358643, "learning_rate": 0.0002, "loss": 1.7536, "step": 178320 }, { "epoch": 0.73, "grad_norm": 2.874246597290039, "learning_rate": 0.0002, "loss": 1.5518, "step": 178330 }, { "epoch": 0.73, "grad_norm": 4.098221778869629, "learning_rate": 0.0002, "loss": 1.6647, "step": 178340 }, { "epoch": 0.73, "grad_norm": 2.4681954383850098, "learning_rate": 0.0002, "loss": 1.4915, "step": 178350 }, { "epoch": 0.73, "grad_norm": 2.754438638687134, "learning_rate": 0.0002, "loss": 1.494, "step": 178360 }, { "epoch": 0.73, "grad_norm": 3.552952527999878, "learning_rate": 0.0002, "loss": 1.7731, "step": 178370 }, { "epoch": 0.73, "grad_norm": 2.765810489654541, "learning_rate": 0.0002, "loss": 1.6539, "step": 178380 }, { "epoch": 0.73, "grad_norm": 3.502798080444336, "learning_rate": 0.0002, "loss": 1.3673, "step": 178390 }, { "epoch": 0.73, "grad_norm": 2.5528905391693115, "learning_rate": 0.0002, "loss": 1.3868, "step": 178400 }, { "epoch": 0.73, "grad_norm": 3.3696112632751465, "learning_rate": 0.0002, "loss": 1.6805, "step": 178410 }, { "epoch": 0.73, "grad_norm": 4.028824806213379, "learning_rate": 0.0002, "loss": 1.9734, "step": 178420 }, { "epoch": 0.73, "grad_norm": 1.543294072151184, "learning_rate": 0.0002, "loss": 1.4571, "step": 178430 }, { "epoch": 0.73, "grad_norm": 3.1101112365722656, "learning_rate": 0.0002, "loss": 1.3854, "step": 178440 }, { "epoch": 0.73, "grad_norm": 2.4800736904144287, "learning_rate": 0.0002, "loss": 1.4912, "step": 178450 }, { "epoch": 0.73, "grad_norm": 2.2103466987609863, "learning_rate": 0.0002, "loss": 1.6764, "step": 178460 }, { "epoch": 0.73, "grad_norm": Infinity, "learning_rate": 0.0002, "loss": 1.7521, "step": 178470 }, { "epoch": 0.73, "grad_norm": 9.323504447937012, "learning_rate": 0.0002, "loss": 1.4623, "step": 178480 }, { "epoch": 0.73, "grad_norm": 2.2618048191070557, "learning_rate": 0.0002, "loss": 1.9287, "step": 178490 }, { "epoch": 0.73, "grad_norm": 3.6087534427642822, "learning_rate": 0.0002, "loss": 1.8366, "step": 178500 }, { "epoch": 0.73, "grad_norm": 2.4722721576690674, "learning_rate": 0.0002, "loss": 1.8237, "step": 178510 }, { "epoch": 0.73, "grad_norm": 3.1509146690368652, "learning_rate": 0.0002, "loss": 1.6855, "step": 178520 }, { "epoch": 0.73, "grad_norm": 2.374321699142456, "learning_rate": 0.0002, "loss": 1.7114, "step": 178530 }, { "epoch": 0.73, "grad_norm": 3.4489665031433105, "learning_rate": 0.0002, "loss": 1.5556, "step": 178540 }, { "epoch": 0.73, "grad_norm": 4.353233814239502, "learning_rate": 0.0002, "loss": 1.6227, "step": 178550 }, { "epoch": 0.73, "grad_norm": 2.6366117000579834, "learning_rate": 0.0002, "loss": 1.5857, "step": 178560 }, { "epoch": 0.73, "grad_norm": 3.1283864974975586, "learning_rate": 0.0002, "loss": 1.6161, "step": 178570 }, { "epoch": 0.73, "grad_norm": 2.3533875942230225, "learning_rate": 0.0002, "loss": 1.7021, "step": 178580 }, { "epoch": 0.73, "grad_norm": 3.265563488006592, "learning_rate": 0.0002, "loss": 1.6399, "step": 178590 }, { "epoch": 0.73, "grad_norm": 2.8433005809783936, "learning_rate": 0.0002, "loss": 1.3515, "step": 178600 }, { "epoch": 0.73, "grad_norm": 4.243879795074463, "learning_rate": 0.0002, "loss": 1.5797, "step": 178610 }, { "epoch": 0.73, "grad_norm": 3.5691115856170654, "learning_rate": 0.0002, "loss": 1.6005, "step": 178620 }, { "epoch": 0.73, "grad_norm": 2.5533535480499268, "learning_rate": 0.0002, "loss": 1.8434, "step": 178630 }, { "epoch": 0.73, "grad_norm": 3.545374631881714, "learning_rate": 0.0002, "loss": 1.6677, "step": 178640 }, { "epoch": 0.73, "grad_norm": 2.5732154846191406, "learning_rate": 0.0002, "loss": 1.4836, "step": 178650 }, { "epoch": 0.73, "grad_norm": 3.7723746299743652, "learning_rate": 0.0002, "loss": 1.5447, "step": 178660 }, { "epoch": 0.73, "grad_norm": 3.0117807388305664, "learning_rate": 0.0002, "loss": 1.6345, "step": 178670 }, { "epoch": 0.73, "grad_norm": 2.021956205368042, "learning_rate": 0.0002, "loss": 1.7928, "step": 178680 }, { "epoch": 0.73, "grad_norm": 4.478428363800049, "learning_rate": 0.0002, "loss": 1.2482, "step": 178690 }, { "epoch": 0.73, "grad_norm": 2.4271838665008545, "learning_rate": 0.0002, "loss": 1.6273, "step": 178700 }, { "epoch": 0.73, "grad_norm": 4.429361343383789, "learning_rate": 0.0002, "loss": 1.4647, "step": 178710 }, { "epoch": 0.73, "grad_norm": 4.046855926513672, "learning_rate": 0.0002, "loss": 1.4356, "step": 178720 }, { "epoch": 0.73, "grad_norm": 1.4702237844467163, "learning_rate": 0.0002, "loss": 1.4254, "step": 178730 }, { "epoch": 0.73, "grad_norm": 5.608347415924072, "learning_rate": 0.0002, "loss": 1.4148, "step": 178740 }, { "epoch": 0.73, "grad_norm": 3.022338628768921, "learning_rate": 0.0002, "loss": 1.7573, "step": 178750 }, { "epoch": 0.73, "grad_norm": 2.9090912342071533, "learning_rate": 0.0002, "loss": 1.5738, "step": 178760 }, { "epoch": 0.73, "grad_norm": 2.48525333404541, "learning_rate": 0.0002, "loss": 1.5246, "step": 178770 }, { "epoch": 0.73, "grad_norm": 2.726358652114868, "learning_rate": 0.0002, "loss": 1.3087, "step": 178780 }, { "epoch": 0.73, "grad_norm": 2.672905445098877, "learning_rate": 0.0002, "loss": 1.5013, "step": 178790 }, { "epoch": 0.73, "grad_norm": 1.9711557626724243, "learning_rate": 0.0002, "loss": 1.7284, "step": 178800 }, { "epoch": 0.73, "grad_norm": 2.766909122467041, "learning_rate": 0.0002, "loss": 1.4655, "step": 178810 }, { "epoch": 0.73, "grad_norm": 3.3339545726776123, "learning_rate": 0.0002, "loss": 1.7337, "step": 178820 }, { "epoch": 0.73, "grad_norm": 2.8008084297180176, "learning_rate": 0.0002, "loss": 1.6108, "step": 178830 }, { "epoch": 0.73, "grad_norm": 3.732943296432495, "learning_rate": 0.0002, "loss": 1.5874, "step": 178840 }, { "epoch": 0.73, "grad_norm": 4.365828514099121, "learning_rate": 0.0002, "loss": 1.524, "step": 178850 }, { "epoch": 0.73, "grad_norm": 2.9669554233551025, "learning_rate": 0.0002, "loss": 1.5237, "step": 178860 }, { "epoch": 0.73, "grad_norm": 3.1473898887634277, "learning_rate": 0.0002, "loss": 1.6758, "step": 178870 }, { "epoch": 0.73, "grad_norm": 3.206143617630005, "learning_rate": 0.0002, "loss": 1.6369, "step": 178880 }, { "epoch": 0.73, "grad_norm": 4.215508460998535, "learning_rate": 0.0002, "loss": 1.7051, "step": 178890 }, { "epoch": 0.73, "grad_norm": 3.0886852741241455, "learning_rate": 0.0002, "loss": 1.5741, "step": 178900 }, { "epoch": 0.73, "grad_norm": 3.1497015953063965, "learning_rate": 0.0002, "loss": 1.542, "step": 178910 }, { "epoch": 0.73, "grad_norm": 3.058835029602051, "learning_rate": 0.0002, "loss": 1.7185, "step": 178920 }, { "epoch": 0.73, "grad_norm": 3.3975753784179688, "learning_rate": 0.0002, "loss": 1.7182, "step": 178930 }, { "epoch": 0.73, "grad_norm": 4.737101078033447, "learning_rate": 0.0002, "loss": 1.8361, "step": 178940 }, { "epoch": 0.73, "grad_norm": 4.476284503936768, "learning_rate": 0.0002, "loss": 1.5851, "step": 178950 }, { "epoch": 0.73, "grad_norm": 2.3712549209594727, "learning_rate": 0.0002, "loss": 1.6015, "step": 178960 }, { "epoch": 0.73, "grad_norm": 2.8071324825286865, "learning_rate": 0.0002, "loss": 1.4997, "step": 178970 }, { "epoch": 0.73, "grad_norm": 1.473318099975586, "learning_rate": 0.0002, "loss": 1.772, "step": 178980 }, { "epoch": 0.73, "grad_norm": 4.115954399108887, "learning_rate": 0.0002, "loss": 1.4477, "step": 178990 }, { "epoch": 0.73, "grad_norm": 2.2208681106567383, "learning_rate": 0.0002, "loss": 1.6705, "step": 179000 }, { "epoch": 0.73, "grad_norm": 2.3683712482452393, "learning_rate": 0.0002, "loss": 1.3523, "step": 179010 }, { "epoch": 0.73, "grad_norm": 3.0301096439361572, "learning_rate": 0.0002, "loss": 1.6445, "step": 179020 }, { "epoch": 0.73, "grad_norm": 2.521789312362671, "learning_rate": 0.0002, "loss": 1.4772, "step": 179030 }, { "epoch": 0.73, "grad_norm": 4.1501264572143555, "learning_rate": 0.0002, "loss": 1.7771, "step": 179040 }, { "epoch": 0.73, "grad_norm": 3.352013349533081, "learning_rate": 0.0002, "loss": 1.2557, "step": 179050 }, { "epoch": 0.73, "grad_norm": 5.120577335357666, "learning_rate": 0.0002, "loss": 1.6914, "step": 179060 }, { "epoch": 0.73, "grad_norm": 2.8100972175598145, "learning_rate": 0.0002, "loss": 1.5989, "step": 179070 }, { "epoch": 0.73, "grad_norm": 3.096782922744751, "learning_rate": 0.0002, "loss": 1.7305, "step": 179080 }, { "epoch": 0.73, "grad_norm": 2.451970100402832, "learning_rate": 0.0002, "loss": 1.6731, "step": 179090 }, { "epoch": 0.73, "grad_norm": 5.984165191650391, "learning_rate": 0.0002, "loss": 1.528, "step": 179100 }, { "epoch": 0.73, "grad_norm": 2.503628730773926, "learning_rate": 0.0002, "loss": 1.6381, "step": 179110 }, { "epoch": 0.73, "grad_norm": 1.9384123086929321, "learning_rate": 0.0002, "loss": 1.7667, "step": 179120 }, { "epoch": 0.73, "grad_norm": 4.118961811065674, "learning_rate": 0.0002, "loss": 1.6782, "step": 179130 }, { "epoch": 0.73, "grad_norm": 6.1031880378723145, "learning_rate": 0.0002, "loss": 1.2991, "step": 179140 }, { "epoch": 0.73, "grad_norm": 3.2311062812805176, "learning_rate": 0.0002, "loss": 1.5854, "step": 179150 }, { "epoch": 0.73, "grad_norm": 2.295111656188965, "learning_rate": 0.0002, "loss": 1.7292, "step": 179160 }, { "epoch": 0.73, "grad_norm": 2.3932175636291504, "learning_rate": 0.0002, "loss": 1.8119, "step": 179170 }, { "epoch": 0.73, "grad_norm": 2.434662103652954, "learning_rate": 0.0002, "loss": 1.826, "step": 179180 }, { "epoch": 0.73, "grad_norm": 4.135616779327393, "learning_rate": 0.0002, "loss": 1.5777, "step": 179190 }, { "epoch": 0.73, "grad_norm": 4.152095317840576, "learning_rate": 0.0002, "loss": 1.5927, "step": 179200 }, { "epoch": 0.73, "grad_norm": 3.5903847217559814, "learning_rate": 0.0002, "loss": 1.4826, "step": 179210 }, { "epoch": 0.73, "grad_norm": 2.7877864837646484, "learning_rate": 0.0002, "loss": 1.7642, "step": 179220 }, { "epoch": 0.73, "grad_norm": 3.8874871730804443, "learning_rate": 0.0002, "loss": 1.7951, "step": 179230 }, { "epoch": 0.73, "grad_norm": 4.510670185089111, "learning_rate": 0.0002, "loss": 1.502, "step": 179240 }, { "epoch": 0.73, "grad_norm": 1.7195603847503662, "learning_rate": 0.0002, "loss": 1.7267, "step": 179250 }, { "epoch": 0.73, "grad_norm": 3.8636081218719482, "learning_rate": 0.0002, "loss": 1.5922, "step": 179260 }, { "epoch": 0.73, "grad_norm": 3.116262435913086, "learning_rate": 0.0002, "loss": 1.6647, "step": 179270 }, { "epoch": 0.73, "grad_norm": 2.8864500522613525, "learning_rate": 0.0002, "loss": 1.6701, "step": 179280 }, { "epoch": 0.73, "grad_norm": 2.775778293609619, "learning_rate": 0.0002, "loss": 1.6172, "step": 179290 }, { "epoch": 0.73, "grad_norm": 3.151132106781006, "learning_rate": 0.0002, "loss": 1.4367, "step": 179300 }, { "epoch": 0.73, "grad_norm": 3.295041561126709, "learning_rate": 0.0002, "loss": 1.4887, "step": 179310 }, { "epoch": 0.73, "grad_norm": 3.1377084255218506, "learning_rate": 0.0002, "loss": 1.4114, "step": 179320 }, { "epoch": 0.73, "grad_norm": 3.5675413608551025, "learning_rate": 0.0002, "loss": 1.5122, "step": 179330 }, { "epoch": 0.73, "grad_norm": 3.300825834274292, "learning_rate": 0.0002, "loss": 1.4286, "step": 179340 }, { "epoch": 0.73, "grad_norm": 1.7515249252319336, "learning_rate": 0.0002, "loss": 1.3933, "step": 179350 }, { "epoch": 0.73, "grad_norm": 3.6949853897094727, "learning_rate": 0.0002, "loss": 1.6791, "step": 179360 }, { "epoch": 0.73, "grad_norm": 2.468031883239746, "learning_rate": 0.0002, "loss": 1.5304, "step": 179370 }, { "epoch": 0.73, "grad_norm": 3.0459728240966797, "learning_rate": 0.0002, "loss": 1.7886, "step": 179380 }, { "epoch": 0.73, "grad_norm": 2.714860439300537, "learning_rate": 0.0002, "loss": 1.5395, "step": 179390 }, { "epoch": 0.73, "grad_norm": 5.867851734161377, "learning_rate": 0.0002, "loss": 1.5813, "step": 179400 }, { "epoch": 0.73, "grad_norm": 4.468993663787842, "learning_rate": 0.0002, "loss": 1.5941, "step": 179410 }, { "epoch": 0.73, "grad_norm": 2.725095510482788, "learning_rate": 0.0002, "loss": 1.5682, "step": 179420 }, { "epoch": 0.73, "grad_norm": 2.312440872192383, "learning_rate": 0.0002, "loss": 1.2728, "step": 179430 }, { "epoch": 0.73, "grad_norm": 3.652158260345459, "learning_rate": 0.0002, "loss": 1.8426, "step": 179440 }, { "epoch": 0.73, "grad_norm": 4.7728118896484375, "learning_rate": 0.0002, "loss": 1.6677, "step": 179450 }, { "epoch": 0.73, "grad_norm": 2.305568218231201, "learning_rate": 0.0002, "loss": 1.5666, "step": 179460 }, { "epoch": 0.73, "grad_norm": 3.816152811050415, "learning_rate": 0.0002, "loss": 1.7577, "step": 179470 }, { "epoch": 0.73, "grad_norm": 4.500756740570068, "learning_rate": 0.0002, "loss": 1.4421, "step": 179480 }, { "epoch": 0.73, "grad_norm": 2.4182286262512207, "learning_rate": 0.0002, "loss": 1.6133, "step": 179490 }, { "epoch": 0.73, "grad_norm": 2.9447033405303955, "learning_rate": 0.0002, "loss": 1.4281, "step": 179500 }, { "epoch": 0.73, "grad_norm": 3.0783393383026123, "learning_rate": 0.0002, "loss": 1.4831, "step": 179510 }, { "epoch": 0.73, "grad_norm": 4.32903528213501, "learning_rate": 0.0002, "loss": 1.5312, "step": 179520 }, { "epoch": 0.73, "grad_norm": 2.100053071975708, "learning_rate": 0.0002, "loss": 1.5899, "step": 179530 }, { "epoch": 0.73, "grad_norm": 3.2206714153289795, "learning_rate": 0.0002, "loss": 1.5753, "step": 179540 }, { "epoch": 0.73, "grad_norm": 1.800010323524475, "learning_rate": 0.0002, "loss": 1.493, "step": 179550 }, { "epoch": 0.73, "grad_norm": 5.13792610168457, "learning_rate": 0.0002, "loss": 1.4746, "step": 179560 }, { "epoch": 0.73, "grad_norm": 2.9019057750701904, "learning_rate": 0.0002, "loss": 1.6775, "step": 179570 }, { "epoch": 0.73, "grad_norm": 2.7794642448425293, "learning_rate": 0.0002, "loss": 1.2743, "step": 179580 }, { "epoch": 0.73, "grad_norm": 4.009284019470215, "learning_rate": 0.0002, "loss": 1.7092, "step": 179590 }, { "epoch": 0.73, "grad_norm": 3.3271446228027344, "learning_rate": 0.0002, "loss": 1.455, "step": 179600 }, { "epoch": 0.73, "grad_norm": 2.7290031909942627, "learning_rate": 0.0002, "loss": 1.3941, "step": 179610 }, { "epoch": 0.73, "grad_norm": 2.296945571899414, "learning_rate": 0.0002, "loss": 1.7465, "step": 179620 }, { "epoch": 0.73, "grad_norm": 9.469307899475098, "learning_rate": 0.0002, "loss": 1.529, "step": 179630 }, { "epoch": 0.73, "grad_norm": 3.5347039699554443, "learning_rate": 0.0002, "loss": 1.74, "step": 179640 }, { "epoch": 0.73, "grad_norm": 2.744863748550415, "learning_rate": 0.0002, "loss": 1.7081, "step": 179650 }, { "epoch": 0.73, "grad_norm": 2.494772434234619, "learning_rate": 0.0002, "loss": 1.7657, "step": 179660 }, { "epoch": 0.73, "grad_norm": 2.5530128479003906, "learning_rate": 0.0002, "loss": 1.6043, "step": 179670 }, { "epoch": 0.73, "grad_norm": 3.1111578941345215, "learning_rate": 0.0002, "loss": 1.8162, "step": 179680 }, { "epoch": 0.73, "grad_norm": 2.406949043273926, "learning_rate": 0.0002, "loss": 1.4839, "step": 179690 }, { "epoch": 0.73, "grad_norm": 2.359569787979126, "learning_rate": 0.0002, "loss": 1.5489, "step": 179700 }, { "epoch": 0.73, "grad_norm": 2.3743937015533447, "learning_rate": 0.0002, "loss": 1.4753, "step": 179710 }, { "epoch": 0.73, "grad_norm": 1.3969227075576782, "learning_rate": 0.0002, "loss": 1.5273, "step": 179720 }, { "epoch": 0.73, "grad_norm": 2.4631619453430176, "learning_rate": 0.0002, "loss": 1.207, "step": 179730 }, { "epoch": 0.73, "grad_norm": 4.326956272125244, "learning_rate": 0.0002, "loss": 1.7775, "step": 179740 }, { "epoch": 0.73, "grad_norm": 8.064003944396973, "learning_rate": 0.0002, "loss": 1.5603, "step": 179750 }, { "epoch": 0.73, "grad_norm": 5.633877754211426, "learning_rate": 0.0002, "loss": 1.7638, "step": 179760 }, { "epoch": 0.73, "grad_norm": 2.619779109954834, "learning_rate": 0.0002, "loss": 1.4362, "step": 179770 }, { "epoch": 0.73, "grad_norm": 7.8272624015808105, "learning_rate": 0.0002, "loss": 1.7753, "step": 179780 }, { "epoch": 0.73, "grad_norm": 2.8135781288146973, "learning_rate": 0.0002, "loss": 1.6521, "step": 179790 }, { "epoch": 0.73, "grad_norm": 2.483442783355713, "learning_rate": 0.0002, "loss": 1.6081, "step": 179800 }, { "epoch": 0.73, "grad_norm": 4.311202526092529, "learning_rate": 0.0002, "loss": 1.56, "step": 179810 }, { "epoch": 0.73, "grad_norm": 2.1905770301818848, "learning_rate": 0.0002, "loss": 1.8393, "step": 179820 }, { "epoch": 0.73, "grad_norm": 2.25823712348938, "learning_rate": 0.0002, "loss": 1.6487, "step": 179830 }, { "epoch": 0.73, "grad_norm": 6.185725212097168, "learning_rate": 0.0002, "loss": 1.2693, "step": 179840 }, { "epoch": 0.73, "grad_norm": 2.455386161804199, "learning_rate": 0.0002, "loss": 1.2827, "step": 179850 }, { "epoch": 0.73, "grad_norm": 3.1959543228149414, "learning_rate": 0.0002, "loss": 1.7729, "step": 179860 }, { "epoch": 0.73, "grad_norm": 2.951213836669922, "learning_rate": 0.0002, "loss": 1.4759, "step": 179870 }, { "epoch": 0.73, "grad_norm": 2.5384349822998047, "learning_rate": 0.0002, "loss": 1.6827, "step": 179880 }, { "epoch": 0.73, "grad_norm": 8.972853660583496, "learning_rate": 0.0002, "loss": 1.3673, "step": 179890 }, { "epoch": 0.73, "grad_norm": 2.0765581130981445, "learning_rate": 0.0002, "loss": 1.3127, "step": 179900 }, { "epoch": 0.73, "grad_norm": 3.9409477710723877, "learning_rate": 0.0002, "loss": 1.665, "step": 179910 }, { "epoch": 0.73, "grad_norm": 2.9090487957000732, "learning_rate": 0.0002, "loss": 1.6778, "step": 179920 }, { "epoch": 0.73, "grad_norm": 2.8748621940612793, "learning_rate": 0.0002, "loss": 1.6037, "step": 179930 }, { "epoch": 0.73, "grad_norm": 2.635507822036743, "learning_rate": 0.0002, "loss": 1.589, "step": 179940 }, { "epoch": 0.73, "grad_norm": 2.4850082397460938, "learning_rate": 0.0002, "loss": 1.4725, "step": 179950 }, { "epoch": 0.73, "grad_norm": 4.530733108520508, "learning_rate": 0.0002, "loss": 1.6051, "step": 179960 }, { "epoch": 0.73, "grad_norm": 2.6331682205200195, "learning_rate": 0.0002, "loss": 1.8141, "step": 179970 }, { "epoch": 0.73, "grad_norm": 6.309206962585449, "learning_rate": 0.0002, "loss": 1.7996, "step": 179980 }, { "epoch": 0.73, "grad_norm": 2.049081325531006, "learning_rate": 0.0002, "loss": 1.7491, "step": 179990 }, { "epoch": 0.73, "grad_norm": 3.6196086406707764, "learning_rate": 0.0002, "loss": 1.689, "step": 180000 }, { "epoch": 0.73, "grad_norm": 4.221643924713135, "learning_rate": 0.0002, "loss": 1.6001, "step": 180010 }, { "epoch": 0.73, "grad_norm": 3.5172414779663086, "learning_rate": 0.0002, "loss": 1.4896, "step": 180020 }, { "epoch": 0.73, "grad_norm": 3.8768653869628906, "learning_rate": 0.0002, "loss": 1.4473, "step": 180030 }, { "epoch": 0.73, "grad_norm": 2.8376643657684326, "learning_rate": 0.0002, "loss": 1.541, "step": 180040 }, { "epoch": 0.73, "grad_norm": 3.2391743659973145, "learning_rate": 0.0002, "loss": 1.5556, "step": 180050 }, { "epoch": 0.73, "grad_norm": 2.8893511295318604, "learning_rate": 0.0002, "loss": 1.7515, "step": 180060 }, { "epoch": 0.73, "grad_norm": 5.2624406814575195, "learning_rate": 0.0002, "loss": 1.6095, "step": 180070 }, { "epoch": 0.73, "grad_norm": 2.1885061264038086, "learning_rate": 0.0002, "loss": 1.5827, "step": 180080 }, { "epoch": 0.73, "grad_norm": 3.5575993061065674, "learning_rate": 0.0002, "loss": 1.7534, "step": 180090 }, { "epoch": 0.73, "grad_norm": 2.4894330501556396, "learning_rate": 0.0002, "loss": 1.437, "step": 180100 }, { "epoch": 0.73, "grad_norm": 3.143601179122925, "learning_rate": 0.0002, "loss": 1.5317, "step": 180110 }, { "epoch": 0.73, "grad_norm": 2.7696244716644287, "learning_rate": 0.0002, "loss": 1.4231, "step": 180120 }, { "epoch": 0.73, "grad_norm": 2.1698977947235107, "learning_rate": 0.0002, "loss": 1.7421, "step": 180130 }, { "epoch": 0.73, "grad_norm": 2.4833247661590576, "learning_rate": 0.0002, "loss": 1.5538, "step": 180140 }, { "epoch": 0.73, "grad_norm": 2.09519362449646, "learning_rate": 0.0002, "loss": 1.6891, "step": 180150 }, { "epoch": 0.73, "grad_norm": 1.6385875940322876, "learning_rate": 0.0002, "loss": 1.5384, "step": 180160 }, { "epoch": 0.73, "grad_norm": 2.3389458656311035, "learning_rate": 0.0002, "loss": 1.4851, "step": 180170 }, { "epoch": 0.73, "grad_norm": 3.913884162902832, "learning_rate": 0.0002, "loss": 1.837, "step": 180180 }, { "epoch": 0.73, "grad_norm": 3.3366127014160156, "learning_rate": 0.0002, "loss": 1.5233, "step": 180190 }, { "epoch": 0.73, "grad_norm": 2.4568705558776855, "learning_rate": 0.0002, "loss": 1.7149, "step": 180200 }, { "epoch": 0.73, "grad_norm": 5.4187822341918945, "learning_rate": 0.0002, "loss": 1.5603, "step": 180210 }, { "epoch": 0.73, "grad_norm": 3.5612130165100098, "learning_rate": 0.0002, "loss": 1.5526, "step": 180220 }, { "epoch": 0.73, "grad_norm": 2.845682382583618, "learning_rate": 0.0002, "loss": 1.7001, "step": 180230 }, { "epoch": 0.73, "grad_norm": 2.888536214828491, "learning_rate": 0.0002, "loss": 1.7369, "step": 180240 }, { "epoch": 0.73, "grad_norm": 3.1229608058929443, "learning_rate": 0.0002, "loss": 1.6338, "step": 180250 }, { "epoch": 0.73, "grad_norm": 3.5902438163757324, "learning_rate": 0.0002, "loss": 1.6262, "step": 180260 }, { "epoch": 0.73, "grad_norm": 2.735365152359009, "learning_rate": 0.0002, "loss": 1.6317, "step": 180270 }, { "epoch": 0.73, "grad_norm": 3.1517746448516846, "learning_rate": 0.0002, "loss": 1.6582, "step": 180280 }, { "epoch": 0.73, "grad_norm": 2.4282257556915283, "learning_rate": 0.0002, "loss": 1.527, "step": 180290 }, { "epoch": 0.73, "grad_norm": 2.6660919189453125, "learning_rate": 0.0002, "loss": 1.449, "step": 180300 }, { "epoch": 0.73, "grad_norm": 4.1952290534973145, "learning_rate": 0.0002, "loss": 1.4924, "step": 180310 }, { "epoch": 0.73, "grad_norm": 8.248017311096191, "learning_rate": 0.0002, "loss": 1.476, "step": 180320 }, { "epoch": 0.73, "grad_norm": 1.8830082416534424, "learning_rate": 0.0002, "loss": 1.4518, "step": 180330 }, { "epoch": 0.73, "grad_norm": 2.1052398681640625, "learning_rate": 0.0002, "loss": 1.7254, "step": 180340 }, { "epoch": 0.73, "grad_norm": 2.3338420391082764, "learning_rate": 0.0002, "loss": 1.6387, "step": 180350 }, { "epoch": 0.73, "grad_norm": 1.9617189168930054, "learning_rate": 0.0002, "loss": 1.5439, "step": 180360 }, { "epoch": 0.73, "grad_norm": 4.454564094543457, "learning_rate": 0.0002, "loss": 1.585, "step": 180370 }, { "epoch": 0.73, "grad_norm": 2.595278739929199, "learning_rate": 0.0002, "loss": 1.7611, "step": 180380 }, { "epoch": 0.73, "grad_norm": 2.2226219177246094, "learning_rate": 0.0002, "loss": 1.3303, "step": 180390 }, { "epoch": 0.73, "grad_norm": 3.547161817550659, "learning_rate": 0.0002, "loss": 1.8755, "step": 180400 }, { "epoch": 0.73, "grad_norm": 2.9746830463409424, "learning_rate": 0.0002, "loss": 1.7482, "step": 180410 }, { "epoch": 0.73, "grad_norm": 2.6905901432037354, "learning_rate": 0.0002, "loss": 1.3566, "step": 180420 }, { "epoch": 0.73, "grad_norm": 1.9361616373062134, "learning_rate": 0.0002, "loss": 1.5647, "step": 180430 }, { "epoch": 0.73, "grad_norm": 3.882986307144165, "learning_rate": 0.0002, "loss": 1.6703, "step": 180440 }, { "epoch": 0.73, "grad_norm": 2.7605702877044678, "learning_rate": 0.0002, "loss": 1.6585, "step": 180450 }, { "epoch": 0.73, "grad_norm": 3.15690279006958, "learning_rate": 0.0002, "loss": 1.5621, "step": 180460 }, { "epoch": 0.73, "grad_norm": 3.6657466888427734, "learning_rate": 0.0002, "loss": 1.6602, "step": 180470 }, { "epoch": 0.73, "grad_norm": 2.2685439586639404, "learning_rate": 0.0002, "loss": 1.6123, "step": 180480 }, { "epoch": 0.73, "grad_norm": 3.079735279083252, "learning_rate": 0.0002, "loss": 1.644, "step": 180490 }, { "epoch": 0.73, "grad_norm": 7.03510046005249, "learning_rate": 0.0002, "loss": 1.805, "step": 180500 }, { "epoch": 0.73, "grad_norm": 4.155401229858398, "learning_rate": 0.0002, "loss": 1.755, "step": 180510 }, { "epoch": 0.73, "grad_norm": 4.969351768493652, "learning_rate": 0.0002, "loss": 1.7315, "step": 180520 }, { "epoch": 0.73, "grad_norm": 4.385404109954834, "learning_rate": 0.0002, "loss": 1.4942, "step": 180530 }, { "epoch": 0.73, "grad_norm": 1.9739153385162354, "learning_rate": 0.0002, "loss": 1.432, "step": 180540 }, { "epoch": 0.74, "grad_norm": 3.4182870388031006, "learning_rate": 0.0002, "loss": 1.5552, "step": 180550 }, { "epoch": 0.74, "grad_norm": 3.728756904602051, "learning_rate": 0.0002, "loss": 1.6809, "step": 180560 }, { "epoch": 0.74, "grad_norm": 3.8335037231445312, "learning_rate": 0.0002, "loss": 1.6093, "step": 180570 }, { "epoch": 0.74, "grad_norm": 2.7219460010528564, "learning_rate": 0.0002, "loss": 1.5974, "step": 180580 }, { "epoch": 0.74, "grad_norm": 2.627298593521118, "learning_rate": 0.0002, "loss": 1.6126, "step": 180590 }, { "epoch": 0.74, "grad_norm": 4.568090438842773, "learning_rate": 0.0002, "loss": 1.6508, "step": 180600 }, { "epoch": 0.74, "grad_norm": 2.3824775218963623, "learning_rate": 0.0002, "loss": 1.3816, "step": 180610 }, { "epoch": 0.74, "grad_norm": 1.6695376634597778, "learning_rate": 0.0002, "loss": 1.4556, "step": 180620 }, { "epoch": 0.74, "grad_norm": 3.470649003982544, "learning_rate": 0.0002, "loss": 1.5956, "step": 180630 }, { "epoch": 0.74, "grad_norm": 3.3351938724517822, "learning_rate": 0.0002, "loss": 1.5072, "step": 180640 }, { "epoch": 0.74, "grad_norm": 2.8693647384643555, "learning_rate": 0.0002, "loss": 1.5508, "step": 180650 }, { "epoch": 0.74, "grad_norm": 2.6618497371673584, "learning_rate": 0.0002, "loss": 1.666, "step": 180660 }, { "epoch": 0.74, "grad_norm": 2.240626811981201, "learning_rate": 0.0002, "loss": 1.4573, "step": 180670 }, { "epoch": 0.74, "grad_norm": 3.6963579654693604, "learning_rate": 0.0002, "loss": 1.5127, "step": 180680 }, { "epoch": 0.74, "grad_norm": 2.872774839401245, "learning_rate": 0.0002, "loss": 1.6369, "step": 180690 }, { "epoch": 0.74, "grad_norm": 2.764457941055298, "learning_rate": 0.0002, "loss": 1.6233, "step": 180700 }, { "epoch": 0.74, "grad_norm": 2.756013870239258, "learning_rate": 0.0002, "loss": 1.5667, "step": 180710 }, { "epoch": 0.74, "grad_norm": 3.087402820587158, "learning_rate": 0.0002, "loss": 1.3566, "step": 180720 }, { "epoch": 0.74, "grad_norm": 3.3194892406463623, "learning_rate": 0.0002, "loss": 1.7242, "step": 180730 }, { "epoch": 0.74, "grad_norm": 3.3253161907196045, "learning_rate": 0.0002, "loss": 1.6422, "step": 180740 }, { "epoch": 0.74, "grad_norm": 2.3030002117156982, "learning_rate": 0.0002, "loss": 1.155, "step": 180750 }, { "epoch": 0.74, "grad_norm": 4.687107086181641, "learning_rate": 0.0002, "loss": 1.6332, "step": 180760 }, { "epoch": 0.74, "grad_norm": 2.9843966960906982, "learning_rate": 0.0002, "loss": 1.4266, "step": 180770 }, { "epoch": 0.74, "grad_norm": 3.8081507682800293, "learning_rate": 0.0002, "loss": 1.5269, "step": 180780 }, { "epoch": 0.74, "grad_norm": 1.4957375526428223, "learning_rate": 0.0002, "loss": 1.3869, "step": 180790 }, { "epoch": 0.74, "grad_norm": 5.3601460456848145, "learning_rate": 0.0002, "loss": 1.4399, "step": 180800 }, { "epoch": 0.74, "grad_norm": 2.701582670211792, "learning_rate": 0.0002, "loss": 1.6246, "step": 180810 }, { "epoch": 0.74, "grad_norm": 3.0769176483154297, "learning_rate": 0.0002, "loss": 1.6223, "step": 180820 }, { "epoch": 0.74, "grad_norm": 4.710756778717041, "learning_rate": 0.0002, "loss": 1.4549, "step": 180830 }, { "epoch": 0.74, "grad_norm": 2.4299192428588867, "learning_rate": 0.0002, "loss": 1.5513, "step": 180840 }, { "epoch": 0.74, "grad_norm": 4.6117939949035645, "learning_rate": 0.0002, "loss": 1.5812, "step": 180850 }, { "epoch": 0.74, "grad_norm": 2.181527614593506, "learning_rate": 0.0002, "loss": 1.4886, "step": 180860 }, { "epoch": 0.74, "grad_norm": 2.2433347702026367, "learning_rate": 0.0002, "loss": 1.4576, "step": 180870 }, { "epoch": 0.74, "grad_norm": 8.724921226501465, "learning_rate": 0.0002, "loss": 1.7206, "step": 180880 }, { "epoch": 0.74, "grad_norm": 7.967584133148193, "learning_rate": 0.0002, "loss": 1.4356, "step": 180890 }, { "epoch": 0.74, "grad_norm": 4.293784141540527, "learning_rate": 0.0002, "loss": 1.4785, "step": 180900 }, { "epoch": 0.74, "grad_norm": 2.000267744064331, "learning_rate": 0.0002, "loss": 1.6915, "step": 180910 }, { "epoch": 0.74, "grad_norm": 2.2859318256378174, "learning_rate": 0.0002, "loss": 1.677, "step": 180920 }, { "epoch": 0.74, "grad_norm": 1.6549079418182373, "learning_rate": 0.0002, "loss": 1.6003, "step": 180930 }, { "epoch": 0.74, "grad_norm": 3.1581413745880127, "learning_rate": 0.0002, "loss": 1.5818, "step": 180940 }, { "epoch": 0.74, "grad_norm": 2.838176965713501, "learning_rate": 0.0002, "loss": 1.4375, "step": 180950 }, { "epoch": 0.74, "grad_norm": 3.555302143096924, "learning_rate": 0.0002, "loss": 1.3482, "step": 180960 }, { "epoch": 0.74, "grad_norm": 3.91518235206604, "learning_rate": 0.0002, "loss": 1.4678, "step": 180970 }, { "epoch": 0.74, "grad_norm": 2.4868693351745605, "learning_rate": 0.0002, "loss": 1.7352, "step": 180980 }, { "epoch": 0.74, "grad_norm": 2.1555731296539307, "learning_rate": 0.0002, "loss": 1.6385, "step": 180990 }, { "epoch": 0.74, "grad_norm": 4.962745189666748, "learning_rate": 0.0002, "loss": 1.8752, "step": 181000 }, { "epoch": 0.74, "grad_norm": 2.1226885318756104, "learning_rate": 0.0002, "loss": 1.5505, "step": 181010 }, { "epoch": 0.74, "grad_norm": 1.8844373226165771, "learning_rate": 0.0002, "loss": 1.408, "step": 181020 }, { "epoch": 0.74, "grad_norm": 2.6483383178710938, "learning_rate": 0.0002, "loss": 1.8277, "step": 181030 }, { "epoch": 0.74, "grad_norm": 5.0598907470703125, "learning_rate": 0.0002, "loss": 1.4692, "step": 181040 }, { "epoch": 0.74, "grad_norm": 2.874191999435425, "learning_rate": 0.0002, "loss": 1.6941, "step": 181050 }, { "epoch": 0.74, "grad_norm": 2.4569363594055176, "learning_rate": 0.0002, "loss": 1.5114, "step": 181060 }, { "epoch": 0.74, "grad_norm": 1.1471946239471436, "learning_rate": 0.0002, "loss": 1.3707, "step": 181070 }, { "epoch": 0.74, "grad_norm": 4.258650302886963, "learning_rate": 0.0002, "loss": 1.6242, "step": 181080 }, { "epoch": 0.74, "grad_norm": 2.5545523166656494, "learning_rate": 0.0002, "loss": 1.7941, "step": 181090 }, { "epoch": 0.74, "grad_norm": 2.788726806640625, "learning_rate": 0.0002, "loss": 1.5234, "step": 181100 }, { "epoch": 0.74, "grad_norm": 2.716792583465576, "learning_rate": 0.0002, "loss": 1.6564, "step": 181110 }, { "epoch": 0.74, "grad_norm": 3.487879514694214, "learning_rate": 0.0002, "loss": 1.5618, "step": 181120 }, { "epoch": 0.74, "grad_norm": 3.467200756072998, "learning_rate": 0.0002, "loss": 1.5841, "step": 181130 }, { "epoch": 0.74, "grad_norm": 3.194697856903076, "learning_rate": 0.0002, "loss": 1.513, "step": 181140 }, { "epoch": 0.74, "grad_norm": 2.430661201477051, "learning_rate": 0.0002, "loss": 1.5029, "step": 181150 }, { "epoch": 0.74, "grad_norm": 3.6080307960510254, "learning_rate": 0.0002, "loss": 1.691, "step": 181160 }, { "epoch": 0.74, "grad_norm": 2.2501718997955322, "learning_rate": 0.0002, "loss": 1.5477, "step": 181170 }, { "epoch": 0.74, "grad_norm": 3.876997947692871, "learning_rate": 0.0002, "loss": 1.736, "step": 181180 }, { "epoch": 0.74, "grad_norm": 4.283198833465576, "learning_rate": 0.0002, "loss": 1.5155, "step": 181190 }, { "epoch": 0.74, "grad_norm": 2.6216793060302734, "learning_rate": 0.0002, "loss": 1.4031, "step": 181200 }, { "epoch": 0.74, "grad_norm": 3.386223077774048, "learning_rate": 0.0002, "loss": 1.4189, "step": 181210 }, { "epoch": 0.74, "grad_norm": 4.807024002075195, "learning_rate": 0.0002, "loss": 1.7718, "step": 181220 }, { "epoch": 0.74, "grad_norm": 3.7690627574920654, "learning_rate": 0.0002, "loss": 1.3648, "step": 181230 }, { "epoch": 0.74, "grad_norm": 3.1276659965515137, "learning_rate": 0.0002, "loss": 1.585, "step": 181240 }, { "epoch": 0.74, "grad_norm": 1.8832372426986694, "learning_rate": 0.0002, "loss": 1.2708, "step": 181250 }, { "epoch": 0.74, "grad_norm": 2.9242875576019287, "learning_rate": 0.0002, "loss": 1.3581, "step": 181260 }, { "epoch": 0.74, "grad_norm": 2.294975757598877, "learning_rate": 0.0002, "loss": 1.6896, "step": 181270 }, { "epoch": 0.74, "grad_norm": 3.6053478717803955, "learning_rate": 0.0002, "loss": 1.7492, "step": 181280 }, { "epoch": 0.74, "grad_norm": 3.5592989921569824, "learning_rate": 0.0002, "loss": 1.5521, "step": 181290 }, { "epoch": 0.74, "grad_norm": 2.774073600769043, "learning_rate": 0.0002, "loss": 1.2885, "step": 181300 }, { "epoch": 0.74, "grad_norm": 2.1760189533233643, "learning_rate": 0.0002, "loss": 1.5623, "step": 181310 }, { "epoch": 0.74, "grad_norm": 5.70097017288208, "learning_rate": 0.0002, "loss": 1.4069, "step": 181320 }, { "epoch": 0.74, "grad_norm": 4.160000801086426, "learning_rate": 0.0002, "loss": 1.6508, "step": 181330 }, { "epoch": 0.74, "grad_norm": 4.151156902313232, "learning_rate": 0.0002, "loss": 1.7357, "step": 181340 }, { "epoch": 0.74, "grad_norm": 2.793700695037842, "learning_rate": 0.0002, "loss": 1.6078, "step": 181350 }, { "epoch": 0.74, "grad_norm": 3.030555248260498, "learning_rate": 0.0002, "loss": 1.5082, "step": 181360 }, { "epoch": 0.74, "grad_norm": 4.142444133758545, "learning_rate": 0.0002, "loss": 1.7433, "step": 181370 }, { "epoch": 0.74, "grad_norm": 5.038829326629639, "learning_rate": 0.0002, "loss": 1.4888, "step": 181380 }, { "epoch": 0.74, "grad_norm": 3.2323992252349854, "learning_rate": 0.0002, "loss": 1.5341, "step": 181390 }, { "epoch": 0.74, "grad_norm": 3.1377944946289062, "learning_rate": 0.0002, "loss": 1.8784, "step": 181400 }, { "epoch": 0.74, "grad_norm": 3.9899063110351562, "learning_rate": 0.0002, "loss": 1.5982, "step": 181410 }, { "epoch": 0.74, "grad_norm": 2.502786874771118, "learning_rate": 0.0002, "loss": 1.518, "step": 181420 }, { "epoch": 0.74, "grad_norm": 2.292006015777588, "learning_rate": 0.0002, "loss": 1.494, "step": 181430 }, { "epoch": 0.74, "grad_norm": 3.7812700271606445, "learning_rate": 0.0002, "loss": 1.6668, "step": 181440 }, { "epoch": 0.74, "grad_norm": 2.718691349029541, "learning_rate": 0.0002, "loss": 1.5225, "step": 181450 }, { "epoch": 0.74, "grad_norm": 2.251457452774048, "learning_rate": 0.0002, "loss": 1.6594, "step": 181460 }, { "epoch": 0.74, "grad_norm": 4.047701835632324, "learning_rate": 0.0002, "loss": 1.9562, "step": 181470 }, { "epoch": 0.74, "grad_norm": 4.0527496337890625, "learning_rate": 0.0002, "loss": 1.607, "step": 181480 }, { "epoch": 0.74, "grad_norm": 2.181497573852539, "learning_rate": 0.0002, "loss": 1.8552, "step": 181490 }, { "epoch": 0.74, "grad_norm": 2.9140520095825195, "learning_rate": 0.0002, "loss": 1.4984, "step": 181500 }, { "epoch": 0.74, "grad_norm": 3.2403321266174316, "learning_rate": 0.0002, "loss": 1.5891, "step": 181510 }, { "epoch": 0.74, "grad_norm": 2.080855131149292, "learning_rate": 0.0002, "loss": 1.5577, "step": 181520 }, { "epoch": 0.74, "grad_norm": 3.8033225536346436, "learning_rate": 0.0002, "loss": 1.5875, "step": 181530 }, { "epoch": 0.74, "grad_norm": 2.5134263038635254, "learning_rate": 0.0002, "loss": 1.5807, "step": 181540 }, { "epoch": 0.74, "grad_norm": 3.364851236343384, "learning_rate": 0.0002, "loss": 1.7305, "step": 181550 }, { "epoch": 0.74, "grad_norm": 2.9783596992492676, "learning_rate": 0.0002, "loss": 1.6385, "step": 181560 }, { "epoch": 0.74, "grad_norm": 2.5225541591644287, "learning_rate": 0.0002, "loss": 1.5866, "step": 181570 }, { "epoch": 0.74, "grad_norm": 3.76515531539917, "learning_rate": 0.0002, "loss": 1.6166, "step": 181580 }, { "epoch": 0.74, "grad_norm": 4.666274547576904, "learning_rate": 0.0002, "loss": 1.4521, "step": 181590 }, { "epoch": 0.74, "grad_norm": 3.8942184448242188, "learning_rate": 0.0002, "loss": 1.7019, "step": 181600 }, { "epoch": 0.74, "grad_norm": 2.5130836963653564, "learning_rate": 0.0002, "loss": 1.4795, "step": 181610 }, { "epoch": 0.74, "grad_norm": 3.413386583328247, "learning_rate": 0.0002, "loss": 1.6847, "step": 181620 }, { "epoch": 0.74, "grad_norm": 1.9063600301742554, "learning_rate": 0.0002, "loss": 1.4932, "step": 181630 }, { "epoch": 0.74, "grad_norm": 3.252364158630371, "learning_rate": 0.0002, "loss": 1.5369, "step": 181640 }, { "epoch": 0.74, "grad_norm": 3.006408929824829, "learning_rate": 0.0002, "loss": 1.5916, "step": 181650 }, { "epoch": 0.74, "grad_norm": 2.621211290359497, "learning_rate": 0.0002, "loss": 1.4982, "step": 181660 }, { "epoch": 0.74, "grad_norm": 2.14700984954834, "learning_rate": 0.0002, "loss": 1.6469, "step": 181670 }, { "epoch": 0.74, "grad_norm": 4.560110569000244, "learning_rate": 0.0002, "loss": 1.5483, "step": 181680 }, { "epoch": 0.74, "grad_norm": 3.262139081954956, "learning_rate": 0.0002, "loss": 1.4853, "step": 181690 }, { "epoch": 0.74, "grad_norm": 2.9396674633026123, "learning_rate": 0.0002, "loss": 1.6779, "step": 181700 }, { "epoch": 0.74, "grad_norm": 2.472778081893921, "learning_rate": 0.0002, "loss": 1.6928, "step": 181710 }, { "epoch": 0.74, "grad_norm": 2.3307933807373047, "learning_rate": 0.0002, "loss": 1.5668, "step": 181720 }, { "epoch": 0.74, "grad_norm": 2.2267346382141113, "learning_rate": 0.0002, "loss": 1.7276, "step": 181730 }, { "epoch": 0.74, "grad_norm": 3.542684316635132, "learning_rate": 0.0002, "loss": 1.6586, "step": 181740 }, { "epoch": 0.74, "grad_norm": 2.622055768966675, "learning_rate": 0.0002, "loss": 1.3725, "step": 181750 }, { "epoch": 0.74, "grad_norm": 2.1135201454162598, "learning_rate": 0.0002, "loss": 1.5651, "step": 181760 }, { "epoch": 0.74, "grad_norm": 3.633913516998291, "learning_rate": 0.0002, "loss": 1.6782, "step": 181770 }, { "epoch": 0.74, "grad_norm": 2.6807777881622314, "learning_rate": 0.0002, "loss": 1.6352, "step": 181780 }, { "epoch": 0.74, "grad_norm": 2.5621602535247803, "learning_rate": 0.0002, "loss": 1.4121, "step": 181790 }, { "epoch": 0.74, "grad_norm": 2.7490389347076416, "learning_rate": 0.0002, "loss": 1.7168, "step": 181800 }, { "epoch": 0.74, "grad_norm": 4.701531887054443, "learning_rate": 0.0002, "loss": 1.5779, "step": 181810 }, { "epoch": 0.74, "grad_norm": 2.9657034873962402, "learning_rate": 0.0002, "loss": 1.6395, "step": 181820 }, { "epoch": 0.74, "grad_norm": 3.148749828338623, "learning_rate": 0.0002, "loss": 1.7216, "step": 181830 }, { "epoch": 0.74, "grad_norm": 6.339113712310791, "learning_rate": 0.0002, "loss": 1.7078, "step": 181840 }, { "epoch": 0.74, "grad_norm": 3.573446273803711, "learning_rate": 0.0002, "loss": 1.5775, "step": 181850 }, { "epoch": 0.74, "grad_norm": 3.5030155181884766, "learning_rate": 0.0002, "loss": 1.505, "step": 181860 }, { "epoch": 0.74, "grad_norm": 3.492725133895874, "learning_rate": 0.0002, "loss": 1.6141, "step": 181870 }, { "epoch": 0.74, "grad_norm": 2.959893226623535, "learning_rate": 0.0002, "loss": 1.3052, "step": 181880 }, { "epoch": 0.74, "grad_norm": 2.8070859909057617, "learning_rate": 0.0002, "loss": 1.7606, "step": 181890 }, { "epoch": 0.74, "grad_norm": 3.4557461738586426, "learning_rate": 0.0002, "loss": 1.5559, "step": 181900 }, { "epoch": 0.74, "grad_norm": 4.385812282562256, "learning_rate": 0.0002, "loss": 1.6313, "step": 181910 }, { "epoch": 0.74, "grad_norm": 3.4297075271606445, "learning_rate": 0.0002, "loss": 1.3442, "step": 181920 }, { "epoch": 0.74, "grad_norm": 3.550576686859131, "learning_rate": 0.0002, "loss": 1.8242, "step": 181930 }, { "epoch": 0.74, "grad_norm": 4.137144088745117, "learning_rate": 0.0002, "loss": 1.4901, "step": 181940 }, { "epoch": 0.74, "grad_norm": 1.9510698318481445, "learning_rate": 0.0002, "loss": 1.3656, "step": 181950 }, { "epoch": 0.74, "grad_norm": 3.0105905532836914, "learning_rate": 0.0002, "loss": 1.5991, "step": 181960 }, { "epoch": 0.74, "grad_norm": 3.638986825942993, "learning_rate": 0.0002, "loss": 1.5477, "step": 181970 }, { "epoch": 0.74, "grad_norm": 2.2237343788146973, "learning_rate": 0.0002, "loss": 1.8266, "step": 181980 }, { "epoch": 0.74, "grad_norm": 2.346665143966675, "learning_rate": 0.0002, "loss": 1.7195, "step": 181990 }, { "epoch": 0.74, "grad_norm": 6.170476913452148, "learning_rate": 0.0002, "loss": 1.569, "step": 182000 }, { "epoch": 0.74, "grad_norm": 2.9540483951568604, "learning_rate": 0.0002, "loss": 1.6901, "step": 182010 }, { "epoch": 0.74, "grad_norm": 2.1167376041412354, "learning_rate": 0.0002, "loss": 1.5413, "step": 182020 }, { "epoch": 0.74, "grad_norm": 2.5940096378326416, "learning_rate": 0.0002, "loss": 1.433, "step": 182030 }, { "epoch": 0.74, "grad_norm": 2.4048194885253906, "learning_rate": 0.0002, "loss": 1.4399, "step": 182040 }, { "epoch": 0.74, "grad_norm": 2.156393527984619, "learning_rate": 0.0002, "loss": 1.5092, "step": 182050 }, { "epoch": 0.74, "grad_norm": 2.0868618488311768, "learning_rate": 0.0002, "loss": 1.4173, "step": 182060 }, { "epoch": 0.74, "grad_norm": 3.772510051727295, "learning_rate": 0.0002, "loss": 1.5572, "step": 182070 }, { "epoch": 0.74, "grad_norm": 1.8148301839828491, "learning_rate": 0.0002, "loss": 1.4318, "step": 182080 }, { "epoch": 0.74, "grad_norm": 3.6736583709716797, "learning_rate": 0.0002, "loss": 1.7186, "step": 182090 }, { "epoch": 0.74, "grad_norm": 2.8845083713531494, "learning_rate": 0.0002, "loss": 1.6338, "step": 182100 }, { "epoch": 0.74, "grad_norm": 2.6831719875335693, "learning_rate": 0.0002, "loss": 1.6033, "step": 182110 }, { "epoch": 0.74, "grad_norm": 3.228976011276245, "learning_rate": 0.0002, "loss": 1.6229, "step": 182120 }, { "epoch": 0.74, "grad_norm": 3.1145172119140625, "learning_rate": 0.0002, "loss": 1.6306, "step": 182130 }, { "epoch": 0.74, "grad_norm": 3.486780881881714, "learning_rate": 0.0002, "loss": 1.3699, "step": 182140 }, { "epoch": 0.74, "grad_norm": 3.2495336532592773, "learning_rate": 0.0002, "loss": 1.5039, "step": 182150 }, { "epoch": 0.74, "grad_norm": 3.3586461544036865, "learning_rate": 0.0002, "loss": 1.4528, "step": 182160 }, { "epoch": 0.74, "grad_norm": 2.9339895248413086, "learning_rate": 0.0002, "loss": 1.4447, "step": 182170 }, { "epoch": 0.74, "grad_norm": 2.156174898147583, "learning_rate": 0.0002, "loss": 1.7472, "step": 182180 }, { "epoch": 0.74, "grad_norm": 2.2835919857025146, "learning_rate": 0.0002, "loss": 1.5639, "step": 182190 }, { "epoch": 0.74, "grad_norm": 2.4656083583831787, "learning_rate": 0.0002, "loss": 1.6424, "step": 182200 }, { "epoch": 0.74, "grad_norm": 2.678422212600708, "learning_rate": 0.0002, "loss": 1.5988, "step": 182210 }, { "epoch": 0.74, "grad_norm": 3.1864728927612305, "learning_rate": 0.0002, "loss": 1.5579, "step": 182220 }, { "epoch": 0.74, "grad_norm": 2.1078059673309326, "learning_rate": 0.0002, "loss": 1.8457, "step": 182230 }, { "epoch": 0.74, "grad_norm": 2.9943912029266357, "learning_rate": 0.0002, "loss": 1.6346, "step": 182240 }, { "epoch": 0.74, "grad_norm": 3.838791847229004, "learning_rate": 0.0002, "loss": 1.6767, "step": 182250 }, { "epoch": 0.74, "grad_norm": 3.0416648387908936, "learning_rate": 0.0002, "loss": 1.7094, "step": 182260 }, { "epoch": 0.74, "grad_norm": 3.717491865158081, "learning_rate": 0.0002, "loss": 1.8259, "step": 182270 }, { "epoch": 0.74, "grad_norm": 2.5825724601745605, "learning_rate": 0.0002, "loss": 1.4877, "step": 182280 }, { "epoch": 0.74, "grad_norm": 3.200003147125244, "learning_rate": 0.0002, "loss": 1.3437, "step": 182290 }, { "epoch": 0.74, "grad_norm": 4.358242988586426, "learning_rate": 0.0002, "loss": 1.2729, "step": 182300 }, { "epoch": 0.74, "grad_norm": 3.6525540351867676, "learning_rate": 0.0002, "loss": 1.5958, "step": 182310 }, { "epoch": 0.74, "grad_norm": 2.452082395553589, "learning_rate": 0.0002, "loss": 1.4562, "step": 182320 }, { "epoch": 0.74, "grad_norm": 2.903545618057251, "learning_rate": 0.0002, "loss": 1.4605, "step": 182330 }, { "epoch": 0.74, "grad_norm": 2.631380796432495, "learning_rate": 0.0002, "loss": 1.498, "step": 182340 }, { "epoch": 0.74, "grad_norm": 3.1014907360076904, "learning_rate": 0.0002, "loss": 1.4084, "step": 182350 }, { "epoch": 0.74, "grad_norm": 2.735002040863037, "learning_rate": 0.0002, "loss": 1.6307, "step": 182360 }, { "epoch": 0.74, "grad_norm": 2.174560785293579, "learning_rate": 0.0002, "loss": 1.4531, "step": 182370 }, { "epoch": 0.74, "grad_norm": 3.318737745285034, "learning_rate": 0.0002, "loss": 1.4528, "step": 182380 }, { "epoch": 0.74, "grad_norm": 4.323850631713867, "learning_rate": 0.0002, "loss": 1.6358, "step": 182390 }, { "epoch": 0.74, "grad_norm": 3.6206793785095215, "learning_rate": 0.0002, "loss": 1.748, "step": 182400 }, { "epoch": 0.74, "grad_norm": 2.5688021183013916, "learning_rate": 0.0002, "loss": 1.511, "step": 182410 }, { "epoch": 0.74, "grad_norm": 2.2165656089782715, "learning_rate": 0.0002, "loss": 1.8116, "step": 182420 }, { "epoch": 0.74, "grad_norm": 3.7819621562957764, "learning_rate": 0.0002, "loss": 1.7319, "step": 182430 }, { "epoch": 0.74, "grad_norm": 4.453058242797852, "learning_rate": 0.0002, "loss": 1.5906, "step": 182440 }, { "epoch": 0.74, "grad_norm": 6.263798713684082, "learning_rate": 0.0002, "loss": 1.6331, "step": 182450 }, { "epoch": 0.74, "grad_norm": 2.7554590702056885, "learning_rate": 0.0002, "loss": 1.8179, "step": 182460 }, { "epoch": 0.74, "grad_norm": 2.744418144226074, "learning_rate": 0.0002, "loss": 1.5917, "step": 182470 }, { "epoch": 0.74, "grad_norm": 1.925595760345459, "learning_rate": 0.0002, "loss": 1.5159, "step": 182480 }, { "epoch": 0.74, "grad_norm": 2.893130302429199, "learning_rate": 0.0002, "loss": 1.5078, "step": 182490 }, { "epoch": 0.74, "grad_norm": 4.459592819213867, "learning_rate": 0.0002, "loss": 1.8619, "step": 182500 }, { "epoch": 0.74, "grad_norm": 2.559171199798584, "learning_rate": 0.0002, "loss": 1.6974, "step": 182510 }, { "epoch": 0.74, "grad_norm": 5.469786167144775, "learning_rate": 0.0002, "loss": 1.2843, "step": 182520 }, { "epoch": 0.74, "grad_norm": 3.402695417404175, "learning_rate": 0.0002, "loss": 1.4496, "step": 182530 }, { "epoch": 0.74, "grad_norm": 3.1769251823425293, "learning_rate": 0.0002, "loss": 1.5105, "step": 182540 }, { "epoch": 0.74, "grad_norm": 2.5176126956939697, "learning_rate": 0.0002, "loss": 1.6425, "step": 182550 }, { "epoch": 0.74, "grad_norm": 2.8676199913024902, "learning_rate": 0.0002, "loss": 1.6776, "step": 182560 }, { "epoch": 0.74, "grad_norm": 3.2258498668670654, "learning_rate": 0.0002, "loss": 1.3467, "step": 182570 }, { "epoch": 0.74, "grad_norm": 3.5751140117645264, "learning_rate": 0.0002, "loss": 1.4922, "step": 182580 }, { "epoch": 0.74, "grad_norm": 3.7708325386047363, "learning_rate": 0.0002, "loss": 1.3615, "step": 182590 }, { "epoch": 0.74, "grad_norm": 4.216849327087402, "learning_rate": 0.0002, "loss": 1.4826, "step": 182600 }, { "epoch": 0.74, "grad_norm": 2.981086015701294, "learning_rate": 0.0002, "loss": 1.6143, "step": 182610 }, { "epoch": 0.74, "grad_norm": 2.845137119293213, "learning_rate": 0.0002, "loss": 1.3688, "step": 182620 }, { "epoch": 0.74, "grad_norm": 6.3011474609375, "learning_rate": 0.0002, "loss": 1.485, "step": 182630 }, { "epoch": 0.74, "grad_norm": 3.5048747062683105, "learning_rate": 0.0002, "loss": 1.6695, "step": 182640 }, { "epoch": 0.74, "grad_norm": 2.7536988258361816, "learning_rate": 0.0002, "loss": 1.5531, "step": 182650 }, { "epoch": 0.74, "grad_norm": 2.462808847427368, "learning_rate": 0.0002, "loss": 1.575, "step": 182660 }, { "epoch": 0.74, "grad_norm": 2.3631751537323, "learning_rate": 0.0002, "loss": 1.4615, "step": 182670 }, { "epoch": 0.74, "grad_norm": 3.5488972663879395, "learning_rate": 0.0002, "loss": 1.819, "step": 182680 }, { "epoch": 0.74, "grad_norm": 3.8938701152801514, "learning_rate": 0.0002, "loss": 1.5756, "step": 182690 }, { "epoch": 0.74, "grad_norm": 4.305524826049805, "learning_rate": 0.0002, "loss": 1.7643, "step": 182700 }, { "epoch": 0.74, "grad_norm": 4.354018211364746, "learning_rate": 0.0002, "loss": 1.5177, "step": 182710 }, { "epoch": 0.74, "grad_norm": 2.6687941551208496, "learning_rate": 0.0002, "loss": 1.7504, "step": 182720 }, { "epoch": 0.74, "grad_norm": 2.9286727905273438, "learning_rate": 0.0002, "loss": 1.6561, "step": 182730 }, { "epoch": 0.74, "grad_norm": 2.630058765411377, "learning_rate": 0.0002, "loss": 1.568, "step": 182740 }, { "epoch": 0.74, "grad_norm": 2.4221394062042236, "learning_rate": 0.0002, "loss": 1.4573, "step": 182750 }, { "epoch": 0.74, "grad_norm": 4.194565773010254, "learning_rate": 0.0002, "loss": 1.8534, "step": 182760 }, { "epoch": 0.74, "grad_norm": 2.8274247646331787, "learning_rate": 0.0002, "loss": 1.3919, "step": 182770 }, { "epoch": 0.74, "grad_norm": 3.1666364669799805, "learning_rate": 0.0002, "loss": 1.8214, "step": 182780 }, { "epoch": 0.74, "grad_norm": 2.9372122287750244, "learning_rate": 0.0002, "loss": 1.7723, "step": 182790 }, { "epoch": 0.74, "grad_norm": 3.352823257446289, "learning_rate": 0.0002, "loss": 1.492, "step": 182800 }, { "epoch": 0.74, "grad_norm": 2.931166410446167, "learning_rate": 0.0002, "loss": 1.3727, "step": 182810 }, { "epoch": 0.74, "grad_norm": 2.4225425720214844, "learning_rate": 0.0002, "loss": 1.5536, "step": 182820 }, { "epoch": 0.74, "grad_norm": 1.750872254371643, "learning_rate": 0.0002, "loss": 1.3671, "step": 182830 }, { "epoch": 0.74, "grad_norm": 4.041645050048828, "learning_rate": 0.0002, "loss": 1.6742, "step": 182840 }, { "epoch": 0.74, "grad_norm": 4.672826766967773, "learning_rate": 0.0002, "loss": 1.5409, "step": 182850 }, { "epoch": 0.74, "grad_norm": 1.6659406423568726, "learning_rate": 0.0002, "loss": 1.5834, "step": 182860 }, { "epoch": 0.74, "grad_norm": 3.9162323474884033, "learning_rate": 0.0002, "loss": 1.7068, "step": 182870 }, { "epoch": 0.74, "grad_norm": 2.8451693058013916, "learning_rate": 0.0002, "loss": 1.5714, "step": 182880 }, { "epoch": 0.74, "grad_norm": 3.313727617263794, "learning_rate": 0.0002, "loss": 1.4672, "step": 182890 }, { "epoch": 0.74, "grad_norm": 3.545419692993164, "learning_rate": 0.0002, "loss": 1.4045, "step": 182900 }, { "epoch": 0.74, "grad_norm": 2.915252447128296, "learning_rate": 0.0002, "loss": 1.5435, "step": 182910 }, { "epoch": 0.74, "grad_norm": 2.8241467475891113, "learning_rate": 0.0002, "loss": 1.5121, "step": 182920 }, { "epoch": 0.74, "grad_norm": 3.233325481414795, "learning_rate": 0.0002, "loss": 1.5018, "step": 182930 }, { "epoch": 0.74, "grad_norm": 2.4925031661987305, "learning_rate": 0.0002, "loss": 1.6628, "step": 182940 }, { "epoch": 0.74, "grad_norm": 2.3641183376312256, "learning_rate": 0.0002, "loss": 1.5492, "step": 182950 }, { "epoch": 0.74, "grad_norm": 2.699169397354126, "learning_rate": 0.0002, "loss": 1.5288, "step": 182960 }, { "epoch": 0.74, "grad_norm": 1.5457518100738525, "learning_rate": 0.0002, "loss": 1.5752, "step": 182970 }, { "epoch": 0.74, "grad_norm": 4.7231574058532715, "learning_rate": 0.0002, "loss": 1.6711, "step": 182980 }, { "epoch": 0.74, "grad_norm": 2.8419270515441895, "learning_rate": 0.0002, "loss": 1.4139, "step": 182990 }, { "epoch": 0.74, "grad_norm": 2.910555362701416, "learning_rate": 0.0002, "loss": 1.6486, "step": 183000 }, { "epoch": 0.75, "grad_norm": 3.5901601314544678, "learning_rate": 0.0002, "loss": 1.4221, "step": 183010 }, { "epoch": 0.75, "grad_norm": 3.4943549633026123, "learning_rate": 0.0002, "loss": 1.5882, "step": 183020 }, { "epoch": 0.75, "grad_norm": 5.367910385131836, "learning_rate": 0.0002, "loss": 1.6699, "step": 183030 }, { "epoch": 0.75, "grad_norm": 3.32371187210083, "learning_rate": 0.0002, "loss": 1.7219, "step": 183040 }, { "epoch": 0.75, "grad_norm": 1.9244412183761597, "learning_rate": 0.0002, "loss": 1.5256, "step": 183050 }, { "epoch": 0.75, "grad_norm": 2.6579527854919434, "learning_rate": 0.0002, "loss": 1.5727, "step": 183060 }, { "epoch": 0.75, "grad_norm": 4.252492904663086, "learning_rate": 0.0002, "loss": 1.4851, "step": 183070 }, { "epoch": 0.75, "grad_norm": 2.4672586917877197, "learning_rate": 0.0002, "loss": 1.5736, "step": 183080 }, { "epoch": 0.75, "grad_norm": 2.559811592102051, "learning_rate": 0.0002, "loss": 1.5666, "step": 183090 }, { "epoch": 0.75, "grad_norm": 3.586252212524414, "learning_rate": 0.0002, "loss": 1.5975, "step": 183100 }, { "epoch": 0.75, "grad_norm": 1.6653956174850464, "learning_rate": 0.0002, "loss": 1.7491, "step": 183110 }, { "epoch": 0.75, "grad_norm": 3.4020416736602783, "learning_rate": 0.0002, "loss": 1.469, "step": 183120 }, { "epoch": 0.75, "grad_norm": 1.7128899097442627, "learning_rate": 0.0002, "loss": 1.5101, "step": 183130 }, { "epoch": 0.75, "grad_norm": 2.3677845001220703, "learning_rate": 0.0002, "loss": 1.678, "step": 183140 }, { "epoch": 0.75, "grad_norm": 3.5350732803344727, "learning_rate": 0.0002, "loss": 1.5965, "step": 183150 }, { "epoch": 0.75, "grad_norm": 3.7642250061035156, "learning_rate": 0.0002, "loss": 1.7374, "step": 183160 }, { "epoch": 0.75, "grad_norm": 2.1601219177246094, "learning_rate": 0.0002, "loss": 1.3289, "step": 183170 }, { "epoch": 0.75, "grad_norm": 3.0152430534362793, "learning_rate": 0.0002, "loss": 1.4334, "step": 183180 }, { "epoch": 0.75, "grad_norm": 1.9029748439788818, "learning_rate": 0.0002, "loss": 1.4939, "step": 183190 }, { "epoch": 0.75, "grad_norm": 3.57609486579895, "learning_rate": 0.0002, "loss": 1.3573, "step": 183200 }, { "epoch": 0.75, "grad_norm": 2.013723373413086, "learning_rate": 0.0002, "loss": 1.6587, "step": 183210 }, { "epoch": 0.75, "grad_norm": 4.035229206085205, "learning_rate": 0.0002, "loss": 1.8572, "step": 183220 }, { "epoch": 0.75, "grad_norm": 3.651040554046631, "learning_rate": 0.0002, "loss": 1.5495, "step": 183230 }, { "epoch": 0.75, "grad_norm": 2.3010196685791016, "learning_rate": 0.0002, "loss": 1.6337, "step": 183240 }, { "epoch": 0.75, "grad_norm": 3.381226062774658, "learning_rate": 0.0002, "loss": 1.5472, "step": 183250 }, { "epoch": 0.75, "grad_norm": 1.9715543985366821, "learning_rate": 0.0002, "loss": 1.6022, "step": 183260 }, { "epoch": 0.75, "grad_norm": 5.426835536956787, "learning_rate": 0.0002, "loss": 1.3873, "step": 183270 }, { "epoch": 0.75, "grad_norm": 3.969712257385254, "learning_rate": 0.0002, "loss": 1.5309, "step": 183280 }, { "epoch": 0.75, "grad_norm": 3.2300217151641846, "learning_rate": 0.0002, "loss": 1.8958, "step": 183290 }, { "epoch": 0.75, "grad_norm": 4.191765308380127, "learning_rate": 0.0002, "loss": 1.7864, "step": 183300 }, { "epoch": 0.75, "grad_norm": 2.881523609161377, "learning_rate": 0.0002, "loss": 1.5164, "step": 183310 }, { "epoch": 0.75, "grad_norm": 7.145068168640137, "learning_rate": 0.0002, "loss": 1.5698, "step": 183320 }, { "epoch": 0.75, "grad_norm": 2.471337080001831, "learning_rate": 0.0002, "loss": 1.4023, "step": 183330 }, { "epoch": 0.75, "grad_norm": 2.6522622108459473, "learning_rate": 0.0002, "loss": 1.662, "step": 183340 }, { "epoch": 0.75, "grad_norm": 3.5809969902038574, "learning_rate": 0.0002, "loss": 1.7655, "step": 183350 }, { "epoch": 0.75, "grad_norm": 3.0291755199432373, "learning_rate": 0.0002, "loss": 1.6559, "step": 183360 }, { "epoch": 0.75, "grad_norm": 2.746368885040283, "learning_rate": 0.0002, "loss": 1.5759, "step": 183370 }, { "epoch": 0.75, "grad_norm": 1.7329959869384766, "learning_rate": 0.0002, "loss": 1.6027, "step": 183380 }, { "epoch": 0.75, "grad_norm": 2.512708902359009, "learning_rate": 0.0002, "loss": 1.5895, "step": 183390 }, { "epoch": 0.75, "grad_norm": 5.331660270690918, "learning_rate": 0.0002, "loss": 1.5646, "step": 183400 }, { "epoch": 0.75, "grad_norm": 2.2897276878356934, "learning_rate": 0.0002, "loss": 1.8195, "step": 183410 }, { "epoch": 0.75, "grad_norm": 3.2491331100463867, "learning_rate": 0.0002, "loss": 1.6986, "step": 183420 }, { "epoch": 0.75, "grad_norm": 4.246188163757324, "learning_rate": 0.0002, "loss": 1.5858, "step": 183430 }, { "epoch": 0.75, "grad_norm": 3.7238354682922363, "learning_rate": 0.0002, "loss": 1.5676, "step": 183440 }, { "epoch": 0.75, "grad_norm": 3.0894322395324707, "learning_rate": 0.0002, "loss": 1.7899, "step": 183450 }, { "epoch": 0.75, "grad_norm": 2.9783496856689453, "learning_rate": 0.0002, "loss": 1.3842, "step": 183460 }, { "epoch": 0.75, "grad_norm": 3.959632635116577, "learning_rate": 0.0002, "loss": 1.4249, "step": 183470 }, { "epoch": 0.75, "grad_norm": 3.5279908180236816, "learning_rate": 0.0002, "loss": 1.295, "step": 183480 }, { "epoch": 0.75, "grad_norm": 3.437978982925415, "learning_rate": 0.0002, "loss": 1.3673, "step": 183490 }, { "epoch": 0.75, "grad_norm": 3.227806568145752, "learning_rate": 0.0002, "loss": 1.6455, "step": 183500 }, { "epoch": 0.75, "grad_norm": 2.7780771255493164, "learning_rate": 0.0002, "loss": 1.594, "step": 183510 }, { "epoch": 0.75, "grad_norm": 3.6841070652008057, "learning_rate": 0.0002, "loss": 1.5883, "step": 183520 }, { "epoch": 0.75, "grad_norm": 2.814138889312744, "learning_rate": 0.0002, "loss": 1.5784, "step": 183530 }, { "epoch": 0.75, "grad_norm": 3.2532222270965576, "learning_rate": 0.0002, "loss": 1.3989, "step": 183540 }, { "epoch": 0.75, "grad_norm": 3.003891706466675, "learning_rate": 0.0002, "loss": 1.5568, "step": 183550 }, { "epoch": 0.75, "grad_norm": 2.8300678730010986, "learning_rate": 0.0002, "loss": 1.4199, "step": 183560 }, { "epoch": 0.75, "grad_norm": 3.7079761028289795, "learning_rate": 0.0002, "loss": 1.8071, "step": 183570 }, { "epoch": 0.75, "grad_norm": 3.9914839267730713, "learning_rate": 0.0002, "loss": 1.6238, "step": 183580 }, { "epoch": 0.75, "grad_norm": 3.3284051418304443, "learning_rate": 0.0002, "loss": 1.718, "step": 183590 }, { "epoch": 0.75, "grad_norm": 2.2172834873199463, "learning_rate": 0.0002, "loss": 1.6592, "step": 183600 }, { "epoch": 0.75, "grad_norm": 6.973814010620117, "learning_rate": 0.0002, "loss": 1.5694, "step": 183610 }, { "epoch": 0.75, "grad_norm": 3.332958698272705, "learning_rate": 0.0002, "loss": 1.5206, "step": 183620 }, { "epoch": 0.75, "grad_norm": 2.58955979347229, "learning_rate": 0.0002, "loss": 1.5713, "step": 183630 }, { "epoch": 0.75, "grad_norm": 2.748056173324585, "learning_rate": 0.0002, "loss": 1.6203, "step": 183640 }, { "epoch": 0.75, "grad_norm": 2.997844934463501, "learning_rate": 0.0002, "loss": 1.7634, "step": 183650 }, { "epoch": 0.75, "grad_norm": 3.373643159866333, "learning_rate": 0.0002, "loss": 1.3763, "step": 183660 }, { "epoch": 0.75, "grad_norm": 2.4317972660064697, "learning_rate": 0.0002, "loss": 1.5398, "step": 183670 }, { "epoch": 0.75, "grad_norm": 3.176396131515503, "learning_rate": 0.0002, "loss": 1.4795, "step": 183680 }, { "epoch": 0.75, "grad_norm": 3.6587274074554443, "learning_rate": 0.0002, "loss": 1.6439, "step": 183690 }, { "epoch": 0.75, "grad_norm": 2.3578336238861084, "learning_rate": 0.0002, "loss": 1.7706, "step": 183700 }, { "epoch": 0.75, "grad_norm": 2.633150339126587, "learning_rate": 0.0002, "loss": 1.542, "step": 183710 }, { "epoch": 0.75, "grad_norm": 4.139082908630371, "learning_rate": 0.0002, "loss": 1.4754, "step": 183720 }, { "epoch": 0.75, "grad_norm": 3.7248690128326416, "learning_rate": 0.0002, "loss": 1.5938, "step": 183730 }, { "epoch": 0.75, "grad_norm": 3.70932936668396, "learning_rate": 0.0002, "loss": 1.6323, "step": 183740 }, { "epoch": 0.75, "grad_norm": 2.6851890087127686, "learning_rate": 0.0002, "loss": 1.4237, "step": 183750 }, { "epoch": 0.75, "grad_norm": 2.7727198600769043, "learning_rate": 0.0002, "loss": 1.5435, "step": 183760 }, { "epoch": 0.75, "grad_norm": 2.45794677734375, "learning_rate": 0.0002, "loss": 1.8719, "step": 183770 }, { "epoch": 0.75, "grad_norm": 3.1270790100097656, "learning_rate": 0.0002, "loss": 1.6706, "step": 183780 }, { "epoch": 0.75, "grad_norm": 3.7374086380004883, "learning_rate": 0.0002, "loss": 1.433, "step": 183790 }, { "epoch": 0.75, "grad_norm": 3.091014862060547, "learning_rate": 0.0002, "loss": 1.8144, "step": 183800 }, { "epoch": 0.75, "grad_norm": 3.031804084777832, "learning_rate": 0.0002, "loss": 1.6623, "step": 183810 }, { "epoch": 0.75, "grad_norm": 2.8377602100372314, "learning_rate": 0.0002, "loss": 1.3417, "step": 183820 }, { "epoch": 0.75, "grad_norm": 1.4632453918457031, "learning_rate": 0.0002, "loss": 1.3482, "step": 183830 }, { "epoch": 0.75, "grad_norm": 1.7013124227523804, "learning_rate": 0.0002, "loss": 1.4432, "step": 183840 }, { "epoch": 0.75, "grad_norm": 4.307096004486084, "learning_rate": 0.0002, "loss": 1.262, "step": 183850 }, { "epoch": 0.75, "grad_norm": 3.845093011856079, "learning_rate": 0.0002, "loss": 1.4079, "step": 183860 }, { "epoch": 0.75, "grad_norm": 2.354961395263672, "learning_rate": 0.0002, "loss": 1.5811, "step": 183870 }, { "epoch": 0.75, "grad_norm": 2.308577537536621, "learning_rate": 0.0002, "loss": 1.354, "step": 183880 }, { "epoch": 0.75, "grad_norm": 2.1691157817840576, "learning_rate": 0.0002, "loss": 1.865, "step": 183890 }, { "epoch": 0.75, "grad_norm": 2.173189163208008, "learning_rate": 0.0002, "loss": 1.5627, "step": 183900 }, { "epoch": 0.75, "grad_norm": 2.1909992694854736, "learning_rate": 0.0002, "loss": 1.5685, "step": 183910 }, { "epoch": 0.75, "grad_norm": 3.3848483562469482, "learning_rate": 0.0002, "loss": 1.6341, "step": 183920 }, { "epoch": 0.75, "grad_norm": 2.510331630706787, "learning_rate": 0.0002, "loss": 1.6316, "step": 183930 }, { "epoch": 0.75, "grad_norm": 3.6464223861694336, "learning_rate": 0.0002, "loss": 1.6788, "step": 183940 }, { "epoch": 0.75, "grad_norm": 4.314770221710205, "learning_rate": 0.0002, "loss": 1.6837, "step": 183950 }, { "epoch": 0.75, "grad_norm": 4.8247246742248535, "learning_rate": 0.0002, "loss": 1.5999, "step": 183960 }, { "epoch": 0.75, "grad_norm": 3.2661454677581787, "learning_rate": 0.0002, "loss": 1.8213, "step": 183970 }, { "epoch": 0.75, "grad_norm": 3.962376594543457, "learning_rate": 0.0002, "loss": 1.4142, "step": 183980 }, { "epoch": 0.75, "grad_norm": 2.7422707080841064, "learning_rate": 0.0002, "loss": 1.636, "step": 183990 }, { "epoch": 0.75, "grad_norm": 2.976271629333496, "learning_rate": 0.0002, "loss": 1.8211, "step": 184000 }, { "epoch": 0.75, "grad_norm": 2.247528076171875, "learning_rate": 0.0002, "loss": 1.826, "step": 184010 }, { "epoch": 0.75, "grad_norm": 2.3213307857513428, "learning_rate": 0.0002, "loss": 1.5078, "step": 184020 }, { "epoch": 0.75, "grad_norm": 2.350051164627075, "learning_rate": 0.0002, "loss": 1.3292, "step": 184030 }, { "epoch": 0.75, "grad_norm": 3.0278799533843994, "learning_rate": 0.0002, "loss": 1.5955, "step": 184040 }, { "epoch": 0.75, "grad_norm": 5.714364528656006, "learning_rate": 0.0002, "loss": 1.491, "step": 184050 }, { "epoch": 0.75, "grad_norm": 3.2220492362976074, "learning_rate": 0.0002, "loss": 1.3815, "step": 184060 }, { "epoch": 0.75, "grad_norm": 3.4665701389312744, "learning_rate": 0.0002, "loss": 1.6259, "step": 184070 }, { "epoch": 0.75, "grad_norm": 2.4205660820007324, "learning_rate": 0.0002, "loss": 1.6614, "step": 184080 }, { "epoch": 0.75, "grad_norm": 2.534804344177246, "learning_rate": 0.0002, "loss": 1.6241, "step": 184090 }, { "epoch": 0.75, "grad_norm": 2.6759769916534424, "learning_rate": 0.0002, "loss": 1.6347, "step": 184100 }, { "epoch": 0.75, "grad_norm": 1.583312749862671, "learning_rate": 0.0002, "loss": 1.5981, "step": 184110 }, { "epoch": 0.75, "grad_norm": 3.0246753692626953, "learning_rate": 0.0002, "loss": 1.6032, "step": 184120 }, { "epoch": 0.75, "grad_norm": 4.482016563415527, "learning_rate": 0.0002, "loss": 1.6197, "step": 184130 }, { "epoch": 0.75, "grad_norm": 3.377370834350586, "learning_rate": 0.0002, "loss": 1.5937, "step": 184140 }, { "epoch": 0.75, "grad_norm": 2.8246119022369385, "learning_rate": 0.0002, "loss": 1.4703, "step": 184150 }, { "epoch": 0.75, "grad_norm": 2.2554879188537598, "learning_rate": 0.0002, "loss": 1.6461, "step": 184160 }, { "epoch": 0.75, "grad_norm": 2.8221378326416016, "learning_rate": 0.0002, "loss": 1.454, "step": 184170 }, { "epoch": 0.75, "grad_norm": 4.222980976104736, "learning_rate": 0.0002, "loss": 1.8119, "step": 184180 }, { "epoch": 0.75, "grad_norm": 1.9333934783935547, "learning_rate": 0.0002, "loss": 1.4636, "step": 184190 }, { "epoch": 0.75, "grad_norm": 2.3314294815063477, "learning_rate": 0.0002, "loss": 1.4702, "step": 184200 }, { "epoch": 0.75, "grad_norm": 2.456327199935913, "learning_rate": 0.0002, "loss": 1.6617, "step": 184210 }, { "epoch": 0.75, "grad_norm": 1.4130373001098633, "learning_rate": 0.0002, "loss": 1.4054, "step": 184220 }, { "epoch": 0.75, "grad_norm": 1.8961061239242554, "learning_rate": 0.0002, "loss": 1.6098, "step": 184230 }, { "epoch": 0.75, "grad_norm": 2.305976152420044, "learning_rate": 0.0002, "loss": 1.772, "step": 184240 }, { "epoch": 0.75, "grad_norm": 2.2278828620910645, "learning_rate": 0.0002, "loss": 1.386, "step": 184250 }, { "epoch": 0.75, "grad_norm": 2.487598180770874, "learning_rate": 0.0002, "loss": 1.5255, "step": 184260 }, { "epoch": 0.75, "grad_norm": 4.502711772918701, "learning_rate": 0.0002, "loss": 1.6917, "step": 184270 }, { "epoch": 0.75, "grad_norm": 3.2901785373687744, "learning_rate": 0.0002, "loss": 1.3083, "step": 184280 }, { "epoch": 0.75, "grad_norm": 5.42161226272583, "learning_rate": 0.0002, "loss": 1.5876, "step": 184290 }, { "epoch": 0.75, "grad_norm": 3.031187057495117, "learning_rate": 0.0002, "loss": 1.5404, "step": 184300 }, { "epoch": 0.75, "grad_norm": 4.035336017608643, "learning_rate": 0.0002, "loss": 1.3358, "step": 184310 }, { "epoch": 0.75, "grad_norm": 3.501095771789551, "learning_rate": 0.0002, "loss": 1.7394, "step": 184320 }, { "epoch": 0.75, "grad_norm": 4.265275001525879, "learning_rate": 0.0002, "loss": 1.6382, "step": 184330 }, { "epoch": 0.75, "grad_norm": 3.1672918796539307, "learning_rate": 0.0002, "loss": 1.6456, "step": 184340 }, { "epoch": 0.75, "grad_norm": 2.9118645191192627, "learning_rate": 0.0002, "loss": 1.6117, "step": 184350 }, { "epoch": 0.75, "grad_norm": 2.8006510734558105, "learning_rate": 0.0002, "loss": 1.6933, "step": 184360 }, { "epoch": 0.75, "grad_norm": 4.03465461730957, "learning_rate": 0.0002, "loss": 1.5462, "step": 184370 }, { "epoch": 0.75, "grad_norm": 1.3121836185455322, "learning_rate": 0.0002, "loss": 1.5108, "step": 184380 }, { "epoch": 0.75, "grad_norm": 2.743469476699829, "learning_rate": 0.0002, "loss": 1.8328, "step": 184390 }, { "epoch": 0.75, "grad_norm": 3.2634716033935547, "learning_rate": 0.0002, "loss": 1.4879, "step": 184400 }, { "epoch": 0.75, "grad_norm": 1.3274810314178467, "learning_rate": 0.0002, "loss": 1.7109, "step": 184410 }, { "epoch": 0.75, "grad_norm": 3.9531607627868652, "learning_rate": 0.0002, "loss": 1.4247, "step": 184420 }, { "epoch": 0.75, "grad_norm": 3.4674930572509766, "learning_rate": 0.0002, "loss": 1.6914, "step": 184430 }, { "epoch": 0.75, "grad_norm": 2.0803894996643066, "learning_rate": 0.0002, "loss": 1.4472, "step": 184440 }, { "epoch": 0.75, "grad_norm": 2.807914972305298, "learning_rate": 0.0002, "loss": 1.7675, "step": 184450 }, { "epoch": 0.75, "grad_norm": 2.3082292079925537, "learning_rate": 0.0002, "loss": 1.5783, "step": 184460 }, { "epoch": 0.75, "grad_norm": 4.674330234527588, "learning_rate": 0.0002, "loss": 1.573, "step": 184470 }, { "epoch": 0.75, "grad_norm": 2.8796169757843018, "learning_rate": 0.0002, "loss": 1.738, "step": 184480 }, { "epoch": 0.75, "grad_norm": 3.6501986980438232, "learning_rate": 0.0002, "loss": 1.5592, "step": 184490 }, { "epoch": 0.75, "grad_norm": 2.3766942024230957, "learning_rate": 0.0002, "loss": 1.4934, "step": 184500 }, { "epoch": 0.75, "grad_norm": 1.8760616779327393, "learning_rate": 0.0002, "loss": 1.5858, "step": 184510 }, { "epoch": 0.75, "grad_norm": 2.0120151042938232, "learning_rate": 0.0002, "loss": 1.6133, "step": 184520 }, { "epoch": 0.75, "grad_norm": 2.5715038776397705, "learning_rate": 0.0002, "loss": 1.8217, "step": 184530 }, { "epoch": 0.75, "grad_norm": 2.7471277713775635, "learning_rate": 0.0002, "loss": 1.5862, "step": 184540 }, { "epoch": 0.75, "grad_norm": 2.5680510997772217, "learning_rate": 0.0002, "loss": 1.7149, "step": 184550 }, { "epoch": 0.75, "grad_norm": 1.887593388557434, "learning_rate": 0.0002, "loss": 1.8943, "step": 184560 }, { "epoch": 0.75, "grad_norm": 3.38615083694458, "learning_rate": 0.0002, "loss": 1.5801, "step": 184570 }, { "epoch": 0.75, "grad_norm": 8.84097957611084, "learning_rate": 0.0002, "loss": 1.4976, "step": 184580 }, { "epoch": 0.75, "grad_norm": 4.0295281410217285, "learning_rate": 0.0002, "loss": 1.506, "step": 184590 }, { "epoch": 0.75, "grad_norm": 1.207777738571167, "learning_rate": 0.0002, "loss": 1.5374, "step": 184600 }, { "epoch": 0.75, "grad_norm": 3.6842920780181885, "learning_rate": 0.0002, "loss": 1.6299, "step": 184610 }, { "epoch": 0.75, "grad_norm": 3.1356325149536133, "learning_rate": 0.0002, "loss": 1.6496, "step": 184620 }, { "epoch": 0.75, "grad_norm": 4.281368732452393, "learning_rate": 0.0002, "loss": 1.4372, "step": 184630 }, { "epoch": 0.75, "grad_norm": 4.320443153381348, "learning_rate": 0.0002, "loss": 1.5739, "step": 184640 }, { "epoch": 0.75, "grad_norm": 3.519172191619873, "learning_rate": 0.0002, "loss": 1.7379, "step": 184650 }, { "epoch": 0.75, "grad_norm": 3.6912295818328857, "learning_rate": 0.0002, "loss": 1.5223, "step": 184660 }, { "epoch": 0.75, "grad_norm": 2.5675551891326904, "learning_rate": 0.0002, "loss": 1.5824, "step": 184670 }, { "epoch": 0.75, "grad_norm": 6.26387357711792, "learning_rate": 0.0002, "loss": 1.3765, "step": 184680 }, { "epoch": 0.75, "grad_norm": 3.028620481491089, "learning_rate": 0.0002, "loss": 1.3085, "step": 184690 }, { "epoch": 0.75, "grad_norm": 2.729067087173462, "learning_rate": 0.0002, "loss": 1.8844, "step": 184700 }, { "epoch": 0.75, "grad_norm": 2.803997755050659, "learning_rate": 0.0002, "loss": 1.6022, "step": 184710 }, { "epoch": 0.75, "grad_norm": 6.787098407745361, "learning_rate": 0.0002, "loss": 1.3835, "step": 184720 }, { "epoch": 0.75, "grad_norm": 2.9814822673797607, "learning_rate": 0.0002, "loss": 1.4386, "step": 184730 }, { "epoch": 0.75, "grad_norm": 2.1084883213043213, "learning_rate": 0.0002, "loss": 1.5832, "step": 184740 }, { "epoch": 0.75, "grad_norm": 5.147500991821289, "learning_rate": 0.0002, "loss": 1.6155, "step": 184750 }, { "epoch": 0.75, "grad_norm": 3.344353199005127, "learning_rate": 0.0002, "loss": 1.6027, "step": 184760 }, { "epoch": 0.75, "grad_norm": 4.246825218200684, "learning_rate": 0.0002, "loss": 1.3796, "step": 184770 }, { "epoch": 0.75, "grad_norm": 2.237949848175049, "learning_rate": 0.0002, "loss": 1.6254, "step": 184780 }, { "epoch": 0.75, "grad_norm": 2.277812957763672, "learning_rate": 0.0002, "loss": 1.5275, "step": 184790 }, { "epoch": 0.75, "grad_norm": 1.6579393148422241, "learning_rate": 0.0002, "loss": 1.5085, "step": 184800 }, { "epoch": 0.75, "grad_norm": 2.6835832595825195, "learning_rate": 0.0002, "loss": 1.6246, "step": 184810 }, { "epoch": 0.75, "grad_norm": 4.925387382507324, "learning_rate": 0.0002, "loss": 1.6953, "step": 184820 }, { "epoch": 0.75, "grad_norm": 2.8325002193450928, "learning_rate": 0.0002, "loss": 1.703, "step": 184830 }, { "epoch": 0.75, "grad_norm": 3.5953423976898193, "learning_rate": 0.0002, "loss": 1.7427, "step": 184840 }, { "epoch": 0.75, "grad_norm": 6.952725410461426, "learning_rate": 0.0002, "loss": 1.5406, "step": 184850 }, { "epoch": 0.75, "grad_norm": 2.986030340194702, "learning_rate": 0.0002, "loss": 1.3709, "step": 184860 }, { "epoch": 0.75, "grad_norm": 2.0907232761383057, "learning_rate": 0.0002, "loss": 1.6998, "step": 184870 }, { "epoch": 0.75, "grad_norm": 4.128434181213379, "learning_rate": 0.0002, "loss": 1.5814, "step": 184880 }, { "epoch": 0.75, "grad_norm": 3.259124994277954, "learning_rate": 0.0002, "loss": 1.5848, "step": 184890 }, { "epoch": 0.75, "grad_norm": 7.8616132736206055, "learning_rate": 0.0002, "loss": 1.7762, "step": 184900 }, { "epoch": 0.75, "grad_norm": 1.9220335483551025, "learning_rate": 0.0002, "loss": 1.7531, "step": 184910 }, { "epoch": 0.75, "grad_norm": 2.7515130043029785, "learning_rate": 0.0002, "loss": 1.7158, "step": 184920 }, { "epoch": 0.75, "grad_norm": 3.0079588890075684, "learning_rate": 0.0002, "loss": 1.3721, "step": 184930 }, { "epoch": 0.75, "grad_norm": 3.407034397125244, "learning_rate": 0.0002, "loss": 1.588, "step": 184940 }, { "epoch": 0.75, "grad_norm": 2.602465867996216, "learning_rate": 0.0002, "loss": 1.592, "step": 184950 }, { "epoch": 0.75, "grad_norm": 3.9548137187957764, "learning_rate": 0.0002, "loss": 1.5868, "step": 184960 }, { "epoch": 0.75, "grad_norm": 4.110199451446533, "learning_rate": 0.0002, "loss": 1.7883, "step": 184970 }, { "epoch": 0.75, "grad_norm": 1.9213361740112305, "learning_rate": 0.0002, "loss": 1.5522, "step": 184980 }, { "epoch": 0.75, "grad_norm": 3.3827664852142334, "learning_rate": 0.0002, "loss": 1.6909, "step": 184990 }, { "epoch": 0.75, "grad_norm": 3.070302724838257, "learning_rate": 0.0002, "loss": 1.365, "step": 185000 }, { "epoch": 0.75, "grad_norm": 2.9303977489471436, "learning_rate": 0.0002, "loss": 1.4771, "step": 185010 }, { "epoch": 0.75, "grad_norm": 3.2633297443389893, "learning_rate": 0.0002, "loss": 1.5, "step": 185020 }, { "epoch": 0.75, "grad_norm": 3.306267023086548, "learning_rate": 0.0002, "loss": 1.6234, "step": 185030 }, { "epoch": 0.75, "grad_norm": 2.568486213684082, "learning_rate": 0.0002, "loss": 1.3914, "step": 185040 }, { "epoch": 0.75, "grad_norm": 2.6803855895996094, "learning_rate": 0.0002, "loss": 1.5674, "step": 185050 }, { "epoch": 0.75, "grad_norm": 2.178514003753662, "learning_rate": 0.0002, "loss": 1.8284, "step": 185060 }, { "epoch": 0.75, "grad_norm": 5.100517749786377, "learning_rate": 0.0002, "loss": 1.5176, "step": 185070 }, { "epoch": 0.75, "grad_norm": 2.6305763721466064, "learning_rate": 0.0002, "loss": 1.6531, "step": 185080 }, { "epoch": 0.75, "grad_norm": 5.073988437652588, "learning_rate": 0.0002, "loss": 1.3247, "step": 185090 }, { "epoch": 0.75, "grad_norm": 1.5292474031448364, "learning_rate": 0.0002, "loss": 1.6364, "step": 185100 }, { "epoch": 0.75, "grad_norm": 2.443830966949463, "learning_rate": 0.0002, "loss": 1.701, "step": 185110 }, { "epoch": 0.75, "grad_norm": 3.3922250270843506, "learning_rate": 0.0002, "loss": 1.6471, "step": 185120 }, { "epoch": 0.75, "grad_norm": 1.9686270952224731, "learning_rate": 0.0002, "loss": 1.7874, "step": 185130 }, { "epoch": 0.75, "grad_norm": 1.9940481185913086, "learning_rate": 0.0002, "loss": 1.6675, "step": 185140 }, { "epoch": 0.75, "grad_norm": 3.3066587448120117, "learning_rate": 0.0002, "loss": 1.4884, "step": 185150 }, { "epoch": 0.75, "grad_norm": 1.9938775300979614, "learning_rate": 0.0002, "loss": 1.486, "step": 185160 }, { "epoch": 0.75, "grad_norm": 3.451228618621826, "learning_rate": 0.0002, "loss": 2.0992, "step": 185170 }, { "epoch": 0.75, "grad_norm": 2.9084677696228027, "learning_rate": 0.0002, "loss": 1.4041, "step": 185180 }, { "epoch": 0.75, "grad_norm": 3.553590774536133, "learning_rate": 0.0002, "loss": 1.702, "step": 185190 }, { "epoch": 0.75, "grad_norm": 2.2232742309570312, "learning_rate": 0.0002, "loss": 1.7457, "step": 185200 }, { "epoch": 0.75, "grad_norm": 2.377046585083008, "learning_rate": 0.0002, "loss": 1.7203, "step": 185210 }, { "epoch": 0.75, "grad_norm": 2.9236388206481934, "learning_rate": 0.0002, "loss": 1.6093, "step": 185220 }, { "epoch": 0.75, "grad_norm": 3.699549674987793, "learning_rate": 0.0002, "loss": 1.6358, "step": 185230 }, { "epoch": 0.75, "grad_norm": 3.1544301509857178, "learning_rate": 0.0002, "loss": 1.6268, "step": 185240 }, { "epoch": 0.75, "grad_norm": 5.085450649261475, "learning_rate": 0.0002, "loss": 1.4216, "step": 185250 }, { "epoch": 0.75, "grad_norm": 3.210477590560913, "learning_rate": 0.0002, "loss": 1.7961, "step": 185260 }, { "epoch": 0.75, "grad_norm": 3.749730348587036, "learning_rate": 0.0002, "loss": 1.4631, "step": 185270 }, { "epoch": 0.75, "grad_norm": 1.466002345085144, "learning_rate": 0.0002, "loss": 1.5043, "step": 185280 }, { "epoch": 0.75, "grad_norm": 3.5832316875457764, "learning_rate": 0.0002, "loss": 1.5978, "step": 185290 }, { "epoch": 0.75, "grad_norm": 2.8221545219421387, "learning_rate": 0.0002, "loss": 1.7201, "step": 185300 }, { "epoch": 0.75, "grad_norm": 3.248675584793091, "learning_rate": 0.0002, "loss": 1.3952, "step": 185310 }, { "epoch": 0.75, "grad_norm": 2.0446674823760986, "learning_rate": 0.0002, "loss": 1.4082, "step": 185320 }, { "epoch": 0.75, "grad_norm": 3.237532615661621, "learning_rate": 0.0002, "loss": 1.6173, "step": 185330 }, { "epoch": 0.75, "grad_norm": 2.909897565841675, "learning_rate": 0.0002, "loss": 1.6518, "step": 185340 }, { "epoch": 0.75, "grad_norm": 1.746364951133728, "learning_rate": 0.0002, "loss": 1.6429, "step": 185350 }, { "epoch": 0.75, "grad_norm": 2.302947998046875, "learning_rate": 0.0002, "loss": 1.5221, "step": 185360 }, { "epoch": 0.75, "grad_norm": 2.0036113262176514, "learning_rate": 0.0002, "loss": 1.4406, "step": 185370 }, { "epoch": 0.75, "grad_norm": 7.5852580070495605, "learning_rate": 0.0002, "loss": 1.6263, "step": 185380 }, { "epoch": 0.75, "grad_norm": 3.5328292846679688, "learning_rate": 0.0002, "loss": 1.6041, "step": 185390 }, { "epoch": 0.75, "grad_norm": 4.266069412231445, "learning_rate": 0.0002, "loss": 1.9258, "step": 185400 }, { "epoch": 0.75, "grad_norm": 3.402453899383545, "learning_rate": 0.0002, "loss": 1.3852, "step": 185410 }, { "epoch": 0.75, "grad_norm": 9.909791946411133, "learning_rate": 0.0002, "loss": 1.5674, "step": 185420 }, { "epoch": 0.75, "grad_norm": 3.1075897216796875, "learning_rate": 0.0002, "loss": 1.663, "step": 185430 }, { "epoch": 0.75, "grad_norm": 7.016313076019287, "learning_rate": 0.0002, "loss": 1.4019, "step": 185440 }, { "epoch": 0.75, "grad_norm": 3.278777837753296, "learning_rate": 0.0002, "loss": 1.469, "step": 185450 }, { "epoch": 0.75, "grad_norm": 3.3797881603240967, "learning_rate": 0.0002, "loss": 1.7232, "step": 185460 }, { "epoch": 0.76, "grad_norm": 2.4894094467163086, "learning_rate": 0.0002, "loss": 1.2769, "step": 185470 }, { "epoch": 0.76, "grad_norm": 3.8066887855529785, "learning_rate": 0.0002, "loss": 1.5174, "step": 185480 }, { "epoch": 0.76, "grad_norm": 2.472818374633789, "learning_rate": 0.0002, "loss": 1.3796, "step": 185490 }, { "epoch": 0.76, "grad_norm": 3.654249906539917, "learning_rate": 0.0002, "loss": 1.677, "step": 185500 }, { "epoch": 0.76, "grad_norm": 2.347679376602173, "learning_rate": 0.0002, "loss": 1.594, "step": 185510 }, { "epoch": 0.76, "grad_norm": 3.5202953815460205, "learning_rate": 0.0002, "loss": 1.5985, "step": 185520 }, { "epoch": 0.76, "grad_norm": 2.1903836727142334, "learning_rate": 0.0002, "loss": 1.5682, "step": 185530 }, { "epoch": 0.76, "grad_norm": 2.755859613418579, "learning_rate": 0.0002, "loss": 1.4318, "step": 185540 }, { "epoch": 0.76, "grad_norm": 2.741269826889038, "learning_rate": 0.0002, "loss": 1.7498, "step": 185550 }, { "epoch": 0.76, "grad_norm": 2.85388445854187, "learning_rate": 0.0002, "loss": 1.8997, "step": 185560 }, { "epoch": 0.76, "grad_norm": 3.5466220378875732, "learning_rate": 0.0002, "loss": 1.4628, "step": 185570 }, { "epoch": 0.76, "grad_norm": 3.0511744022369385, "learning_rate": 0.0002, "loss": 1.5969, "step": 185580 }, { "epoch": 0.76, "grad_norm": 2.6736650466918945, "learning_rate": 0.0002, "loss": 1.4908, "step": 185590 }, { "epoch": 0.76, "grad_norm": 6.446672439575195, "learning_rate": 0.0002, "loss": 1.4385, "step": 185600 }, { "epoch": 0.76, "grad_norm": 2.4977612495422363, "learning_rate": 0.0002, "loss": 1.413, "step": 185610 }, { "epoch": 0.76, "grad_norm": 3.033050537109375, "learning_rate": 0.0002, "loss": 1.6313, "step": 185620 }, { "epoch": 0.76, "grad_norm": 3.3042783737182617, "learning_rate": 0.0002, "loss": 1.2766, "step": 185630 }, { "epoch": 0.76, "grad_norm": 2.4598093032836914, "learning_rate": 0.0002, "loss": 1.3119, "step": 185640 }, { "epoch": 0.76, "grad_norm": 2.3347249031066895, "learning_rate": 0.0002, "loss": 1.4751, "step": 185650 }, { "epoch": 0.76, "grad_norm": 2.1262736320495605, "learning_rate": 0.0002, "loss": 1.5746, "step": 185660 }, { "epoch": 0.76, "grad_norm": 2.9905202388763428, "learning_rate": 0.0002, "loss": 1.6276, "step": 185670 }, { "epoch": 0.76, "grad_norm": 4.582488536834717, "learning_rate": 0.0002, "loss": 1.8112, "step": 185680 }, { "epoch": 0.76, "grad_norm": 2.822943925857544, "learning_rate": 0.0002, "loss": 1.5224, "step": 185690 }, { "epoch": 0.76, "grad_norm": 4.9194746017456055, "learning_rate": 0.0002, "loss": 1.7961, "step": 185700 }, { "epoch": 0.76, "grad_norm": 3.6976053714752197, "learning_rate": 0.0002, "loss": 1.5498, "step": 185710 }, { "epoch": 0.76, "grad_norm": 1.9929836988449097, "learning_rate": 0.0002, "loss": 1.4352, "step": 185720 }, { "epoch": 0.76, "grad_norm": 3.5207040309906006, "learning_rate": 0.0002, "loss": 1.5537, "step": 185730 }, { "epoch": 0.76, "grad_norm": 3.7522082328796387, "learning_rate": 0.0002, "loss": 1.492, "step": 185740 }, { "epoch": 0.76, "grad_norm": 2.4558961391448975, "learning_rate": 0.0002, "loss": 1.634, "step": 185750 }, { "epoch": 0.76, "grad_norm": 4.2574543952941895, "learning_rate": 0.0002, "loss": 1.7585, "step": 185760 }, { "epoch": 0.76, "grad_norm": 2.487700939178467, "learning_rate": 0.0002, "loss": 1.5228, "step": 185770 }, { "epoch": 0.76, "grad_norm": 2.846384048461914, "learning_rate": 0.0002, "loss": 1.6635, "step": 185780 }, { "epoch": 0.76, "grad_norm": 1.8758119344711304, "learning_rate": 0.0002, "loss": 1.6404, "step": 185790 }, { "epoch": 0.76, "grad_norm": 2.292965888977051, "learning_rate": 0.0002, "loss": 1.6218, "step": 185800 }, { "epoch": 0.76, "grad_norm": 4.061832904815674, "learning_rate": 0.0002, "loss": 1.5522, "step": 185810 }, { "epoch": 0.76, "grad_norm": 2.8425004482269287, "learning_rate": 0.0002, "loss": 1.4425, "step": 185820 }, { "epoch": 0.76, "grad_norm": 2.592101573944092, "learning_rate": 0.0002, "loss": 1.5213, "step": 185830 }, { "epoch": 0.76, "grad_norm": 4.020380973815918, "learning_rate": 0.0002, "loss": 1.6714, "step": 185840 }, { "epoch": 0.76, "grad_norm": 3.6377310752868652, "learning_rate": 0.0002, "loss": 1.6267, "step": 185850 }, { "epoch": 0.76, "grad_norm": 3.814032554626465, "learning_rate": 0.0002, "loss": 1.6797, "step": 185860 }, { "epoch": 0.76, "grad_norm": 2.4700820446014404, "learning_rate": 0.0002, "loss": 1.6152, "step": 185870 }, { "epoch": 0.76, "grad_norm": 2.5908937454223633, "learning_rate": 0.0002, "loss": 1.63, "step": 185880 }, { "epoch": 0.76, "grad_norm": 3.638638496398926, "learning_rate": 0.0002, "loss": 1.359, "step": 185890 }, { "epoch": 0.76, "grad_norm": 2.2006642818450928, "learning_rate": 0.0002, "loss": 1.8185, "step": 185900 }, { "epoch": 0.76, "grad_norm": 2.7699484825134277, "learning_rate": 0.0002, "loss": 1.4889, "step": 185910 }, { "epoch": 0.76, "grad_norm": 4.296577453613281, "learning_rate": 0.0002, "loss": 1.6213, "step": 185920 }, { "epoch": 0.76, "grad_norm": 3.0103237628936768, "learning_rate": 0.0002, "loss": 1.7586, "step": 185930 }, { "epoch": 0.76, "grad_norm": 3.0353775024414062, "learning_rate": 0.0002, "loss": 1.4183, "step": 185940 }, { "epoch": 0.76, "grad_norm": 1.880020260810852, "learning_rate": 0.0002, "loss": 1.6806, "step": 185950 }, { "epoch": 0.76, "grad_norm": 2.856334686279297, "learning_rate": 0.0002, "loss": 1.5938, "step": 185960 }, { "epoch": 0.76, "grad_norm": 6.315115451812744, "learning_rate": 0.0002, "loss": 1.6161, "step": 185970 }, { "epoch": 0.76, "grad_norm": 3.706713914871216, "learning_rate": 0.0002, "loss": 1.5828, "step": 185980 }, { "epoch": 0.76, "grad_norm": 2.9414546489715576, "learning_rate": 0.0002, "loss": 1.6173, "step": 185990 }, { "epoch": 0.76, "grad_norm": 6.253665447235107, "learning_rate": 0.0002, "loss": 1.609, "step": 186000 }, { "epoch": 0.76, "grad_norm": 3.432047128677368, "learning_rate": 0.0002, "loss": 1.5074, "step": 186010 }, { "epoch": 0.76, "grad_norm": 3.492365598678589, "learning_rate": 0.0002, "loss": 1.6846, "step": 186020 }, { "epoch": 0.76, "grad_norm": 3.567906141281128, "learning_rate": 0.0002, "loss": 1.6159, "step": 186030 }, { "epoch": 0.76, "grad_norm": 4.440598487854004, "learning_rate": 0.0002, "loss": 1.5312, "step": 186040 }, { "epoch": 0.76, "grad_norm": 3.196855068206787, "learning_rate": 0.0002, "loss": 1.4254, "step": 186050 }, { "epoch": 0.76, "grad_norm": 4.200582981109619, "learning_rate": 0.0002, "loss": 1.6092, "step": 186060 }, { "epoch": 0.76, "grad_norm": 2.7994887828826904, "learning_rate": 0.0002, "loss": 1.578, "step": 186070 }, { "epoch": 0.76, "grad_norm": 2.711534261703491, "learning_rate": 0.0002, "loss": 1.7384, "step": 186080 }, { "epoch": 0.76, "grad_norm": 4.095931053161621, "learning_rate": 0.0002, "loss": 1.5973, "step": 186090 }, { "epoch": 0.76, "grad_norm": 3.0878000259399414, "learning_rate": 0.0002, "loss": 1.9714, "step": 186100 }, { "epoch": 0.76, "grad_norm": 2.7082011699676514, "learning_rate": 0.0002, "loss": 1.5197, "step": 186110 }, { "epoch": 0.76, "grad_norm": 2.5984630584716797, "learning_rate": 0.0002, "loss": 1.7028, "step": 186120 }, { "epoch": 0.76, "grad_norm": 4.813511848449707, "learning_rate": 0.0002, "loss": 1.5494, "step": 186130 }, { "epoch": 0.76, "grad_norm": 3.474189043045044, "learning_rate": 0.0002, "loss": 1.7513, "step": 186140 }, { "epoch": 0.76, "grad_norm": 2.660456895828247, "learning_rate": 0.0002, "loss": 1.6279, "step": 186150 }, { "epoch": 0.76, "grad_norm": 4.660046100616455, "learning_rate": 0.0002, "loss": 1.8845, "step": 186160 }, { "epoch": 0.76, "grad_norm": 2.6237576007843018, "learning_rate": 0.0002, "loss": 1.7581, "step": 186170 }, { "epoch": 0.76, "grad_norm": 3.5109622478485107, "learning_rate": 0.0002, "loss": 1.523, "step": 186180 }, { "epoch": 0.76, "grad_norm": 2.7719614505767822, "learning_rate": 0.0002, "loss": 1.5943, "step": 186190 }, { "epoch": 0.76, "grad_norm": 6.4066572189331055, "learning_rate": 0.0002, "loss": 1.6842, "step": 186200 }, { "epoch": 0.76, "grad_norm": 2.7760679721832275, "learning_rate": 0.0002, "loss": 1.5932, "step": 186210 }, { "epoch": 0.76, "grad_norm": 2.788383960723877, "learning_rate": 0.0002, "loss": 1.5972, "step": 186220 }, { "epoch": 0.76, "grad_norm": 2.4779515266418457, "learning_rate": 0.0002, "loss": 1.4285, "step": 186230 }, { "epoch": 0.76, "grad_norm": 2.89359188079834, "learning_rate": 0.0002, "loss": 1.4899, "step": 186240 }, { "epoch": 0.76, "grad_norm": 2.1474947929382324, "learning_rate": 0.0002, "loss": 1.5547, "step": 186250 }, { "epoch": 0.76, "grad_norm": 2.547487258911133, "learning_rate": 0.0002, "loss": 1.3879, "step": 186260 }, { "epoch": 0.76, "grad_norm": 3.1360998153686523, "learning_rate": 0.0002, "loss": 1.7246, "step": 186270 }, { "epoch": 0.76, "grad_norm": 3.235586166381836, "learning_rate": 0.0002, "loss": 1.7172, "step": 186280 }, { "epoch": 0.76, "grad_norm": 2.7696444988250732, "learning_rate": 0.0002, "loss": 1.739, "step": 186290 }, { "epoch": 0.76, "grad_norm": 11.355778694152832, "learning_rate": 0.0002, "loss": 1.3779, "step": 186300 }, { "epoch": 0.76, "grad_norm": 2.8137683868408203, "learning_rate": 0.0002, "loss": 1.5314, "step": 186310 }, { "epoch": 0.76, "grad_norm": 1.7726023197174072, "learning_rate": 0.0002, "loss": 1.5204, "step": 186320 }, { "epoch": 0.76, "grad_norm": 3.37275767326355, "learning_rate": 0.0002, "loss": 1.6118, "step": 186330 }, { "epoch": 0.76, "grad_norm": 3.7290539741516113, "learning_rate": 0.0002, "loss": 1.5965, "step": 186340 }, { "epoch": 0.76, "grad_norm": 3.5275025367736816, "learning_rate": 0.0002, "loss": 1.8825, "step": 186350 }, { "epoch": 0.76, "grad_norm": 1.7752920389175415, "learning_rate": 0.0002, "loss": 1.6445, "step": 186360 }, { "epoch": 0.76, "grad_norm": 2.8862059116363525, "learning_rate": 0.0002, "loss": 1.4345, "step": 186370 }, { "epoch": 0.76, "grad_norm": 2.738525390625, "learning_rate": 0.0002, "loss": 1.6149, "step": 186380 }, { "epoch": 0.76, "grad_norm": 3.1467161178588867, "learning_rate": 0.0002, "loss": 1.3883, "step": 186390 }, { "epoch": 0.76, "grad_norm": 4.225863933563232, "learning_rate": 0.0002, "loss": 1.5295, "step": 186400 }, { "epoch": 0.76, "grad_norm": 2.99906849861145, "learning_rate": 0.0002, "loss": 1.5479, "step": 186410 }, { "epoch": 0.76, "grad_norm": 1.8648444414138794, "learning_rate": 0.0002, "loss": 1.8465, "step": 186420 }, { "epoch": 0.76, "grad_norm": 3.244844913482666, "learning_rate": 0.0002, "loss": 1.4107, "step": 186430 }, { "epoch": 0.76, "grad_norm": 4.780183792114258, "learning_rate": 0.0002, "loss": 1.8241, "step": 186440 }, { "epoch": 0.76, "grad_norm": 5.851568698883057, "learning_rate": 0.0002, "loss": 1.5578, "step": 186450 }, { "epoch": 0.76, "grad_norm": 2.949246644973755, "learning_rate": 0.0002, "loss": 1.8561, "step": 186460 }, { "epoch": 0.76, "grad_norm": 2.119004011154175, "learning_rate": 0.0002, "loss": 1.6301, "step": 186470 }, { "epoch": 0.76, "grad_norm": 2.1359915733337402, "learning_rate": 0.0002, "loss": 1.6356, "step": 186480 }, { "epoch": 0.76, "grad_norm": 6.6086015701293945, "learning_rate": 0.0002, "loss": 1.4761, "step": 186490 }, { "epoch": 0.76, "grad_norm": 3.3639562129974365, "learning_rate": 0.0002, "loss": 1.2486, "step": 186500 }, { "epoch": 0.76, "grad_norm": 1.467928409576416, "learning_rate": 0.0002, "loss": 1.5255, "step": 186510 }, { "epoch": 0.76, "grad_norm": 2.741250514984131, "learning_rate": 0.0002, "loss": 1.5974, "step": 186520 }, { "epoch": 0.76, "grad_norm": 2.5685675144195557, "learning_rate": 0.0002, "loss": 1.861, "step": 186530 }, { "epoch": 0.76, "grad_norm": 2.1286001205444336, "learning_rate": 0.0002, "loss": 1.4729, "step": 186540 }, { "epoch": 0.76, "grad_norm": 3.301827907562256, "learning_rate": 0.0002, "loss": 1.4677, "step": 186550 }, { "epoch": 0.76, "grad_norm": 3.3575851917266846, "learning_rate": 0.0002, "loss": 1.6232, "step": 186560 }, { "epoch": 0.76, "grad_norm": 2.116152048110962, "learning_rate": 0.0002, "loss": 1.3513, "step": 186570 }, { "epoch": 0.76, "grad_norm": 3.6917099952697754, "learning_rate": 0.0002, "loss": 1.7504, "step": 186580 }, { "epoch": 0.76, "grad_norm": 2.9869635105133057, "learning_rate": 0.0002, "loss": 1.5941, "step": 186590 }, { "epoch": 0.76, "grad_norm": 3.340202569961548, "learning_rate": 0.0002, "loss": 1.6083, "step": 186600 }, { "epoch": 0.76, "grad_norm": 4.531144142150879, "learning_rate": 0.0002, "loss": 1.6945, "step": 186610 }, { "epoch": 0.76, "grad_norm": 3.2144486904144287, "learning_rate": 0.0002, "loss": 1.6333, "step": 186620 }, { "epoch": 0.76, "grad_norm": 5.130369663238525, "learning_rate": 0.0002, "loss": 1.4701, "step": 186630 }, { "epoch": 0.76, "grad_norm": 2.607938289642334, "learning_rate": 0.0002, "loss": 1.5485, "step": 186640 }, { "epoch": 0.76, "grad_norm": 5.082418441772461, "learning_rate": 0.0002, "loss": 1.6973, "step": 186650 }, { "epoch": 0.76, "grad_norm": 3.559628963470459, "learning_rate": 0.0002, "loss": 1.6108, "step": 186660 }, { "epoch": 0.76, "grad_norm": 2.4163818359375, "learning_rate": 0.0002, "loss": 1.3272, "step": 186670 }, { "epoch": 0.76, "grad_norm": 2.6571035385131836, "learning_rate": 0.0002, "loss": 1.6811, "step": 186680 }, { "epoch": 0.76, "grad_norm": 2.400909185409546, "learning_rate": 0.0002, "loss": 1.4512, "step": 186690 }, { "epoch": 0.76, "grad_norm": 4.0981125831604, "learning_rate": 0.0002, "loss": 1.4583, "step": 186700 }, { "epoch": 0.76, "grad_norm": 3.4842000007629395, "learning_rate": 0.0002, "loss": 1.3051, "step": 186710 }, { "epoch": 0.76, "grad_norm": 1.7646172046661377, "learning_rate": 0.0002, "loss": 1.8939, "step": 186720 }, { "epoch": 0.76, "grad_norm": 2.0257086753845215, "learning_rate": 0.0002, "loss": 1.5873, "step": 186730 }, { "epoch": 0.76, "grad_norm": 2.7757129669189453, "learning_rate": 0.0002, "loss": 1.4662, "step": 186740 }, { "epoch": 0.76, "grad_norm": 1.9936048984527588, "learning_rate": 0.0002, "loss": 1.9711, "step": 186750 }, { "epoch": 0.76, "grad_norm": 3.6384823322296143, "learning_rate": 0.0002, "loss": 1.6972, "step": 186760 }, { "epoch": 0.76, "grad_norm": 4.7209930419921875, "learning_rate": 0.0002, "loss": 1.6891, "step": 186770 }, { "epoch": 0.76, "grad_norm": 2.351414203643799, "learning_rate": 0.0002, "loss": 1.4732, "step": 186780 }, { "epoch": 0.76, "grad_norm": 2.318912982940674, "learning_rate": 0.0002, "loss": 1.4469, "step": 186790 }, { "epoch": 0.76, "grad_norm": 3.1761090755462646, "learning_rate": 0.0002, "loss": 1.7498, "step": 186800 }, { "epoch": 0.76, "grad_norm": 3.7924046516418457, "learning_rate": 0.0002, "loss": 1.5948, "step": 186810 }, { "epoch": 0.76, "grad_norm": 2.387315273284912, "learning_rate": 0.0002, "loss": 1.8481, "step": 186820 }, { "epoch": 0.76, "grad_norm": 2.7702596187591553, "learning_rate": 0.0002, "loss": 1.6345, "step": 186830 }, { "epoch": 0.76, "grad_norm": 2.407644271850586, "learning_rate": 0.0002, "loss": 1.7223, "step": 186840 }, { "epoch": 0.76, "grad_norm": 1.7532545328140259, "learning_rate": 0.0002, "loss": 1.7746, "step": 186850 }, { "epoch": 0.76, "grad_norm": 5.619864463806152, "learning_rate": 0.0002, "loss": 1.6457, "step": 186860 }, { "epoch": 0.76, "grad_norm": 2.7323765754699707, "learning_rate": 0.0002, "loss": 1.7769, "step": 186870 }, { "epoch": 0.76, "grad_norm": 3.570594549179077, "learning_rate": 0.0002, "loss": 1.7688, "step": 186880 }, { "epoch": 0.76, "grad_norm": 1.7145828008651733, "learning_rate": 0.0002, "loss": 1.5993, "step": 186890 }, { "epoch": 0.76, "grad_norm": 3.2156898975372314, "learning_rate": 0.0002, "loss": 1.3673, "step": 186900 }, { "epoch": 0.76, "grad_norm": 1.886936068534851, "learning_rate": 0.0002, "loss": 1.5001, "step": 186910 }, { "epoch": 0.76, "grad_norm": 2.1371169090270996, "learning_rate": 0.0002, "loss": 1.7343, "step": 186920 }, { "epoch": 0.76, "grad_norm": 3.6109838485717773, "learning_rate": 0.0002, "loss": 1.4685, "step": 186930 }, { "epoch": 0.76, "grad_norm": 3.0955605506896973, "learning_rate": 0.0002, "loss": 1.419, "step": 186940 }, { "epoch": 0.76, "grad_norm": 3.338019847869873, "learning_rate": 0.0002, "loss": 1.9052, "step": 186950 }, { "epoch": 0.76, "grad_norm": 3.535576820373535, "learning_rate": 0.0002, "loss": 1.7159, "step": 186960 }, { "epoch": 0.76, "grad_norm": 2.4601051807403564, "learning_rate": 0.0002, "loss": 1.315, "step": 186970 }, { "epoch": 0.76, "grad_norm": 4.7871246337890625, "learning_rate": 0.0002, "loss": 1.7404, "step": 186980 }, { "epoch": 0.76, "grad_norm": 3.942884922027588, "learning_rate": 0.0002, "loss": 1.4834, "step": 186990 }, { "epoch": 0.76, "grad_norm": 2.456291675567627, "learning_rate": 0.0002, "loss": 1.5011, "step": 187000 }, { "epoch": 0.76, "grad_norm": 1.7126749753952026, "learning_rate": 0.0002, "loss": 1.4192, "step": 187010 }, { "epoch": 0.76, "grad_norm": 3.8107147216796875, "learning_rate": 0.0002, "loss": 1.4557, "step": 187020 }, { "epoch": 0.76, "grad_norm": 2.8937695026397705, "learning_rate": 0.0002, "loss": 1.6838, "step": 187030 }, { "epoch": 0.76, "grad_norm": 2.4287540912628174, "learning_rate": 0.0002, "loss": 1.3449, "step": 187040 }, { "epoch": 0.76, "grad_norm": 4.282341957092285, "learning_rate": 0.0002, "loss": 1.4044, "step": 187050 }, { "epoch": 0.76, "grad_norm": 2.3792576789855957, "learning_rate": 0.0002, "loss": 1.4982, "step": 187060 }, { "epoch": 0.76, "grad_norm": 2.554124355316162, "learning_rate": 0.0002, "loss": 1.7667, "step": 187070 }, { "epoch": 0.76, "grad_norm": 2.0035130977630615, "learning_rate": 0.0002, "loss": 1.4707, "step": 187080 }, { "epoch": 0.76, "grad_norm": 2.6317481994628906, "learning_rate": 0.0002, "loss": 1.6164, "step": 187090 }, { "epoch": 0.76, "grad_norm": 3.0643489360809326, "learning_rate": 0.0002, "loss": 1.5367, "step": 187100 }, { "epoch": 0.76, "grad_norm": 3.3597161769866943, "learning_rate": 0.0002, "loss": 1.4483, "step": 187110 }, { "epoch": 0.76, "grad_norm": 2.439358949661255, "learning_rate": 0.0002, "loss": 1.3555, "step": 187120 }, { "epoch": 0.76, "grad_norm": 4.065363883972168, "learning_rate": 0.0002, "loss": 1.7436, "step": 187130 }, { "epoch": 0.76, "grad_norm": 2.1764891147613525, "learning_rate": 0.0002, "loss": 1.5083, "step": 187140 }, { "epoch": 0.76, "grad_norm": 2.6553142070770264, "learning_rate": 0.0002, "loss": 1.554, "step": 187150 }, { "epoch": 0.76, "grad_norm": 2.1687686443328857, "learning_rate": 0.0002, "loss": 1.5233, "step": 187160 }, { "epoch": 0.76, "grad_norm": 2.9147896766662598, "learning_rate": 0.0002, "loss": 1.8378, "step": 187170 }, { "epoch": 0.76, "grad_norm": 5.558018207550049, "learning_rate": 0.0002, "loss": 1.4179, "step": 187180 }, { "epoch": 0.76, "grad_norm": 1.84568190574646, "learning_rate": 0.0002, "loss": 1.3728, "step": 187190 }, { "epoch": 0.76, "grad_norm": 3.583251714706421, "learning_rate": 0.0002, "loss": 1.8893, "step": 187200 }, { "epoch": 0.76, "grad_norm": 4.981374263763428, "learning_rate": 0.0002, "loss": 1.6473, "step": 187210 }, { "epoch": 0.76, "grad_norm": 2.5920071601867676, "learning_rate": 0.0002, "loss": 1.5858, "step": 187220 }, { "epoch": 0.76, "grad_norm": 2.399078369140625, "learning_rate": 0.0002, "loss": 1.6237, "step": 187230 }, { "epoch": 0.76, "grad_norm": 2.5298662185668945, "learning_rate": 0.0002, "loss": 1.3627, "step": 187240 }, { "epoch": 0.76, "grad_norm": 2.4332730770111084, "learning_rate": 0.0002, "loss": 1.7167, "step": 187250 }, { "epoch": 0.76, "grad_norm": 2.874260425567627, "learning_rate": 0.0002, "loss": 1.454, "step": 187260 }, { "epoch": 0.76, "grad_norm": 3.598451852798462, "learning_rate": 0.0002, "loss": 1.4693, "step": 187270 }, { "epoch": 0.76, "grad_norm": 4.026856899261475, "learning_rate": 0.0002, "loss": 1.5601, "step": 187280 }, { "epoch": 0.76, "grad_norm": 2.337008237838745, "learning_rate": 0.0002, "loss": 1.4625, "step": 187290 }, { "epoch": 0.76, "grad_norm": 3.0251786708831787, "learning_rate": 0.0002, "loss": 1.6989, "step": 187300 }, { "epoch": 0.76, "grad_norm": 2.017012119293213, "learning_rate": 0.0002, "loss": 1.555, "step": 187310 }, { "epoch": 0.76, "grad_norm": 4.399107933044434, "learning_rate": 0.0002, "loss": 1.655, "step": 187320 }, { "epoch": 0.76, "grad_norm": 3.066401958465576, "learning_rate": 0.0002, "loss": 1.3571, "step": 187330 }, { "epoch": 0.76, "grad_norm": 2.6284773349761963, "learning_rate": 0.0002, "loss": 1.4881, "step": 187340 }, { "epoch": 0.76, "grad_norm": 4.291021823883057, "learning_rate": 0.0002, "loss": 1.4431, "step": 187350 }, { "epoch": 0.76, "grad_norm": 7.1795525550842285, "learning_rate": 0.0002, "loss": 1.5561, "step": 187360 }, { "epoch": 0.76, "grad_norm": 2.846480131149292, "learning_rate": 0.0002, "loss": 1.5069, "step": 187370 }, { "epoch": 0.76, "grad_norm": 2.1685523986816406, "learning_rate": 0.0002, "loss": 1.7616, "step": 187380 }, { "epoch": 0.76, "grad_norm": 2.006887197494507, "learning_rate": 0.0002, "loss": 1.6078, "step": 187390 }, { "epoch": 0.76, "grad_norm": 2.211934804916382, "learning_rate": 0.0002, "loss": 1.6119, "step": 187400 }, { "epoch": 0.76, "grad_norm": 1.9838536977767944, "learning_rate": 0.0002, "loss": 1.6365, "step": 187410 }, { "epoch": 0.76, "grad_norm": 2.038243532180786, "learning_rate": 0.0002, "loss": 1.7752, "step": 187420 }, { "epoch": 0.76, "grad_norm": 4.52028226852417, "learning_rate": 0.0002, "loss": 1.5142, "step": 187430 }, { "epoch": 0.76, "grad_norm": 4.490276336669922, "learning_rate": 0.0002, "loss": 1.3018, "step": 187440 }, { "epoch": 0.76, "grad_norm": 2.5833687782287598, "learning_rate": 0.0002, "loss": 1.52, "step": 187450 }, { "epoch": 0.76, "grad_norm": 3.13950777053833, "learning_rate": 0.0002, "loss": 1.5391, "step": 187460 }, { "epoch": 0.76, "grad_norm": 5.515466213226318, "learning_rate": 0.0002, "loss": 1.6833, "step": 187470 }, { "epoch": 0.76, "grad_norm": 2.524500608444214, "learning_rate": 0.0002, "loss": 1.3936, "step": 187480 }, { "epoch": 0.76, "grad_norm": 4.185404300689697, "learning_rate": 0.0002, "loss": 1.4748, "step": 187490 }, { "epoch": 0.76, "grad_norm": 2.915869951248169, "learning_rate": 0.0002, "loss": 1.4647, "step": 187500 }, { "epoch": 0.76, "grad_norm": 3.137017011642456, "learning_rate": 0.0002, "loss": 1.36, "step": 187510 }, { "epoch": 0.76, "grad_norm": 2.1833364963531494, "learning_rate": 0.0002, "loss": 1.7518, "step": 187520 }, { "epoch": 0.76, "grad_norm": 2.4222168922424316, "learning_rate": 0.0002, "loss": 1.5083, "step": 187530 }, { "epoch": 0.76, "grad_norm": 2.9952664375305176, "learning_rate": 0.0002, "loss": 1.5066, "step": 187540 }, { "epoch": 0.76, "grad_norm": 3.3376920223236084, "learning_rate": 0.0002, "loss": 1.9348, "step": 187550 }, { "epoch": 0.76, "grad_norm": 1.9564287662506104, "learning_rate": 0.0002, "loss": 1.591, "step": 187560 }, { "epoch": 0.76, "grad_norm": 5.893379211425781, "learning_rate": 0.0002, "loss": 1.5462, "step": 187570 }, { "epoch": 0.76, "grad_norm": 2.2068169116973877, "learning_rate": 0.0002, "loss": 1.5413, "step": 187580 }, { "epoch": 0.76, "grad_norm": 1.6109602451324463, "learning_rate": 0.0002, "loss": 1.556, "step": 187590 }, { "epoch": 0.76, "grad_norm": 5.375267505645752, "learning_rate": 0.0002, "loss": 1.6884, "step": 187600 }, { "epoch": 0.76, "grad_norm": 2.7623116970062256, "learning_rate": 0.0002, "loss": 1.2944, "step": 187610 }, { "epoch": 0.76, "grad_norm": 2.8036065101623535, "learning_rate": 0.0002, "loss": 1.7095, "step": 187620 }, { "epoch": 0.76, "grad_norm": 4.740669250488281, "learning_rate": 0.0002, "loss": 1.3664, "step": 187630 }, { "epoch": 0.76, "grad_norm": 12.26758861541748, "learning_rate": 0.0002, "loss": 1.663, "step": 187640 }, { "epoch": 0.76, "grad_norm": 3.7896728515625, "learning_rate": 0.0002, "loss": 1.6939, "step": 187650 }, { "epoch": 0.76, "grad_norm": 2.4643211364746094, "learning_rate": 0.0002, "loss": 1.6117, "step": 187660 }, { "epoch": 0.76, "grad_norm": 3.067093849182129, "learning_rate": 0.0002, "loss": 1.4685, "step": 187670 }, { "epoch": 0.76, "grad_norm": 2.3063313961029053, "learning_rate": 0.0002, "loss": 1.3655, "step": 187680 }, { "epoch": 0.76, "grad_norm": 4.102051734924316, "learning_rate": 0.0002, "loss": 1.8853, "step": 187690 }, { "epoch": 0.76, "grad_norm": 3.6773428916931152, "learning_rate": 0.0002, "loss": 1.4249, "step": 187700 }, { "epoch": 0.76, "grad_norm": 3.4393107891082764, "learning_rate": 0.0002, "loss": 1.6188, "step": 187710 }, { "epoch": 0.76, "grad_norm": 2.7164509296417236, "learning_rate": 0.0002, "loss": 1.6251, "step": 187720 }, { "epoch": 0.76, "grad_norm": 2.402925729751587, "learning_rate": 0.0002, "loss": 1.3802, "step": 187730 }, { "epoch": 0.76, "grad_norm": 2.841181755065918, "learning_rate": 0.0002, "loss": 1.6593, "step": 187740 }, { "epoch": 0.76, "grad_norm": 1.8562217950820923, "learning_rate": 0.0002, "loss": 1.6967, "step": 187750 }, { "epoch": 0.76, "grad_norm": 2.514641046524048, "learning_rate": 0.0002, "loss": 1.6698, "step": 187760 }, { "epoch": 0.76, "grad_norm": 4.104755878448486, "learning_rate": 0.0002, "loss": 1.6285, "step": 187770 }, { "epoch": 0.76, "grad_norm": 3.5419750213623047, "learning_rate": 0.0002, "loss": 1.6572, "step": 187780 }, { "epoch": 0.76, "grad_norm": 1.6128679513931274, "learning_rate": 0.0002, "loss": 1.477, "step": 187790 }, { "epoch": 0.76, "grad_norm": 5.119298458099365, "learning_rate": 0.0002, "loss": 1.6136, "step": 187800 }, { "epoch": 0.76, "grad_norm": 2.598564386367798, "learning_rate": 0.0002, "loss": 1.4219, "step": 187810 }, { "epoch": 0.76, "grad_norm": 3.3476791381835938, "learning_rate": 0.0002, "loss": 1.6169, "step": 187820 }, { "epoch": 0.76, "grad_norm": 3.0794787406921387, "learning_rate": 0.0002, "loss": 1.7146, "step": 187830 }, { "epoch": 0.76, "grad_norm": 3.4599854946136475, "learning_rate": 0.0002, "loss": 1.6155, "step": 187840 }, { "epoch": 0.76, "grad_norm": 1.6489083766937256, "learning_rate": 0.0002, "loss": 1.3501, "step": 187850 }, { "epoch": 0.76, "grad_norm": 3.637141466140747, "learning_rate": 0.0002, "loss": 1.5929, "step": 187860 }, { "epoch": 0.76, "grad_norm": 2.0553581714630127, "learning_rate": 0.0002, "loss": 1.6044, "step": 187870 }, { "epoch": 0.76, "grad_norm": 2.2523021697998047, "learning_rate": 0.0002, "loss": 1.61, "step": 187880 }, { "epoch": 0.76, "grad_norm": 3.0768916606903076, "learning_rate": 0.0002, "loss": 1.5852, "step": 187890 }, { "epoch": 0.76, "grad_norm": 4.449456691741943, "learning_rate": 0.0002, "loss": 1.6659, "step": 187900 }, { "epoch": 0.76, "grad_norm": 3.1214237213134766, "learning_rate": 0.0002, "loss": 1.5415, "step": 187910 }, { "epoch": 0.77, "grad_norm": 2.512570381164551, "learning_rate": 0.0002, "loss": 1.3328, "step": 187920 }, { "epoch": 0.77, "grad_norm": 2.7123048305511475, "learning_rate": 0.0002, "loss": 1.7567, "step": 187930 }, { "epoch": 0.77, "grad_norm": 3.0503597259521484, "learning_rate": 0.0002, "loss": 1.6089, "step": 187940 }, { "epoch": 0.77, "grad_norm": 2.6076767444610596, "learning_rate": 0.0002, "loss": 1.675, "step": 187950 }, { "epoch": 0.77, "grad_norm": 2.976773738861084, "learning_rate": 0.0002, "loss": 1.3247, "step": 187960 }, { "epoch": 0.77, "grad_norm": 5.994812488555908, "learning_rate": 0.0002, "loss": 1.6026, "step": 187970 }, { "epoch": 0.77, "grad_norm": 2.633575677871704, "learning_rate": 0.0002, "loss": 1.6908, "step": 187980 }, { "epoch": 0.77, "grad_norm": 2.8268065452575684, "learning_rate": 0.0002, "loss": 1.691, "step": 187990 }, { "epoch": 0.77, "grad_norm": 3.381512403488159, "learning_rate": 0.0002, "loss": 1.5377, "step": 188000 }, { "epoch": 0.77, "grad_norm": 2.3634696006774902, "learning_rate": 0.0002, "loss": 1.3472, "step": 188010 }, { "epoch": 0.77, "grad_norm": 4.889230728149414, "learning_rate": 0.0002, "loss": 1.7516, "step": 188020 }, { "epoch": 0.77, "grad_norm": 2.3745381832122803, "learning_rate": 0.0002, "loss": 1.6056, "step": 188030 }, { "epoch": 0.77, "grad_norm": 3.736821413040161, "learning_rate": 0.0002, "loss": 1.461, "step": 188040 }, { "epoch": 0.77, "grad_norm": 4.788425445556641, "learning_rate": 0.0002, "loss": 1.6148, "step": 188050 }, { "epoch": 0.77, "grad_norm": 3.2587969303131104, "learning_rate": 0.0002, "loss": 1.5487, "step": 188060 }, { "epoch": 0.77, "grad_norm": 3.0131051540374756, "learning_rate": 0.0002, "loss": 1.6331, "step": 188070 }, { "epoch": 0.77, "grad_norm": 5.877961158752441, "learning_rate": 0.0002, "loss": 1.4842, "step": 188080 }, { "epoch": 0.77, "grad_norm": 2.5196380615234375, "learning_rate": 0.0002, "loss": 1.6724, "step": 188090 }, { "epoch": 0.77, "grad_norm": 2.127194881439209, "learning_rate": 0.0002, "loss": 1.5291, "step": 188100 }, { "epoch": 0.77, "grad_norm": 2.474087953567505, "learning_rate": 0.0002, "loss": 1.6622, "step": 188110 }, { "epoch": 0.77, "grad_norm": 3.5175914764404297, "learning_rate": 0.0002, "loss": 1.5753, "step": 188120 }, { "epoch": 0.77, "grad_norm": 3.2054834365844727, "learning_rate": 0.0002, "loss": 1.5468, "step": 188130 }, { "epoch": 0.77, "grad_norm": 2.5263452529907227, "learning_rate": 0.0002, "loss": 1.6722, "step": 188140 }, { "epoch": 0.77, "grad_norm": 2.821000099182129, "learning_rate": 0.0002, "loss": 1.6062, "step": 188150 }, { "epoch": 0.77, "grad_norm": 2.319017171859741, "learning_rate": 0.0002, "loss": 1.715, "step": 188160 }, { "epoch": 0.77, "grad_norm": 3.1849942207336426, "learning_rate": 0.0002, "loss": 1.5672, "step": 188170 }, { "epoch": 0.77, "grad_norm": 2.6172614097595215, "learning_rate": 0.0002, "loss": 1.5329, "step": 188180 }, { "epoch": 0.77, "grad_norm": 3.94594669342041, "learning_rate": 0.0002, "loss": 1.565, "step": 188190 }, { "epoch": 0.77, "grad_norm": 3.7254419326782227, "learning_rate": 0.0002, "loss": 1.5715, "step": 188200 }, { "epoch": 0.77, "grad_norm": 5.197427749633789, "learning_rate": 0.0002, "loss": 1.7127, "step": 188210 }, { "epoch": 0.77, "grad_norm": 3.1483638286590576, "learning_rate": 0.0002, "loss": 1.6404, "step": 188220 }, { "epoch": 0.77, "grad_norm": 2.7832555770874023, "learning_rate": 0.0002, "loss": 1.443, "step": 188230 }, { "epoch": 0.77, "grad_norm": 2.9954843521118164, "learning_rate": 0.0002, "loss": 1.7079, "step": 188240 }, { "epoch": 0.77, "grad_norm": 2.2703335285186768, "learning_rate": 0.0002, "loss": 1.6065, "step": 188250 }, { "epoch": 0.77, "grad_norm": 2.5144238471984863, "learning_rate": 0.0002, "loss": 1.4262, "step": 188260 }, { "epoch": 0.77, "grad_norm": 2.3010921478271484, "learning_rate": 0.0002, "loss": 1.5574, "step": 188270 }, { "epoch": 0.77, "grad_norm": 1.5427805185317993, "learning_rate": 0.0002, "loss": 1.5174, "step": 188280 }, { "epoch": 0.77, "grad_norm": 2.3612661361694336, "learning_rate": 0.0002, "loss": 1.4819, "step": 188290 }, { "epoch": 0.77, "grad_norm": 2.9688639640808105, "learning_rate": 0.0002, "loss": 1.7859, "step": 188300 }, { "epoch": 0.77, "grad_norm": 2.2869505882263184, "learning_rate": 0.0002, "loss": 1.6207, "step": 188310 }, { "epoch": 0.77, "grad_norm": 3.386385440826416, "learning_rate": 0.0002, "loss": 1.5333, "step": 188320 }, { "epoch": 0.77, "grad_norm": 2.312044143676758, "learning_rate": 0.0002, "loss": 1.5777, "step": 188330 }, { "epoch": 0.77, "grad_norm": 2.705275297164917, "learning_rate": 0.0002, "loss": 1.7202, "step": 188340 }, { "epoch": 0.77, "grad_norm": 2.1069319248199463, "learning_rate": 0.0002, "loss": 1.4846, "step": 188350 }, { "epoch": 0.77, "grad_norm": 5.231747627258301, "learning_rate": 0.0002, "loss": 1.9046, "step": 188360 }, { "epoch": 0.77, "grad_norm": 4.026188373565674, "learning_rate": 0.0002, "loss": 1.8438, "step": 188370 }, { "epoch": 0.77, "grad_norm": 7.55496072769165, "learning_rate": 0.0002, "loss": 1.2128, "step": 188380 }, { "epoch": 0.77, "grad_norm": 3.8010189533233643, "learning_rate": 0.0002, "loss": 1.63, "step": 188390 }, { "epoch": 0.77, "grad_norm": 3.12896728515625, "learning_rate": 0.0002, "loss": 1.7681, "step": 188400 }, { "epoch": 0.77, "grad_norm": 2.8185060024261475, "learning_rate": 0.0002, "loss": 1.7113, "step": 188410 }, { "epoch": 0.77, "grad_norm": 2.6974029541015625, "learning_rate": 0.0002, "loss": 1.6311, "step": 188420 }, { "epoch": 0.77, "grad_norm": 3.9740707874298096, "learning_rate": 0.0002, "loss": 1.6862, "step": 188430 }, { "epoch": 0.77, "grad_norm": 2.744966506958008, "learning_rate": 0.0002, "loss": 1.4114, "step": 188440 }, { "epoch": 0.77, "grad_norm": 3.0244269371032715, "learning_rate": 0.0002, "loss": 1.4845, "step": 188450 }, { "epoch": 0.77, "grad_norm": 2.312485933303833, "learning_rate": 0.0002, "loss": 1.7573, "step": 188460 }, { "epoch": 0.77, "grad_norm": 2.8012473583221436, "learning_rate": 0.0002, "loss": 1.6297, "step": 188470 }, { "epoch": 0.77, "grad_norm": 2.200997829437256, "learning_rate": 0.0002, "loss": 1.5654, "step": 188480 }, { "epoch": 0.77, "grad_norm": 3.38435435295105, "learning_rate": 0.0002, "loss": 1.7527, "step": 188490 }, { "epoch": 0.77, "grad_norm": 2.648974895477295, "learning_rate": 0.0002, "loss": 1.5442, "step": 188500 }, { "epoch": 0.77, "grad_norm": 2.569976568222046, "learning_rate": 0.0002, "loss": 1.5513, "step": 188510 }, { "epoch": 0.77, "grad_norm": 2.069459915161133, "learning_rate": 0.0002, "loss": 1.6351, "step": 188520 }, { "epoch": 0.77, "grad_norm": 2.2271549701690674, "learning_rate": 0.0002, "loss": 1.6729, "step": 188530 }, { "epoch": 0.77, "grad_norm": 3.1947596073150635, "learning_rate": 0.0002, "loss": 1.4065, "step": 188540 }, { "epoch": 0.77, "grad_norm": 2.3122401237487793, "learning_rate": 0.0002, "loss": 1.3361, "step": 188550 }, { "epoch": 0.77, "grad_norm": 3.0404393672943115, "learning_rate": 0.0002, "loss": 1.6516, "step": 188560 }, { "epoch": 0.77, "grad_norm": 1.720941185951233, "learning_rate": 0.0002, "loss": 1.4295, "step": 188570 }, { "epoch": 0.77, "grad_norm": 2.445988416671753, "learning_rate": 0.0002, "loss": 1.4688, "step": 188580 }, { "epoch": 0.77, "grad_norm": 5.27188777923584, "learning_rate": 0.0002, "loss": 1.766, "step": 188590 }, { "epoch": 0.77, "grad_norm": 2.5938093662261963, "learning_rate": 0.0002, "loss": 1.7028, "step": 188600 }, { "epoch": 0.77, "grad_norm": 3.5108675956726074, "learning_rate": 0.0002, "loss": 1.7336, "step": 188610 }, { "epoch": 0.77, "grad_norm": 4.631674289703369, "learning_rate": 0.0002, "loss": 1.9069, "step": 188620 }, { "epoch": 0.77, "grad_norm": 2.7259585857391357, "learning_rate": 0.0002, "loss": 1.7516, "step": 188630 }, { "epoch": 0.77, "grad_norm": 4.356855392456055, "learning_rate": 0.0002, "loss": 1.6491, "step": 188640 }, { "epoch": 0.77, "grad_norm": 1.8184406757354736, "learning_rate": 0.0002, "loss": 1.6489, "step": 188650 }, { "epoch": 0.77, "grad_norm": 2.786709785461426, "learning_rate": 0.0002, "loss": 1.8001, "step": 188660 }, { "epoch": 0.77, "grad_norm": 1.7552740573883057, "learning_rate": 0.0002, "loss": 1.6384, "step": 188670 }, { "epoch": 0.77, "grad_norm": 4.126555442810059, "learning_rate": 0.0002, "loss": 1.4189, "step": 188680 }, { "epoch": 0.77, "grad_norm": 3.107175827026367, "learning_rate": 0.0002, "loss": 1.4384, "step": 188690 }, { "epoch": 0.77, "grad_norm": 3.2432382106781006, "learning_rate": 0.0002, "loss": 1.6762, "step": 188700 }, { "epoch": 0.77, "grad_norm": 4.759862899780273, "learning_rate": 0.0002, "loss": 1.546, "step": 188710 }, { "epoch": 0.77, "grad_norm": 3.501814603805542, "learning_rate": 0.0002, "loss": 1.7323, "step": 188720 }, { "epoch": 0.77, "grad_norm": 2.802748918533325, "learning_rate": 0.0002, "loss": 1.6553, "step": 188730 }, { "epoch": 0.77, "grad_norm": 2.683795213699341, "learning_rate": 0.0002, "loss": 1.5523, "step": 188740 }, { "epoch": 0.77, "grad_norm": 5.402125835418701, "learning_rate": 0.0002, "loss": 1.436, "step": 188750 }, { "epoch": 0.77, "grad_norm": 3.2439169883728027, "learning_rate": 0.0002, "loss": 1.3814, "step": 188760 }, { "epoch": 0.77, "grad_norm": 3.830531358718872, "learning_rate": 0.0002, "loss": 1.6153, "step": 188770 }, { "epoch": 0.77, "grad_norm": 5.755814075469971, "learning_rate": 0.0002, "loss": 1.3977, "step": 188780 }, { "epoch": 0.77, "grad_norm": 1.7433464527130127, "learning_rate": 0.0002, "loss": 1.6408, "step": 188790 }, { "epoch": 0.77, "grad_norm": 5.300205230712891, "learning_rate": 0.0002, "loss": 1.644, "step": 188800 }, { "epoch": 0.77, "grad_norm": 3.479004144668579, "learning_rate": 0.0002, "loss": 1.5173, "step": 188810 }, { "epoch": 0.77, "grad_norm": 4.244363307952881, "learning_rate": 0.0002, "loss": 1.5348, "step": 188820 }, { "epoch": 0.77, "grad_norm": 3.2163565158843994, "learning_rate": 0.0002, "loss": 1.6603, "step": 188830 }, { "epoch": 0.77, "grad_norm": 6.5685577392578125, "learning_rate": 0.0002, "loss": 1.5048, "step": 188840 }, { "epoch": 0.77, "grad_norm": 2.6150388717651367, "learning_rate": 0.0002, "loss": 1.4472, "step": 188850 }, { "epoch": 0.77, "grad_norm": 3.5163888931274414, "learning_rate": 0.0002, "loss": 1.6043, "step": 188860 }, { "epoch": 0.77, "grad_norm": 2.7858662605285645, "learning_rate": 0.0002, "loss": 1.5187, "step": 188870 }, { "epoch": 0.77, "grad_norm": 1.9617825746536255, "learning_rate": 0.0002, "loss": 1.5507, "step": 188880 }, { "epoch": 0.77, "grad_norm": 2.211179494857788, "learning_rate": 0.0002, "loss": 1.6663, "step": 188890 }, { "epoch": 0.77, "grad_norm": 2.440040111541748, "learning_rate": 0.0002, "loss": 1.5232, "step": 188900 }, { "epoch": 0.77, "grad_norm": 2.141361713409424, "learning_rate": 0.0002, "loss": 1.5659, "step": 188910 }, { "epoch": 0.77, "grad_norm": 2.812073230743408, "learning_rate": 0.0002, "loss": 1.6897, "step": 188920 }, { "epoch": 0.77, "grad_norm": 3.374992847442627, "learning_rate": 0.0002, "loss": 1.4824, "step": 188930 }, { "epoch": 0.77, "grad_norm": 3.2308030128479004, "learning_rate": 0.0002, "loss": 1.4059, "step": 188940 }, { "epoch": 0.77, "grad_norm": 2.131049156188965, "learning_rate": 0.0002, "loss": 1.6749, "step": 188950 }, { "epoch": 0.77, "grad_norm": 2.795673370361328, "learning_rate": 0.0002, "loss": 1.502, "step": 188960 }, { "epoch": 0.77, "grad_norm": 2.214153528213501, "learning_rate": 0.0002, "loss": 1.6966, "step": 188970 }, { "epoch": 0.77, "grad_norm": 2.0655109882354736, "learning_rate": 0.0002, "loss": 1.6498, "step": 188980 }, { "epoch": 0.77, "grad_norm": 2.5474541187286377, "learning_rate": 0.0002, "loss": 1.443, "step": 188990 }, { "epoch": 0.77, "grad_norm": 6.668135643005371, "learning_rate": 0.0002, "loss": 1.7189, "step": 189000 }, { "epoch": 0.77, "grad_norm": 2.704315423965454, "learning_rate": 0.0002, "loss": 1.7243, "step": 189010 }, { "epoch": 0.77, "grad_norm": 4.6740827560424805, "learning_rate": 0.0002, "loss": 1.5924, "step": 189020 }, { "epoch": 0.77, "grad_norm": 1.8611547946929932, "learning_rate": 0.0002, "loss": 1.7007, "step": 189030 }, { "epoch": 0.77, "grad_norm": 2.774675130844116, "learning_rate": 0.0002, "loss": 1.5624, "step": 189040 }, { "epoch": 0.77, "grad_norm": 3.708099603652954, "learning_rate": 0.0002, "loss": 1.7042, "step": 189050 }, { "epoch": 0.77, "grad_norm": 2.9336090087890625, "learning_rate": 0.0002, "loss": 1.6551, "step": 189060 }, { "epoch": 0.77, "grad_norm": 3.6053998470306396, "learning_rate": 0.0002, "loss": 1.5155, "step": 189070 }, { "epoch": 0.77, "grad_norm": 2.364820718765259, "learning_rate": 0.0002, "loss": 1.457, "step": 189080 }, { "epoch": 0.77, "grad_norm": 2.067906141281128, "learning_rate": 0.0002, "loss": 1.1721, "step": 189090 }, { "epoch": 0.77, "grad_norm": 3.325761556625366, "learning_rate": 0.0002, "loss": 1.6904, "step": 189100 }, { "epoch": 0.77, "grad_norm": 2.748016834259033, "learning_rate": 0.0002, "loss": 1.6864, "step": 189110 }, { "epoch": 0.77, "grad_norm": 2.315539598464966, "learning_rate": 0.0002, "loss": 1.6615, "step": 189120 }, { "epoch": 0.77, "grad_norm": 2.9896132946014404, "learning_rate": 0.0002, "loss": 1.4491, "step": 189130 }, { "epoch": 0.77, "grad_norm": 2.8491296768188477, "learning_rate": 0.0002, "loss": 1.481, "step": 189140 }, { "epoch": 0.77, "grad_norm": 1.8199430704116821, "learning_rate": 0.0002, "loss": 1.4768, "step": 189150 }, { "epoch": 0.77, "grad_norm": 1.906578779220581, "learning_rate": 0.0002, "loss": 1.4436, "step": 189160 }, { "epoch": 0.77, "grad_norm": 1.4088215827941895, "learning_rate": 0.0002, "loss": 1.7098, "step": 189170 }, { "epoch": 0.77, "grad_norm": 2.3294284343719482, "learning_rate": 0.0002, "loss": 1.7306, "step": 189180 }, { "epoch": 0.77, "grad_norm": 5.849423408508301, "learning_rate": 0.0002, "loss": 1.5286, "step": 189190 }, { "epoch": 0.77, "grad_norm": 2.371084451675415, "learning_rate": 0.0002, "loss": 1.7157, "step": 189200 }, { "epoch": 0.77, "grad_norm": 5.415609359741211, "learning_rate": 0.0002, "loss": 1.6319, "step": 189210 }, { "epoch": 0.77, "grad_norm": 3.8625504970550537, "learning_rate": 0.0002, "loss": 1.4907, "step": 189220 }, { "epoch": 0.77, "grad_norm": 2.1115403175354004, "learning_rate": 0.0002, "loss": 1.435, "step": 189230 }, { "epoch": 0.77, "grad_norm": 2.4703001976013184, "learning_rate": 0.0002, "loss": 1.623, "step": 189240 }, { "epoch": 0.77, "grad_norm": 2.847677707672119, "learning_rate": 0.0002, "loss": 1.7622, "step": 189250 }, { "epoch": 0.77, "grad_norm": 2.353288173675537, "learning_rate": 0.0002, "loss": 1.7068, "step": 189260 }, { "epoch": 0.77, "grad_norm": 6.009357929229736, "learning_rate": 0.0002, "loss": 1.5821, "step": 189270 }, { "epoch": 0.77, "grad_norm": 3.54951810836792, "learning_rate": 0.0002, "loss": 1.8582, "step": 189280 }, { "epoch": 0.77, "grad_norm": 3.9284873008728027, "learning_rate": 0.0002, "loss": 1.9035, "step": 189290 }, { "epoch": 0.77, "grad_norm": 3.1164205074310303, "learning_rate": 0.0002, "loss": 1.7508, "step": 189300 }, { "epoch": 0.77, "grad_norm": 3.081791877746582, "learning_rate": 0.0002, "loss": 1.7222, "step": 189310 }, { "epoch": 0.77, "grad_norm": 2.983175754547119, "learning_rate": 0.0002, "loss": 1.4944, "step": 189320 }, { "epoch": 0.77, "grad_norm": 1.564553141593933, "learning_rate": 0.0002, "loss": 1.6307, "step": 189330 }, { "epoch": 0.77, "grad_norm": 4.535298824310303, "learning_rate": 0.0002, "loss": 1.6464, "step": 189340 }, { "epoch": 0.77, "grad_norm": 3.803635358810425, "learning_rate": 0.0002, "loss": 1.6081, "step": 189350 }, { "epoch": 0.77, "grad_norm": 2.8125319480895996, "learning_rate": 0.0002, "loss": 1.4899, "step": 189360 }, { "epoch": 0.77, "grad_norm": 3.2130181789398193, "learning_rate": 0.0002, "loss": 1.5562, "step": 189370 }, { "epoch": 0.77, "grad_norm": 3.977217435836792, "learning_rate": 0.0002, "loss": 1.6227, "step": 189380 }, { "epoch": 0.77, "grad_norm": 3.33988618850708, "learning_rate": 0.0002, "loss": 1.469, "step": 189390 }, { "epoch": 0.77, "grad_norm": 3.9701852798461914, "learning_rate": 0.0002, "loss": 1.6143, "step": 189400 }, { "epoch": 0.77, "grad_norm": 4.050278663635254, "learning_rate": 0.0002, "loss": 1.5738, "step": 189410 }, { "epoch": 0.77, "grad_norm": 5.594799518585205, "learning_rate": 0.0002, "loss": 1.3697, "step": 189420 }, { "epoch": 0.77, "grad_norm": 3.1928720474243164, "learning_rate": 0.0002, "loss": 1.5701, "step": 189430 }, { "epoch": 0.77, "grad_norm": 2.9354135990142822, "learning_rate": 0.0002, "loss": 1.577, "step": 189440 }, { "epoch": 0.77, "grad_norm": 3.162630081176758, "learning_rate": 0.0002, "loss": 1.4317, "step": 189450 }, { "epoch": 0.77, "grad_norm": 3.156709909439087, "learning_rate": 0.0002, "loss": 1.4082, "step": 189460 }, { "epoch": 0.77, "grad_norm": 2.4027063846588135, "learning_rate": 0.0002, "loss": 1.7427, "step": 189470 }, { "epoch": 0.77, "grad_norm": 2.8790948390960693, "learning_rate": 0.0002, "loss": 1.5084, "step": 189480 }, { "epoch": 0.77, "grad_norm": 4.2119975090026855, "learning_rate": 0.0002, "loss": 1.8318, "step": 189490 }, { "epoch": 0.77, "grad_norm": 2.923140525817871, "learning_rate": 0.0002, "loss": 1.635, "step": 189500 }, { "epoch": 0.77, "grad_norm": 4.7288336753845215, "learning_rate": 0.0002, "loss": 1.7032, "step": 189510 }, { "epoch": 0.77, "grad_norm": 3.3965935707092285, "learning_rate": 0.0002, "loss": 1.7924, "step": 189520 }, { "epoch": 0.77, "grad_norm": 2.5540401935577393, "learning_rate": 0.0002, "loss": 1.4686, "step": 189530 }, { "epoch": 0.77, "grad_norm": 5.6805853843688965, "learning_rate": 0.0002, "loss": 1.3815, "step": 189540 }, { "epoch": 0.77, "grad_norm": 2.2761380672454834, "learning_rate": 0.0002, "loss": 1.5127, "step": 189550 }, { "epoch": 0.77, "grad_norm": 1.9556702375411987, "learning_rate": 0.0002, "loss": 1.7319, "step": 189560 }, { "epoch": 0.77, "grad_norm": 3.1734418869018555, "learning_rate": 0.0002, "loss": 1.5366, "step": 189570 }, { "epoch": 0.77, "grad_norm": 2.6700711250305176, "learning_rate": 0.0002, "loss": 1.7411, "step": 189580 }, { "epoch": 0.77, "grad_norm": 3.3052098751068115, "learning_rate": 0.0002, "loss": 1.6904, "step": 189590 }, { "epoch": 0.77, "grad_norm": 2.5132663249969482, "learning_rate": 0.0002, "loss": 1.5274, "step": 189600 }, { "epoch": 0.77, "grad_norm": 3.3668575286865234, "learning_rate": 0.0002, "loss": 1.5813, "step": 189610 }, { "epoch": 0.77, "grad_norm": 1.8266874551773071, "learning_rate": 0.0002, "loss": 1.4628, "step": 189620 }, { "epoch": 0.77, "grad_norm": 2.7785611152648926, "learning_rate": 0.0002, "loss": 1.7957, "step": 189630 }, { "epoch": 0.77, "grad_norm": 1.9968370199203491, "learning_rate": 0.0002, "loss": 1.6742, "step": 189640 }, { "epoch": 0.77, "grad_norm": 3.6886167526245117, "learning_rate": 0.0002, "loss": 1.5033, "step": 189650 }, { "epoch": 0.77, "grad_norm": 2.1741795539855957, "learning_rate": 0.0002, "loss": 1.667, "step": 189660 }, { "epoch": 0.77, "grad_norm": 2.2730793952941895, "learning_rate": 0.0002, "loss": 1.5776, "step": 189670 }, { "epoch": 0.77, "grad_norm": 2.642110824584961, "learning_rate": 0.0002, "loss": 1.2852, "step": 189680 }, { "epoch": 0.77, "grad_norm": 3.178375720977783, "learning_rate": 0.0002, "loss": 1.6183, "step": 189690 }, { "epoch": 0.77, "grad_norm": 2.0274858474731445, "learning_rate": 0.0002, "loss": 1.7052, "step": 189700 }, { "epoch": 0.77, "grad_norm": 4.6180195808410645, "learning_rate": 0.0002, "loss": 1.7308, "step": 189710 }, { "epoch": 0.77, "grad_norm": 2.3564794063568115, "learning_rate": 0.0002, "loss": 1.4765, "step": 189720 }, { "epoch": 0.77, "grad_norm": 3.9163730144500732, "learning_rate": 0.0002, "loss": 1.8922, "step": 189730 }, { "epoch": 0.77, "grad_norm": 2.0561301708221436, "learning_rate": 0.0002, "loss": 1.6703, "step": 189740 }, { "epoch": 0.77, "grad_norm": 4.054860591888428, "learning_rate": 0.0002, "loss": 1.5702, "step": 189750 }, { "epoch": 0.77, "grad_norm": 2.856416702270508, "learning_rate": 0.0002, "loss": 1.6778, "step": 189760 }, { "epoch": 0.77, "grad_norm": 3.7968451976776123, "learning_rate": 0.0002, "loss": 1.7976, "step": 189770 }, { "epoch": 0.77, "grad_norm": 3.2756264209747314, "learning_rate": 0.0002, "loss": 1.606, "step": 189780 }, { "epoch": 0.77, "grad_norm": 3.3994362354278564, "learning_rate": 0.0002, "loss": 1.5671, "step": 189790 }, { "epoch": 0.77, "grad_norm": 2.658031940460205, "learning_rate": 0.0002, "loss": 1.7687, "step": 189800 }, { "epoch": 0.77, "grad_norm": 1.8363603353500366, "learning_rate": 0.0002, "loss": 1.4728, "step": 189810 }, { "epoch": 0.77, "grad_norm": 5.7548747062683105, "learning_rate": 0.0002, "loss": 1.5595, "step": 189820 }, { "epoch": 0.77, "grad_norm": 2.638806104660034, "learning_rate": 0.0002, "loss": 1.4556, "step": 189830 }, { "epoch": 0.77, "grad_norm": 1.6819078922271729, "learning_rate": 0.0002, "loss": 1.6161, "step": 189840 }, { "epoch": 0.77, "grad_norm": 3.4459993839263916, "learning_rate": 0.0002, "loss": 1.4601, "step": 189850 }, { "epoch": 0.77, "grad_norm": 2.8790905475616455, "learning_rate": 0.0002, "loss": 1.6366, "step": 189860 }, { "epoch": 0.77, "grad_norm": 4.006432056427002, "learning_rate": 0.0002, "loss": 1.7931, "step": 189870 }, { "epoch": 0.77, "grad_norm": 2.304659366607666, "learning_rate": 0.0002, "loss": 1.5907, "step": 189880 }, { "epoch": 0.77, "grad_norm": 2.1856446266174316, "learning_rate": 0.0002, "loss": 1.7461, "step": 189890 }, { "epoch": 0.77, "grad_norm": 3.1440868377685547, "learning_rate": 0.0002, "loss": 1.5694, "step": 189900 }, { "epoch": 0.77, "grad_norm": 2.8051300048828125, "learning_rate": 0.0002, "loss": 1.7981, "step": 189910 }, { "epoch": 0.77, "grad_norm": 4.506171226501465, "learning_rate": 0.0002, "loss": 1.6416, "step": 189920 }, { "epoch": 0.77, "grad_norm": 5.166402339935303, "learning_rate": 0.0002, "loss": 1.3905, "step": 189930 }, { "epoch": 0.77, "grad_norm": 2.998962879180908, "learning_rate": 0.0002, "loss": 1.7535, "step": 189940 }, { "epoch": 0.77, "grad_norm": 1.8699915409088135, "learning_rate": 0.0002, "loss": 1.4234, "step": 189950 }, { "epoch": 0.77, "grad_norm": 2.878545045852661, "learning_rate": 0.0002, "loss": 1.5667, "step": 189960 }, { "epoch": 0.77, "grad_norm": 7.01379919052124, "learning_rate": 0.0002, "loss": 1.707, "step": 189970 }, { "epoch": 0.77, "grad_norm": 1.607569932937622, "learning_rate": 0.0002, "loss": 1.4805, "step": 189980 }, { "epoch": 0.77, "grad_norm": 3.6221234798431396, "learning_rate": 0.0002, "loss": 1.7358, "step": 189990 }, { "epoch": 0.77, "grad_norm": 2.6212046146392822, "learning_rate": 0.0002, "loss": 1.4991, "step": 190000 }, { "epoch": 0.77, "grad_norm": 3.574786901473999, "learning_rate": 0.0002, "loss": 1.6955, "step": 190010 }, { "epoch": 0.77, "grad_norm": 1.9175102710723877, "learning_rate": 0.0002, "loss": 1.49, "step": 190020 }, { "epoch": 0.77, "grad_norm": 2.318051338195801, "learning_rate": 0.0002, "loss": 1.8225, "step": 190030 }, { "epoch": 0.77, "grad_norm": 2.182243585586548, "learning_rate": 0.0002, "loss": 1.6014, "step": 190040 }, { "epoch": 0.77, "grad_norm": 2.747061014175415, "learning_rate": 0.0002, "loss": 1.5336, "step": 190050 }, { "epoch": 0.77, "grad_norm": 5.179882526397705, "learning_rate": 0.0002, "loss": 1.3413, "step": 190060 }, { "epoch": 0.77, "grad_norm": 2.630277395248413, "learning_rate": 0.0002, "loss": 1.5256, "step": 190070 }, { "epoch": 0.77, "grad_norm": 3.814608335494995, "learning_rate": 0.0002, "loss": 1.4665, "step": 190080 }, { "epoch": 0.77, "grad_norm": 4.027436256408691, "learning_rate": 0.0002, "loss": 1.4798, "step": 190090 }, { "epoch": 0.77, "grad_norm": 2.5817127227783203, "learning_rate": 0.0002, "loss": 1.5455, "step": 190100 }, { "epoch": 0.77, "grad_norm": 1.8555395603179932, "learning_rate": 0.0002, "loss": 1.5969, "step": 190110 }, { "epoch": 0.77, "grad_norm": 2.3047571182250977, "learning_rate": 0.0002, "loss": 1.5241, "step": 190120 }, { "epoch": 0.77, "grad_norm": 4.670502185821533, "learning_rate": 0.0002, "loss": 1.7963, "step": 190130 }, { "epoch": 0.77, "grad_norm": 2.3003790378570557, "learning_rate": 0.0002, "loss": 1.5463, "step": 190140 }, { "epoch": 0.77, "grad_norm": 2.4117259979248047, "learning_rate": 0.0002, "loss": 1.5797, "step": 190150 }, { "epoch": 0.77, "grad_norm": 3.4370925426483154, "learning_rate": 0.0002, "loss": 1.6546, "step": 190160 }, { "epoch": 0.77, "grad_norm": 2.859266519546509, "learning_rate": 0.0002, "loss": 1.8758, "step": 190170 }, { "epoch": 0.77, "grad_norm": 2.768630266189575, "learning_rate": 0.0002, "loss": 1.3531, "step": 190180 }, { "epoch": 0.77, "grad_norm": 2.5922117233276367, "learning_rate": 0.0002, "loss": 1.5126, "step": 190190 }, { "epoch": 0.77, "grad_norm": 3.0568225383758545, "learning_rate": 0.0002, "loss": 1.4752, "step": 190200 }, { "epoch": 0.77, "grad_norm": 3.110105276107788, "learning_rate": 0.0002, "loss": 1.5146, "step": 190210 }, { "epoch": 0.77, "grad_norm": 2.945159673690796, "learning_rate": 0.0002, "loss": 1.6755, "step": 190220 }, { "epoch": 0.77, "grad_norm": 3.633004903793335, "learning_rate": 0.0002, "loss": 1.4871, "step": 190230 }, { "epoch": 0.77, "grad_norm": 1.671401023864746, "learning_rate": 0.0002, "loss": 1.5211, "step": 190240 }, { "epoch": 0.77, "grad_norm": 3.5706398487091064, "learning_rate": 0.0002, "loss": 1.4191, "step": 190250 }, { "epoch": 0.77, "grad_norm": 2.525754690170288, "learning_rate": 0.0002, "loss": 1.5899, "step": 190260 }, { "epoch": 0.77, "grad_norm": 2.6600329875946045, "learning_rate": 0.0002, "loss": 1.7965, "step": 190270 }, { "epoch": 0.77, "grad_norm": 2.484257698059082, "learning_rate": 0.0002, "loss": 1.5824, "step": 190280 }, { "epoch": 0.77, "grad_norm": 3.8481571674346924, "learning_rate": 0.0002, "loss": 1.7208, "step": 190290 }, { "epoch": 0.77, "grad_norm": 3.8283722400665283, "learning_rate": 0.0002, "loss": 1.6702, "step": 190300 }, { "epoch": 0.77, "grad_norm": 3.686497926712036, "learning_rate": 0.0002, "loss": 1.5664, "step": 190310 }, { "epoch": 0.77, "grad_norm": 2.9810073375701904, "learning_rate": 0.0002, "loss": 1.3968, "step": 190320 }, { "epoch": 0.77, "grad_norm": 2.6114563941955566, "learning_rate": 0.0002, "loss": 1.692, "step": 190330 }, { "epoch": 0.77, "grad_norm": 2.432756185531616, "learning_rate": 0.0002, "loss": 1.5204, "step": 190340 }, { "epoch": 0.77, "grad_norm": 2.591188430786133, "learning_rate": 0.0002, "loss": 1.8068, "step": 190350 }, { "epoch": 0.77, "grad_norm": 2.5283687114715576, "learning_rate": 0.0002, "loss": 1.596, "step": 190360 }, { "epoch": 0.77, "grad_norm": 3.0740537643432617, "learning_rate": 0.0002, "loss": 1.7976, "step": 190370 }, { "epoch": 0.78, "grad_norm": 3.794499397277832, "learning_rate": 0.0002, "loss": 1.5664, "step": 190380 }, { "epoch": 0.78, "grad_norm": 3.943603515625, "learning_rate": 0.0002, "loss": 1.8413, "step": 190390 }, { "epoch": 0.78, "grad_norm": 7.120370388031006, "learning_rate": 0.0002, "loss": 1.5561, "step": 190400 }, { "epoch": 0.78, "grad_norm": 2.440521478652954, "learning_rate": 0.0002, "loss": 1.7539, "step": 190410 }, { "epoch": 0.78, "grad_norm": 6.1320481300354, "learning_rate": 0.0002, "loss": 1.6278, "step": 190420 }, { "epoch": 0.78, "grad_norm": 2.5862133502960205, "learning_rate": 0.0002, "loss": 1.6277, "step": 190430 }, { "epoch": 0.78, "grad_norm": 3.2210752964019775, "learning_rate": 0.0002, "loss": 1.6397, "step": 190440 }, { "epoch": 0.78, "grad_norm": 2.239042043685913, "learning_rate": 0.0002, "loss": 1.6082, "step": 190450 }, { "epoch": 0.78, "grad_norm": 2.3525800704956055, "learning_rate": 0.0002, "loss": 1.847, "step": 190460 }, { "epoch": 0.78, "grad_norm": 1.7004131078720093, "learning_rate": 0.0002, "loss": 1.6546, "step": 190470 }, { "epoch": 0.78, "grad_norm": 3.4927265644073486, "learning_rate": 0.0002, "loss": 1.662, "step": 190480 }, { "epoch": 0.78, "grad_norm": 1.8889788389205933, "learning_rate": 0.0002, "loss": 1.6665, "step": 190490 }, { "epoch": 0.78, "grad_norm": 2.1942226886749268, "learning_rate": 0.0002, "loss": 1.8651, "step": 190500 }, { "epoch": 0.78, "grad_norm": 3.0103883743286133, "learning_rate": 0.0002, "loss": 1.4027, "step": 190510 }, { "epoch": 0.78, "grad_norm": 3.583789348602295, "learning_rate": 0.0002, "loss": 1.9224, "step": 190520 }, { "epoch": 0.78, "grad_norm": 2.8152084350585938, "learning_rate": 0.0002, "loss": 1.4597, "step": 190530 }, { "epoch": 0.78, "grad_norm": 1.879374623298645, "learning_rate": 0.0002, "loss": 1.6979, "step": 190540 }, { "epoch": 0.78, "grad_norm": 2.3620529174804688, "learning_rate": 0.0002, "loss": 1.5232, "step": 190550 }, { "epoch": 0.78, "grad_norm": 2.1982662677764893, "learning_rate": 0.0002, "loss": 1.5168, "step": 190560 }, { "epoch": 0.78, "grad_norm": 4.710212707519531, "learning_rate": 0.0002, "loss": 1.8714, "step": 190570 }, { "epoch": 0.78, "grad_norm": 2.881833076477051, "learning_rate": 0.0002, "loss": 1.4721, "step": 190580 }, { "epoch": 0.78, "grad_norm": 1.7869572639465332, "learning_rate": 0.0002, "loss": 1.7908, "step": 190590 }, { "epoch": 0.78, "grad_norm": 3.963130235671997, "learning_rate": 0.0002, "loss": 1.78, "step": 190600 }, { "epoch": 0.78, "grad_norm": 3.9833414554595947, "learning_rate": 0.0002, "loss": 1.7793, "step": 190610 }, { "epoch": 0.78, "grad_norm": 1.9929704666137695, "learning_rate": 0.0002, "loss": 1.607, "step": 190620 }, { "epoch": 0.78, "grad_norm": 2.3062679767608643, "learning_rate": 0.0002, "loss": 1.486, "step": 190630 }, { "epoch": 0.78, "grad_norm": 3.8511650562286377, "learning_rate": 0.0002, "loss": 1.6464, "step": 190640 }, { "epoch": 0.78, "grad_norm": 1.5801491737365723, "learning_rate": 0.0002, "loss": 1.5856, "step": 190650 }, { "epoch": 0.78, "grad_norm": 3.2195887565612793, "learning_rate": 0.0002, "loss": 1.5483, "step": 190660 }, { "epoch": 0.78, "grad_norm": 2.8133697509765625, "learning_rate": 0.0002, "loss": 1.6945, "step": 190670 }, { "epoch": 0.78, "grad_norm": 2.75469970703125, "learning_rate": 0.0002, "loss": 1.6658, "step": 190680 }, { "epoch": 0.78, "grad_norm": 3.110053777694702, "learning_rate": 0.0002, "loss": 1.7013, "step": 190690 }, { "epoch": 0.78, "grad_norm": 1.86016047000885, "learning_rate": 0.0002, "loss": 1.5137, "step": 190700 }, { "epoch": 0.78, "grad_norm": 3.7257845401763916, "learning_rate": 0.0002, "loss": 1.7888, "step": 190710 }, { "epoch": 0.78, "grad_norm": 3.0001494884490967, "learning_rate": 0.0002, "loss": 1.6897, "step": 190720 }, { "epoch": 0.78, "grad_norm": 2.6179893016815186, "learning_rate": 0.0002, "loss": 1.5826, "step": 190730 }, { "epoch": 0.78, "grad_norm": 6.4086594581604, "learning_rate": 0.0002, "loss": 1.5544, "step": 190740 }, { "epoch": 0.78, "grad_norm": 4.610642433166504, "learning_rate": 0.0002, "loss": 1.6617, "step": 190750 }, { "epoch": 0.78, "grad_norm": 3.587486982345581, "learning_rate": 0.0002, "loss": 1.5532, "step": 190760 }, { "epoch": 0.78, "grad_norm": 3.6565775871276855, "learning_rate": 0.0002, "loss": 1.7821, "step": 190770 }, { "epoch": 0.78, "grad_norm": 2.1668074131011963, "learning_rate": 0.0002, "loss": 1.7728, "step": 190780 }, { "epoch": 0.78, "grad_norm": 4.414488315582275, "learning_rate": 0.0002, "loss": 1.5659, "step": 190790 }, { "epoch": 0.78, "grad_norm": 3.5593454837799072, "learning_rate": 0.0002, "loss": 1.5543, "step": 190800 }, { "epoch": 0.78, "grad_norm": 2.5754239559173584, "learning_rate": 0.0002, "loss": 1.4939, "step": 190810 }, { "epoch": 0.78, "grad_norm": 3.480055809020996, "learning_rate": 0.0002, "loss": 1.5541, "step": 190820 }, { "epoch": 0.78, "grad_norm": 3.742558717727661, "learning_rate": 0.0002, "loss": 1.6215, "step": 190830 }, { "epoch": 0.78, "grad_norm": 3.0433647632598877, "learning_rate": 0.0002, "loss": 1.7974, "step": 190840 }, { "epoch": 0.78, "grad_norm": 3.178898572921753, "learning_rate": 0.0002, "loss": 1.6655, "step": 190850 }, { "epoch": 0.78, "grad_norm": 3.2759768962860107, "learning_rate": 0.0002, "loss": 1.4958, "step": 190860 }, { "epoch": 0.78, "grad_norm": 2.286761522293091, "learning_rate": 0.0002, "loss": 1.6852, "step": 190870 }, { "epoch": 0.78, "grad_norm": 3.103821277618408, "learning_rate": 0.0002, "loss": 1.7799, "step": 190880 }, { "epoch": 0.78, "grad_norm": 3.1769795417785645, "learning_rate": 0.0002, "loss": 1.7181, "step": 190890 }, { "epoch": 0.78, "grad_norm": 2.9934306144714355, "learning_rate": 0.0002, "loss": 1.6961, "step": 190900 }, { "epoch": 0.78, "grad_norm": 2.743330955505371, "learning_rate": 0.0002, "loss": 1.8582, "step": 190910 }, { "epoch": 0.78, "grad_norm": 3.216430425643921, "learning_rate": 0.0002, "loss": 1.3098, "step": 190920 }, { "epoch": 0.78, "grad_norm": 2.842367649078369, "learning_rate": 0.0002, "loss": 1.6947, "step": 190930 }, { "epoch": 0.78, "grad_norm": 3.481842279434204, "learning_rate": 0.0002, "loss": 1.5161, "step": 190940 }, { "epoch": 0.78, "grad_norm": 2.930445432662964, "learning_rate": 0.0002, "loss": 1.6181, "step": 190950 }, { "epoch": 0.78, "grad_norm": 3.473560094833374, "learning_rate": 0.0002, "loss": 1.6141, "step": 190960 }, { "epoch": 0.78, "grad_norm": 2.925767183303833, "learning_rate": 0.0002, "loss": 1.4848, "step": 190970 }, { "epoch": 0.78, "grad_norm": 2.654205322265625, "learning_rate": 0.0002, "loss": 1.617, "step": 190980 }, { "epoch": 0.78, "grad_norm": 2.93447208404541, "learning_rate": 0.0002, "loss": 1.5537, "step": 190990 }, { "epoch": 0.78, "grad_norm": 2.226621150970459, "learning_rate": 0.0002, "loss": 1.7471, "step": 191000 }, { "epoch": 0.78, "grad_norm": 4.641233444213867, "learning_rate": 0.0002, "loss": 1.6272, "step": 191010 }, { "epoch": 0.78, "grad_norm": 3.3245437145233154, "learning_rate": 0.0002, "loss": 1.7331, "step": 191020 }, { "epoch": 0.78, "grad_norm": 3.1580471992492676, "learning_rate": 0.0002, "loss": 1.5117, "step": 191030 }, { "epoch": 0.78, "grad_norm": 3.9476168155670166, "learning_rate": 0.0002, "loss": 1.6268, "step": 191040 }, { "epoch": 0.78, "grad_norm": 2.405186176300049, "learning_rate": 0.0002, "loss": 1.4364, "step": 191050 }, { "epoch": 0.78, "grad_norm": 4.494388580322266, "learning_rate": 0.0002, "loss": 1.7142, "step": 191060 }, { "epoch": 0.78, "grad_norm": 2.7373437881469727, "learning_rate": 0.0002, "loss": 1.7267, "step": 191070 }, { "epoch": 0.78, "grad_norm": 2.981710195541382, "learning_rate": 0.0002, "loss": 1.5751, "step": 191080 }, { "epoch": 0.78, "grad_norm": 3.264784097671509, "learning_rate": 0.0002, "loss": 1.5087, "step": 191090 }, { "epoch": 0.78, "grad_norm": 3.4847171306610107, "learning_rate": 0.0002, "loss": 1.6356, "step": 191100 }, { "epoch": 0.78, "grad_norm": 5.80771541595459, "learning_rate": 0.0002, "loss": 1.6249, "step": 191110 }, { "epoch": 0.78, "grad_norm": 2.468665838241577, "learning_rate": 0.0002, "loss": 1.5856, "step": 191120 }, { "epoch": 0.78, "grad_norm": 2.1766467094421387, "learning_rate": 0.0002, "loss": 1.7653, "step": 191130 }, { "epoch": 0.78, "grad_norm": 3.627678155899048, "learning_rate": 0.0002, "loss": 1.5323, "step": 191140 }, { "epoch": 0.78, "grad_norm": 2.7906079292297363, "learning_rate": 0.0002, "loss": 1.5488, "step": 191150 }, { "epoch": 0.78, "grad_norm": 1.1377122402191162, "learning_rate": 0.0002, "loss": 1.7244, "step": 191160 }, { "epoch": 0.78, "grad_norm": 2.521865129470825, "learning_rate": 0.0002, "loss": 1.5553, "step": 191170 }, { "epoch": 0.78, "grad_norm": 2.6721668243408203, "learning_rate": 0.0002, "loss": 1.365, "step": 191180 }, { "epoch": 0.78, "grad_norm": 3.9916446208953857, "learning_rate": 0.0002, "loss": 1.5756, "step": 191190 }, { "epoch": 0.78, "grad_norm": 3.022836923599243, "learning_rate": 0.0002, "loss": 1.5413, "step": 191200 }, { "epoch": 0.78, "grad_norm": 5.101054668426514, "learning_rate": 0.0002, "loss": 1.4908, "step": 191210 }, { "epoch": 0.78, "grad_norm": 3.3246142864227295, "learning_rate": 0.0002, "loss": 1.621, "step": 191220 }, { "epoch": 0.78, "grad_norm": 2.505314588546753, "learning_rate": 0.0002, "loss": 1.6354, "step": 191230 }, { "epoch": 0.78, "grad_norm": 4.000927925109863, "learning_rate": 0.0002, "loss": 1.4678, "step": 191240 }, { "epoch": 0.78, "grad_norm": 3.4678359031677246, "learning_rate": 0.0002, "loss": 1.4412, "step": 191250 }, { "epoch": 0.78, "grad_norm": 3.2612974643707275, "learning_rate": 0.0002, "loss": 1.8526, "step": 191260 }, { "epoch": 0.78, "grad_norm": 5.666616916656494, "learning_rate": 0.0002, "loss": 1.7276, "step": 191270 }, { "epoch": 0.78, "grad_norm": 1.6773864030838013, "learning_rate": 0.0002, "loss": 1.5573, "step": 191280 }, { "epoch": 0.78, "grad_norm": 2.6214356422424316, "learning_rate": 0.0002, "loss": 1.6671, "step": 191290 }, { "epoch": 0.78, "grad_norm": 2.618403911590576, "learning_rate": 0.0002, "loss": 1.6464, "step": 191300 }, { "epoch": 0.78, "grad_norm": 4.608771800994873, "learning_rate": 0.0002, "loss": 1.3235, "step": 191310 }, { "epoch": 0.78, "grad_norm": 2.9990808963775635, "learning_rate": 0.0002, "loss": 1.6731, "step": 191320 }, { "epoch": 0.78, "grad_norm": 1.6073167324066162, "learning_rate": 0.0002, "loss": 1.5947, "step": 191330 }, { "epoch": 0.78, "grad_norm": 3.0803229808807373, "learning_rate": 0.0002, "loss": 1.643, "step": 191340 }, { "epoch": 0.78, "grad_norm": 5.485774517059326, "learning_rate": 0.0002, "loss": 1.6321, "step": 191350 }, { "epoch": 0.78, "grad_norm": 2.195976972579956, "learning_rate": 0.0002, "loss": 1.3741, "step": 191360 }, { "epoch": 0.78, "grad_norm": 3.244480609893799, "learning_rate": 0.0002, "loss": 1.6543, "step": 191370 }, { "epoch": 0.78, "grad_norm": 3.0637667179107666, "learning_rate": 0.0002, "loss": 1.6147, "step": 191380 }, { "epoch": 0.78, "grad_norm": 3.6515376567840576, "learning_rate": 0.0002, "loss": 1.498, "step": 191390 }, { "epoch": 0.78, "grad_norm": 3.2901740074157715, "learning_rate": 0.0002, "loss": 1.4373, "step": 191400 }, { "epoch": 0.78, "grad_norm": 3.5892298221588135, "learning_rate": 0.0002, "loss": 1.6959, "step": 191410 }, { "epoch": 0.78, "grad_norm": 3.4598357677459717, "learning_rate": 0.0002, "loss": 1.7197, "step": 191420 }, { "epoch": 0.78, "grad_norm": 3.002936601638794, "learning_rate": 0.0002, "loss": 1.4612, "step": 191430 }, { "epoch": 0.78, "grad_norm": 2.5162909030914307, "learning_rate": 0.0002, "loss": 1.7248, "step": 191440 }, { "epoch": 0.78, "grad_norm": 2.2841596603393555, "learning_rate": 0.0002, "loss": 1.5781, "step": 191450 }, { "epoch": 0.78, "grad_norm": 2.983752965927124, "learning_rate": 0.0002, "loss": 1.5338, "step": 191460 }, { "epoch": 0.78, "grad_norm": 2.987781286239624, "learning_rate": 0.0002, "loss": 1.5506, "step": 191470 }, { "epoch": 0.78, "grad_norm": 2.7755351066589355, "learning_rate": 0.0002, "loss": 1.6286, "step": 191480 }, { "epoch": 0.78, "grad_norm": 1.8026055097579956, "learning_rate": 0.0002, "loss": 1.5826, "step": 191490 }, { "epoch": 0.78, "grad_norm": 2.6644599437713623, "learning_rate": 0.0002, "loss": 1.3683, "step": 191500 }, { "epoch": 0.78, "grad_norm": 2.4597346782684326, "learning_rate": 0.0002, "loss": 1.5937, "step": 191510 }, { "epoch": 0.78, "grad_norm": 2.8846073150634766, "learning_rate": 0.0002, "loss": 1.5393, "step": 191520 }, { "epoch": 0.78, "grad_norm": 2.3309478759765625, "learning_rate": 0.0002, "loss": 1.5932, "step": 191530 }, { "epoch": 0.78, "grad_norm": 2.3754148483276367, "learning_rate": 0.0002, "loss": 1.4649, "step": 191540 }, { "epoch": 0.78, "grad_norm": 2.866459608078003, "learning_rate": 0.0002, "loss": 1.6455, "step": 191550 }, { "epoch": 0.78, "grad_norm": 3.382382869720459, "learning_rate": 0.0002, "loss": 1.5683, "step": 191560 }, { "epoch": 0.78, "grad_norm": 2.7649261951446533, "learning_rate": 0.0002, "loss": 1.4394, "step": 191570 }, { "epoch": 0.78, "grad_norm": 1.9812963008880615, "learning_rate": 0.0002, "loss": 1.9231, "step": 191580 }, { "epoch": 0.78, "grad_norm": 3.076692819595337, "learning_rate": 0.0002, "loss": 1.5229, "step": 191590 }, { "epoch": 0.78, "grad_norm": 2.7611072063446045, "learning_rate": 0.0002, "loss": 1.5977, "step": 191600 }, { "epoch": 0.78, "grad_norm": 2.58837628364563, "learning_rate": 0.0002, "loss": 1.3522, "step": 191610 }, { "epoch": 0.78, "grad_norm": 2.4455459117889404, "learning_rate": 0.0002, "loss": 1.4464, "step": 191620 }, { "epoch": 0.78, "grad_norm": 3.144850492477417, "learning_rate": 0.0002, "loss": 1.6862, "step": 191630 }, { "epoch": 0.78, "grad_norm": 3.4667928218841553, "learning_rate": 0.0002, "loss": 1.6726, "step": 191640 }, { "epoch": 0.78, "grad_norm": 3.715055227279663, "learning_rate": 0.0002, "loss": 1.7149, "step": 191650 }, { "epoch": 0.78, "grad_norm": 2.3076586723327637, "learning_rate": 0.0002, "loss": 1.4946, "step": 191660 }, { "epoch": 0.78, "grad_norm": 2.4696850776672363, "learning_rate": 0.0002, "loss": 1.7688, "step": 191670 }, { "epoch": 0.78, "grad_norm": 1.4932142496109009, "learning_rate": 0.0002, "loss": 1.5341, "step": 191680 }, { "epoch": 0.78, "grad_norm": 3.1472253799438477, "learning_rate": 0.0002, "loss": 1.4147, "step": 191690 }, { "epoch": 0.78, "grad_norm": 3.2191362380981445, "learning_rate": 0.0002, "loss": 1.6137, "step": 191700 }, { "epoch": 0.78, "grad_norm": 1.2045608758926392, "learning_rate": 0.0002, "loss": 1.5857, "step": 191710 }, { "epoch": 0.78, "grad_norm": 2.46242356300354, "learning_rate": 0.0002, "loss": 1.78, "step": 191720 }, { "epoch": 0.78, "grad_norm": 1.4783616065979004, "learning_rate": 0.0002, "loss": 1.5092, "step": 191730 }, { "epoch": 0.78, "grad_norm": 2.3224921226501465, "learning_rate": 0.0002, "loss": 1.6002, "step": 191740 }, { "epoch": 0.78, "grad_norm": 2.807404041290283, "learning_rate": 0.0002, "loss": 1.6884, "step": 191750 }, { "epoch": 0.78, "grad_norm": 2.8976128101348877, "learning_rate": 0.0002, "loss": 1.8116, "step": 191760 }, { "epoch": 0.78, "grad_norm": 1.929179310798645, "learning_rate": 0.0002, "loss": 1.4839, "step": 191770 }, { "epoch": 0.78, "grad_norm": 2.107707977294922, "learning_rate": 0.0002, "loss": 1.5582, "step": 191780 }, { "epoch": 0.78, "grad_norm": 3.364549160003662, "learning_rate": 0.0002, "loss": 1.3057, "step": 191790 }, { "epoch": 0.78, "grad_norm": 2.2138209342956543, "learning_rate": 0.0002, "loss": 1.5048, "step": 191800 }, { "epoch": 0.78, "grad_norm": 2.0184030532836914, "learning_rate": 0.0002, "loss": 1.5342, "step": 191810 }, { "epoch": 0.78, "grad_norm": 3.6446919441223145, "learning_rate": 0.0002, "loss": 1.5223, "step": 191820 }, { "epoch": 0.78, "grad_norm": 2.196460723876953, "learning_rate": 0.0002, "loss": 1.6231, "step": 191830 }, { "epoch": 0.78, "grad_norm": 2.7079572677612305, "learning_rate": 0.0002, "loss": 1.5908, "step": 191840 }, { "epoch": 0.78, "grad_norm": 4.002665996551514, "learning_rate": 0.0002, "loss": 1.6189, "step": 191850 }, { "epoch": 0.78, "grad_norm": 3.5793368816375732, "learning_rate": 0.0002, "loss": 1.6633, "step": 191860 }, { "epoch": 0.78, "grad_norm": 4.27685546875, "learning_rate": 0.0002, "loss": 1.5323, "step": 191870 }, { "epoch": 0.78, "grad_norm": 2.309650182723999, "learning_rate": 0.0002, "loss": 1.6887, "step": 191880 }, { "epoch": 0.78, "grad_norm": 2.7562918663024902, "learning_rate": 0.0002, "loss": 1.4714, "step": 191890 }, { "epoch": 0.78, "grad_norm": 2.295870542526245, "learning_rate": 0.0002, "loss": 1.6185, "step": 191900 }, { "epoch": 0.78, "grad_norm": 4.3346757888793945, "learning_rate": 0.0002, "loss": 1.7125, "step": 191910 }, { "epoch": 0.78, "grad_norm": 2.749513626098633, "learning_rate": 0.0002, "loss": 1.6324, "step": 191920 }, { "epoch": 0.78, "grad_norm": 2.344994068145752, "learning_rate": 0.0002, "loss": 1.5513, "step": 191930 }, { "epoch": 0.78, "grad_norm": 3.7748124599456787, "learning_rate": 0.0002, "loss": 1.5135, "step": 191940 }, { "epoch": 0.78, "grad_norm": 1.9952373504638672, "learning_rate": 0.0002, "loss": 1.56, "step": 191950 }, { "epoch": 0.78, "grad_norm": 2.6649792194366455, "learning_rate": 0.0002, "loss": 1.448, "step": 191960 }, { "epoch": 0.78, "grad_norm": 2.5333757400512695, "learning_rate": 0.0002, "loss": 1.3899, "step": 191970 }, { "epoch": 0.78, "grad_norm": 2.6642487049102783, "learning_rate": 0.0002, "loss": 1.7327, "step": 191980 }, { "epoch": 0.78, "grad_norm": 2.01248836517334, "learning_rate": 0.0002, "loss": 1.565, "step": 191990 }, { "epoch": 0.78, "grad_norm": 2.8185360431671143, "learning_rate": 0.0002, "loss": 1.4877, "step": 192000 }, { "epoch": 0.78, "grad_norm": 3.42509126663208, "learning_rate": 0.0002, "loss": 1.4897, "step": 192010 }, { "epoch": 0.78, "grad_norm": 2.976656436920166, "learning_rate": 0.0002, "loss": 1.7197, "step": 192020 }, { "epoch": 0.78, "grad_norm": 4.1229658126831055, "learning_rate": 0.0002, "loss": 1.4855, "step": 192030 }, { "epoch": 0.78, "grad_norm": 3.794651985168457, "learning_rate": 0.0002, "loss": 1.5629, "step": 192040 }, { "epoch": 0.78, "grad_norm": 1.4747322797775269, "learning_rate": 0.0002, "loss": 1.6215, "step": 192050 }, { "epoch": 0.78, "grad_norm": 3.782804489135742, "learning_rate": 0.0002, "loss": 1.5639, "step": 192060 }, { "epoch": 0.78, "grad_norm": 3.2282891273498535, "learning_rate": 0.0002, "loss": 1.5588, "step": 192070 }, { "epoch": 0.78, "grad_norm": 2.33062744140625, "learning_rate": 0.0002, "loss": 1.618, "step": 192080 }, { "epoch": 0.78, "grad_norm": 2.407282590866089, "learning_rate": 0.0002, "loss": 1.4996, "step": 192090 }, { "epoch": 0.78, "grad_norm": 3.467040777206421, "learning_rate": 0.0002, "loss": 1.5925, "step": 192100 }, { "epoch": 0.78, "grad_norm": 3.148259162902832, "learning_rate": 0.0002, "loss": 1.7311, "step": 192110 }, { "epoch": 0.78, "grad_norm": 4.104574680328369, "learning_rate": 0.0002, "loss": 1.4552, "step": 192120 }, { "epoch": 0.78, "grad_norm": 3.8612382411956787, "learning_rate": 0.0002, "loss": 1.2074, "step": 192130 }, { "epoch": 0.78, "grad_norm": 2.778376817703247, "learning_rate": 0.0002, "loss": 1.2728, "step": 192140 }, { "epoch": 0.78, "grad_norm": 4.5976386070251465, "learning_rate": 0.0002, "loss": 1.6832, "step": 192150 }, { "epoch": 0.78, "grad_norm": 2.075453281402588, "learning_rate": 0.0002, "loss": 1.3392, "step": 192160 }, { "epoch": 0.78, "grad_norm": 2.4873502254486084, "learning_rate": 0.0002, "loss": 1.3054, "step": 192170 }, { "epoch": 0.78, "grad_norm": 1.737353801727295, "learning_rate": 0.0002, "loss": 1.5107, "step": 192180 }, { "epoch": 0.78, "grad_norm": 2.5592830181121826, "learning_rate": 0.0002, "loss": 1.6984, "step": 192190 }, { "epoch": 0.78, "grad_norm": 3.6930902004241943, "learning_rate": 0.0002, "loss": 1.5128, "step": 192200 }, { "epoch": 0.78, "grad_norm": 3.5771324634552, "learning_rate": 0.0002, "loss": 1.5513, "step": 192210 }, { "epoch": 0.78, "grad_norm": 3.6401479244232178, "learning_rate": 0.0002, "loss": 1.6431, "step": 192220 }, { "epoch": 0.78, "grad_norm": 2.0405049324035645, "learning_rate": 0.0002, "loss": 1.7499, "step": 192230 }, { "epoch": 0.78, "grad_norm": 3.1540470123291016, "learning_rate": 0.0002, "loss": 1.6889, "step": 192240 }, { "epoch": 0.78, "grad_norm": 2.4989521503448486, "learning_rate": 0.0002, "loss": 1.5959, "step": 192250 }, { "epoch": 0.78, "grad_norm": 2.4068803787231445, "learning_rate": 0.0002, "loss": 1.4934, "step": 192260 }, { "epoch": 0.78, "grad_norm": 2.174102783203125, "learning_rate": 0.0002, "loss": 1.4286, "step": 192270 }, { "epoch": 0.78, "grad_norm": 2.0232765674591064, "learning_rate": 0.0002, "loss": 1.598, "step": 192280 }, { "epoch": 0.78, "grad_norm": 3.120713233947754, "learning_rate": 0.0002, "loss": 1.4804, "step": 192290 }, { "epoch": 0.78, "grad_norm": 3.466010332107544, "learning_rate": 0.0002, "loss": 1.6221, "step": 192300 }, { "epoch": 0.78, "grad_norm": 5.318981170654297, "learning_rate": 0.0002, "loss": 1.5289, "step": 192310 }, { "epoch": 0.78, "grad_norm": 4.3992600440979, "learning_rate": 0.0002, "loss": 1.5737, "step": 192320 }, { "epoch": 0.78, "grad_norm": 3.843052864074707, "learning_rate": 0.0002, "loss": 1.4024, "step": 192330 }, { "epoch": 0.78, "grad_norm": 1.9619667530059814, "learning_rate": 0.0002, "loss": 1.5433, "step": 192340 }, { "epoch": 0.78, "grad_norm": 2.5538558959960938, "learning_rate": 0.0002, "loss": 1.7442, "step": 192350 }, { "epoch": 0.78, "grad_norm": 2.0065035820007324, "learning_rate": 0.0002, "loss": 1.7341, "step": 192360 }, { "epoch": 0.78, "grad_norm": 2.7307729721069336, "learning_rate": 0.0002, "loss": 1.9459, "step": 192370 }, { "epoch": 0.78, "grad_norm": 2.5330443382263184, "learning_rate": 0.0002, "loss": 1.6691, "step": 192380 }, { "epoch": 0.78, "grad_norm": 4.952829837799072, "learning_rate": 0.0002, "loss": 1.4879, "step": 192390 }, { "epoch": 0.78, "grad_norm": 5.195496559143066, "learning_rate": 0.0002, "loss": 1.6217, "step": 192400 }, { "epoch": 0.78, "grad_norm": 3.418114185333252, "learning_rate": 0.0002, "loss": 1.4365, "step": 192410 }, { "epoch": 0.78, "grad_norm": 2.8144729137420654, "learning_rate": 0.0002, "loss": 1.8299, "step": 192420 }, { "epoch": 0.78, "grad_norm": 3.262164831161499, "learning_rate": 0.0002, "loss": 1.7315, "step": 192430 }, { "epoch": 0.78, "grad_norm": 3.2124650478363037, "learning_rate": 0.0002, "loss": 1.6187, "step": 192440 }, { "epoch": 0.78, "grad_norm": 3.477903127670288, "learning_rate": 0.0002, "loss": 1.5673, "step": 192450 }, { "epoch": 0.78, "grad_norm": 5.155794620513916, "learning_rate": 0.0002, "loss": 1.6599, "step": 192460 }, { "epoch": 0.78, "grad_norm": 3.4318742752075195, "learning_rate": 0.0002, "loss": 1.6197, "step": 192470 }, { "epoch": 0.78, "grad_norm": 1.8003487586975098, "learning_rate": 0.0002, "loss": 1.3938, "step": 192480 }, { "epoch": 0.78, "grad_norm": 3.9337828159332275, "learning_rate": 0.0002, "loss": 1.9489, "step": 192490 }, { "epoch": 0.78, "grad_norm": 3.074737548828125, "learning_rate": 0.0002, "loss": 1.4448, "step": 192500 }, { "epoch": 0.78, "grad_norm": 2.7909834384918213, "learning_rate": 0.0002, "loss": 1.5781, "step": 192510 }, { "epoch": 0.78, "grad_norm": 2.8708910942077637, "learning_rate": 0.0002, "loss": 1.6699, "step": 192520 }, { "epoch": 0.78, "grad_norm": 3.4363343715667725, "learning_rate": 0.0002, "loss": 1.3553, "step": 192530 }, { "epoch": 0.78, "grad_norm": 3.016324043273926, "learning_rate": 0.0002, "loss": 1.4924, "step": 192540 }, { "epoch": 0.78, "grad_norm": 3.6716601848602295, "learning_rate": 0.0002, "loss": 1.5568, "step": 192550 }, { "epoch": 0.78, "grad_norm": 5.1224894523620605, "learning_rate": 0.0002, "loss": 1.2596, "step": 192560 }, { "epoch": 0.78, "grad_norm": 2.6647560596466064, "learning_rate": 0.0002, "loss": 1.3183, "step": 192570 }, { "epoch": 0.78, "grad_norm": 2.1840133666992188, "learning_rate": 0.0002, "loss": 1.4008, "step": 192580 }, { "epoch": 0.78, "grad_norm": 4.366098403930664, "learning_rate": 0.0002, "loss": 1.7287, "step": 192590 }, { "epoch": 0.78, "grad_norm": 2.4509165287017822, "learning_rate": 0.0002, "loss": 1.4058, "step": 192600 }, { "epoch": 0.78, "grad_norm": 2.8754942417144775, "learning_rate": 0.0002, "loss": 1.2756, "step": 192610 }, { "epoch": 0.78, "grad_norm": 4.182994365692139, "learning_rate": 0.0002, "loss": 1.7127, "step": 192620 }, { "epoch": 0.78, "grad_norm": 2.128180503845215, "learning_rate": 0.0002, "loss": 1.5675, "step": 192630 }, { "epoch": 0.78, "grad_norm": 2.356010913848877, "learning_rate": 0.0002, "loss": 1.563, "step": 192640 }, { "epoch": 0.78, "grad_norm": 5.133151531219482, "learning_rate": 0.0002, "loss": 1.6499, "step": 192650 }, { "epoch": 0.78, "grad_norm": 1.4583940505981445, "learning_rate": 0.0002, "loss": 1.5638, "step": 192660 }, { "epoch": 0.78, "grad_norm": 2.691087007522583, "learning_rate": 0.0002, "loss": 1.7962, "step": 192670 }, { "epoch": 0.78, "grad_norm": 2.24708890914917, "learning_rate": 0.0002, "loss": 1.4619, "step": 192680 }, { "epoch": 0.78, "grad_norm": 2.4734692573547363, "learning_rate": 0.0002, "loss": 1.2757, "step": 192690 }, { "epoch": 0.78, "grad_norm": 3.514432668685913, "learning_rate": 0.0002, "loss": 1.7273, "step": 192700 }, { "epoch": 0.78, "grad_norm": 3.492948293685913, "learning_rate": 0.0002, "loss": 1.563, "step": 192710 }, { "epoch": 0.78, "grad_norm": 3.462341070175171, "learning_rate": 0.0002, "loss": 1.5087, "step": 192720 }, { "epoch": 0.78, "grad_norm": 3.915090799331665, "learning_rate": 0.0002, "loss": 1.4977, "step": 192730 }, { "epoch": 0.78, "grad_norm": 3.1525793075561523, "learning_rate": 0.0002, "loss": 1.5418, "step": 192740 }, { "epoch": 0.78, "grad_norm": 2.902580976486206, "learning_rate": 0.0002, "loss": 1.6113, "step": 192750 }, { "epoch": 0.78, "grad_norm": 2.5971243381500244, "learning_rate": 0.0002, "loss": 1.8728, "step": 192760 }, { "epoch": 0.78, "grad_norm": 2.6191792488098145, "learning_rate": 0.0002, "loss": 1.3286, "step": 192770 }, { "epoch": 0.78, "grad_norm": 4.325506210327148, "learning_rate": 0.0002, "loss": 1.4614, "step": 192780 }, { "epoch": 0.78, "grad_norm": 2.4701907634735107, "learning_rate": 0.0002, "loss": 1.5152, "step": 192790 }, { "epoch": 0.78, "grad_norm": 4.294724464416504, "learning_rate": 0.0002, "loss": 1.4865, "step": 192800 }, { "epoch": 0.78, "grad_norm": 5.4799699783325195, "learning_rate": 0.0002, "loss": 1.5651, "step": 192810 }, { "epoch": 0.78, "grad_norm": 2.223154067993164, "learning_rate": 0.0002, "loss": 1.4795, "step": 192820 }, { "epoch": 0.78, "grad_norm": 3.729504108428955, "learning_rate": 0.0002, "loss": 1.4593, "step": 192830 }, { "epoch": 0.79, "grad_norm": 2.62548828125, "learning_rate": 0.0002, "loss": 1.5663, "step": 192840 }, { "epoch": 0.79, "grad_norm": 2.7991552352905273, "learning_rate": 0.0002, "loss": 1.5396, "step": 192850 }, { "epoch": 0.79, "grad_norm": 2.9385766983032227, "learning_rate": 0.0002, "loss": 1.8842, "step": 192860 }, { "epoch": 0.79, "grad_norm": 2.8834385871887207, "learning_rate": 0.0002, "loss": 1.4837, "step": 192870 }, { "epoch": 0.79, "grad_norm": 2.6357290744781494, "learning_rate": 0.0002, "loss": 1.6703, "step": 192880 }, { "epoch": 0.79, "grad_norm": 3.249833345413208, "learning_rate": 0.0002, "loss": 1.6547, "step": 192890 }, { "epoch": 0.79, "grad_norm": 2.1931962966918945, "learning_rate": 0.0002, "loss": 1.5578, "step": 192900 }, { "epoch": 0.79, "grad_norm": 2.5888993740081787, "learning_rate": 0.0002, "loss": 1.6172, "step": 192910 }, { "epoch": 0.79, "grad_norm": 3.22501802444458, "learning_rate": 0.0002, "loss": 1.5932, "step": 192920 }, { "epoch": 0.79, "grad_norm": 2.404118537902832, "learning_rate": 0.0002, "loss": 1.2842, "step": 192930 }, { "epoch": 0.79, "grad_norm": 4.088610649108887, "learning_rate": 0.0002, "loss": 1.4908, "step": 192940 }, { "epoch": 0.79, "grad_norm": 2.8035430908203125, "learning_rate": 0.0002, "loss": 1.5504, "step": 192950 }, { "epoch": 0.79, "grad_norm": 2.6504316329956055, "learning_rate": 0.0002, "loss": 1.4755, "step": 192960 }, { "epoch": 0.79, "grad_norm": 2.8936381340026855, "learning_rate": 0.0002, "loss": 1.51, "step": 192970 }, { "epoch": 0.79, "grad_norm": 3.3831303119659424, "learning_rate": 0.0002, "loss": 1.6084, "step": 192980 }, { "epoch": 0.79, "grad_norm": 2.910526752471924, "learning_rate": 0.0002, "loss": 1.4751, "step": 192990 }, { "epoch": 0.79, "grad_norm": 3.323342800140381, "learning_rate": 0.0002, "loss": 1.5325, "step": 193000 }, { "epoch": 0.79, "grad_norm": 6.171630859375, "learning_rate": 0.0002, "loss": 1.4369, "step": 193010 }, { "epoch": 0.79, "grad_norm": 3.375918388366699, "learning_rate": 0.0002, "loss": 1.48, "step": 193020 }, { "epoch": 0.79, "grad_norm": 2.968787670135498, "learning_rate": 0.0002, "loss": 1.6157, "step": 193030 }, { "epoch": 0.79, "grad_norm": 4.7924394607543945, "learning_rate": 0.0002, "loss": 1.7809, "step": 193040 }, { "epoch": 0.79, "grad_norm": 2.868000030517578, "learning_rate": 0.0002, "loss": 1.6387, "step": 193050 }, { "epoch": 0.79, "grad_norm": 2.7992544174194336, "learning_rate": 0.0002, "loss": 1.529, "step": 193060 }, { "epoch": 0.79, "grad_norm": 2.5807278156280518, "learning_rate": 0.0002, "loss": 1.8032, "step": 193070 }, { "epoch": 0.79, "grad_norm": 2.4217655658721924, "learning_rate": 0.0002, "loss": 1.6273, "step": 193080 }, { "epoch": 0.79, "grad_norm": 2.829439163208008, "learning_rate": 0.0002, "loss": 1.7213, "step": 193090 }, { "epoch": 0.79, "grad_norm": 2.8221654891967773, "learning_rate": 0.0002, "loss": 1.367, "step": 193100 }, { "epoch": 0.79, "grad_norm": 2.425747871398926, "learning_rate": 0.0002, "loss": 1.564, "step": 193110 }, { "epoch": 0.79, "grad_norm": 3.2800674438476562, "learning_rate": 0.0002, "loss": 1.6217, "step": 193120 }, { "epoch": 0.79, "grad_norm": 2.413397789001465, "learning_rate": 0.0002, "loss": 1.3595, "step": 193130 }, { "epoch": 0.79, "grad_norm": 3.800490140914917, "learning_rate": 0.0002, "loss": 1.6759, "step": 193140 }, { "epoch": 0.79, "grad_norm": 3.3245465755462646, "learning_rate": 0.0002, "loss": 1.7421, "step": 193150 }, { "epoch": 0.79, "grad_norm": 3.0420045852661133, "learning_rate": 0.0002, "loss": 1.4626, "step": 193160 }, { "epoch": 0.79, "grad_norm": 4.361213207244873, "learning_rate": 0.0002, "loss": 1.3825, "step": 193170 }, { "epoch": 0.79, "grad_norm": 3.002004623413086, "learning_rate": 0.0002, "loss": 1.5039, "step": 193180 }, { "epoch": 0.79, "grad_norm": 3.3749749660491943, "learning_rate": 0.0002, "loss": 1.6685, "step": 193190 }, { "epoch": 0.79, "grad_norm": 1.708435297012329, "learning_rate": 0.0002, "loss": 1.5995, "step": 193200 }, { "epoch": 0.79, "grad_norm": 2.11040997505188, "learning_rate": 0.0002, "loss": 1.4049, "step": 193210 }, { "epoch": 0.79, "grad_norm": 3.146214008331299, "learning_rate": 0.0002, "loss": 1.5054, "step": 193220 }, { "epoch": 0.79, "grad_norm": 3.7179489135742188, "learning_rate": 0.0002, "loss": 1.553, "step": 193230 }, { "epoch": 0.79, "grad_norm": 4.240278244018555, "learning_rate": 0.0002, "loss": 1.5506, "step": 193240 }, { "epoch": 0.79, "grad_norm": 4.092662334442139, "learning_rate": 0.0002, "loss": 1.6742, "step": 193250 }, { "epoch": 0.79, "grad_norm": 2.695733070373535, "learning_rate": 0.0002, "loss": 1.6823, "step": 193260 }, { "epoch": 0.79, "grad_norm": 2.633181095123291, "learning_rate": 0.0002, "loss": 1.5638, "step": 193270 }, { "epoch": 0.79, "grad_norm": 2.4191927909851074, "learning_rate": 0.0002, "loss": 1.4768, "step": 193280 }, { "epoch": 0.79, "grad_norm": 2.6576924324035645, "learning_rate": 0.0002, "loss": 1.5934, "step": 193290 }, { "epoch": 0.79, "grad_norm": 3.1491122245788574, "learning_rate": 0.0002, "loss": 1.5607, "step": 193300 }, { "epoch": 0.79, "grad_norm": 3.8011653423309326, "learning_rate": 0.0002, "loss": 1.7621, "step": 193310 }, { "epoch": 0.79, "grad_norm": 2.6745481491088867, "learning_rate": 0.0002, "loss": 1.7439, "step": 193320 }, { "epoch": 0.79, "grad_norm": 4.384544372558594, "learning_rate": 0.0002, "loss": 1.5624, "step": 193330 }, { "epoch": 0.79, "grad_norm": 1.6843761205673218, "learning_rate": 0.0002, "loss": 1.6899, "step": 193340 }, { "epoch": 0.79, "grad_norm": 3.4644124507904053, "learning_rate": 0.0002, "loss": 1.5185, "step": 193350 }, { "epoch": 0.79, "grad_norm": 2.9452552795410156, "learning_rate": 0.0002, "loss": 1.4668, "step": 193360 }, { "epoch": 0.79, "grad_norm": 2.3280091285705566, "learning_rate": 0.0002, "loss": 1.4372, "step": 193370 }, { "epoch": 0.79, "grad_norm": 2.876523017883301, "learning_rate": 0.0002, "loss": 1.6317, "step": 193380 }, { "epoch": 0.79, "grad_norm": 3.8835299015045166, "learning_rate": 0.0002, "loss": 1.3999, "step": 193390 }, { "epoch": 0.79, "grad_norm": 2.329850196838379, "learning_rate": 0.0002, "loss": 1.5896, "step": 193400 }, { "epoch": 0.79, "grad_norm": 5.462871074676514, "learning_rate": 0.0002, "loss": 1.6562, "step": 193410 }, { "epoch": 0.79, "grad_norm": 4.139917850494385, "learning_rate": 0.0002, "loss": 1.6265, "step": 193420 }, { "epoch": 0.79, "grad_norm": 3.1642496585845947, "learning_rate": 0.0002, "loss": 1.4886, "step": 193430 }, { "epoch": 0.79, "grad_norm": 1.9416062831878662, "learning_rate": 0.0002, "loss": 1.6023, "step": 193440 }, { "epoch": 0.79, "grad_norm": 3.4037699699401855, "learning_rate": 0.0002, "loss": 1.3174, "step": 193450 }, { "epoch": 0.79, "grad_norm": 4.612514495849609, "learning_rate": 0.0002, "loss": 1.5004, "step": 193460 }, { "epoch": 0.79, "grad_norm": 2.3328118324279785, "learning_rate": 0.0002, "loss": 1.5749, "step": 193470 }, { "epoch": 0.79, "grad_norm": 3.8618087768554688, "learning_rate": 0.0002, "loss": 1.6652, "step": 193480 }, { "epoch": 0.79, "grad_norm": 3.6194260120391846, "learning_rate": 0.0002, "loss": 1.6537, "step": 193490 }, { "epoch": 0.79, "grad_norm": 3.354611873626709, "learning_rate": 0.0002, "loss": 1.2507, "step": 193500 }, { "epoch": 0.79, "grad_norm": 3.639082908630371, "learning_rate": 0.0002, "loss": 1.7091, "step": 193510 }, { "epoch": 0.79, "grad_norm": 3.1248326301574707, "learning_rate": 0.0002, "loss": 1.7535, "step": 193520 }, { "epoch": 0.79, "grad_norm": 2.5702285766601562, "learning_rate": 0.0002, "loss": 1.7158, "step": 193530 }, { "epoch": 0.79, "grad_norm": 1.952742099761963, "learning_rate": 0.0002, "loss": 1.5144, "step": 193540 }, { "epoch": 0.79, "grad_norm": 2.637998342514038, "learning_rate": 0.0002, "loss": 1.4922, "step": 193550 }, { "epoch": 0.79, "grad_norm": 2.02756667137146, "learning_rate": 0.0002, "loss": 1.4722, "step": 193560 }, { "epoch": 0.79, "grad_norm": 4.139317035675049, "learning_rate": 0.0002, "loss": 1.6734, "step": 193570 }, { "epoch": 0.79, "grad_norm": 3.4609711170196533, "learning_rate": 0.0002, "loss": 1.8346, "step": 193580 }, { "epoch": 0.79, "grad_norm": 4.391393661499023, "learning_rate": 0.0002, "loss": 1.5947, "step": 193590 }, { "epoch": 0.79, "grad_norm": 4.256625175476074, "learning_rate": 0.0002, "loss": 1.6705, "step": 193600 }, { "epoch": 0.79, "grad_norm": 2.8613839149475098, "learning_rate": 0.0002, "loss": 1.9347, "step": 193610 }, { "epoch": 0.79, "grad_norm": 4.157734394073486, "learning_rate": 0.0002, "loss": 1.7599, "step": 193620 }, { "epoch": 0.79, "grad_norm": 3.805802822113037, "learning_rate": 0.0002, "loss": 1.5986, "step": 193630 }, { "epoch": 0.79, "grad_norm": 2.7791759967803955, "learning_rate": 0.0002, "loss": 1.5394, "step": 193640 }, { "epoch": 0.79, "grad_norm": 15.171465873718262, "learning_rate": 0.0002, "loss": 1.6039, "step": 193650 }, { "epoch": 0.79, "grad_norm": 10.092479705810547, "learning_rate": 0.0002, "loss": 1.5768, "step": 193660 }, { "epoch": 0.79, "grad_norm": 3.552522897720337, "learning_rate": 0.0002, "loss": 1.5162, "step": 193670 }, { "epoch": 0.79, "grad_norm": 10.215209007263184, "learning_rate": 0.0002, "loss": 1.4458, "step": 193680 }, { "epoch": 0.79, "grad_norm": 3.304447889328003, "learning_rate": 0.0002, "loss": 1.7073, "step": 193690 }, { "epoch": 0.79, "grad_norm": 5.1758317947387695, "learning_rate": 0.0002, "loss": 1.5056, "step": 193700 }, { "epoch": 0.79, "grad_norm": 2.9950344562530518, "learning_rate": 0.0002, "loss": 1.8587, "step": 193710 }, { "epoch": 0.79, "grad_norm": 3.3001508712768555, "learning_rate": 0.0002, "loss": 1.4212, "step": 193720 }, { "epoch": 0.79, "grad_norm": 3.951115846633911, "learning_rate": 0.0002, "loss": 1.5221, "step": 193730 }, { "epoch": 0.79, "grad_norm": 3.2459359169006348, "learning_rate": 0.0002, "loss": 1.6552, "step": 193740 }, { "epoch": 0.79, "grad_norm": 2.5196373462677, "learning_rate": 0.0002, "loss": 1.5533, "step": 193750 }, { "epoch": 0.79, "grad_norm": 3.1192827224731445, "learning_rate": 0.0002, "loss": 1.4122, "step": 193760 }, { "epoch": 0.79, "grad_norm": 2.0766992568969727, "learning_rate": 0.0002, "loss": 1.9538, "step": 193770 }, { "epoch": 0.79, "grad_norm": 5.191832542419434, "learning_rate": 0.0002, "loss": 1.7908, "step": 193780 }, { "epoch": 0.79, "grad_norm": 2.1421782970428467, "learning_rate": 0.0002, "loss": 1.6206, "step": 193790 }, { "epoch": 0.79, "grad_norm": 3.6347267627716064, "learning_rate": 0.0002, "loss": 1.5326, "step": 193800 }, { "epoch": 0.79, "grad_norm": 2.340388774871826, "learning_rate": 0.0002, "loss": 1.6056, "step": 193810 }, { "epoch": 0.79, "grad_norm": 2.630034923553467, "learning_rate": 0.0002, "loss": 1.7362, "step": 193820 }, { "epoch": 0.79, "grad_norm": 2.140617609024048, "learning_rate": 0.0002, "loss": 1.5764, "step": 193830 }, { "epoch": 0.79, "grad_norm": 2.393742561340332, "learning_rate": 0.0002, "loss": 1.7646, "step": 193840 }, { "epoch": 0.79, "grad_norm": 2.118962049484253, "learning_rate": 0.0002, "loss": 1.4964, "step": 193850 }, { "epoch": 0.79, "grad_norm": 4.3051323890686035, "learning_rate": 0.0002, "loss": 1.5481, "step": 193860 }, { "epoch": 0.79, "grad_norm": 2.3214964866638184, "learning_rate": 0.0002, "loss": 1.4581, "step": 193870 }, { "epoch": 0.79, "grad_norm": 2.3835208415985107, "learning_rate": 0.0002, "loss": 1.4872, "step": 193880 }, { "epoch": 0.79, "grad_norm": 2.598381996154785, "learning_rate": 0.0002, "loss": 1.5525, "step": 193890 }, { "epoch": 0.79, "grad_norm": 1.8727154731750488, "learning_rate": 0.0002, "loss": 1.4813, "step": 193900 }, { "epoch": 0.79, "grad_norm": 3.2119967937469482, "learning_rate": 0.0002, "loss": 1.4551, "step": 193910 }, { "epoch": 0.79, "grad_norm": 3.3954312801361084, "learning_rate": 0.0002, "loss": 1.6349, "step": 193920 }, { "epoch": 0.79, "grad_norm": 3.636685371398926, "learning_rate": 0.0002, "loss": 1.6005, "step": 193930 }, { "epoch": 0.79, "grad_norm": 3.4493887424468994, "learning_rate": 0.0002, "loss": 1.8645, "step": 193940 }, { "epoch": 0.79, "grad_norm": 1.7979930639266968, "learning_rate": 0.0002, "loss": 1.7483, "step": 193950 }, { "epoch": 0.79, "grad_norm": 3.0946524143218994, "learning_rate": 0.0002, "loss": 1.6163, "step": 193960 }, { "epoch": 0.79, "grad_norm": 2.5376555919647217, "learning_rate": 0.0002, "loss": 1.4069, "step": 193970 }, { "epoch": 0.79, "grad_norm": 5.65814733505249, "learning_rate": 0.0002, "loss": 1.7468, "step": 193980 }, { "epoch": 0.79, "grad_norm": 2.255197763442993, "learning_rate": 0.0002, "loss": 1.1185, "step": 193990 }, { "epoch": 0.79, "grad_norm": 3.7978427410125732, "learning_rate": 0.0002, "loss": 1.4644, "step": 194000 }, { "epoch": 0.79, "grad_norm": 3.986677408218384, "learning_rate": 0.0002, "loss": 1.4015, "step": 194010 }, { "epoch": 0.79, "grad_norm": 2.1534647941589355, "learning_rate": 0.0002, "loss": 1.6486, "step": 194020 }, { "epoch": 0.79, "grad_norm": 2.6716978549957275, "learning_rate": 0.0002, "loss": 1.5263, "step": 194030 }, { "epoch": 0.79, "grad_norm": 2.7715837955474854, "learning_rate": 0.0002, "loss": 1.5822, "step": 194040 }, { "epoch": 0.79, "grad_norm": 2.8272697925567627, "learning_rate": 0.0002, "loss": 1.5995, "step": 194050 }, { "epoch": 0.79, "grad_norm": 2.224867820739746, "learning_rate": 0.0002, "loss": 1.6163, "step": 194060 }, { "epoch": 0.79, "grad_norm": 2.981027364730835, "learning_rate": 0.0002, "loss": 1.803, "step": 194070 }, { "epoch": 0.79, "grad_norm": 2.542642593383789, "learning_rate": 0.0002, "loss": 1.5117, "step": 194080 }, { "epoch": 0.79, "grad_norm": 1.584719181060791, "learning_rate": 0.0002, "loss": 1.6281, "step": 194090 }, { "epoch": 0.79, "grad_norm": 1.5663992166519165, "learning_rate": 0.0002, "loss": 1.3251, "step": 194100 }, { "epoch": 0.79, "grad_norm": 2.7871079444885254, "learning_rate": 0.0002, "loss": 1.5327, "step": 194110 }, { "epoch": 0.79, "grad_norm": 2.9104299545288086, "learning_rate": 0.0002, "loss": 1.7329, "step": 194120 }, { "epoch": 0.79, "grad_norm": 3.5985872745513916, "learning_rate": 0.0002, "loss": 1.8148, "step": 194130 }, { "epoch": 0.79, "grad_norm": 4.081465721130371, "learning_rate": 0.0002, "loss": 1.5646, "step": 194140 }, { "epoch": 0.79, "grad_norm": 3.09430193901062, "learning_rate": 0.0002, "loss": 1.5734, "step": 194150 }, { "epoch": 0.79, "grad_norm": 2.9583911895751953, "learning_rate": 0.0002, "loss": 1.5301, "step": 194160 }, { "epoch": 0.79, "grad_norm": 2.611931324005127, "learning_rate": 0.0002, "loss": 1.7177, "step": 194170 }, { "epoch": 0.79, "grad_norm": 3.2719833850860596, "learning_rate": 0.0002, "loss": 1.4862, "step": 194180 }, { "epoch": 0.79, "grad_norm": 2.4327268600463867, "learning_rate": 0.0002, "loss": 1.4351, "step": 194190 }, { "epoch": 0.79, "grad_norm": 1.8528467416763306, "learning_rate": 0.0002, "loss": 1.4802, "step": 194200 }, { "epoch": 0.79, "grad_norm": 2.902186632156372, "learning_rate": 0.0002, "loss": 1.4253, "step": 194210 }, { "epoch": 0.79, "grad_norm": 2.445117950439453, "learning_rate": 0.0002, "loss": 1.6947, "step": 194220 }, { "epoch": 0.79, "grad_norm": 2.2893338203430176, "learning_rate": 0.0002, "loss": 1.7484, "step": 194230 }, { "epoch": 0.79, "grad_norm": 3.09061861038208, "learning_rate": 0.0002, "loss": 1.6972, "step": 194240 }, { "epoch": 0.79, "grad_norm": 2.885931968688965, "learning_rate": 0.0002, "loss": 1.4395, "step": 194250 }, { "epoch": 0.79, "grad_norm": 3.2289862632751465, "learning_rate": 0.0002, "loss": 1.5639, "step": 194260 }, { "epoch": 0.79, "grad_norm": 5.174649715423584, "learning_rate": 0.0002, "loss": 1.7093, "step": 194270 }, { "epoch": 0.79, "grad_norm": 3.330134630203247, "learning_rate": 0.0002, "loss": 1.5015, "step": 194280 }, { "epoch": 0.79, "grad_norm": 3.327442169189453, "learning_rate": 0.0002, "loss": 1.7291, "step": 194290 }, { "epoch": 0.79, "grad_norm": 2.660087823867798, "learning_rate": 0.0002, "loss": 1.7861, "step": 194300 }, { "epoch": 0.79, "grad_norm": 1.8602302074432373, "learning_rate": 0.0002, "loss": 1.508, "step": 194310 }, { "epoch": 0.79, "grad_norm": 5.457831382751465, "learning_rate": 0.0002, "loss": 1.6102, "step": 194320 }, { "epoch": 0.79, "grad_norm": 2.989315986633301, "learning_rate": 0.0002, "loss": 1.6236, "step": 194330 }, { "epoch": 0.79, "grad_norm": 5.108485221862793, "learning_rate": 0.0002, "loss": 1.7798, "step": 194340 }, { "epoch": 0.79, "grad_norm": 4.016399383544922, "learning_rate": 0.0002, "loss": 1.5873, "step": 194350 }, { "epoch": 0.79, "grad_norm": 3.5569920539855957, "learning_rate": 0.0002, "loss": 1.6964, "step": 194360 }, { "epoch": 0.79, "grad_norm": 2.18025803565979, "learning_rate": 0.0002, "loss": 1.5526, "step": 194370 }, { "epoch": 0.79, "grad_norm": 2.3484268188476562, "learning_rate": 0.0002, "loss": 1.5257, "step": 194380 }, { "epoch": 0.79, "grad_norm": 3.31992244720459, "learning_rate": 0.0002, "loss": 1.6088, "step": 194390 }, { "epoch": 0.79, "grad_norm": 1.835891604423523, "learning_rate": 0.0002, "loss": 1.3876, "step": 194400 }, { "epoch": 0.79, "grad_norm": 2.1732168197631836, "learning_rate": 0.0002, "loss": 1.4361, "step": 194410 }, { "epoch": 0.79, "grad_norm": 8.67979907989502, "learning_rate": 0.0002, "loss": 1.506, "step": 194420 }, { "epoch": 0.79, "grad_norm": 3.5251760482788086, "learning_rate": 0.0002, "loss": 1.5517, "step": 194430 }, { "epoch": 0.79, "grad_norm": 2.4646425247192383, "learning_rate": 0.0002, "loss": 1.6224, "step": 194440 }, { "epoch": 0.79, "grad_norm": 3.4753735065460205, "learning_rate": 0.0002, "loss": 1.676, "step": 194450 }, { "epoch": 0.79, "grad_norm": 3.4484407901763916, "learning_rate": 0.0002, "loss": 1.7004, "step": 194460 }, { "epoch": 0.79, "grad_norm": 3.286301374435425, "learning_rate": 0.0002, "loss": 1.5413, "step": 194470 }, { "epoch": 0.79, "grad_norm": 2.924382448196411, "learning_rate": 0.0002, "loss": 1.4646, "step": 194480 }, { "epoch": 0.79, "grad_norm": 4.357697010040283, "learning_rate": 0.0002, "loss": 1.4042, "step": 194490 }, { "epoch": 0.79, "grad_norm": 3.1881375312805176, "learning_rate": 0.0002, "loss": 1.6103, "step": 194500 }, { "epoch": 0.79, "grad_norm": 2.478759765625, "learning_rate": 0.0002, "loss": 1.6003, "step": 194510 }, { "epoch": 0.79, "grad_norm": 2.895768404006958, "learning_rate": 0.0002, "loss": 1.4733, "step": 194520 }, { "epoch": 0.79, "grad_norm": 6.36017370223999, "learning_rate": 0.0002, "loss": 1.5079, "step": 194530 }, { "epoch": 0.79, "grad_norm": 4.4071502685546875, "learning_rate": 0.0002, "loss": 1.5272, "step": 194540 }, { "epoch": 0.79, "grad_norm": 5.148589611053467, "learning_rate": 0.0002, "loss": 1.4034, "step": 194550 }, { "epoch": 0.79, "grad_norm": 4.082414150238037, "learning_rate": 0.0002, "loss": 1.4493, "step": 194560 }, { "epoch": 0.79, "grad_norm": 2.499608278274536, "learning_rate": 0.0002, "loss": 1.6694, "step": 194570 }, { "epoch": 0.79, "grad_norm": 2.581511974334717, "learning_rate": 0.0002, "loss": 1.9264, "step": 194580 }, { "epoch": 0.79, "grad_norm": 5.509479999542236, "learning_rate": 0.0002, "loss": 1.4153, "step": 194590 }, { "epoch": 0.79, "grad_norm": 1.2281192541122437, "learning_rate": 0.0002, "loss": 1.6005, "step": 194600 }, { "epoch": 0.79, "grad_norm": 4.671163558959961, "learning_rate": 0.0002, "loss": 1.2181, "step": 194610 }, { "epoch": 0.79, "grad_norm": 2.6821608543395996, "learning_rate": 0.0002, "loss": 1.6384, "step": 194620 }, { "epoch": 0.79, "grad_norm": 2.050997495651245, "learning_rate": 0.0002, "loss": 1.6395, "step": 194630 }, { "epoch": 0.79, "grad_norm": 2.3114876747131348, "learning_rate": 0.0002, "loss": 1.7827, "step": 194640 }, { "epoch": 0.79, "grad_norm": 2.2060370445251465, "learning_rate": 0.0002, "loss": 1.752, "step": 194650 }, { "epoch": 0.79, "grad_norm": 1.7908557653427124, "learning_rate": 0.0002, "loss": 1.3493, "step": 194660 }, { "epoch": 0.79, "grad_norm": 3.3543894290924072, "learning_rate": 0.0002, "loss": 1.7338, "step": 194670 }, { "epoch": 0.79, "grad_norm": 2.9860024452209473, "learning_rate": 0.0002, "loss": 1.4836, "step": 194680 }, { "epoch": 0.79, "grad_norm": 3.2863776683807373, "learning_rate": 0.0002, "loss": 1.7533, "step": 194690 }, { "epoch": 0.79, "grad_norm": 5.480184078216553, "learning_rate": 0.0002, "loss": 1.5291, "step": 194700 }, { "epoch": 0.79, "grad_norm": 3.9853663444519043, "learning_rate": 0.0002, "loss": 1.5114, "step": 194710 }, { "epoch": 0.79, "grad_norm": 2.3801679611206055, "learning_rate": 0.0002, "loss": 1.4289, "step": 194720 }, { "epoch": 0.79, "grad_norm": 2.4870307445526123, "learning_rate": 0.0002, "loss": 1.6527, "step": 194730 }, { "epoch": 0.79, "grad_norm": 2.5084316730499268, "learning_rate": 0.0002, "loss": 1.7025, "step": 194740 }, { "epoch": 0.79, "grad_norm": 2.4062914848327637, "learning_rate": 0.0002, "loss": 1.369, "step": 194750 }, { "epoch": 0.79, "grad_norm": 4.15812349319458, "learning_rate": 0.0002, "loss": 1.5621, "step": 194760 }, { "epoch": 0.79, "grad_norm": 4.26974630355835, "learning_rate": 0.0002, "loss": 1.6586, "step": 194770 }, { "epoch": 0.79, "grad_norm": 3.9306254386901855, "learning_rate": 0.0002, "loss": 1.3015, "step": 194780 }, { "epoch": 0.79, "grad_norm": 3.3021483421325684, "learning_rate": 0.0002, "loss": 1.7688, "step": 194790 }, { "epoch": 0.79, "grad_norm": 3.3711791038513184, "learning_rate": 0.0002, "loss": 1.5882, "step": 194800 }, { "epoch": 0.79, "grad_norm": 3.9550063610076904, "learning_rate": 0.0002, "loss": 1.509, "step": 194810 }, { "epoch": 0.79, "grad_norm": 2.764958381652832, "learning_rate": 0.0002, "loss": 1.5408, "step": 194820 }, { "epoch": 0.79, "grad_norm": 2.429542303085327, "learning_rate": 0.0002, "loss": 1.7253, "step": 194830 }, { "epoch": 0.79, "grad_norm": 5.558993339538574, "learning_rate": 0.0002, "loss": 1.4509, "step": 194840 }, { "epoch": 0.79, "grad_norm": 2.666504144668579, "learning_rate": 0.0002, "loss": 1.9077, "step": 194850 }, { "epoch": 0.79, "grad_norm": 3.2189249992370605, "learning_rate": 0.0002, "loss": 1.4651, "step": 194860 }, { "epoch": 0.79, "grad_norm": 3.0281472206115723, "learning_rate": 0.0002, "loss": 1.6697, "step": 194870 }, { "epoch": 0.79, "grad_norm": 3.016336679458618, "learning_rate": 0.0002, "loss": 1.696, "step": 194880 }, { "epoch": 0.79, "grad_norm": 4.346741676330566, "learning_rate": 0.0002, "loss": 1.7632, "step": 194890 }, { "epoch": 0.79, "grad_norm": 2.8682122230529785, "learning_rate": 0.0002, "loss": 1.73, "step": 194900 }, { "epoch": 0.79, "grad_norm": 3.548185348510742, "learning_rate": 0.0002, "loss": 1.6268, "step": 194910 }, { "epoch": 0.79, "grad_norm": 3.0044443607330322, "learning_rate": 0.0002, "loss": 1.4735, "step": 194920 }, { "epoch": 0.79, "grad_norm": 3.71539044380188, "learning_rate": 0.0002, "loss": 1.6613, "step": 194930 }, { "epoch": 0.79, "grad_norm": 6.336026191711426, "learning_rate": 0.0002, "loss": 1.5518, "step": 194940 }, { "epoch": 0.79, "grad_norm": 2.145559549331665, "learning_rate": 0.0002, "loss": 1.4949, "step": 194950 }, { "epoch": 0.79, "grad_norm": 2.674070119857788, "learning_rate": 0.0002, "loss": 1.7901, "step": 194960 }, { "epoch": 0.79, "grad_norm": 2.7061071395874023, "learning_rate": 0.0002, "loss": 1.6763, "step": 194970 }, { "epoch": 0.79, "grad_norm": 3.4552149772644043, "learning_rate": 0.0002, "loss": 1.7028, "step": 194980 }, { "epoch": 0.79, "grad_norm": 2.8480615615844727, "learning_rate": 0.0002, "loss": 1.6913, "step": 194990 }, { "epoch": 0.79, "grad_norm": 3.124962329864502, "learning_rate": 0.0002, "loss": 1.3978, "step": 195000 }, { "epoch": 0.79, "grad_norm": 2.4883928298950195, "learning_rate": 0.0002, "loss": 1.4539, "step": 195010 }, { "epoch": 0.79, "grad_norm": 4.021456241607666, "learning_rate": 0.0002, "loss": 1.5965, "step": 195020 }, { "epoch": 0.79, "grad_norm": 2.2947299480438232, "learning_rate": 0.0002, "loss": 1.5822, "step": 195030 }, { "epoch": 0.79, "grad_norm": 2.815419912338257, "learning_rate": 0.0002, "loss": 1.4589, "step": 195040 }, { "epoch": 0.79, "grad_norm": 2.6250855922698975, "learning_rate": 0.0002, "loss": 1.5898, "step": 195050 }, { "epoch": 0.79, "grad_norm": 1.6690043210983276, "learning_rate": 0.0002, "loss": 1.5632, "step": 195060 }, { "epoch": 0.79, "grad_norm": 1.4310628175735474, "learning_rate": 0.0002, "loss": 1.3649, "step": 195070 }, { "epoch": 0.79, "grad_norm": 3.669257402420044, "learning_rate": 0.0002, "loss": 1.5387, "step": 195080 }, { "epoch": 0.79, "grad_norm": 2.7064099311828613, "learning_rate": 0.0002, "loss": 1.3945, "step": 195090 }, { "epoch": 0.79, "grad_norm": 3.751249074935913, "learning_rate": 0.0002, "loss": 1.4818, "step": 195100 }, { "epoch": 0.79, "grad_norm": 3.457559823989868, "learning_rate": 0.0002, "loss": 1.4916, "step": 195110 }, { "epoch": 0.79, "grad_norm": 3.629772901535034, "learning_rate": 0.0002, "loss": 1.7315, "step": 195120 }, { "epoch": 0.79, "grad_norm": 2.814653158187866, "learning_rate": 0.0002, "loss": 1.6347, "step": 195130 }, { "epoch": 0.79, "grad_norm": 2.7857553958892822, "learning_rate": 0.0002, "loss": 1.4982, "step": 195140 }, { "epoch": 0.79, "grad_norm": 2.3727424144744873, "learning_rate": 0.0002, "loss": 1.6917, "step": 195150 }, { "epoch": 0.79, "grad_norm": 2.778949499130249, "learning_rate": 0.0002, "loss": 1.4853, "step": 195160 }, { "epoch": 0.79, "grad_norm": 3.0118420124053955, "learning_rate": 0.0002, "loss": 1.4515, "step": 195170 }, { "epoch": 0.79, "grad_norm": 2.765420436859131, "learning_rate": 0.0002, "loss": 1.5698, "step": 195180 }, { "epoch": 0.79, "grad_norm": 3.073397397994995, "learning_rate": 0.0002, "loss": 1.9943, "step": 195190 }, { "epoch": 0.79, "grad_norm": 2.4123446941375732, "learning_rate": 0.0002, "loss": 1.853, "step": 195200 }, { "epoch": 0.79, "grad_norm": 3.6391944885253906, "learning_rate": 0.0002, "loss": 1.6484, "step": 195210 }, { "epoch": 0.79, "grad_norm": 4.305320739746094, "learning_rate": 0.0002, "loss": 1.763, "step": 195220 }, { "epoch": 0.79, "grad_norm": 3.4425830841064453, "learning_rate": 0.0002, "loss": 1.7983, "step": 195230 }, { "epoch": 0.79, "grad_norm": 2.985375165939331, "learning_rate": 0.0002, "loss": 1.7543, "step": 195240 }, { "epoch": 0.79, "grad_norm": 2.8021204471588135, "learning_rate": 0.0002, "loss": 1.6159, "step": 195250 }, { "epoch": 0.79, "grad_norm": 3.611137866973877, "learning_rate": 0.0002, "loss": 1.5354, "step": 195260 }, { "epoch": 0.79, "grad_norm": 2.395129442214966, "learning_rate": 0.0002, "loss": 1.3378, "step": 195270 }, { "epoch": 0.79, "grad_norm": 3.381253242492676, "learning_rate": 0.0002, "loss": 1.3956, "step": 195280 }, { "epoch": 0.8, "grad_norm": 2.506819009780884, "learning_rate": 0.0002, "loss": 1.6692, "step": 195290 }, { "epoch": 0.8, "grad_norm": 2.935398817062378, "learning_rate": 0.0002, "loss": 1.5318, "step": 195300 }, { "epoch": 0.8, "grad_norm": 3.0529186725616455, "learning_rate": 0.0002, "loss": 1.6879, "step": 195310 }, { "epoch": 0.8, "grad_norm": 2.3608291149139404, "learning_rate": 0.0002, "loss": 1.7836, "step": 195320 }, { "epoch": 0.8, "grad_norm": 1.7244266271591187, "learning_rate": 0.0002, "loss": 1.4909, "step": 195330 }, { "epoch": 0.8, "grad_norm": 3.3372273445129395, "learning_rate": 0.0002, "loss": 1.3102, "step": 195340 }, { "epoch": 0.8, "grad_norm": 2.84023118019104, "learning_rate": 0.0002, "loss": 1.5149, "step": 195350 }, { "epoch": 0.8, "grad_norm": 3.1973371505737305, "learning_rate": 0.0002, "loss": 1.7482, "step": 195360 }, { "epoch": 0.8, "grad_norm": 1.7524974346160889, "learning_rate": 0.0002, "loss": 1.7326, "step": 195370 }, { "epoch": 0.8, "grad_norm": 3.1700940132141113, "learning_rate": 0.0002, "loss": 1.6943, "step": 195380 }, { "epoch": 0.8, "grad_norm": 2.1766676902770996, "learning_rate": 0.0002, "loss": 1.6637, "step": 195390 }, { "epoch": 0.8, "grad_norm": 2.9594876766204834, "learning_rate": 0.0002, "loss": 1.5334, "step": 195400 }, { "epoch": 0.8, "grad_norm": 3.2276604175567627, "learning_rate": 0.0002, "loss": 1.6117, "step": 195410 }, { "epoch": 0.8, "grad_norm": 4.896964073181152, "learning_rate": 0.0002, "loss": 1.5498, "step": 195420 }, { "epoch": 0.8, "grad_norm": 2.164212465286255, "learning_rate": 0.0002, "loss": 1.6292, "step": 195430 }, { "epoch": 0.8, "grad_norm": 2.0400657653808594, "learning_rate": 0.0002, "loss": 1.5728, "step": 195440 }, { "epoch": 0.8, "grad_norm": 3.1586921215057373, "learning_rate": 0.0002, "loss": 1.7529, "step": 195450 }, { "epoch": 0.8, "grad_norm": 3.170656442642212, "learning_rate": 0.0002, "loss": 1.6322, "step": 195460 }, { "epoch": 0.8, "grad_norm": 6.8489603996276855, "learning_rate": 0.0002, "loss": 1.6456, "step": 195470 }, { "epoch": 0.8, "grad_norm": 2.491391658782959, "learning_rate": 0.0002, "loss": 1.5361, "step": 195480 }, { "epoch": 0.8, "grad_norm": 2.8676645755767822, "learning_rate": 0.0002, "loss": 1.6932, "step": 195490 }, { "epoch": 0.8, "grad_norm": 1.7158292531967163, "learning_rate": 0.0002, "loss": 1.3597, "step": 195500 }, { "epoch": 0.8, "grad_norm": 2.097590923309326, "learning_rate": 0.0002, "loss": 1.6572, "step": 195510 }, { "epoch": 0.8, "grad_norm": 4.236222743988037, "learning_rate": 0.0002, "loss": 1.3914, "step": 195520 }, { "epoch": 0.8, "grad_norm": 3.454392194747925, "learning_rate": 0.0002, "loss": 1.476, "step": 195530 }, { "epoch": 0.8, "grad_norm": 2.6532998085021973, "learning_rate": 0.0002, "loss": 1.5976, "step": 195540 }, { "epoch": 0.8, "grad_norm": 2.4617483615875244, "learning_rate": 0.0002, "loss": 1.4234, "step": 195550 }, { "epoch": 0.8, "grad_norm": 5.819428443908691, "learning_rate": 0.0002, "loss": 1.6133, "step": 195560 }, { "epoch": 0.8, "grad_norm": 3.2801246643066406, "learning_rate": 0.0002, "loss": 1.6808, "step": 195570 }, { "epoch": 0.8, "grad_norm": 2.6998307704925537, "learning_rate": 0.0002, "loss": 1.6761, "step": 195580 }, { "epoch": 0.8, "grad_norm": 2.5116465091705322, "learning_rate": 0.0002, "loss": 1.6816, "step": 195590 }, { "epoch": 0.8, "grad_norm": 1.606163501739502, "learning_rate": 0.0002, "loss": 1.4922, "step": 195600 }, { "epoch": 0.8, "grad_norm": 3.5801734924316406, "learning_rate": 0.0002, "loss": 1.5697, "step": 195610 }, { "epoch": 0.8, "grad_norm": 3.8326292037963867, "learning_rate": 0.0002, "loss": 1.6595, "step": 195620 }, { "epoch": 0.8, "grad_norm": 4.146578788757324, "learning_rate": 0.0002, "loss": 1.401, "step": 195630 }, { "epoch": 0.8, "grad_norm": 2.710939645767212, "learning_rate": 0.0002, "loss": 1.4783, "step": 195640 }, { "epoch": 0.8, "grad_norm": 5.788120269775391, "learning_rate": 0.0002, "loss": 1.6012, "step": 195650 }, { "epoch": 0.8, "grad_norm": 3.0710134506225586, "learning_rate": 0.0002, "loss": 1.776, "step": 195660 }, { "epoch": 0.8, "grad_norm": 4.396709442138672, "learning_rate": 0.0002, "loss": 1.5831, "step": 195670 }, { "epoch": 0.8, "grad_norm": 2.5204432010650635, "learning_rate": 0.0002, "loss": 1.694, "step": 195680 }, { "epoch": 0.8, "grad_norm": 1.8842573165893555, "learning_rate": 0.0002, "loss": 1.5197, "step": 195690 }, { "epoch": 0.8, "grad_norm": 2.1378073692321777, "learning_rate": 0.0002, "loss": 1.942, "step": 195700 }, { "epoch": 0.8, "grad_norm": 2.734562635421753, "learning_rate": 0.0002, "loss": 1.6692, "step": 195710 }, { "epoch": 0.8, "grad_norm": 2.584613084793091, "learning_rate": 0.0002, "loss": 1.6878, "step": 195720 }, { "epoch": 0.8, "grad_norm": 3.1319594383239746, "learning_rate": 0.0002, "loss": 1.2979, "step": 195730 }, { "epoch": 0.8, "grad_norm": 2.639394998550415, "learning_rate": 0.0002, "loss": 1.6378, "step": 195740 }, { "epoch": 0.8, "grad_norm": 2.5411930084228516, "learning_rate": 0.0002, "loss": 1.4919, "step": 195750 }, { "epoch": 0.8, "grad_norm": 2.2841763496398926, "learning_rate": 0.0002, "loss": 1.6383, "step": 195760 }, { "epoch": 0.8, "grad_norm": 3.4868316650390625, "learning_rate": 0.0002, "loss": 1.4315, "step": 195770 }, { "epoch": 0.8, "grad_norm": 3.352198600769043, "learning_rate": 0.0002, "loss": 1.2727, "step": 195780 }, { "epoch": 0.8, "grad_norm": 2.1265623569488525, "learning_rate": 0.0002, "loss": 1.7077, "step": 195790 }, { "epoch": 0.8, "grad_norm": 3.646430253982544, "learning_rate": 0.0002, "loss": 1.6632, "step": 195800 }, { "epoch": 0.8, "grad_norm": 1.455028772354126, "learning_rate": 0.0002, "loss": 1.6975, "step": 195810 }, { "epoch": 0.8, "grad_norm": 3.4807989597320557, "learning_rate": 0.0002, "loss": 1.8541, "step": 195820 }, { "epoch": 0.8, "grad_norm": 4.311765670776367, "learning_rate": 0.0002, "loss": 1.5798, "step": 195830 }, { "epoch": 0.8, "grad_norm": 4.153775691986084, "learning_rate": 0.0002, "loss": 1.5866, "step": 195840 }, { "epoch": 0.8, "grad_norm": 3.492673873901367, "learning_rate": 0.0002, "loss": 1.639, "step": 195850 }, { "epoch": 0.8, "grad_norm": 2.003251791000366, "learning_rate": 0.0002, "loss": 1.3921, "step": 195860 }, { "epoch": 0.8, "grad_norm": 1.3245534896850586, "learning_rate": 0.0002, "loss": 1.657, "step": 195870 }, { "epoch": 0.8, "grad_norm": 3.2977747917175293, "learning_rate": 0.0002, "loss": 1.4318, "step": 195880 }, { "epoch": 0.8, "grad_norm": 2.2895803451538086, "learning_rate": 0.0002, "loss": 1.4363, "step": 195890 }, { "epoch": 0.8, "grad_norm": 2.176637649536133, "learning_rate": 0.0002, "loss": 1.6559, "step": 195900 }, { "epoch": 0.8, "grad_norm": 6.794626712799072, "learning_rate": 0.0002, "loss": 1.5787, "step": 195910 }, { "epoch": 0.8, "grad_norm": 2.0575151443481445, "learning_rate": 0.0002, "loss": 1.4844, "step": 195920 }, { "epoch": 0.8, "grad_norm": 3.89443039894104, "learning_rate": 0.0002, "loss": 1.5013, "step": 195930 }, { "epoch": 0.8, "grad_norm": 2.39149808883667, "learning_rate": 0.0002, "loss": 1.5642, "step": 195940 }, { "epoch": 0.8, "grad_norm": 4.722901821136475, "learning_rate": 0.0002, "loss": 1.6147, "step": 195950 }, { "epoch": 0.8, "grad_norm": 1.8713723421096802, "learning_rate": 0.0002, "loss": 1.6767, "step": 195960 }, { "epoch": 0.8, "grad_norm": 2.025360584259033, "learning_rate": 0.0002, "loss": 1.4285, "step": 195970 }, { "epoch": 0.8, "grad_norm": 3.6574277877807617, "learning_rate": 0.0002, "loss": 1.5799, "step": 195980 }, { "epoch": 0.8, "grad_norm": 3.899521827697754, "learning_rate": 0.0002, "loss": 1.4039, "step": 195990 }, { "epoch": 0.8, "grad_norm": 3.0190088748931885, "learning_rate": 0.0002, "loss": 1.4363, "step": 196000 }, { "epoch": 0.8, "grad_norm": 3.8379766941070557, "learning_rate": 0.0002, "loss": 1.4576, "step": 196010 }, { "epoch": 0.8, "grad_norm": 2.8190317153930664, "learning_rate": 0.0002, "loss": 1.7369, "step": 196020 }, { "epoch": 0.8, "grad_norm": 1.9342221021652222, "learning_rate": 0.0002, "loss": 1.5947, "step": 196030 }, { "epoch": 0.8, "grad_norm": 4.5480475425720215, "learning_rate": 0.0002, "loss": 1.5119, "step": 196040 }, { "epoch": 0.8, "grad_norm": 2.650153636932373, "learning_rate": 0.0002, "loss": 1.537, "step": 196050 }, { "epoch": 0.8, "grad_norm": 3.6529948711395264, "learning_rate": 0.0002, "loss": 1.6813, "step": 196060 }, { "epoch": 0.8, "grad_norm": 3.550548553466797, "learning_rate": 0.0002, "loss": 1.7047, "step": 196070 }, { "epoch": 0.8, "grad_norm": 3.353533983230591, "learning_rate": 0.0002, "loss": 1.6167, "step": 196080 }, { "epoch": 0.8, "grad_norm": 10.048738479614258, "learning_rate": 0.0002, "loss": 1.5779, "step": 196090 }, { "epoch": 0.8, "grad_norm": 3.556514024734497, "learning_rate": 0.0002, "loss": 1.6705, "step": 196100 }, { "epoch": 0.8, "grad_norm": 2.636460304260254, "learning_rate": 0.0002, "loss": 1.7631, "step": 196110 }, { "epoch": 0.8, "grad_norm": 3.0427188873291016, "learning_rate": 0.0002, "loss": 1.6468, "step": 196120 }, { "epoch": 0.8, "grad_norm": 2.055501937866211, "learning_rate": 0.0002, "loss": 1.5724, "step": 196130 }, { "epoch": 0.8, "grad_norm": 4.455606460571289, "learning_rate": 0.0002, "loss": 1.5583, "step": 196140 }, { "epoch": 0.8, "grad_norm": 4.196521282196045, "learning_rate": 0.0002, "loss": 1.5806, "step": 196150 }, { "epoch": 0.8, "grad_norm": 2.826390504837036, "learning_rate": 0.0002, "loss": 1.7104, "step": 196160 }, { "epoch": 0.8, "grad_norm": 2.06183123588562, "learning_rate": 0.0002, "loss": 1.5036, "step": 196170 }, { "epoch": 0.8, "grad_norm": 3.5859062671661377, "learning_rate": 0.0002, "loss": 1.765, "step": 196180 }, { "epoch": 0.8, "grad_norm": 3.5029428005218506, "learning_rate": 0.0002, "loss": 1.7668, "step": 196190 }, { "epoch": 0.8, "grad_norm": 3.0809624195098877, "learning_rate": 0.0002, "loss": 1.6349, "step": 196200 }, { "epoch": 0.8, "grad_norm": 3.5730113983154297, "learning_rate": 0.0002, "loss": 1.679, "step": 196210 }, { "epoch": 0.8, "grad_norm": 2.0426814556121826, "learning_rate": 0.0002, "loss": 1.4459, "step": 196220 }, { "epoch": 0.8, "grad_norm": 3.3860960006713867, "learning_rate": 0.0002, "loss": 1.594, "step": 196230 }, { "epoch": 0.8, "grad_norm": 1.6778769493103027, "learning_rate": 0.0002, "loss": 1.5319, "step": 196240 }, { "epoch": 0.8, "grad_norm": 2.7331655025482178, "learning_rate": 0.0002, "loss": 1.4647, "step": 196250 }, { "epoch": 0.8, "grad_norm": 4.749382495880127, "learning_rate": 0.0002, "loss": 1.3252, "step": 196260 }, { "epoch": 0.8, "grad_norm": 3.0421836376190186, "learning_rate": 0.0002, "loss": 1.8557, "step": 196270 }, { "epoch": 0.8, "grad_norm": 2.360852003097534, "learning_rate": 0.0002, "loss": 1.5739, "step": 196280 }, { "epoch": 0.8, "grad_norm": 2.703392744064331, "learning_rate": 0.0002, "loss": 1.5448, "step": 196290 }, { "epoch": 0.8, "grad_norm": 2.492520570755005, "learning_rate": 0.0002, "loss": 1.4702, "step": 196300 }, { "epoch": 0.8, "grad_norm": 3.0861997604370117, "learning_rate": 0.0002, "loss": 1.4616, "step": 196310 }, { "epoch": 0.8, "grad_norm": 5.652528762817383, "learning_rate": 0.0002, "loss": 1.5537, "step": 196320 }, { "epoch": 0.8, "grad_norm": 2.190596580505371, "learning_rate": 0.0002, "loss": 1.5113, "step": 196330 }, { "epoch": 0.8, "grad_norm": 2.3843114376068115, "learning_rate": 0.0002, "loss": 1.4467, "step": 196340 }, { "epoch": 0.8, "grad_norm": 2.7311384677886963, "learning_rate": 0.0002, "loss": 1.6478, "step": 196350 }, { "epoch": 0.8, "grad_norm": 3.243997573852539, "learning_rate": 0.0002, "loss": 1.28, "step": 196360 }, { "epoch": 0.8, "grad_norm": 2.538666009902954, "learning_rate": 0.0002, "loss": 1.637, "step": 196370 }, { "epoch": 0.8, "grad_norm": 2.9226949214935303, "learning_rate": 0.0002, "loss": 1.5478, "step": 196380 }, { "epoch": 0.8, "grad_norm": 2.5735743045806885, "learning_rate": 0.0002, "loss": 1.7246, "step": 196390 }, { "epoch": 0.8, "grad_norm": 2.1859190464019775, "learning_rate": 0.0002, "loss": 1.6175, "step": 196400 }, { "epoch": 0.8, "grad_norm": 2.8016693592071533, "learning_rate": 0.0002, "loss": 1.6481, "step": 196410 }, { "epoch": 0.8, "grad_norm": 4.450173377990723, "learning_rate": 0.0002, "loss": 1.5716, "step": 196420 }, { "epoch": 0.8, "grad_norm": 2.1587038040161133, "learning_rate": 0.0002, "loss": 1.5082, "step": 196430 }, { "epoch": 0.8, "grad_norm": 1.9172827005386353, "learning_rate": 0.0002, "loss": 1.2662, "step": 196440 }, { "epoch": 0.8, "grad_norm": 4.32075834274292, "learning_rate": 0.0002, "loss": 1.5047, "step": 196450 }, { "epoch": 0.8, "grad_norm": 2.629415988922119, "learning_rate": 0.0002, "loss": 1.703, "step": 196460 }, { "epoch": 0.8, "grad_norm": 3.3064637184143066, "learning_rate": 0.0002, "loss": 1.3807, "step": 196470 }, { "epoch": 0.8, "grad_norm": 2.8535056114196777, "learning_rate": 0.0002, "loss": 1.4634, "step": 196480 }, { "epoch": 0.8, "grad_norm": 2.5368993282318115, "learning_rate": 0.0002, "loss": 1.6034, "step": 196490 }, { "epoch": 0.8, "grad_norm": 3.061375856399536, "learning_rate": 0.0002, "loss": 1.8604, "step": 196500 }, { "epoch": 0.8, "grad_norm": 4.183290958404541, "learning_rate": 0.0002, "loss": 1.7343, "step": 196510 }, { "epoch": 0.8, "grad_norm": 2.9835469722747803, "learning_rate": 0.0002, "loss": 1.659, "step": 196520 }, { "epoch": 0.8, "grad_norm": Infinity, "learning_rate": 0.0002, "loss": 1.6268, "step": 196530 }, { "epoch": 0.8, "grad_norm": 1.6973336935043335, "learning_rate": 0.0002, "loss": 1.664, "step": 196540 }, { "epoch": 0.8, "grad_norm": 2.7897324562072754, "learning_rate": 0.0002, "loss": 1.5633, "step": 196550 }, { "epoch": 0.8, "grad_norm": 2.563788652420044, "learning_rate": 0.0002, "loss": 1.4834, "step": 196560 }, { "epoch": 0.8, "grad_norm": 3.8004093170166016, "learning_rate": 0.0002, "loss": 1.5585, "step": 196570 }, { "epoch": 0.8, "grad_norm": 2.077057361602783, "learning_rate": 0.0002, "loss": 1.5114, "step": 196580 }, { "epoch": 0.8, "grad_norm": 3.1191768646240234, "learning_rate": 0.0002, "loss": 1.5486, "step": 196590 }, { "epoch": 0.8, "grad_norm": 2.9380829334259033, "learning_rate": 0.0002, "loss": 1.3687, "step": 196600 }, { "epoch": 0.8, "grad_norm": 3.3276119232177734, "learning_rate": 0.0002, "loss": 1.6902, "step": 196610 }, { "epoch": 0.8, "grad_norm": 2.046966791152954, "learning_rate": 0.0002, "loss": 1.6409, "step": 196620 }, { "epoch": 0.8, "grad_norm": 2.7599947452545166, "learning_rate": 0.0002, "loss": 1.6294, "step": 196630 }, { "epoch": 0.8, "grad_norm": 2.7742109298706055, "learning_rate": 0.0002, "loss": 1.765, "step": 196640 }, { "epoch": 0.8, "grad_norm": 3.1363778114318848, "learning_rate": 0.0002, "loss": 1.6633, "step": 196650 }, { "epoch": 0.8, "grad_norm": 4.316096305847168, "learning_rate": 0.0002, "loss": 1.3178, "step": 196660 }, { "epoch": 0.8, "grad_norm": 3.003357410430908, "learning_rate": 0.0002, "loss": 1.6307, "step": 196670 }, { "epoch": 0.8, "grad_norm": 3.238152503967285, "learning_rate": 0.0002, "loss": 1.5104, "step": 196680 }, { "epoch": 0.8, "grad_norm": 3.8603246212005615, "learning_rate": 0.0002, "loss": 1.8219, "step": 196690 }, { "epoch": 0.8, "grad_norm": 2.307483434677124, "learning_rate": 0.0002, "loss": 1.6271, "step": 196700 }, { "epoch": 0.8, "grad_norm": 2.3452863693237305, "learning_rate": 0.0002, "loss": 1.6618, "step": 196710 }, { "epoch": 0.8, "grad_norm": 1.9928168058395386, "learning_rate": 0.0002, "loss": 1.6655, "step": 196720 }, { "epoch": 0.8, "grad_norm": 2.523576498031616, "learning_rate": 0.0002, "loss": 1.961, "step": 196730 }, { "epoch": 0.8, "grad_norm": 3.315112829208374, "learning_rate": 0.0002, "loss": 1.522, "step": 196740 }, { "epoch": 0.8, "grad_norm": 2.02993106842041, "learning_rate": 0.0002, "loss": 1.5297, "step": 196750 }, { "epoch": 0.8, "grad_norm": 2.774329423904419, "learning_rate": 0.0002, "loss": 1.2796, "step": 196760 }, { "epoch": 0.8, "grad_norm": 4.482242107391357, "learning_rate": 0.0002, "loss": 1.8104, "step": 196770 }, { "epoch": 0.8, "grad_norm": 3.2932116985321045, "learning_rate": 0.0002, "loss": 1.4478, "step": 196780 }, { "epoch": 0.8, "grad_norm": 2.382852554321289, "learning_rate": 0.0002, "loss": 1.5947, "step": 196790 }, { "epoch": 0.8, "grad_norm": 2.3524293899536133, "learning_rate": 0.0002, "loss": 1.4164, "step": 196800 }, { "epoch": 0.8, "grad_norm": 2.8940374851226807, "learning_rate": 0.0002, "loss": 1.677, "step": 196810 }, { "epoch": 0.8, "grad_norm": 2.697052240371704, "learning_rate": 0.0002, "loss": 1.7062, "step": 196820 }, { "epoch": 0.8, "grad_norm": 2.8591959476470947, "learning_rate": 0.0002, "loss": 1.5746, "step": 196830 }, { "epoch": 0.8, "grad_norm": 2.528271436691284, "learning_rate": 0.0002, "loss": 1.7415, "step": 196840 }, { "epoch": 0.8, "grad_norm": 3.1994826793670654, "learning_rate": 0.0002, "loss": 1.5186, "step": 196850 }, { "epoch": 0.8, "grad_norm": 2.469496726989746, "learning_rate": 0.0002, "loss": 1.5957, "step": 196860 }, { "epoch": 0.8, "grad_norm": 2.6467127799987793, "learning_rate": 0.0002, "loss": 1.5809, "step": 196870 }, { "epoch": 0.8, "grad_norm": 2.5487356185913086, "learning_rate": 0.0002, "loss": 1.8304, "step": 196880 }, { "epoch": 0.8, "grad_norm": 3.270561456680298, "learning_rate": 0.0002, "loss": 1.5982, "step": 196890 }, { "epoch": 0.8, "grad_norm": 3.5338447093963623, "learning_rate": 0.0002, "loss": 1.743, "step": 196900 }, { "epoch": 0.8, "grad_norm": 2.6063742637634277, "learning_rate": 0.0002, "loss": 1.5913, "step": 196910 }, { "epoch": 0.8, "grad_norm": 3.1491777896881104, "learning_rate": 0.0002, "loss": 1.6624, "step": 196920 }, { "epoch": 0.8, "grad_norm": 4.447266101837158, "learning_rate": 0.0002, "loss": 1.457, "step": 196930 }, { "epoch": 0.8, "grad_norm": 2.5988008975982666, "learning_rate": 0.0002, "loss": 1.2457, "step": 196940 }, { "epoch": 0.8, "grad_norm": 3.939547300338745, "learning_rate": 0.0002, "loss": 1.505, "step": 196950 }, { "epoch": 0.8, "grad_norm": 4.258374214172363, "learning_rate": 0.0002, "loss": 1.5863, "step": 196960 }, { "epoch": 0.8, "grad_norm": 2.5108275413513184, "learning_rate": 0.0002, "loss": 1.7316, "step": 196970 }, { "epoch": 0.8, "grad_norm": 2.596693992614746, "learning_rate": 0.0002, "loss": 1.537, "step": 196980 }, { "epoch": 0.8, "grad_norm": 3.7769250869750977, "learning_rate": 0.0002, "loss": 1.6085, "step": 196990 }, { "epoch": 0.8, "grad_norm": 3.550536870956421, "learning_rate": 0.0002, "loss": 1.5725, "step": 197000 }, { "epoch": 0.8, "grad_norm": 2.693331241607666, "learning_rate": 0.0002, "loss": 1.5642, "step": 197010 }, { "epoch": 0.8, "grad_norm": 1.6175382137298584, "learning_rate": 0.0002, "loss": 1.6564, "step": 197020 }, { "epoch": 0.8, "grad_norm": 2.2779297828674316, "learning_rate": 0.0002, "loss": 1.7586, "step": 197030 }, { "epoch": 0.8, "grad_norm": 1.0747714042663574, "learning_rate": 0.0002, "loss": 1.3994, "step": 197040 }, { "epoch": 0.8, "grad_norm": 2.4317150115966797, "learning_rate": 0.0002, "loss": 1.504, "step": 197050 }, { "epoch": 0.8, "grad_norm": 3.9738852977752686, "learning_rate": 0.0002, "loss": 1.4069, "step": 197060 }, { "epoch": 0.8, "grad_norm": 2.5372233390808105, "learning_rate": 0.0002, "loss": 1.6211, "step": 197070 }, { "epoch": 0.8, "grad_norm": 4.169395446777344, "learning_rate": 0.0002, "loss": 1.6874, "step": 197080 }, { "epoch": 0.8, "grad_norm": 3.725524663925171, "learning_rate": 0.0002, "loss": 1.3987, "step": 197090 }, { "epoch": 0.8, "grad_norm": 2.251979351043701, "learning_rate": 0.0002, "loss": 1.4311, "step": 197100 }, { "epoch": 0.8, "grad_norm": 2.942379951477051, "learning_rate": 0.0002, "loss": 1.4282, "step": 197110 }, { "epoch": 0.8, "grad_norm": 1.6761229038238525, "learning_rate": 0.0002, "loss": 1.4527, "step": 197120 }, { "epoch": 0.8, "grad_norm": 3.0256073474884033, "learning_rate": 0.0002, "loss": 1.8252, "step": 197130 }, { "epoch": 0.8, "grad_norm": 2.082538366317749, "learning_rate": 0.0002, "loss": 1.5815, "step": 197140 }, { "epoch": 0.8, "grad_norm": 2.6347360610961914, "learning_rate": 0.0002, "loss": 1.7154, "step": 197150 }, { "epoch": 0.8, "grad_norm": 3.3088884353637695, "learning_rate": 0.0002, "loss": 1.6096, "step": 197160 }, { "epoch": 0.8, "grad_norm": 4.515635013580322, "learning_rate": 0.0002, "loss": 1.5103, "step": 197170 }, { "epoch": 0.8, "grad_norm": 1.9292405843734741, "learning_rate": 0.0002, "loss": 1.5348, "step": 197180 }, { "epoch": 0.8, "grad_norm": 2.8222644329071045, "learning_rate": 0.0002, "loss": 1.8013, "step": 197190 }, { "epoch": 0.8, "grad_norm": 2.429500102996826, "learning_rate": 0.0002, "loss": 1.6146, "step": 197200 }, { "epoch": 0.8, "grad_norm": 2.1891462802886963, "learning_rate": 0.0002, "loss": 1.4268, "step": 197210 }, { "epoch": 0.8, "grad_norm": 2.729093074798584, "learning_rate": 0.0002, "loss": 1.5889, "step": 197220 }, { "epoch": 0.8, "grad_norm": 4.884980201721191, "learning_rate": 0.0002, "loss": 1.6762, "step": 197230 }, { "epoch": 0.8, "grad_norm": 4.222090721130371, "learning_rate": 0.0002, "loss": 1.6239, "step": 197240 }, { "epoch": 0.8, "grad_norm": 2.0541889667510986, "learning_rate": 0.0002, "loss": 1.458, "step": 197250 }, { "epoch": 0.8, "grad_norm": 5.581301212310791, "learning_rate": 0.0002, "loss": 1.5834, "step": 197260 }, { "epoch": 0.8, "grad_norm": 2.957167625427246, "learning_rate": 0.0002, "loss": 1.577, "step": 197270 }, { "epoch": 0.8, "grad_norm": 2.098240375518799, "learning_rate": 0.0002, "loss": 1.5834, "step": 197280 }, { "epoch": 0.8, "grad_norm": 2.3686845302581787, "learning_rate": 0.0002, "loss": 1.8088, "step": 197290 }, { "epoch": 0.8, "grad_norm": 1.8811103105545044, "learning_rate": 0.0002, "loss": 1.5172, "step": 197300 }, { "epoch": 0.8, "grad_norm": 1.8635141849517822, "learning_rate": 0.0002, "loss": 1.8588, "step": 197310 }, { "epoch": 0.8, "grad_norm": 2.377561569213867, "learning_rate": 0.0002, "loss": 1.526, "step": 197320 }, { "epoch": 0.8, "grad_norm": 2.1304216384887695, "learning_rate": 0.0002, "loss": 1.6419, "step": 197330 }, { "epoch": 0.8, "grad_norm": 3.6848275661468506, "learning_rate": 0.0002, "loss": 1.8641, "step": 197340 }, { "epoch": 0.8, "grad_norm": 2.4524221420288086, "learning_rate": 0.0002, "loss": 1.5599, "step": 197350 }, { "epoch": 0.8, "grad_norm": 3.756690740585327, "learning_rate": 0.0002, "loss": 1.3836, "step": 197360 }, { "epoch": 0.8, "grad_norm": 3.215076208114624, "learning_rate": 0.0002, "loss": 1.3345, "step": 197370 }, { "epoch": 0.8, "grad_norm": 3.4575228691101074, "learning_rate": 0.0002, "loss": 1.7797, "step": 197380 }, { "epoch": 0.8, "grad_norm": 3.535583734512329, "learning_rate": 0.0002, "loss": 1.7024, "step": 197390 }, { "epoch": 0.8, "grad_norm": 2.3693621158599854, "learning_rate": 0.0002, "loss": 1.5659, "step": 197400 }, { "epoch": 0.8, "grad_norm": 3.4843790531158447, "learning_rate": 0.0002, "loss": 1.614, "step": 197410 }, { "epoch": 0.8, "grad_norm": 3.018460750579834, "learning_rate": 0.0002, "loss": 1.5959, "step": 197420 }, { "epoch": 0.8, "grad_norm": 2.9125566482543945, "learning_rate": 0.0002, "loss": 1.5859, "step": 197430 }, { "epoch": 0.8, "grad_norm": 4.341710090637207, "learning_rate": 0.0002, "loss": 1.6411, "step": 197440 }, { "epoch": 0.8, "grad_norm": 2.494673013687134, "learning_rate": 0.0002, "loss": 1.4076, "step": 197450 }, { "epoch": 0.8, "grad_norm": 3.2831873893737793, "learning_rate": 0.0002, "loss": 1.4625, "step": 197460 }, { "epoch": 0.8, "grad_norm": 2.624610424041748, "learning_rate": 0.0002, "loss": 1.3713, "step": 197470 }, { "epoch": 0.8, "grad_norm": 4.484355926513672, "learning_rate": 0.0002, "loss": 1.4945, "step": 197480 }, { "epoch": 0.8, "grad_norm": 1.5087205171585083, "learning_rate": 0.0002, "loss": 1.6422, "step": 197490 }, { "epoch": 0.8, "grad_norm": 3.1999282836914062, "learning_rate": 0.0002, "loss": 1.6692, "step": 197500 }, { "epoch": 0.8, "grad_norm": 3.44966983795166, "learning_rate": 0.0002, "loss": 1.4538, "step": 197510 }, { "epoch": 0.8, "grad_norm": 2.0279946327209473, "learning_rate": 0.0002, "loss": 1.532, "step": 197520 }, { "epoch": 0.8, "grad_norm": 3.8277428150177, "learning_rate": 0.0002, "loss": 1.8883, "step": 197530 }, { "epoch": 0.8, "grad_norm": 3.1214759349823, "learning_rate": 0.0002, "loss": 1.6946, "step": 197540 }, { "epoch": 0.8, "grad_norm": 3.281829357147217, "learning_rate": 0.0002, "loss": 1.6633, "step": 197550 }, { "epoch": 0.8, "grad_norm": 5.259139060974121, "learning_rate": 0.0002, "loss": 1.8389, "step": 197560 }, { "epoch": 0.8, "grad_norm": 3.1464896202087402, "learning_rate": 0.0002, "loss": 1.5629, "step": 197570 }, { "epoch": 0.8, "grad_norm": 3.0177102088928223, "learning_rate": 0.0002, "loss": 1.5224, "step": 197580 }, { "epoch": 0.8, "grad_norm": 2.799034595489502, "learning_rate": 0.0002, "loss": 1.7133, "step": 197590 }, { "epoch": 0.8, "grad_norm": 2.6933212280273438, "learning_rate": 0.0002, "loss": 1.7351, "step": 197600 }, { "epoch": 0.8, "grad_norm": 2.696526288986206, "learning_rate": 0.0002, "loss": 1.4836, "step": 197610 }, { "epoch": 0.8, "grad_norm": 4.88810396194458, "learning_rate": 0.0002, "loss": 1.6067, "step": 197620 }, { "epoch": 0.8, "grad_norm": 3.600668430328369, "learning_rate": 0.0002, "loss": 1.6165, "step": 197630 }, { "epoch": 0.8, "grad_norm": 4.00566291809082, "learning_rate": 0.0002, "loss": 1.3432, "step": 197640 }, { "epoch": 0.8, "grad_norm": 1.662538766860962, "learning_rate": 0.0002, "loss": 1.5991, "step": 197650 }, { "epoch": 0.8, "grad_norm": 3.1781721115112305, "learning_rate": 0.0002, "loss": 1.5295, "step": 197660 }, { "epoch": 0.8, "grad_norm": 2.7534923553466797, "learning_rate": 0.0002, "loss": 1.601, "step": 197670 }, { "epoch": 0.8, "grad_norm": 2.2640182971954346, "learning_rate": 0.0002, "loss": 1.5045, "step": 197680 }, { "epoch": 0.8, "grad_norm": 3.176769971847534, "learning_rate": 0.0002, "loss": 1.7583, "step": 197690 }, { "epoch": 0.8, "grad_norm": 3.1086783409118652, "learning_rate": 0.0002, "loss": 1.8302, "step": 197700 }, { "epoch": 0.8, "grad_norm": 2.5699493885040283, "learning_rate": 0.0002, "loss": 1.5961, "step": 197710 }, { "epoch": 0.8, "grad_norm": 2.461421251296997, "learning_rate": 0.0002, "loss": 1.5595, "step": 197720 }, { "epoch": 0.8, "grad_norm": 4.319108009338379, "learning_rate": 0.0002, "loss": 1.5279, "step": 197730 }, { "epoch": 0.8, "grad_norm": 3.883240222930908, "learning_rate": 0.0002, "loss": 1.7509, "step": 197740 }, { "epoch": 0.81, "grad_norm": 3.206545114517212, "learning_rate": 0.0002, "loss": 1.9569, "step": 197750 }, { "epoch": 0.81, "grad_norm": 4.009945869445801, "learning_rate": 0.0002, "loss": 1.5387, "step": 197760 }, { "epoch": 0.81, "grad_norm": 7.0370073318481445, "learning_rate": 0.0002, "loss": 1.7257, "step": 197770 }, { "epoch": 0.81, "grad_norm": 3.1552817821502686, "learning_rate": 0.0002, "loss": 1.5945, "step": 197780 }, { "epoch": 0.81, "grad_norm": 2.8742926120758057, "learning_rate": 0.0002, "loss": 1.5025, "step": 197790 }, { "epoch": 0.81, "grad_norm": 3.678079843521118, "learning_rate": 0.0002, "loss": 1.5098, "step": 197800 }, { "epoch": 0.81, "grad_norm": 3.491783380508423, "learning_rate": 0.0002, "loss": 1.5616, "step": 197810 }, { "epoch": 0.81, "grad_norm": 3.8813228607177734, "learning_rate": 0.0002, "loss": 1.5218, "step": 197820 }, { "epoch": 0.81, "grad_norm": 3.637822389602661, "learning_rate": 0.0002, "loss": 1.6986, "step": 197830 }, { "epoch": 0.81, "grad_norm": 3.5256118774414062, "learning_rate": 0.0002, "loss": 1.4193, "step": 197840 }, { "epoch": 0.81, "grad_norm": 2.6694304943084717, "learning_rate": 0.0002, "loss": 1.6694, "step": 197850 }, { "epoch": 0.81, "grad_norm": 2.2848153114318848, "learning_rate": 0.0002, "loss": 1.6555, "step": 197860 }, { "epoch": 0.81, "grad_norm": 2.508481025695801, "learning_rate": 0.0002, "loss": 1.4507, "step": 197870 }, { "epoch": 0.81, "grad_norm": 4.048215866088867, "learning_rate": 0.0002, "loss": 1.553, "step": 197880 }, { "epoch": 0.81, "grad_norm": 2.619011878967285, "learning_rate": 0.0002, "loss": 1.7789, "step": 197890 }, { "epoch": 0.81, "grad_norm": 2.2891416549682617, "learning_rate": 0.0002, "loss": 1.4636, "step": 197900 }, { "epoch": 0.81, "grad_norm": 6.820378303527832, "learning_rate": 0.0002, "loss": 1.5967, "step": 197910 }, { "epoch": 0.81, "grad_norm": 3.7654783725738525, "learning_rate": 0.0002, "loss": 1.4911, "step": 197920 }, { "epoch": 0.81, "grad_norm": 2.614637851715088, "learning_rate": 0.0002, "loss": 1.7269, "step": 197930 }, { "epoch": 0.81, "grad_norm": 3.3186745643615723, "learning_rate": 0.0002, "loss": 1.5259, "step": 197940 }, { "epoch": 0.81, "grad_norm": 2.779203176498413, "learning_rate": 0.0002, "loss": 1.553, "step": 197950 }, { "epoch": 0.81, "grad_norm": 3.274456739425659, "learning_rate": 0.0002, "loss": 1.5154, "step": 197960 }, { "epoch": 0.81, "grad_norm": 2.174691915512085, "learning_rate": 0.0002, "loss": 1.3416, "step": 197970 }, { "epoch": 0.81, "grad_norm": 3.7940514087677, "learning_rate": 0.0002, "loss": 1.6764, "step": 197980 }, { "epoch": 0.81, "grad_norm": 5.524105072021484, "learning_rate": 0.0002, "loss": 1.5268, "step": 197990 }, { "epoch": 0.81, "grad_norm": 3.6529688835144043, "learning_rate": 0.0002, "loss": 1.7221, "step": 198000 }, { "epoch": 0.81, "grad_norm": 2.593517303466797, "learning_rate": 0.0002, "loss": 1.6195, "step": 198010 }, { "epoch": 0.81, "grad_norm": 2.288862943649292, "learning_rate": 0.0002, "loss": 1.5729, "step": 198020 }, { "epoch": 0.81, "grad_norm": 2.213083505630493, "learning_rate": 0.0002, "loss": 1.4713, "step": 198030 }, { "epoch": 0.81, "grad_norm": 3.7038865089416504, "learning_rate": 0.0002, "loss": 1.6298, "step": 198040 }, { "epoch": 0.81, "grad_norm": 2.130568742752075, "learning_rate": 0.0002, "loss": 1.4402, "step": 198050 }, { "epoch": 0.81, "grad_norm": 3.792990207672119, "learning_rate": 0.0002, "loss": 1.6336, "step": 198060 }, { "epoch": 0.81, "grad_norm": 3.261781692504883, "learning_rate": 0.0002, "loss": 1.6486, "step": 198070 }, { "epoch": 0.81, "grad_norm": 3.0517241954803467, "learning_rate": 0.0002, "loss": 1.4757, "step": 198080 }, { "epoch": 0.81, "grad_norm": 3.3755905628204346, "learning_rate": 0.0002, "loss": 1.7098, "step": 198090 }, { "epoch": 0.81, "grad_norm": 2.6435842514038086, "learning_rate": 0.0002, "loss": 1.5572, "step": 198100 }, { "epoch": 0.81, "grad_norm": 3.0309653282165527, "learning_rate": 0.0002, "loss": 1.8644, "step": 198110 }, { "epoch": 0.81, "grad_norm": 2.1054494380950928, "learning_rate": 0.0002, "loss": 1.6026, "step": 198120 }, { "epoch": 0.81, "grad_norm": 4.805838108062744, "learning_rate": 0.0002, "loss": 1.4387, "step": 198130 }, { "epoch": 0.81, "grad_norm": 2.6671149730682373, "learning_rate": 0.0002, "loss": 1.5583, "step": 198140 }, { "epoch": 0.81, "grad_norm": 4.440059185028076, "learning_rate": 0.0002, "loss": 1.2779, "step": 198150 }, { "epoch": 0.81, "grad_norm": 3.6956167221069336, "learning_rate": 0.0002, "loss": 1.6702, "step": 198160 }, { "epoch": 0.81, "grad_norm": 3.1777312755584717, "learning_rate": 0.0002, "loss": 1.4759, "step": 198170 }, { "epoch": 0.81, "grad_norm": 2.807983636856079, "learning_rate": 0.0002, "loss": 1.6521, "step": 198180 }, { "epoch": 0.81, "grad_norm": 3.6408843994140625, "learning_rate": 0.0002, "loss": 1.3591, "step": 198190 }, { "epoch": 0.81, "grad_norm": 3.088869094848633, "learning_rate": 0.0002, "loss": 1.8923, "step": 198200 }, { "epoch": 0.81, "grad_norm": 3.155975818634033, "learning_rate": 0.0002, "loss": 1.5408, "step": 198210 }, { "epoch": 0.81, "grad_norm": 2.0482451915740967, "learning_rate": 0.0002, "loss": 1.6146, "step": 198220 }, { "epoch": 0.81, "grad_norm": 2.9990572929382324, "learning_rate": 0.0002, "loss": 1.6117, "step": 198230 }, { "epoch": 0.81, "grad_norm": 2.6909310817718506, "learning_rate": 0.0002, "loss": 1.7864, "step": 198240 }, { "epoch": 0.81, "grad_norm": 2.1763522624969482, "learning_rate": 0.0002, "loss": 1.6283, "step": 198250 }, { "epoch": 0.81, "grad_norm": 3.2706856727600098, "learning_rate": 0.0002, "loss": 1.5664, "step": 198260 }, { "epoch": 0.81, "grad_norm": 2.4696545600891113, "learning_rate": 0.0002, "loss": 1.5662, "step": 198270 }, { "epoch": 0.81, "grad_norm": 2.701099157333374, "learning_rate": 0.0002, "loss": 1.5084, "step": 198280 }, { "epoch": 0.81, "grad_norm": 4.7909674644470215, "learning_rate": 0.0002, "loss": 1.6929, "step": 198290 }, { "epoch": 0.81, "grad_norm": 3.0497121810913086, "learning_rate": 0.0002, "loss": 1.6629, "step": 198300 }, { "epoch": 0.81, "grad_norm": 2.4236409664154053, "learning_rate": 0.0002, "loss": 1.6044, "step": 198310 }, { "epoch": 0.81, "grad_norm": 3.1452698707580566, "learning_rate": 0.0002, "loss": 1.4238, "step": 198320 }, { "epoch": 0.81, "grad_norm": 2.5158917903900146, "learning_rate": 0.0002, "loss": 1.2231, "step": 198330 }, { "epoch": 0.81, "grad_norm": 2.0340564250946045, "learning_rate": 0.0002, "loss": 1.5128, "step": 198340 }, { "epoch": 0.81, "grad_norm": 4.127931118011475, "learning_rate": 0.0002, "loss": 1.5301, "step": 198350 }, { "epoch": 0.81, "grad_norm": 2.5970749855041504, "learning_rate": 0.0002, "loss": 1.6657, "step": 198360 }, { "epoch": 0.81, "grad_norm": 2.0290896892547607, "learning_rate": 0.0002, "loss": 1.7001, "step": 198370 }, { "epoch": 0.81, "grad_norm": 2.188394546508789, "learning_rate": 0.0002, "loss": 1.5816, "step": 198380 }, { "epoch": 0.81, "grad_norm": 2.8346962928771973, "learning_rate": 0.0002, "loss": 1.5346, "step": 198390 }, { "epoch": 0.81, "grad_norm": 3.120652914047241, "learning_rate": 0.0002, "loss": 1.6673, "step": 198400 }, { "epoch": 0.81, "grad_norm": 2.8568029403686523, "learning_rate": 0.0002, "loss": 1.595, "step": 198410 }, { "epoch": 0.81, "grad_norm": 1.864978551864624, "learning_rate": 0.0002, "loss": 1.6111, "step": 198420 }, { "epoch": 0.81, "grad_norm": 3.2917582988739014, "learning_rate": 0.0002, "loss": 1.6609, "step": 198430 }, { "epoch": 0.81, "grad_norm": 2.5406243801116943, "learning_rate": 0.0002, "loss": 1.484, "step": 198440 }, { "epoch": 0.81, "grad_norm": 2.54069447517395, "learning_rate": 0.0002, "loss": 1.5944, "step": 198450 }, { "epoch": 0.81, "grad_norm": 4.379208087921143, "learning_rate": 0.0002, "loss": 1.5535, "step": 198460 }, { "epoch": 0.81, "grad_norm": 1.6938451528549194, "learning_rate": 0.0002, "loss": 1.6682, "step": 198470 }, { "epoch": 0.81, "grad_norm": 3.1597869396209717, "learning_rate": 0.0002, "loss": 1.4413, "step": 198480 }, { "epoch": 0.81, "grad_norm": 7.541648864746094, "learning_rate": 0.0002, "loss": 1.8196, "step": 198490 }, { "epoch": 0.81, "grad_norm": 1.8595424890518188, "learning_rate": 0.0002, "loss": 1.5764, "step": 198500 }, { "epoch": 0.81, "grad_norm": 4.156613349914551, "learning_rate": 0.0002, "loss": 1.5715, "step": 198510 }, { "epoch": 0.81, "grad_norm": 2.980738878250122, "learning_rate": 0.0002, "loss": 1.6561, "step": 198520 }, { "epoch": 0.81, "grad_norm": 2.4977173805236816, "learning_rate": 0.0002, "loss": 1.6257, "step": 198530 }, { "epoch": 0.81, "grad_norm": 3.8842973709106445, "learning_rate": 0.0002, "loss": 1.3999, "step": 198540 }, { "epoch": 0.81, "grad_norm": 3.4347851276397705, "learning_rate": 0.0002, "loss": 1.5142, "step": 198550 }, { "epoch": 0.81, "grad_norm": 2.937300443649292, "learning_rate": 0.0002, "loss": 1.7442, "step": 198560 }, { "epoch": 0.81, "grad_norm": 4.369318962097168, "learning_rate": 0.0002, "loss": 1.2837, "step": 198570 }, { "epoch": 0.81, "grad_norm": 3.347221851348877, "learning_rate": 0.0002, "loss": 1.4268, "step": 198580 }, { "epoch": 0.81, "grad_norm": 2.0783579349517822, "learning_rate": 0.0002, "loss": 1.479, "step": 198590 }, { "epoch": 0.81, "grad_norm": 3.151625156402588, "learning_rate": 0.0002, "loss": 1.8203, "step": 198600 }, { "epoch": 0.81, "grad_norm": 3.8941376209259033, "learning_rate": 0.0002, "loss": 1.6072, "step": 198610 }, { "epoch": 0.81, "grad_norm": 2.6871464252471924, "learning_rate": 0.0002, "loss": 1.4632, "step": 198620 }, { "epoch": 0.81, "grad_norm": 3.6395626068115234, "learning_rate": 0.0002, "loss": 1.4583, "step": 198630 }, { "epoch": 0.81, "grad_norm": 1.4505460262298584, "learning_rate": 0.0002, "loss": 1.4812, "step": 198640 }, { "epoch": 0.81, "grad_norm": 2.3845536708831787, "learning_rate": 0.0002, "loss": 1.8924, "step": 198650 }, { "epoch": 0.81, "grad_norm": 2.828157663345337, "learning_rate": 0.0002, "loss": 1.4376, "step": 198660 }, { "epoch": 0.81, "grad_norm": 2.3501029014587402, "learning_rate": 0.0002, "loss": 1.5949, "step": 198670 }, { "epoch": 0.81, "grad_norm": 5.75753116607666, "learning_rate": 0.0002, "loss": 1.536, "step": 198680 }, { "epoch": 0.81, "grad_norm": 3.466371536254883, "learning_rate": 0.0002, "loss": 1.4841, "step": 198690 }, { "epoch": 0.81, "grad_norm": 2.728893280029297, "learning_rate": 0.0002, "loss": 1.6803, "step": 198700 }, { "epoch": 0.81, "grad_norm": 2.810068368911743, "learning_rate": 0.0002, "loss": 1.7118, "step": 198710 }, { "epoch": 0.81, "grad_norm": 4.582742214202881, "learning_rate": 0.0002, "loss": 1.6393, "step": 198720 }, { "epoch": 0.81, "grad_norm": 3.6760141849517822, "learning_rate": 0.0002, "loss": 1.6018, "step": 198730 }, { "epoch": 0.81, "grad_norm": 4.690118312835693, "learning_rate": 0.0002, "loss": 1.6257, "step": 198740 }, { "epoch": 0.81, "grad_norm": 2.3109028339385986, "learning_rate": 0.0002, "loss": 1.521, "step": 198750 }, { "epoch": 0.81, "grad_norm": 2.213663101196289, "learning_rate": 0.0002, "loss": 1.4268, "step": 198760 }, { "epoch": 0.81, "grad_norm": 3.10526704788208, "learning_rate": 0.0002, "loss": 1.4464, "step": 198770 }, { "epoch": 0.81, "grad_norm": 3.2732510566711426, "learning_rate": 0.0002, "loss": 1.5606, "step": 198780 }, { "epoch": 0.81, "grad_norm": 2.3214242458343506, "learning_rate": 0.0002, "loss": 1.5767, "step": 198790 }, { "epoch": 0.81, "grad_norm": 2.8192620277404785, "learning_rate": 0.0002, "loss": 1.695, "step": 198800 }, { "epoch": 0.81, "grad_norm": 3.848005533218384, "learning_rate": 0.0002, "loss": 1.4495, "step": 198810 }, { "epoch": 0.81, "grad_norm": 2.249953269958496, "learning_rate": 0.0002, "loss": 1.7164, "step": 198820 }, { "epoch": 0.81, "grad_norm": 1.8982304334640503, "learning_rate": 0.0002, "loss": 1.3295, "step": 198830 }, { "epoch": 0.81, "grad_norm": 2.9020681381225586, "learning_rate": 0.0002, "loss": 1.4645, "step": 198840 }, { "epoch": 0.81, "grad_norm": 2.688143014907837, "learning_rate": 0.0002, "loss": 1.3521, "step": 198850 }, { "epoch": 0.81, "grad_norm": 3.581364154815674, "learning_rate": 0.0002, "loss": 1.6666, "step": 198860 }, { "epoch": 0.81, "grad_norm": 3.338402271270752, "learning_rate": 0.0002, "loss": 1.4687, "step": 198870 }, { "epoch": 0.81, "grad_norm": 2.1993086338043213, "learning_rate": 0.0002, "loss": 1.5367, "step": 198880 }, { "epoch": 0.81, "grad_norm": 3.030923366546631, "learning_rate": 0.0002, "loss": 1.7722, "step": 198890 }, { "epoch": 0.81, "grad_norm": 1.2812780141830444, "learning_rate": 0.0002, "loss": 1.6387, "step": 198900 }, { "epoch": 0.81, "grad_norm": 3.0515620708465576, "learning_rate": 0.0002, "loss": 1.5154, "step": 198910 }, { "epoch": 0.81, "grad_norm": 2.9566516876220703, "learning_rate": 0.0002, "loss": 1.5283, "step": 198920 }, { "epoch": 0.81, "grad_norm": 3.90116286277771, "learning_rate": 0.0002, "loss": 1.6211, "step": 198930 }, { "epoch": 0.81, "grad_norm": 2.0789976119995117, "learning_rate": 0.0002, "loss": 1.5848, "step": 198940 }, { "epoch": 0.81, "grad_norm": 2.853330373764038, "learning_rate": 0.0002, "loss": 1.5896, "step": 198950 }, { "epoch": 0.81, "grad_norm": 3.034485101699829, "learning_rate": 0.0002, "loss": 1.6062, "step": 198960 }, { "epoch": 0.81, "grad_norm": 2.1766958236694336, "learning_rate": 0.0002, "loss": 1.7117, "step": 198970 }, { "epoch": 0.81, "grad_norm": 3.5878713130950928, "learning_rate": 0.0002, "loss": 1.5486, "step": 198980 }, { "epoch": 0.81, "grad_norm": 4.217390060424805, "learning_rate": 0.0002, "loss": 1.8434, "step": 198990 }, { "epoch": 0.81, "grad_norm": 3.521916627883911, "learning_rate": 0.0002, "loss": 1.7441, "step": 199000 }, { "epoch": 0.81, "grad_norm": 2.5345299243927, "learning_rate": 0.0002, "loss": 1.7274, "step": 199010 }, { "epoch": 0.81, "grad_norm": 2.545396327972412, "learning_rate": 0.0002, "loss": 1.6648, "step": 199020 }, { "epoch": 0.81, "grad_norm": 2.676976442337036, "learning_rate": 0.0002, "loss": 1.5592, "step": 199030 }, { "epoch": 0.81, "grad_norm": 2.8784310817718506, "learning_rate": 0.0002, "loss": 1.5515, "step": 199040 }, { "epoch": 0.81, "grad_norm": 3.5452778339385986, "learning_rate": 0.0002, "loss": 1.5839, "step": 199050 }, { "epoch": 0.81, "grad_norm": 1.7765710353851318, "learning_rate": 0.0002, "loss": 1.2674, "step": 199060 }, { "epoch": 0.81, "grad_norm": 3.348938226699829, "learning_rate": 0.0002, "loss": 1.6909, "step": 199070 }, { "epoch": 0.81, "grad_norm": 2.91751766204834, "learning_rate": 0.0002, "loss": 1.8109, "step": 199080 }, { "epoch": 0.81, "grad_norm": 2.6432557106018066, "learning_rate": 0.0002, "loss": 1.4806, "step": 199090 }, { "epoch": 0.81, "grad_norm": 4.537422180175781, "learning_rate": 0.0002, "loss": 1.5121, "step": 199100 }, { "epoch": 0.81, "grad_norm": 2.31036376953125, "learning_rate": 0.0002, "loss": 1.4105, "step": 199110 }, { "epoch": 0.81, "grad_norm": 3.1527822017669678, "learning_rate": 0.0002, "loss": 1.6801, "step": 199120 }, { "epoch": 0.81, "grad_norm": 5.59832763671875, "learning_rate": 0.0002, "loss": 1.512, "step": 199130 }, { "epoch": 0.81, "grad_norm": 2.6010444164276123, "learning_rate": 0.0002, "loss": 1.7572, "step": 199140 }, { "epoch": 0.81, "grad_norm": 2.4655611515045166, "learning_rate": 0.0002, "loss": 1.8939, "step": 199150 }, { "epoch": 0.81, "grad_norm": 2.8226635456085205, "learning_rate": 0.0002, "loss": 1.6287, "step": 199160 }, { "epoch": 0.81, "grad_norm": 2.432755470275879, "learning_rate": 0.0002, "loss": 1.6428, "step": 199170 }, { "epoch": 0.81, "grad_norm": 2.700192451477051, "learning_rate": 0.0002, "loss": 1.6853, "step": 199180 }, { "epoch": 0.81, "grad_norm": 2.5139801502227783, "learning_rate": 0.0002, "loss": 1.6696, "step": 199190 }, { "epoch": 0.81, "grad_norm": 2.4689314365386963, "learning_rate": 0.0002, "loss": 1.5947, "step": 199200 }, { "epoch": 0.81, "grad_norm": 2.6000306606292725, "learning_rate": 0.0002, "loss": 1.5658, "step": 199210 }, { "epoch": 0.81, "grad_norm": 2.098719596862793, "learning_rate": 0.0002, "loss": 1.7638, "step": 199220 }, { "epoch": 0.81, "grad_norm": 3.729494571685791, "learning_rate": 0.0002, "loss": 1.7012, "step": 199230 }, { "epoch": 0.81, "grad_norm": 3.337285041809082, "learning_rate": 0.0002, "loss": 1.6154, "step": 199240 }, { "epoch": 0.81, "grad_norm": 2.454146146774292, "learning_rate": 0.0002, "loss": 1.7525, "step": 199250 }, { "epoch": 0.81, "grad_norm": 2.9489500522613525, "learning_rate": 0.0002, "loss": 1.5747, "step": 199260 }, { "epoch": 0.81, "grad_norm": 3.686555862426758, "learning_rate": 0.0002, "loss": 1.6132, "step": 199270 }, { "epoch": 0.81, "grad_norm": 4.218347072601318, "learning_rate": 0.0002, "loss": 1.7491, "step": 199280 }, { "epoch": 0.81, "grad_norm": 2.2102646827697754, "learning_rate": 0.0002, "loss": 1.6244, "step": 199290 }, { "epoch": 0.81, "grad_norm": 4.010354995727539, "learning_rate": 0.0002, "loss": 1.6938, "step": 199300 }, { "epoch": 0.81, "grad_norm": 1.8503336906433105, "learning_rate": 0.0002, "loss": 1.5042, "step": 199310 }, { "epoch": 0.81, "grad_norm": 2.74157977104187, "learning_rate": 0.0002, "loss": 1.7525, "step": 199320 }, { "epoch": 0.81, "grad_norm": 3.496337890625, "learning_rate": 0.0002, "loss": 1.5634, "step": 199330 }, { "epoch": 0.81, "grad_norm": 2.6803011894226074, "learning_rate": 0.0002, "loss": 1.5195, "step": 199340 }, { "epoch": 0.81, "grad_norm": 8.986811637878418, "learning_rate": 0.0002, "loss": 1.7083, "step": 199350 }, { "epoch": 0.81, "grad_norm": 3.403268814086914, "learning_rate": 0.0002, "loss": 1.7036, "step": 199360 }, { "epoch": 0.81, "grad_norm": 2.1683669090270996, "learning_rate": 0.0002, "loss": 1.5693, "step": 199370 }, { "epoch": 0.81, "grad_norm": 3.1485955715179443, "learning_rate": 0.0002, "loss": 1.4899, "step": 199380 }, { "epoch": 0.81, "grad_norm": 3.9074032306671143, "learning_rate": 0.0002, "loss": 1.5813, "step": 199390 }, { "epoch": 0.81, "grad_norm": 3.4039053916931152, "learning_rate": 0.0002, "loss": 1.3593, "step": 199400 }, { "epoch": 0.81, "grad_norm": 3.2423155307769775, "learning_rate": 0.0002, "loss": 1.3975, "step": 199410 }, { "epoch": 0.81, "grad_norm": 3.139218330383301, "learning_rate": 0.0002, "loss": 1.7525, "step": 199420 }, { "epoch": 0.81, "grad_norm": 3.341916084289551, "learning_rate": 0.0002, "loss": 1.375, "step": 199430 }, { "epoch": 0.81, "grad_norm": 2.844588279724121, "learning_rate": 0.0002, "loss": 1.623, "step": 199440 }, { "epoch": 0.81, "grad_norm": 3.026329517364502, "learning_rate": 0.0002, "loss": 1.4913, "step": 199450 }, { "epoch": 0.81, "grad_norm": 2.9574368000030518, "learning_rate": 0.0002, "loss": 1.405, "step": 199460 }, { "epoch": 0.81, "grad_norm": 2.681419849395752, "learning_rate": 0.0002, "loss": 1.1555, "step": 199470 }, { "epoch": 0.81, "grad_norm": 4.587699890136719, "learning_rate": 0.0002, "loss": 1.4897, "step": 199480 }, { "epoch": 0.81, "grad_norm": 2.022343873977661, "learning_rate": 0.0002, "loss": 1.601, "step": 199490 }, { "epoch": 0.81, "grad_norm": 1.8079707622528076, "learning_rate": 0.0002, "loss": 1.6518, "step": 199500 }, { "epoch": 0.81, "grad_norm": 3.109898567199707, "learning_rate": 0.0002, "loss": 1.5683, "step": 199510 }, { "epoch": 0.81, "grad_norm": 2.863908290863037, "learning_rate": 0.0002, "loss": 1.6146, "step": 199520 }, { "epoch": 0.81, "grad_norm": 3.7972402572631836, "learning_rate": 0.0002, "loss": 1.7403, "step": 199530 }, { "epoch": 0.81, "grad_norm": 1.8239532709121704, "learning_rate": 0.0002, "loss": 1.8111, "step": 199540 }, { "epoch": 0.81, "grad_norm": 1.1998991966247559, "learning_rate": 0.0002, "loss": 1.5145, "step": 199550 }, { "epoch": 0.81, "grad_norm": 3.495802164077759, "learning_rate": 0.0002, "loss": 1.4633, "step": 199560 }, { "epoch": 0.81, "grad_norm": 5.7601776123046875, "learning_rate": 0.0002, "loss": 1.6345, "step": 199570 }, { "epoch": 0.81, "grad_norm": 3.493725299835205, "learning_rate": 0.0002, "loss": 1.4777, "step": 199580 }, { "epoch": 0.81, "grad_norm": 2.4825439453125, "learning_rate": 0.0002, "loss": 1.7192, "step": 199590 }, { "epoch": 0.81, "grad_norm": 2.5724663734436035, "learning_rate": 0.0002, "loss": 1.4978, "step": 199600 }, { "epoch": 0.81, "grad_norm": 3.12129807472229, "learning_rate": 0.0002, "loss": 1.5262, "step": 199610 }, { "epoch": 0.81, "grad_norm": 3.563392400741577, "learning_rate": 0.0002, "loss": 1.6337, "step": 199620 }, { "epoch": 0.81, "grad_norm": 2.3282206058502197, "learning_rate": 0.0002, "loss": 1.5303, "step": 199630 }, { "epoch": 0.81, "grad_norm": 3.8560285568237305, "learning_rate": 0.0002, "loss": 1.7633, "step": 199640 }, { "epoch": 0.81, "grad_norm": 3.225759983062744, "learning_rate": 0.0002, "loss": 1.4763, "step": 199650 }, { "epoch": 0.81, "grad_norm": 3.8963420391082764, "learning_rate": 0.0002, "loss": 1.6536, "step": 199660 }, { "epoch": 0.81, "grad_norm": 2.1737306118011475, "learning_rate": 0.0002, "loss": 1.5469, "step": 199670 }, { "epoch": 0.81, "grad_norm": 2.1937358379364014, "learning_rate": 0.0002, "loss": 1.4357, "step": 199680 }, { "epoch": 0.81, "grad_norm": 3.4392144680023193, "learning_rate": 0.0002, "loss": 1.7385, "step": 199690 }, { "epoch": 0.81, "grad_norm": 4.053357124328613, "learning_rate": 0.0002, "loss": 1.7836, "step": 199700 }, { "epoch": 0.81, "grad_norm": 2.712050199508667, "learning_rate": 0.0002, "loss": 1.5291, "step": 199710 }, { "epoch": 0.81, "grad_norm": 3.7998859882354736, "learning_rate": 0.0002, "loss": 1.5096, "step": 199720 }, { "epoch": 0.81, "grad_norm": 2.625666379928589, "learning_rate": 0.0002, "loss": 1.4831, "step": 199730 }, { "epoch": 0.81, "grad_norm": 3.190709114074707, "learning_rate": 0.0002, "loss": 1.4988, "step": 199740 }, { "epoch": 0.81, "grad_norm": 2.027726650238037, "learning_rate": 0.0002, "loss": 1.5319, "step": 199750 }, { "epoch": 0.81, "grad_norm": 3.577970504760742, "learning_rate": 0.0002, "loss": 1.4996, "step": 199760 }, { "epoch": 0.81, "grad_norm": 2.2690014839172363, "learning_rate": 0.0002, "loss": 1.5183, "step": 199770 }, { "epoch": 0.81, "grad_norm": 3.2814993858337402, "learning_rate": 0.0002, "loss": 1.5536, "step": 199780 }, { "epoch": 0.81, "grad_norm": 2.065983772277832, "learning_rate": 0.0002, "loss": 1.6662, "step": 199790 }, { "epoch": 0.81, "grad_norm": 2.8962013721466064, "learning_rate": 0.0002, "loss": 1.7605, "step": 199800 }, { "epoch": 0.81, "grad_norm": 4.368984222412109, "learning_rate": 0.0002, "loss": 1.5537, "step": 199810 }, { "epoch": 0.81, "grad_norm": 2.3386218547821045, "learning_rate": 0.0002, "loss": 1.5506, "step": 199820 }, { "epoch": 0.81, "grad_norm": 2.9711761474609375, "learning_rate": 0.0002, "loss": 1.669, "step": 199830 }, { "epoch": 0.81, "grad_norm": 3.2966701984405518, "learning_rate": 0.0002, "loss": 1.5518, "step": 199840 }, { "epoch": 0.81, "grad_norm": 3.5007946491241455, "learning_rate": 0.0002, "loss": 1.5938, "step": 199850 }, { "epoch": 0.81, "grad_norm": 3.0108845233917236, "learning_rate": 0.0002, "loss": 1.6525, "step": 199860 }, { "epoch": 0.81, "grad_norm": 4.330310821533203, "learning_rate": 0.0002, "loss": 1.5303, "step": 199870 }, { "epoch": 0.81, "grad_norm": 2.8701305389404297, "learning_rate": 0.0002, "loss": 1.4447, "step": 199880 }, { "epoch": 0.81, "grad_norm": 3.7539284229278564, "learning_rate": 0.0002, "loss": 1.5745, "step": 199890 }, { "epoch": 0.81, "grad_norm": 2.7792718410491943, "learning_rate": 0.0002, "loss": 1.6668, "step": 199900 }, { "epoch": 0.81, "grad_norm": 6.626028060913086, "learning_rate": 0.0002, "loss": 1.7142, "step": 199910 }, { "epoch": 0.81, "grad_norm": 3.829958438873291, "learning_rate": 0.0002, "loss": 1.7792, "step": 199920 }, { "epoch": 0.81, "grad_norm": 2.644817352294922, "learning_rate": 0.0002, "loss": 1.2685, "step": 199930 }, { "epoch": 0.81, "grad_norm": 2.8265819549560547, "learning_rate": 0.0002, "loss": 1.4413, "step": 199940 }, { "epoch": 0.81, "grad_norm": 2.7783169746398926, "learning_rate": 0.0002, "loss": 1.3301, "step": 199950 }, { "epoch": 0.81, "grad_norm": 2.1297006607055664, "learning_rate": 0.0002, "loss": 1.6522, "step": 199960 }, { "epoch": 0.81, "grad_norm": 2.4509224891662598, "learning_rate": 0.0002, "loss": 1.7049, "step": 199970 }, { "epoch": 0.81, "grad_norm": 3.2197470664978027, "learning_rate": 0.0002, "loss": 1.423, "step": 199980 }, { "epoch": 0.81, "grad_norm": 2.221473455429077, "learning_rate": 0.0002, "loss": 1.4192, "step": 199990 }, { "epoch": 0.81, "grad_norm": 4.3059587478637695, "learning_rate": 0.0002, "loss": 1.6838, "step": 200000 }, { "epoch": 0.81, "grad_norm": 3.4671692848205566, "learning_rate": 0.0002, "loss": 1.6629, "step": 200010 }, { "epoch": 0.81, "grad_norm": 3.2592716217041016, "learning_rate": 0.0002, "loss": 1.5668, "step": 200020 }, { "epoch": 0.81, "grad_norm": 3.184394359588623, "learning_rate": 0.0002, "loss": 1.5879, "step": 200030 }, { "epoch": 0.81, "grad_norm": 3.8386220932006836, "learning_rate": 0.0002, "loss": 1.4008, "step": 200040 }, { "epoch": 0.81, "grad_norm": 3.43827748298645, "learning_rate": 0.0002, "loss": 1.861, "step": 200050 }, { "epoch": 0.81, "grad_norm": 2.902050733566284, "learning_rate": 0.0002, "loss": 1.4886, "step": 200060 }, { "epoch": 0.81, "grad_norm": 3.339237928390503, "learning_rate": 0.0002, "loss": 1.5717, "step": 200070 }, { "epoch": 0.81, "grad_norm": 5.050685405731201, "learning_rate": 0.0002, "loss": 1.5851, "step": 200080 }, { "epoch": 0.81, "grad_norm": 2.7856388092041016, "learning_rate": 0.0002, "loss": 1.5045, "step": 200090 }, { "epoch": 0.81, "grad_norm": 2.7902982234954834, "learning_rate": 0.0002, "loss": 1.4538, "step": 200100 }, { "epoch": 0.81, "grad_norm": 3.4954419136047363, "learning_rate": 0.0002, "loss": 1.4691, "step": 200110 }, { "epoch": 0.81, "grad_norm": 4.943021297454834, "learning_rate": 0.0002, "loss": 1.5411, "step": 200120 }, { "epoch": 0.81, "grad_norm": 3.745222806930542, "learning_rate": 0.0002, "loss": 1.6433, "step": 200130 }, { "epoch": 0.81, "grad_norm": 3.335540294647217, "learning_rate": 0.0002, "loss": 1.6662, "step": 200140 }, { "epoch": 0.81, "grad_norm": 2.3386735916137695, "learning_rate": 0.0002, "loss": 1.6649, "step": 200150 }, { "epoch": 0.81, "grad_norm": 2.8956093788146973, "learning_rate": 0.0002, "loss": 1.6805, "step": 200160 }, { "epoch": 0.81, "grad_norm": 2.929435968399048, "learning_rate": 0.0002, "loss": 1.6684, "step": 200170 }, { "epoch": 0.81, "grad_norm": 4.370945453643799, "learning_rate": 0.0002, "loss": 1.8105, "step": 200180 }, { "epoch": 0.81, "grad_norm": 2.8828299045562744, "learning_rate": 0.0002, "loss": 1.4714, "step": 200190 }, { "epoch": 0.82, "grad_norm": 3.2255537509918213, "learning_rate": 0.0002, "loss": 1.6359, "step": 200200 }, { "epoch": 0.82, "grad_norm": 2.7465667724609375, "learning_rate": 0.0002, "loss": 1.7376, "step": 200210 }, { "epoch": 0.82, "grad_norm": 2.635866641998291, "learning_rate": 0.0002, "loss": 1.3841, "step": 200220 }, { "epoch": 0.82, "grad_norm": 5.374242305755615, "learning_rate": 0.0002, "loss": 1.4878, "step": 200230 }, { "epoch": 0.82, "grad_norm": 3.0629420280456543, "learning_rate": 0.0002, "loss": 1.5696, "step": 200240 }, { "epoch": 0.82, "grad_norm": 2.6320807933807373, "learning_rate": 0.0002, "loss": 1.4708, "step": 200250 }, { "epoch": 0.82, "grad_norm": 4.192784309387207, "learning_rate": 0.0002, "loss": 1.5818, "step": 200260 }, { "epoch": 0.82, "grad_norm": 2.314927577972412, "learning_rate": 0.0002, "loss": 1.7132, "step": 200270 }, { "epoch": 0.82, "grad_norm": 4.422706127166748, "learning_rate": 0.0002, "loss": 1.7799, "step": 200280 }, { "epoch": 0.82, "grad_norm": 2.3990321159362793, "learning_rate": 0.0002, "loss": 1.7133, "step": 200290 }, { "epoch": 0.82, "grad_norm": 4.149808883666992, "learning_rate": 0.0002, "loss": 1.5379, "step": 200300 }, { "epoch": 0.82, "grad_norm": 2.4991159439086914, "learning_rate": 0.0002, "loss": 1.5627, "step": 200310 }, { "epoch": 0.82, "grad_norm": 4.0000128746032715, "learning_rate": 0.0002, "loss": 1.9368, "step": 200320 }, { "epoch": 0.82, "grad_norm": 2.5969576835632324, "learning_rate": 0.0002, "loss": 1.3611, "step": 200330 }, { "epoch": 0.82, "grad_norm": 2.266751527786255, "learning_rate": 0.0002, "loss": 1.7042, "step": 200340 }, { "epoch": 0.82, "grad_norm": 2.123310089111328, "learning_rate": 0.0002, "loss": 1.871, "step": 200350 }, { "epoch": 0.82, "grad_norm": 3.2434284687042236, "learning_rate": 0.0002, "loss": 1.6443, "step": 200360 }, { "epoch": 0.82, "grad_norm": 3.108367681503296, "learning_rate": 0.0002, "loss": 1.767, "step": 200370 }, { "epoch": 0.82, "grad_norm": 2.6282691955566406, "learning_rate": 0.0002, "loss": 1.5226, "step": 200380 }, { "epoch": 0.82, "grad_norm": 2.020131826400757, "learning_rate": 0.0002, "loss": 1.5113, "step": 200390 }, { "epoch": 0.82, "grad_norm": 2.780042886734009, "learning_rate": 0.0002, "loss": 1.6634, "step": 200400 }, { "epoch": 0.82, "grad_norm": 2.147592067718506, "learning_rate": 0.0002, "loss": 1.5156, "step": 200410 }, { "epoch": 0.82, "grad_norm": 4.095967769622803, "learning_rate": 0.0002, "loss": 1.5708, "step": 200420 }, { "epoch": 0.82, "grad_norm": 3.3782386779785156, "learning_rate": 0.0002, "loss": 1.6831, "step": 200430 }, { "epoch": 0.82, "grad_norm": 3.390638828277588, "learning_rate": 0.0002, "loss": 1.6467, "step": 200440 }, { "epoch": 0.82, "grad_norm": 2.8843982219696045, "learning_rate": 0.0002, "loss": 1.3624, "step": 200450 }, { "epoch": 0.82, "grad_norm": 3.5144944190979004, "learning_rate": 0.0002, "loss": 1.478, "step": 200460 }, { "epoch": 0.82, "grad_norm": 2.637707233428955, "learning_rate": 0.0002, "loss": 1.484, "step": 200470 }, { "epoch": 0.82, "grad_norm": 1.8780664205551147, "learning_rate": 0.0002, "loss": 1.8108, "step": 200480 }, { "epoch": 0.82, "grad_norm": 5.2995076179504395, "learning_rate": 0.0002, "loss": 1.6281, "step": 200490 }, { "epoch": 0.82, "grad_norm": 3.3943519592285156, "learning_rate": 0.0002, "loss": 1.7635, "step": 200500 }, { "epoch": 0.82, "grad_norm": 2.0610344409942627, "learning_rate": 0.0002, "loss": 1.6213, "step": 200510 }, { "epoch": 0.82, "grad_norm": 3.50530743598938, "learning_rate": 0.0002, "loss": 1.3455, "step": 200520 }, { "epoch": 0.82, "grad_norm": 4.048388481140137, "learning_rate": 0.0002, "loss": 1.6321, "step": 200530 }, { "epoch": 0.82, "grad_norm": 2.9073188304901123, "learning_rate": 0.0002, "loss": 1.6762, "step": 200540 }, { "epoch": 0.82, "grad_norm": 4.367613315582275, "learning_rate": 0.0002, "loss": 1.5176, "step": 200550 }, { "epoch": 0.82, "grad_norm": 6.774343967437744, "learning_rate": 0.0002, "loss": 1.7908, "step": 200560 }, { "epoch": 0.82, "grad_norm": 2.5172770023345947, "learning_rate": 0.0002, "loss": 1.6769, "step": 200570 }, { "epoch": 0.82, "grad_norm": 3.0198657512664795, "learning_rate": 0.0002, "loss": 1.7332, "step": 200580 }, { "epoch": 0.82, "grad_norm": 2.5823450088500977, "learning_rate": 0.0002, "loss": 1.6252, "step": 200590 }, { "epoch": 0.82, "grad_norm": 2.3747715950012207, "learning_rate": 0.0002, "loss": 1.5933, "step": 200600 }, { "epoch": 0.82, "grad_norm": 4.435959339141846, "learning_rate": 0.0002, "loss": 1.5925, "step": 200610 }, { "epoch": 0.82, "grad_norm": 4.761816501617432, "learning_rate": 0.0002, "loss": 1.5672, "step": 200620 }, { "epoch": 0.82, "grad_norm": 2.883448839187622, "learning_rate": 0.0002, "loss": 1.4094, "step": 200630 }, { "epoch": 0.82, "grad_norm": 2.9341580867767334, "learning_rate": 0.0002, "loss": 1.7623, "step": 200640 }, { "epoch": 0.82, "grad_norm": 3.159633159637451, "learning_rate": 0.0002, "loss": 1.6047, "step": 200650 }, { "epoch": 0.82, "grad_norm": Infinity, "learning_rate": 0.0002, "loss": 1.7296, "step": 200660 }, { "epoch": 0.82, "grad_norm": 2.8240137100219727, "learning_rate": 0.0002, "loss": 1.6276, "step": 200670 }, { "epoch": 0.82, "grad_norm": 3.3703346252441406, "learning_rate": 0.0002, "loss": 1.7559, "step": 200680 }, { "epoch": 0.82, "grad_norm": 4.736722946166992, "learning_rate": 0.0002, "loss": 1.5599, "step": 200690 }, { "epoch": 0.82, "grad_norm": 1.6173474788665771, "learning_rate": 0.0002, "loss": 1.5732, "step": 200700 }, { "epoch": 0.82, "grad_norm": 2.1660053730010986, "learning_rate": 0.0002, "loss": 1.677, "step": 200710 }, { "epoch": 0.82, "grad_norm": 3.3157553672790527, "learning_rate": 0.0002, "loss": 1.4362, "step": 200720 }, { "epoch": 0.82, "grad_norm": 3.719892978668213, "learning_rate": 0.0002, "loss": 1.7065, "step": 200730 }, { "epoch": 0.82, "grad_norm": 1.721588134765625, "learning_rate": 0.0002, "loss": 1.6471, "step": 200740 }, { "epoch": 0.82, "grad_norm": 2.0158040523529053, "learning_rate": 0.0002, "loss": 1.6132, "step": 200750 }, { "epoch": 0.82, "grad_norm": 3.791214942932129, "learning_rate": 0.0002, "loss": 1.5431, "step": 200760 }, { "epoch": 0.82, "grad_norm": 2.359407424926758, "learning_rate": 0.0002, "loss": 1.7152, "step": 200770 }, { "epoch": 0.82, "grad_norm": 2.534160614013672, "learning_rate": 0.0002, "loss": 1.4607, "step": 200780 }, { "epoch": 0.82, "grad_norm": 3.445028781890869, "learning_rate": 0.0002, "loss": 1.558, "step": 200790 }, { "epoch": 0.82, "grad_norm": 1.6804299354553223, "learning_rate": 0.0002, "loss": 1.7071, "step": 200800 }, { "epoch": 0.82, "grad_norm": 5.126003742218018, "learning_rate": 0.0002, "loss": 1.2572, "step": 200810 }, { "epoch": 0.82, "grad_norm": 2.418910026550293, "learning_rate": 0.0002, "loss": 1.6557, "step": 200820 }, { "epoch": 0.82, "grad_norm": 3.425185203552246, "learning_rate": 0.0002, "loss": 1.3109, "step": 200830 }, { "epoch": 0.82, "grad_norm": 5.59514856338501, "learning_rate": 0.0002, "loss": 1.4482, "step": 200840 }, { "epoch": 0.82, "grad_norm": 5.242031097412109, "learning_rate": 0.0002, "loss": 1.5824, "step": 200850 }, { "epoch": 0.82, "grad_norm": 2.5406980514526367, "learning_rate": 0.0002, "loss": 1.5494, "step": 200860 }, { "epoch": 0.82, "grad_norm": 1.583446979522705, "learning_rate": 0.0002, "loss": 1.883, "step": 200870 }, { "epoch": 0.82, "grad_norm": 2.223994016647339, "learning_rate": 0.0002, "loss": 1.6178, "step": 200880 }, { "epoch": 0.82, "grad_norm": 2.4662954807281494, "learning_rate": 0.0002, "loss": 1.494, "step": 200890 }, { "epoch": 0.82, "grad_norm": 4.514795780181885, "learning_rate": 0.0002, "loss": 1.6527, "step": 200900 }, { "epoch": 0.82, "grad_norm": 1.4554818868637085, "learning_rate": 0.0002, "loss": 1.5693, "step": 200910 }, { "epoch": 0.82, "grad_norm": 2.068096399307251, "learning_rate": 0.0002, "loss": 1.573, "step": 200920 }, { "epoch": 0.82, "grad_norm": 4.067203521728516, "learning_rate": 0.0002, "loss": 1.6663, "step": 200930 }, { "epoch": 0.82, "grad_norm": 2.9450294971466064, "learning_rate": 0.0002, "loss": 1.4667, "step": 200940 }, { "epoch": 0.82, "grad_norm": 4.8575520515441895, "learning_rate": 0.0002, "loss": 1.5732, "step": 200950 }, { "epoch": 0.82, "grad_norm": 1.307734727859497, "learning_rate": 0.0002, "loss": 1.5748, "step": 200960 }, { "epoch": 0.82, "grad_norm": 2.6085922718048096, "learning_rate": 0.0002, "loss": 1.5695, "step": 200970 }, { "epoch": 0.82, "grad_norm": 2.1066513061523438, "learning_rate": 0.0002, "loss": 1.6413, "step": 200980 }, { "epoch": 0.82, "grad_norm": 2.607764720916748, "learning_rate": 0.0002, "loss": 1.6765, "step": 200990 }, { "epoch": 0.82, "grad_norm": 9.638734817504883, "learning_rate": 0.0002, "loss": 1.777, "step": 201000 }, { "epoch": 0.82, "grad_norm": 4.925416469573975, "learning_rate": 0.0002, "loss": 1.6973, "step": 201010 }, { "epoch": 0.82, "grad_norm": 3.7181456089019775, "learning_rate": 0.0002, "loss": 1.4334, "step": 201020 }, { "epoch": 0.82, "grad_norm": 3.450096845626831, "learning_rate": 0.0002, "loss": 1.5347, "step": 201030 }, { "epoch": 0.82, "grad_norm": 5.154470443725586, "learning_rate": 0.0002, "loss": 1.2386, "step": 201040 }, { "epoch": 0.82, "grad_norm": 4.74406623840332, "learning_rate": 0.0002, "loss": 1.7446, "step": 201050 }, { "epoch": 0.82, "grad_norm": 4.528318405151367, "learning_rate": 0.0002, "loss": 1.6243, "step": 201060 }, { "epoch": 0.82, "grad_norm": 2.9135138988494873, "learning_rate": 0.0002, "loss": 1.4826, "step": 201070 }, { "epoch": 0.82, "grad_norm": 2.5727357864379883, "learning_rate": 0.0002, "loss": 1.6073, "step": 201080 }, { "epoch": 0.82, "grad_norm": 1.9228267669677734, "learning_rate": 0.0002, "loss": 1.513, "step": 201090 }, { "epoch": 0.82, "grad_norm": 2.01326847076416, "learning_rate": 0.0002, "loss": 1.4946, "step": 201100 }, { "epoch": 0.82, "grad_norm": 2.1866884231567383, "learning_rate": 0.0002, "loss": 1.9435, "step": 201110 }, { "epoch": 0.82, "grad_norm": 2.5210840702056885, "learning_rate": 0.0002, "loss": 1.6563, "step": 201120 }, { "epoch": 0.82, "grad_norm": 2.0726544857025146, "learning_rate": 0.0002, "loss": 1.2977, "step": 201130 }, { "epoch": 0.82, "grad_norm": 2.2875759601593018, "learning_rate": 0.0002, "loss": 1.4692, "step": 201140 }, { "epoch": 0.82, "grad_norm": 3.6848573684692383, "learning_rate": 0.0002, "loss": 1.391, "step": 201150 }, { "epoch": 0.82, "grad_norm": 2.59397554397583, "learning_rate": 0.0002, "loss": 1.6291, "step": 201160 }, { "epoch": 0.82, "grad_norm": 3.272726535797119, "learning_rate": 0.0002, "loss": 1.6655, "step": 201170 }, { "epoch": 0.82, "grad_norm": 2.6227188110351562, "learning_rate": 0.0002, "loss": 1.7791, "step": 201180 }, { "epoch": 0.82, "grad_norm": 2.485945463180542, "learning_rate": 0.0002, "loss": 1.8631, "step": 201190 }, { "epoch": 0.82, "grad_norm": 5.373334884643555, "learning_rate": 0.0002, "loss": 1.6486, "step": 201200 }, { "epoch": 0.82, "grad_norm": 2.884080648422241, "learning_rate": 0.0002, "loss": 1.62, "step": 201210 }, { "epoch": 0.82, "grad_norm": 1.7945412397384644, "learning_rate": 0.0002, "loss": 1.8085, "step": 201220 }, { "epoch": 0.82, "grad_norm": 3.66652250289917, "learning_rate": 0.0002, "loss": 1.4827, "step": 201230 }, { "epoch": 0.82, "grad_norm": 3.500161647796631, "learning_rate": 0.0002, "loss": 1.8726, "step": 201240 }, { "epoch": 0.82, "grad_norm": 2.871929407119751, "learning_rate": 0.0002, "loss": 1.6451, "step": 201250 }, { "epoch": 0.82, "grad_norm": 2.597531795501709, "learning_rate": 0.0002, "loss": 1.5916, "step": 201260 }, { "epoch": 0.82, "grad_norm": 3.799511432647705, "learning_rate": 0.0002, "loss": 1.7469, "step": 201270 }, { "epoch": 0.82, "grad_norm": 3.251377582550049, "learning_rate": 0.0002, "loss": 1.6612, "step": 201280 }, { "epoch": 0.82, "grad_norm": 7.165322303771973, "learning_rate": 0.0002, "loss": 1.4811, "step": 201290 }, { "epoch": 0.82, "grad_norm": 3.385197401046753, "learning_rate": 0.0002, "loss": 1.4793, "step": 201300 }, { "epoch": 0.82, "grad_norm": 2.79166316986084, "learning_rate": 0.0002, "loss": 1.4019, "step": 201310 }, { "epoch": 0.82, "grad_norm": 2.480288505554199, "learning_rate": 0.0002, "loss": 1.3927, "step": 201320 }, { "epoch": 0.82, "grad_norm": 3.3899364471435547, "learning_rate": 0.0002, "loss": 1.5004, "step": 201330 }, { "epoch": 0.82, "grad_norm": 2.284485101699829, "learning_rate": 0.0002, "loss": 1.4416, "step": 201340 }, { "epoch": 0.82, "grad_norm": 2.4895315170288086, "learning_rate": 0.0002, "loss": 1.3847, "step": 201350 }, { "epoch": 0.82, "grad_norm": 2.7812089920043945, "learning_rate": 0.0002, "loss": 1.6505, "step": 201360 }, { "epoch": 0.82, "grad_norm": 3.4659128189086914, "learning_rate": 0.0002, "loss": 1.3715, "step": 201370 }, { "epoch": 0.82, "grad_norm": 2.5596818923950195, "learning_rate": 0.0002, "loss": 1.5853, "step": 201380 }, { "epoch": 0.82, "grad_norm": 2.497516632080078, "learning_rate": 0.0002, "loss": 1.7123, "step": 201390 }, { "epoch": 0.82, "grad_norm": 2.7943646907806396, "learning_rate": 0.0002, "loss": 1.6322, "step": 201400 }, { "epoch": 0.82, "grad_norm": 2.7778708934783936, "learning_rate": 0.0002, "loss": 1.7673, "step": 201410 }, { "epoch": 0.82, "grad_norm": 2.179807662963867, "learning_rate": 0.0002, "loss": 1.5116, "step": 201420 }, { "epoch": 0.82, "grad_norm": 2.6284663677215576, "learning_rate": 0.0002, "loss": 1.7037, "step": 201430 }, { "epoch": 0.82, "grad_norm": 3.0155036449432373, "learning_rate": 0.0002, "loss": 1.5384, "step": 201440 }, { "epoch": 0.82, "grad_norm": 2.970895528793335, "learning_rate": 0.0002, "loss": 1.6227, "step": 201450 }, { "epoch": 0.82, "grad_norm": 1.933533787727356, "learning_rate": 0.0002, "loss": 1.5014, "step": 201460 }, { "epoch": 0.82, "grad_norm": 3.5804452896118164, "learning_rate": 0.0002, "loss": 1.5772, "step": 201470 }, { "epoch": 0.82, "grad_norm": 4.915350437164307, "learning_rate": 0.0002, "loss": 1.6302, "step": 201480 }, { "epoch": 0.82, "grad_norm": 3.0634610652923584, "learning_rate": 0.0002, "loss": 1.5639, "step": 201490 }, { "epoch": 0.82, "grad_norm": 2.0893561840057373, "learning_rate": 0.0002, "loss": 1.6384, "step": 201500 }, { "epoch": 0.82, "grad_norm": 4.950010299682617, "learning_rate": 0.0002, "loss": 1.5991, "step": 201510 }, { "epoch": 0.82, "grad_norm": 5.981450080871582, "learning_rate": 0.0002, "loss": 1.3705, "step": 201520 }, { "epoch": 0.82, "grad_norm": 1.998018741607666, "learning_rate": 0.0002, "loss": 1.7662, "step": 201530 }, { "epoch": 0.82, "grad_norm": 5.917320728302002, "learning_rate": 0.0002, "loss": 1.6182, "step": 201540 }, { "epoch": 0.82, "grad_norm": 1.798321008682251, "learning_rate": 0.0002, "loss": 1.7729, "step": 201550 }, { "epoch": 0.82, "grad_norm": 3.8193907737731934, "learning_rate": 0.0002, "loss": 1.7284, "step": 201560 }, { "epoch": 0.82, "grad_norm": 2.8187050819396973, "learning_rate": 0.0002, "loss": 1.6303, "step": 201570 }, { "epoch": 0.82, "grad_norm": 3.2357852458953857, "learning_rate": 0.0002, "loss": 1.6673, "step": 201580 }, { "epoch": 0.82, "grad_norm": 3.995157480239868, "learning_rate": 0.0002, "loss": 1.6537, "step": 201590 }, { "epoch": 0.82, "grad_norm": 5.135768413543701, "learning_rate": 0.0002, "loss": 1.7342, "step": 201600 }, { "epoch": 0.82, "grad_norm": 2.216642141342163, "learning_rate": 0.0002, "loss": 1.5746, "step": 201610 }, { "epoch": 0.82, "grad_norm": 3.3014049530029297, "learning_rate": 0.0002, "loss": 1.5033, "step": 201620 }, { "epoch": 0.82, "grad_norm": 2.272437334060669, "learning_rate": 0.0002, "loss": 1.5551, "step": 201630 }, { "epoch": 0.82, "grad_norm": 4.098140239715576, "learning_rate": 0.0002, "loss": 1.7592, "step": 201640 }, { "epoch": 0.82, "grad_norm": 2.4232547283172607, "learning_rate": 0.0002, "loss": 1.6467, "step": 201650 }, { "epoch": 0.82, "grad_norm": 4.224586486816406, "learning_rate": 0.0002, "loss": 1.743, "step": 201660 }, { "epoch": 0.82, "grad_norm": 2.8813724517822266, "learning_rate": 0.0002, "loss": 1.5043, "step": 201670 }, { "epoch": 0.82, "grad_norm": 3.5151405334472656, "learning_rate": 0.0002, "loss": 1.7254, "step": 201680 }, { "epoch": 0.82, "grad_norm": 4.702524662017822, "learning_rate": 0.0002, "loss": 1.4798, "step": 201690 }, { "epoch": 0.82, "grad_norm": 6.1580586433410645, "learning_rate": 0.0002, "loss": 1.5568, "step": 201700 }, { "epoch": 0.82, "grad_norm": 2.1539559364318848, "learning_rate": 0.0002, "loss": 1.5311, "step": 201710 }, { "epoch": 0.82, "grad_norm": 2.5277748107910156, "learning_rate": 0.0002, "loss": 1.6532, "step": 201720 }, { "epoch": 0.82, "grad_norm": 3.2411224842071533, "learning_rate": 0.0002, "loss": 1.5159, "step": 201730 }, { "epoch": 0.82, "grad_norm": 3.2808756828308105, "learning_rate": 0.0002, "loss": 1.8958, "step": 201740 }, { "epoch": 0.82, "grad_norm": 2.815955877304077, "learning_rate": 0.0002, "loss": 1.5926, "step": 201750 }, { "epoch": 0.82, "grad_norm": 3.025454044342041, "learning_rate": 0.0002, "loss": 1.6109, "step": 201760 }, { "epoch": 0.82, "grad_norm": 4.748942852020264, "learning_rate": 0.0002, "loss": 1.548, "step": 201770 }, { "epoch": 0.82, "grad_norm": 2.4056763648986816, "learning_rate": 0.0002, "loss": 1.8937, "step": 201780 }, { "epoch": 0.82, "grad_norm": 3.276728630065918, "learning_rate": 0.0002, "loss": 1.4757, "step": 201790 }, { "epoch": 0.82, "grad_norm": 2.9609713554382324, "learning_rate": 0.0002, "loss": 1.5582, "step": 201800 }, { "epoch": 0.82, "grad_norm": 2.9100494384765625, "learning_rate": 0.0002, "loss": 1.4772, "step": 201810 }, { "epoch": 0.82, "grad_norm": 3.832686185836792, "learning_rate": 0.0002, "loss": 1.6256, "step": 201820 }, { "epoch": 0.82, "grad_norm": 2.149780511856079, "learning_rate": 0.0002, "loss": 1.4806, "step": 201830 }, { "epoch": 0.82, "grad_norm": 4.271036148071289, "learning_rate": 0.0002, "loss": 1.6662, "step": 201840 }, { "epoch": 0.82, "grad_norm": 2.756892681121826, "learning_rate": 0.0002, "loss": 1.3463, "step": 201850 }, { "epoch": 0.82, "grad_norm": 3.319411516189575, "learning_rate": 0.0002, "loss": 1.6184, "step": 201860 }, { "epoch": 0.82, "grad_norm": 2.4944233894348145, "learning_rate": 0.0002, "loss": 1.3646, "step": 201870 }, { "epoch": 0.82, "grad_norm": 3.081569194793701, "learning_rate": 0.0002, "loss": 1.9851, "step": 201880 }, { "epoch": 0.82, "grad_norm": 2.4410150051116943, "learning_rate": 0.0002, "loss": 1.5522, "step": 201890 }, { "epoch": 0.82, "grad_norm": 1.4040437936782837, "learning_rate": 0.0002, "loss": 1.3656, "step": 201900 }, { "epoch": 0.82, "grad_norm": 3.3715357780456543, "learning_rate": 0.0002, "loss": 1.5983, "step": 201910 }, { "epoch": 0.82, "grad_norm": 3.6116437911987305, "learning_rate": 0.0002, "loss": 1.5311, "step": 201920 }, { "epoch": 0.82, "grad_norm": 2.66628098487854, "learning_rate": 0.0002, "loss": 1.65, "step": 201930 }, { "epoch": 0.82, "grad_norm": 3.357422113418579, "learning_rate": 0.0002, "loss": 1.6972, "step": 201940 }, { "epoch": 0.82, "grad_norm": 1.7286266088485718, "learning_rate": 0.0002, "loss": 1.6172, "step": 201950 }, { "epoch": 0.82, "grad_norm": 3.4580273628234863, "learning_rate": 0.0002, "loss": 1.5969, "step": 201960 }, { "epoch": 0.82, "grad_norm": 2.9912593364715576, "learning_rate": 0.0002, "loss": 1.7374, "step": 201970 }, { "epoch": 0.82, "grad_norm": 1.7053132057189941, "learning_rate": 0.0002, "loss": 1.3168, "step": 201980 }, { "epoch": 0.82, "grad_norm": 2.14367413520813, "learning_rate": 0.0002, "loss": 1.6422, "step": 201990 }, { "epoch": 0.82, "grad_norm": 3.5544376373291016, "learning_rate": 0.0002, "loss": 1.3456, "step": 202000 }, { "epoch": 0.82, "grad_norm": 2.7532289028167725, "learning_rate": 0.0002, "loss": 1.3181, "step": 202010 }, { "epoch": 0.82, "grad_norm": 2.1873714923858643, "learning_rate": 0.0002, "loss": 1.7261, "step": 202020 }, { "epoch": 0.82, "grad_norm": 2.884730100631714, "learning_rate": 0.0002, "loss": 1.4986, "step": 202030 }, { "epoch": 0.82, "grad_norm": 2.9142160415649414, "learning_rate": 0.0002, "loss": 1.3202, "step": 202040 }, { "epoch": 0.82, "grad_norm": 3.2296252250671387, "learning_rate": 0.0002, "loss": 1.8442, "step": 202050 }, { "epoch": 0.82, "grad_norm": 2.938581943511963, "learning_rate": 0.0002, "loss": 1.7515, "step": 202060 }, { "epoch": 0.82, "grad_norm": 2.554640054702759, "learning_rate": 0.0002, "loss": 1.4773, "step": 202070 }, { "epoch": 0.82, "grad_norm": 3.154110908508301, "learning_rate": 0.0002, "loss": 1.2444, "step": 202080 }, { "epoch": 0.82, "grad_norm": 2.535529136657715, "learning_rate": 0.0002, "loss": 1.55, "step": 202090 }, { "epoch": 0.82, "grad_norm": 2.0547268390655518, "learning_rate": 0.0002, "loss": 1.5247, "step": 202100 }, { "epoch": 0.82, "grad_norm": 1.9132713079452515, "learning_rate": 0.0002, "loss": 1.5412, "step": 202110 }, { "epoch": 0.82, "grad_norm": 2.4001142978668213, "learning_rate": 0.0002, "loss": 1.5614, "step": 202120 }, { "epoch": 0.82, "grad_norm": 3.7516229152679443, "learning_rate": 0.0002, "loss": 1.7276, "step": 202130 }, { "epoch": 0.82, "grad_norm": 2.4920296669006348, "learning_rate": 0.0002, "loss": 1.3398, "step": 202140 }, { "epoch": 0.82, "grad_norm": 4.138619422912598, "learning_rate": 0.0002, "loss": 1.7632, "step": 202150 }, { "epoch": 0.82, "grad_norm": 2.045508861541748, "learning_rate": 0.0002, "loss": 1.7256, "step": 202160 }, { "epoch": 0.82, "grad_norm": 2.372060775756836, "learning_rate": 0.0002, "loss": 1.6376, "step": 202170 }, { "epoch": 0.82, "grad_norm": 2.5777411460876465, "learning_rate": 0.0002, "loss": 1.6205, "step": 202180 }, { "epoch": 0.82, "grad_norm": 2.5343613624572754, "learning_rate": 0.0002, "loss": 1.6647, "step": 202190 }, { "epoch": 0.82, "grad_norm": 3.105290412902832, "learning_rate": 0.0002, "loss": 1.8317, "step": 202200 }, { "epoch": 0.82, "grad_norm": 3.8586814403533936, "learning_rate": 0.0002, "loss": 1.492, "step": 202210 }, { "epoch": 0.82, "grad_norm": 2.895517110824585, "learning_rate": 0.0002, "loss": 1.6338, "step": 202220 }, { "epoch": 0.82, "grad_norm": 3.6202526092529297, "learning_rate": 0.0002, "loss": 1.6178, "step": 202230 }, { "epoch": 0.82, "grad_norm": 1.6649901866912842, "learning_rate": 0.0002, "loss": 1.5187, "step": 202240 }, { "epoch": 0.82, "grad_norm": 3.611795425415039, "learning_rate": 0.0002, "loss": 1.4165, "step": 202250 }, { "epoch": 0.82, "grad_norm": 3.0767595767974854, "learning_rate": 0.0002, "loss": 1.4848, "step": 202260 }, { "epoch": 0.82, "grad_norm": 4.967329502105713, "learning_rate": 0.0002, "loss": 1.3485, "step": 202270 }, { "epoch": 0.82, "grad_norm": 3.906885862350464, "learning_rate": 0.0002, "loss": 1.4188, "step": 202280 }, { "epoch": 0.82, "grad_norm": 2.156377077102661, "learning_rate": 0.0002, "loss": 1.4501, "step": 202290 }, { "epoch": 0.82, "grad_norm": 2.3667221069335938, "learning_rate": 0.0002, "loss": 1.5589, "step": 202300 }, { "epoch": 0.82, "grad_norm": 2.2338991165161133, "learning_rate": 0.0002, "loss": 1.5529, "step": 202310 }, { "epoch": 0.82, "grad_norm": 6.507227897644043, "learning_rate": 0.0002, "loss": 1.515, "step": 202320 }, { "epoch": 0.82, "grad_norm": 2.92794132232666, "learning_rate": 0.0002, "loss": 1.7789, "step": 202330 }, { "epoch": 0.82, "grad_norm": 2.442352056503296, "learning_rate": 0.0002, "loss": 1.4763, "step": 202340 }, { "epoch": 0.82, "grad_norm": 3.5920212268829346, "learning_rate": 0.0002, "loss": 2.0081, "step": 202350 }, { "epoch": 0.82, "grad_norm": 2.5495054721832275, "learning_rate": 0.0002, "loss": 1.7987, "step": 202360 }, { "epoch": 0.82, "grad_norm": 4.515008926391602, "learning_rate": 0.0002, "loss": 1.7667, "step": 202370 }, { "epoch": 0.82, "grad_norm": 2.106654405593872, "learning_rate": 0.0002, "loss": 1.2894, "step": 202380 }, { "epoch": 0.82, "grad_norm": 2.6603620052337646, "learning_rate": 0.0002, "loss": 1.6631, "step": 202390 }, { "epoch": 0.82, "grad_norm": 2.6121151447296143, "learning_rate": 0.0002, "loss": 1.434, "step": 202400 }, { "epoch": 0.82, "grad_norm": 2.462538719177246, "learning_rate": 0.0002, "loss": 1.7004, "step": 202410 }, { "epoch": 0.82, "grad_norm": 3.992396593093872, "learning_rate": 0.0002, "loss": 1.6129, "step": 202420 }, { "epoch": 0.82, "grad_norm": 4.181812286376953, "learning_rate": 0.0002, "loss": 1.829, "step": 202430 }, { "epoch": 0.82, "grad_norm": 3.3964734077453613, "learning_rate": 0.0002, "loss": 1.6591, "step": 202440 }, { "epoch": 0.82, "grad_norm": 2.413055419921875, "learning_rate": 0.0002, "loss": 1.6293, "step": 202450 }, { "epoch": 0.82, "grad_norm": 2.6981146335601807, "learning_rate": 0.0002, "loss": 1.5784, "step": 202460 }, { "epoch": 0.82, "grad_norm": 3.2705533504486084, "learning_rate": 0.0002, "loss": 1.7725, "step": 202470 }, { "epoch": 0.82, "grad_norm": 3.8447225093841553, "learning_rate": 0.0002, "loss": 1.6909, "step": 202480 }, { "epoch": 0.82, "grad_norm": 2.4147040843963623, "learning_rate": 0.0002, "loss": 1.5105, "step": 202490 }, { "epoch": 0.82, "grad_norm": 3.438101053237915, "learning_rate": 0.0002, "loss": 1.4887, "step": 202500 }, { "epoch": 0.82, "grad_norm": 2.7712888717651367, "learning_rate": 0.0002, "loss": 1.4378, "step": 202510 }, { "epoch": 0.82, "grad_norm": 3.3744442462921143, "learning_rate": 0.0002, "loss": 1.5712, "step": 202520 }, { "epoch": 0.82, "grad_norm": 2.8154945373535156, "learning_rate": 0.0002, "loss": 1.7184, "step": 202530 }, { "epoch": 0.82, "grad_norm": 3.7666351795196533, "learning_rate": 0.0002, "loss": 1.515, "step": 202540 }, { "epoch": 0.82, "grad_norm": 2.3365087509155273, "learning_rate": 0.0002, "loss": 1.6078, "step": 202550 }, { "epoch": 0.82, "grad_norm": 4.526130199432373, "learning_rate": 0.0002, "loss": 1.5395, "step": 202560 }, { "epoch": 0.82, "grad_norm": 2.8635332584381104, "learning_rate": 0.0002, "loss": 1.6837, "step": 202570 }, { "epoch": 0.82, "grad_norm": 4.45156192779541, "learning_rate": 0.0002, "loss": 1.4809, "step": 202580 }, { "epoch": 0.82, "grad_norm": 3.600257396697998, "learning_rate": 0.0002, "loss": 1.6347, "step": 202590 }, { "epoch": 0.82, "grad_norm": 2.4963629245758057, "learning_rate": 0.0002, "loss": 1.5991, "step": 202600 }, { "epoch": 0.82, "grad_norm": 3.3464298248291016, "learning_rate": 0.0002, "loss": 1.671, "step": 202610 }, { "epoch": 0.82, "grad_norm": 2.058180809020996, "learning_rate": 0.0002, "loss": 1.6281, "step": 202620 }, { "epoch": 0.82, "grad_norm": 2.3859846591949463, "learning_rate": 0.0002, "loss": 1.3772, "step": 202630 }, { "epoch": 0.82, "grad_norm": 2.682647705078125, "learning_rate": 0.0002, "loss": 1.494, "step": 202640 }, { "epoch": 0.82, "grad_norm": 3.210529327392578, "learning_rate": 0.0002, "loss": 1.6833, "step": 202650 }, { "epoch": 0.83, "grad_norm": 3.657027244567871, "learning_rate": 0.0002, "loss": 1.6264, "step": 202660 }, { "epoch": 0.83, "grad_norm": 4.028883934020996, "learning_rate": 0.0002, "loss": 1.4605, "step": 202670 }, { "epoch": 0.83, "grad_norm": 3.752472400665283, "learning_rate": 0.0002, "loss": 1.6413, "step": 202680 }, { "epoch": 0.83, "grad_norm": 3.8747456073760986, "learning_rate": 0.0002, "loss": 1.7367, "step": 202690 }, { "epoch": 0.83, "grad_norm": 3.1707351207733154, "learning_rate": 0.0002, "loss": 1.6422, "step": 202700 }, { "epoch": 0.83, "grad_norm": 3.1283178329467773, "learning_rate": 0.0002, "loss": 1.6283, "step": 202710 }, { "epoch": 0.83, "grad_norm": 4.88255500793457, "learning_rate": 0.0002, "loss": 1.6762, "step": 202720 }, { "epoch": 0.83, "grad_norm": 2.3010878562927246, "learning_rate": 0.0002, "loss": 1.4136, "step": 202730 }, { "epoch": 0.83, "grad_norm": 2.278970956802368, "learning_rate": 0.0002, "loss": 1.555, "step": 202740 }, { "epoch": 0.83, "grad_norm": 2.750117063522339, "learning_rate": 0.0002, "loss": 1.6018, "step": 202750 }, { "epoch": 0.83, "grad_norm": 2.6164791584014893, "learning_rate": 0.0002, "loss": 1.5303, "step": 202760 }, { "epoch": 0.83, "grad_norm": 3.036940336227417, "learning_rate": 0.0002, "loss": 1.4448, "step": 202770 }, { "epoch": 0.83, "grad_norm": 2.7833354473114014, "learning_rate": 0.0002, "loss": 1.2091, "step": 202780 }, { "epoch": 0.83, "grad_norm": 3.2129018306732178, "learning_rate": 0.0002, "loss": 1.4835, "step": 202790 }, { "epoch": 0.83, "grad_norm": 2.665091037750244, "learning_rate": 0.0002, "loss": 1.4639, "step": 202800 }, { "epoch": 0.83, "grad_norm": 3.2417356967926025, "learning_rate": 0.0002, "loss": 1.4466, "step": 202810 }, { "epoch": 0.83, "grad_norm": 3.520063638687134, "learning_rate": 0.0002, "loss": 1.5508, "step": 202820 }, { "epoch": 0.83, "grad_norm": 5.043698787689209, "learning_rate": 0.0002, "loss": 1.6243, "step": 202830 }, { "epoch": 0.83, "grad_norm": 4.604275703430176, "learning_rate": 0.0002, "loss": 1.8937, "step": 202840 }, { "epoch": 0.83, "grad_norm": 2.6140754222869873, "learning_rate": 0.0002, "loss": 1.6511, "step": 202850 }, { "epoch": 0.83, "grad_norm": 4.000005722045898, "learning_rate": 0.0002, "loss": 1.636, "step": 202860 }, { "epoch": 0.83, "grad_norm": 2.618435859680176, "learning_rate": 0.0002, "loss": 1.5534, "step": 202870 }, { "epoch": 0.83, "grad_norm": 8.076700210571289, "learning_rate": 0.0002, "loss": 1.5254, "step": 202880 }, { "epoch": 0.83, "grad_norm": 1.6001077890396118, "learning_rate": 0.0002, "loss": 1.6839, "step": 202890 }, { "epoch": 0.83, "grad_norm": 3.22790789604187, "learning_rate": 0.0002, "loss": 1.4307, "step": 202900 }, { "epoch": 0.83, "grad_norm": 2.5643503665924072, "learning_rate": 0.0002, "loss": 1.4686, "step": 202910 }, { "epoch": 0.83, "grad_norm": 5.5071797370910645, "learning_rate": 0.0002, "loss": 1.5405, "step": 202920 }, { "epoch": 0.83, "grad_norm": 5.447440147399902, "learning_rate": 0.0002, "loss": 1.8058, "step": 202930 }, { "epoch": 0.83, "grad_norm": 4.002035140991211, "learning_rate": 0.0002, "loss": 1.6134, "step": 202940 }, { "epoch": 0.83, "grad_norm": 1.9945546388626099, "learning_rate": 0.0002, "loss": 1.5615, "step": 202950 }, { "epoch": 0.83, "grad_norm": 2.021836757659912, "learning_rate": 0.0002, "loss": 1.4355, "step": 202960 }, { "epoch": 0.83, "grad_norm": 2.320538282394409, "learning_rate": 0.0002, "loss": 1.4312, "step": 202970 }, { "epoch": 0.83, "grad_norm": 2.361997127532959, "learning_rate": 0.0002, "loss": 1.37, "step": 202980 }, { "epoch": 0.83, "grad_norm": 3.1511728763580322, "learning_rate": 0.0002, "loss": 1.6771, "step": 202990 }, { "epoch": 0.83, "grad_norm": 2.8462018966674805, "learning_rate": 0.0002, "loss": 1.3136, "step": 203000 }, { "epoch": 0.83, "grad_norm": 4.976779460906982, "learning_rate": 0.0002, "loss": 1.7631, "step": 203010 }, { "epoch": 0.83, "grad_norm": 3.623782157897949, "learning_rate": 0.0002, "loss": 1.6238, "step": 203020 }, { "epoch": 0.83, "grad_norm": 3.9382166862487793, "learning_rate": 0.0002, "loss": 1.6146, "step": 203030 }, { "epoch": 0.83, "grad_norm": 3.069135904312134, "learning_rate": 0.0002, "loss": 1.3983, "step": 203040 }, { "epoch": 0.83, "grad_norm": 3.0713725090026855, "learning_rate": 0.0002, "loss": 1.5714, "step": 203050 }, { "epoch": 0.83, "grad_norm": 2.772455930709839, "learning_rate": 0.0002, "loss": 1.4284, "step": 203060 }, { "epoch": 0.83, "grad_norm": 4.694379806518555, "learning_rate": 0.0002, "loss": 1.5145, "step": 203070 }, { "epoch": 0.83, "grad_norm": 2.866323471069336, "learning_rate": 0.0002, "loss": 1.6233, "step": 203080 }, { "epoch": 0.83, "grad_norm": 1.5874102115631104, "learning_rate": 0.0002, "loss": 1.4964, "step": 203090 }, { "epoch": 0.83, "grad_norm": 2.862424850463867, "learning_rate": 0.0002, "loss": 1.5302, "step": 203100 }, { "epoch": 0.83, "grad_norm": 4.646700859069824, "learning_rate": 0.0002, "loss": 1.587, "step": 203110 }, { "epoch": 0.83, "grad_norm": 3.9552454948425293, "learning_rate": 0.0002, "loss": 1.6828, "step": 203120 }, { "epoch": 0.83, "grad_norm": 3.2867329120635986, "learning_rate": 0.0002, "loss": 1.8054, "step": 203130 }, { "epoch": 0.83, "grad_norm": 2.3568220138549805, "learning_rate": 0.0002, "loss": 1.668, "step": 203140 }, { "epoch": 0.83, "grad_norm": 4.83786153793335, "learning_rate": 0.0002, "loss": 1.4972, "step": 203150 }, { "epoch": 0.83, "grad_norm": 4.520172595977783, "learning_rate": 0.0002, "loss": 1.5522, "step": 203160 }, { "epoch": 0.83, "grad_norm": 3.775904655456543, "learning_rate": 0.0002, "loss": 1.2786, "step": 203170 }, { "epoch": 0.83, "grad_norm": 3.1926701068878174, "learning_rate": 0.0002, "loss": 1.3201, "step": 203180 }, { "epoch": 0.83, "grad_norm": 2.8111536502838135, "learning_rate": 0.0002, "loss": 1.6049, "step": 203190 }, { "epoch": 0.83, "grad_norm": 4.709024429321289, "learning_rate": 0.0002, "loss": 1.3556, "step": 203200 }, { "epoch": 0.83, "grad_norm": 4.116768836975098, "learning_rate": 0.0002, "loss": 1.6222, "step": 203210 }, { "epoch": 0.83, "grad_norm": 2.3365957736968994, "learning_rate": 0.0002, "loss": 1.7322, "step": 203220 }, { "epoch": 0.83, "grad_norm": 3.0055179595947266, "learning_rate": 0.0002, "loss": 1.7756, "step": 203230 }, { "epoch": 0.83, "grad_norm": 3.0620646476745605, "learning_rate": 0.0002, "loss": 1.462, "step": 203240 }, { "epoch": 0.83, "grad_norm": 3.136845588684082, "learning_rate": 0.0002, "loss": 1.5859, "step": 203250 }, { "epoch": 0.83, "grad_norm": 2.456442356109619, "learning_rate": 0.0002, "loss": 1.5984, "step": 203260 }, { "epoch": 0.83, "grad_norm": 2.3574600219726562, "learning_rate": 0.0002, "loss": 1.6303, "step": 203270 }, { "epoch": 0.83, "grad_norm": 3.2612149715423584, "learning_rate": 0.0002, "loss": 1.5964, "step": 203280 }, { "epoch": 0.83, "grad_norm": 2.0307998657226562, "learning_rate": 0.0002, "loss": 1.5779, "step": 203290 }, { "epoch": 0.83, "grad_norm": 2.470634698867798, "learning_rate": 0.0002, "loss": 1.6429, "step": 203300 }, { "epoch": 0.83, "grad_norm": 3.439964771270752, "learning_rate": 0.0002, "loss": 1.2807, "step": 203310 }, { "epoch": 0.83, "grad_norm": 3.1350619792938232, "learning_rate": 0.0002, "loss": 1.6625, "step": 203320 }, { "epoch": 0.83, "grad_norm": 2.687880516052246, "learning_rate": 0.0002, "loss": 1.5858, "step": 203330 }, { "epoch": 0.83, "grad_norm": 3.570391893386841, "learning_rate": 0.0002, "loss": 1.7847, "step": 203340 }, { "epoch": 0.83, "grad_norm": 2.3152225017547607, "learning_rate": 0.0002, "loss": 1.6515, "step": 203350 }, { "epoch": 0.83, "grad_norm": 2.4676225185394287, "learning_rate": 0.0002, "loss": 1.405, "step": 203360 }, { "epoch": 0.83, "grad_norm": 3.9722349643707275, "learning_rate": 0.0002, "loss": 1.7462, "step": 203370 }, { "epoch": 0.83, "grad_norm": 4.101015090942383, "learning_rate": 0.0002, "loss": 1.476, "step": 203380 }, { "epoch": 0.83, "grad_norm": 3.0450992584228516, "learning_rate": 0.0002, "loss": 1.7079, "step": 203390 }, { "epoch": 0.83, "grad_norm": 3.0686662197113037, "learning_rate": 0.0002, "loss": 1.5759, "step": 203400 }, { "epoch": 0.83, "grad_norm": 2.7316527366638184, "learning_rate": 0.0002, "loss": 1.4629, "step": 203410 }, { "epoch": 0.83, "grad_norm": 3.3936331272125244, "learning_rate": 0.0002, "loss": 1.5889, "step": 203420 }, { "epoch": 0.83, "grad_norm": 3.237874746322632, "learning_rate": 0.0002, "loss": 1.6521, "step": 203430 }, { "epoch": 0.83, "grad_norm": 5.571216583251953, "learning_rate": 0.0002, "loss": 1.5818, "step": 203440 }, { "epoch": 0.83, "grad_norm": 4.567506313323975, "learning_rate": 0.0002, "loss": 1.2981, "step": 203450 }, { "epoch": 0.83, "grad_norm": 3.5283260345458984, "learning_rate": 0.0002, "loss": 1.6975, "step": 203460 }, { "epoch": 0.83, "grad_norm": 3.653435468673706, "learning_rate": 0.0002, "loss": 1.3503, "step": 203470 }, { "epoch": 0.83, "grad_norm": 5.347657680511475, "learning_rate": 0.0002, "loss": 1.718, "step": 203480 }, { "epoch": 0.83, "grad_norm": 3.1422107219696045, "learning_rate": 0.0002, "loss": 1.7812, "step": 203490 }, { "epoch": 0.83, "grad_norm": 4.728053569793701, "learning_rate": 0.0002, "loss": 1.7711, "step": 203500 }, { "epoch": 0.83, "grad_norm": 3.1112096309661865, "learning_rate": 0.0002, "loss": 1.6739, "step": 203510 }, { "epoch": 0.83, "grad_norm": 2.6396493911743164, "learning_rate": 0.0002, "loss": 1.5191, "step": 203520 }, { "epoch": 0.83, "grad_norm": 3.0077016353607178, "learning_rate": 0.0002, "loss": 1.7743, "step": 203530 }, { "epoch": 0.83, "grad_norm": 3.233771562576294, "learning_rate": 0.0002, "loss": 1.7032, "step": 203540 }, { "epoch": 0.83, "grad_norm": 3.1473348140716553, "learning_rate": 0.0002, "loss": 1.5086, "step": 203550 }, { "epoch": 0.83, "grad_norm": 2.7842721939086914, "learning_rate": 0.0002, "loss": 1.657, "step": 203560 }, { "epoch": 0.83, "grad_norm": 2.6561148166656494, "learning_rate": 0.0002, "loss": 1.601, "step": 203570 }, { "epoch": 0.83, "grad_norm": 1.4937562942504883, "learning_rate": 0.0002, "loss": 1.3494, "step": 203580 }, { "epoch": 0.83, "grad_norm": 4.2579450607299805, "learning_rate": 0.0002, "loss": 1.4297, "step": 203590 }, { "epoch": 0.83, "grad_norm": 4.340793609619141, "learning_rate": 0.0002, "loss": 1.6667, "step": 203600 }, { "epoch": 0.83, "grad_norm": 1.8465044498443604, "learning_rate": 0.0002, "loss": 1.6235, "step": 203610 }, { "epoch": 0.83, "grad_norm": 2.169309139251709, "learning_rate": 0.0002, "loss": 1.6287, "step": 203620 }, { "epoch": 0.83, "grad_norm": 3.626425266265869, "learning_rate": 0.0002, "loss": 1.6923, "step": 203630 }, { "epoch": 0.83, "grad_norm": 2.685636520385742, "learning_rate": 0.0002, "loss": 1.632, "step": 203640 }, { "epoch": 0.83, "grad_norm": 3.5362801551818848, "learning_rate": 0.0002, "loss": 1.5226, "step": 203650 }, { "epoch": 0.83, "grad_norm": 4.477468490600586, "learning_rate": 0.0002, "loss": 1.3728, "step": 203660 }, { "epoch": 0.83, "grad_norm": 4.541819095611572, "learning_rate": 0.0002, "loss": 1.6281, "step": 203670 }, { "epoch": 0.83, "grad_norm": 3.975559711456299, "learning_rate": 0.0002, "loss": 1.3479, "step": 203680 }, { "epoch": 0.83, "grad_norm": 2.1796860694885254, "learning_rate": 0.0002, "loss": 1.4552, "step": 203690 }, { "epoch": 0.83, "grad_norm": 3.3697969913482666, "learning_rate": 0.0002, "loss": 1.5416, "step": 203700 }, { "epoch": 0.83, "grad_norm": 3.3466758728027344, "learning_rate": 0.0002, "loss": 1.5492, "step": 203710 }, { "epoch": 0.83, "grad_norm": 2.803532361984253, "learning_rate": 0.0002, "loss": 1.2269, "step": 203720 }, { "epoch": 0.83, "grad_norm": 3.5205271244049072, "learning_rate": 0.0002, "loss": 1.3315, "step": 203730 }, { "epoch": 0.83, "grad_norm": 9.811635971069336, "learning_rate": 0.0002, "loss": 1.47, "step": 203740 }, { "epoch": 0.83, "grad_norm": 2.303056240081787, "learning_rate": 0.0002, "loss": 1.5633, "step": 203750 }, { "epoch": 0.83, "grad_norm": 2.6988470554351807, "learning_rate": 0.0002, "loss": 1.4325, "step": 203760 }, { "epoch": 0.83, "grad_norm": 2.7801737785339355, "learning_rate": 0.0002, "loss": 1.7455, "step": 203770 }, { "epoch": 0.83, "grad_norm": 2.7504916191101074, "learning_rate": 0.0002, "loss": 1.3163, "step": 203780 }, { "epoch": 0.83, "grad_norm": 2.080017328262329, "learning_rate": 0.0002, "loss": 1.6221, "step": 203790 }, { "epoch": 0.83, "grad_norm": 2.418226480484009, "learning_rate": 0.0002, "loss": 1.7079, "step": 203800 }, { "epoch": 0.83, "grad_norm": 3.147047519683838, "learning_rate": 0.0002, "loss": 1.5521, "step": 203810 }, { "epoch": 0.83, "grad_norm": 3.4428229331970215, "learning_rate": 0.0002, "loss": 1.4916, "step": 203820 }, { "epoch": 0.83, "grad_norm": 3.030194044113159, "learning_rate": 0.0002, "loss": 1.5516, "step": 203830 }, { "epoch": 0.83, "grad_norm": 2.5809268951416016, "learning_rate": 0.0002, "loss": 1.3294, "step": 203840 }, { "epoch": 0.83, "grad_norm": 2.958609104156494, "learning_rate": 0.0002, "loss": 1.5209, "step": 203850 }, { "epoch": 0.83, "grad_norm": 5.042663097381592, "learning_rate": 0.0002, "loss": 1.5787, "step": 203860 }, { "epoch": 0.83, "grad_norm": 2.1463534832000732, "learning_rate": 0.0002, "loss": 1.466, "step": 203870 }, { "epoch": 0.83, "grad_norm": 2.767463207244873, "learning_rate": 0.0002, "loss": 1.5935, "step": 203880 }, { "epoch": 0.83, "grad_norm": 2.332033395767212, "learning_rate": 0.0002, "loss": 1.468, "step": 203890 }, { "epoch": 0.83, "grad_norm": 3.4014170169830322, "learning_rate": 0.0002, "loss": 1.4721, "step": 203900 }, { "epoch": 0.83, "grad_norm": 1.6158546209335327, "learning_rate": 0.0002, "loss": 1.5741, "step": 203910 }, { "epoch": 0.83, "grad_norm": 4.817910671234131, "learning_rate": 0.0002, "loss": 1.8037, "step": 203920 }, { "epoch": 0.83, "grad_norm": 3.109747886657715, "learning_rate": 0.0002, "loss": 1.3982, "step": 203930 }, { "epoch": 0.83, "grad_norm": 3.3491854667663574, "learning_rate": 0.0002, "loss": 1.697, "step": 203940 }, { "epoch": 0.83, "grad_norm": 2.8452842235565186, "learning_rate": 0.0002, "loss": 1.5284, "step": 203950 }, { "epoch": 0.83, "grad_norm": 4.0861735343933105, "learning_rate": 0.0002, "loss": 1.6849, "step": 203960 }, { "epoch": 0.83, "grad_norm": 3.9899356365203857, "learning_rate": 0.0002, "loss": 1.7171, "step": 203970 }, { "epoch": 0.83, "grad_norm": 2.2292771339416504, "learning_rate": 0.0002, "loss": 1.3384, "step": 203980 }, { "epoch": 0.83, "grad_norm": 3.2320902347564697, "learning_rate": 0.0002, "loss": 1.7962, "step": 203990 }, { "epoch": 0.83, "grad_norm": 3.9207241535186768, "learning_rate": 0.0002, "loss": 1.6103, "step": 204000 }, { "epoch": 0.83, "grad_norm": 2.780299186706543, "learning_rate": 0.0002, "loss": 1.8169, "step": 204010 }, { "epoch": 0.83, "grad_norm": 3.1382110118865967, "learning_rate": 0.0002, "loss": 1.6226, "step": 204020 }, { "epoch": 0.83, "grad_norm": 1.7307215929031372, "learning_rate": 0.0002, "loss": 1.2772, "step": 204030 }, { "epoch": 0.83, "grad_norm": 2.0227770805358887, "learning_rate": 0.0002, "loss": 1.7107, "step": 204040 }, { "epoch": 0.83, "grad_norm": 4.341747760772705, "learning_rate": 0.0002, "loss": 1.3919, "step": 204050 }, { "epoch": 0.83, "grad_norm": 2.8919012546539307, "learning_rate": 0.0002, "loss": 1.5797, "step": 204060 }, { "epoch": 0.83, "grad_norm": 4.4553446769714355, "learning_rate": 0.0002, "loss": 1.6157, "step": 204070 }, { "epoch": 0.83, "grad_norm": 3.216892719268799, "learning_rate": 0.0002, "loss": 1.3666, "step": 204080 }, { "epoch": 0.83, "grad_norm": 2.5958075523376465, "learning_rate": 0.0002, "loss": 1.5726, "step": 204090 }, { "epoch": 0.83, "grad_norm": 4.530405521392822, "learning_rate": 0.0002, "loss": 1.5111, "step": 204100 }, { "epoch": 0.83, "grad_norm": 4.985377311706543, "learning_rate": 0.0002, "loss": 1.8281, "step": 204110 }, { "epoch": 0.83, "grad_norm": 3.403989553451538, "learning_rate": 0.0002, "loss": 1.7146, "step": 204120 }, { "epoch": 0.83, "grad_norm": 4.383645534515381, "learning_rate": 0.0002, "loss": 1.6971, "step": 204130 }, { "epoch": 0.83, "grad_norm": 2.5151538848876953, "learning_rate": 0.0002, "loss": 1.6873, "step": 204140 }, { "epoch": 0.83, "grad_norm": 2.3453640937805176, "learning_rate": 0.0002, "loss": 1.8016, "step": 204150 }, { "epoch": 0.83, "grad_norm": 3.4078309535980225, "learning_rate": 0.0002, "loss": 1.4355, "step": 204160 }, { "epoch": 0.83, "grad_norm": 5.099096298217773, "learning_rate": 0.0002, "loss": 1.2089, "step": 204170 }, { "epoch": 0.83, "grad_norm": 3.3015613555908203, "learning_rate": 0.0002, "loss": 1.7212, "step": 204180 }, { "epoch": 0.83, "grad_norm": 2.9736220836639404, "learning_rate": 0.0002, "loss": 1.3141, "step": 204190 }, { "epoch": 0.83, "grad_norm": 2.9241373538970947, "learning_rate": 0.0002, "loss": 1.5796, "step": 204200 }, { "epoch": 0.83, "grad_norm": 3.5044262409210205, "learning_rate": 0.0002, "loss": 1.4962, "step": 204210 }, { "epoch": 0.83, "grad_norm": 1.858734369277954, "learning_rate": 0.0002, "loss": 1.6042, "step": 204220 }, { "epoch": 0.83, "grad_norm": 3.734421491622925, "learning_rate": 0.0002, "loss": 1.5398, "step": 204230 }, { "epoch": 0.83, "grad_norm": 2.872498035430908, "learning_rate": 0.0002, "loss": 1.7114, "step": 204240 }, { "epoch": 0.83, "grad_norm": 6.038716793060303, "learning_rate": 0.0002, "loss": 1.6444, "step": 204250 }, { "epoch": 0.83, "grad_norm": 2.9143033027648926, "learning_rate": 0.0002, "loss": 1.7503, "step": 204260 }, { "epoch": 0.83, "grad_norm": 1.4241175651550293, "learning_rate": 0.0002, "loss": 1.3029, "step": 204270 }, { "epoch": 0.83, "grad_norm": 3.05021595954895, "learning_rate": 0.0002, "loss": 1.5465, "step": 204280 }, { "epoch": 0.83, "grad_norm": 3.5422375202178955, "learning_rate": 0.0002, "loss": 1.7756, "step": 204290 }, { "epoch": 0.83, "grad_norm": 2.0964088439941406, "learning_rate": 0.0002, "loss": 1.6306, "step": 204300 }, { "epoch": 0.83, "grad_norm": 3.6357126235961914, "learning_rate": 0.0002, "loss": 1.3457, "step": 204310 }, { "epoch": 0.83, "grad_norm": 2.0645906925201416, "learning_rate": 0.0002, "loss": 1.4513, "step": 204320 }, { "epoch": 0.83, "grad_norm": 2.4881865978240967, "learning_rate": 0.0002, "loss": 1.7869, "step": 204330 }, { "epoch": 0.83, "grad_norm": 2.8468353748321533, "learning_rate": 0.0002, "loss": 1.5377, "step": 204340 }, { "epoch": 0.83, "grad_norm": 2.2740566730499268, "learning_rate": 0.0002, "loss": 1.5598, "step": 204350 }, { "epoch": 0.83, "grad_norm": 3.321204900741577, "learning_rate": 0.0002, "loss": 1.5113, "step": 204360 }, { "epoch": 0.83, "grad_norm": 1.334659218788147, "learning_rate": 0.0002, "loss": 1.6769, "step": 204370 }, { "epoch": 0.83, "grad_norm": 2.099760055541992, "learning_rate": 0.0002, "loss": 1.6476, "step": 204380 }, { "epoch": 0.83, "grad_norm": 3.166895627975464, "learning_rate": 0.0002, "loss": 1.5457, "step": 204390 }, { "epoch": 0.83, "grad_norm": 4.145196914672852, "learning_rate": 0.0002, "loss": 1.5477, "step": 204400 }, { "epoch": 0.83, "grad_norm": 3.1180341243743896, "learning_rate": 0.0002, "loss": 1.4199, "step": 204410 }, { "epoch": 0.83, "grad_norm": 3.357184410095215, "learning_rate": 0.0002, "loss": 1.4797, "step": 204420 }, { "epoch": 0.83, "grad_norm": 3.6045982837677, "learning_rate": 0.0002, "loss": 1.7067, "step": 204430 }, { "epoch": 0.83, "grad_norm": 3.4246625900268555, "learning_rate": 0.0002, "loss": 1.3236, "step": 204440 }, { "epoch": 0.83, "grad_norm": 2.4425270557403564, "learning_rate": 0.0002, "loss": 1.421, "step": 204450 }, { "epoch": 0.83, "grad_norm": 4.453653335571289, "learning_rate": 0.0002, "loss": 1.7466, "step": 204460 }, { "epoch": 0.83, "grad_norm": 3.7235324382781982, "learning_rate": 0.0002, "loss": 1.484, "step": 204470 }, { "epoch": 0.83, "grad_norm": 5.960526943206787, "learning_rate": 0.0002, "loss": 1.5297, "step": 204480 }, { "epoch": 0.83, "grad_norm": 5.355547904968262, "learning_rate": 0.0002, "loss": 1.5943, "step": 204490 }, { "epoch": 0.83, "grad_norm": 5.130080699920654, "learning_rate": 0.0002, "loss": 1.6971, "step": 204500 }, { "epoch": 0.83, "grad_norm": 2.5934581756591797, "learning_rate": 0.0002, "loss": 1.6392, "step": 204510 }, { "epoch": 0.83, "grad_norm": 2.2230424880981445, "learning_rate": 0.0002, "loss": 1.6565, "step": 204520 }, { "epoch": 0.83, "grad_norm": 1.883658528327942, "learning_rate": 0.0002, "loss": 1.4096, "step": 204530 }, { "epoch": 0.83, "grad_norm": 2.4434263706207275, "learning_rate": 0.0002, "loss": 1.4999, "step": 204540 }, { "epoch": 0.83, "grad_norm": 2.388007879257202, "learning_rate": 0.0002, "loss": 1.7089, "step": 204550 }, { "epoch": 0.83, "grad_norm": 2.908233880996704, "learning_rate": 0.0002, "loss": 1.7351, "step": 204560 }, { "epoch": 0.83, "grad_norm": 2.0009052753448486, "learning_rate": 0.0002, "loss": 1.3421, "step": 204570 }, { "epoch": 0.83, "grad_norm": 2.6668524742126465, "learning_rate": 0.0002, "loss": 1.5962, "step": 204580 }, { "epoch": 0.83, "grad_norm": 3.100553274154663, "learning_rate": 0.0002, "loss": 1.7177, "step": 204590 }, { "epoch": 0.83, "grad_norm": 5.497629642486572, "learning_rate": 0.0002, "loss": 1.3317, "step": 204600 }, { "epoch": 0.83, "grad_norm": 4.263491153717041, "learning_rate": 0.0002, "loss": 1.4492, "step": 204610 }, { "epoch": 0.83, "grad_norm": 3.89791202545166, "learning_rate": 0.0002, "loss": 1.4386, "step": 204620 }, { "epoch": 0.83, "grad_norm": 2.352585554122925, "learning_rate": 0.0002, "loss": 1.7044, "step": 204630 }, { "epoch": 0.83, "grad_norm": 2.4655261039733887, "learning_rate": 0.0002, "loss": 1.6353, "step": 204640 }, { "epoch": 0.83, "grad_norm": 2.6464297771453857, "learning_rate": 0.0002, "loss": 1.5969, "step": 204650 }, { "epoch": 0.83, "grad_norm": 3.0775959491729736, "learning_rate": 0.0002, "loss": 1.6113, "step": 204660 }, { "epoch": 0.83, "grad_norm": 3.2886312007904053, "learning_rate": 0.0002, "loss": 1.4462, "step": 204670 }, { "epoch": 0.83, "grad_norm": 4.816145420074463, "learning_rate": 0.0002, "loss": 1.4902, "step": 204680 }, { "epoch": 0.83, "grad_norm": 2.872403621673584, "learning_rate": 0.0002, "loss": 1.3638, "step": 204690 }, { "epoch": 0.83, "grad_norm": 3.9358861446380615, "learning_rate": 0.0002, "loss": 1.533, "step": 204700 }, { "epoch": 0.83, "grad_norm": 5.190571308135986, "learning_rate": 0.0002, "loss": 1.5943, "step": 204710 }, { "epoch": 0.83, "grad_norm": 2.0010428428649902, "learning_rate": 0.0002, "loss": 1.6489, "step": 204720 }, { "epoch": 0.83, "grad_norm": 19.88298797607422, "learning_rate": 0.0002, "loss": 1.5337, "step": 204730 }, { "epoch": 0.83, "grad_norm": 5.668027877807617, "learning_rate": 0.0002, "loss": 1.6222, "step": 204740 }, { "epoch": 0.83, "grad_norm": 2.181636333465576, "learning_rate": 0.0002, "loss": 1.383, "step": 204750 }, { "epoch": 0.83, "grad_norm": 2.5563368797302246, "learning_rate": 0.0002, "loss": 1.5565, "step": 204760 }, { "epoch": 0.83, "grad_norm": 2.7994132041931152, "learning_rate": 0.0002, "loss": 1.7089, "step": 204770 }, { "epoch": 0.83, "grad_norm": 2.7083945274353027, "learning_rate": 0.0002, "loss": 1.6856, "step": 204780 }, { "epoch": 0.83, "grad_norm": 1.9976446628570557, "learning_rate": 0.0002, "loss": 1.5693, "step": 204790 }, { "epoch": 0.83, "grad_norm": 3.26383113861084, "learning_rate": 0.0002, "loss": 1.6143, "step": 204800 }, { "epoch": 0.83, "grad_norm": 2.5711114406585693, "learning_rate": 0.0002, "loss": 1.5899, "step": 204810 }, { "epoch": 0.83, "grad_norm": 2.359083652496338, "learning_rate": 0.0002, "loss": 1.5663, "step": 204820 }, { "epoch": 0.83, "grad_norm": 3.2899863719940186, "learning_rate": 0.0002, "loss": 1.5582, "step": 204830 }, { "epoch": 0.83, "grad_norm": 3.390388250350952, "learning_rate": 0.0002, "loss": 1.4818, "step": 204840 }, { "epoch": 0.83, "grad_norm": 4.1023077964782715, "learning_rate": 0.0002, "loss": 1.6412, "step": 204850 }, { "epoch": 0.83, "grad_norm": 5.443305969238281, "learning_rate": 0.0002, "loss": 1.4905, "step": 204860 }, { "epoch": 0.83, "grad_norm": 2.5871191024780273, "learning_rate": 0.0002, "loss": 1.4468, "step": 204870 }, { "epoch": 0.83, "grad_norm": 2.5229082107543945, "learning_rate": 0.0002, "loss": 1.8349, "step": 204880 }, { "epoch": 0.83, "grad_norm": 7.731244087219238, "learning_rate": 0.0002, "loss": 1.6024, "step": 204890 }, { "epoch": 0.83, "grad_norm": 2.8281121253967285, "learning_rate": 0.0002, "loss": 1.6532, "step": 204900 }, { "epoch": 0.83, "grad_norm": 1.7893437147140503, "learning_rate": 0.0002, "loss": 1.598, "step": 204910 }, { "epoch": 0.83, "grad_norm": 3.709984540939331, "learning_rate": 0.0002, "loss": 1.6783, "step": 204920 }, { "epoch": 0.83, "grad_norm": 4.897369861602783, "learning_rate": 0.0002, "loss": 1.4174, "step": 204930 }, { "epoch": 0.83, "grad_norm": 2.41925048828125, "learning_rate": 0.0002, "loss": 1.468, "step": 204940 }, { "epoch": 0.83, "grad_norm": 6.266191005706787, "learning_rate": 0.0002, "loss": 1.7158, "step": 204950 }, { "epoch": 0.83, "grad_norm": 2.003478527069092, "learning_rate": 0.0002, "loss": 1.5185, "step": 204960 }, { "epoch": 0.83, "grad_norm": 2.753300428390503, "learning_rate": 0.0002, "loss": 1.8106, "step": 204970 }, { "epoch": 0.83, "grad_norm": 2.1011970043182373, "learning_rate": 0.0002, "loss": 1.4909, "step": 204980 }, { "epoch": 0.83, "grad_norm": 2.501411199569702, "learning_rate": 0.0002, "loss": 1.732, "step": 204990 }, { "epoch": 0.83, "grad_norm": 3.5865838527679443, "learning_rate": 0.0002, "loss": 1.4962, "step": 205000 }, { "epoch": 0.83, "grad_norm": 3.4916698932647705, "learning_rate": 0.0002, "loss": 1.7642, "step": 205010 }, { "epoch": 0.83, "grad_norm": 1.906611442565918, "learning_rate": 0.0002, "loss": 1.5603, "step": 205020 }, { "epoch": 0.83, "grad_norm": 2.00856614112854, "learning_rate": 0.0002, "loss": 1.3226, "step": 205030 }, { "epoch": 0.83, "grad_norm": 2.622276544570923, "learning_rate": 0.0002, "loss": 1.5392, "step": 205040 }, { "epoch": 0.83, "grad_norm": 2.279995918273926, "learning_rate": 0.0002, "loss": 1.7032, "step": 205050 }, { "epoch": 0.83, "grad_norm": 2.763432741165161, "learning_rate": 0.0002, "loss": 1.5757, "step": 205060 }, { "epoch": 0.83, "grad_norm": 3.7848360538482666, "learning_rate": 0.0002, "loss": 1.546, "step": 205070 }, { "epoch": 0.83, "grad_norm": 2.8395445346832275, "learning_rate": 0.0002, "loss": 1.5496, "step": 205080 }, { "epoch": 0.83, "grad_norm": 1.798892617225647, "learning_rate": 0.0002, "loss": 1.5258, "step": 205090 }, { "epoch": 0.83, "grad_norm": 3.6856982707977295, "learning_rate": 0.0002, "loss": 1.7747, "step": 205100 }, { "epoch": 0.83, "grad_norm": 4.692550182342529, "learning_rate": 0.0002, "loss": 1.5443, "step": 205110 }, { "epoch": 0.84, "grad_norm": 3.5933642387390137, "learning_rate": 0.0002, "loss": 1.7499, "step": 205120 }, { "epoch": 0.84, "grad_norm": 3.9325525760650635, "learning_rate": 0.0002, "loss": 1.6783, "step": 205130 }, { "epoch": 0.84, "grad_norm": 3.012375831604004, "learning_rate": 0.0002, "loss": 1.473, "step": 205140 }, { "epoch": 0.84, "grad_norm": 1.8903234004974365, "learning_rate": 0.0002, "loss": 1.3437, "step": 205150 }, { "epoch": 0.84, "grad_norm": 2.934732675552368, "learning_rate": 0.0002, "loss": 1.6688, "step": 205160 }, { "epoch": 0.84, "grad_norm": 2.3500874042510986, "learning_rate": 0.0002, "loss": 1.5214, "step": 205170 }, { "epoch": 0.84, "grad_norm": 2.1336679458618164, "learning_rate": 0.0002, "loss": 1.6639, "step": 205180 }, { "epoch": 0.84, "grad_norm": 2.7587335109710693, "learning_rate": 0.0002, "loss": 1.3292, "step": 205190 }, { "epoch": 0.84, "grad_norm": 2.3142764568328857, "learning_rate": 0.0002, "loss": 1.4441, "step": 205200 }, { "epoch": 0.84, "grad_norm": 2.617310047149658, "learning_rate": 0.0002, "loss": 1.484, "step": 205210 }, { "epoch": 0.84, "grad_norm": 3.915996551513672, "learning_rate": 0.0002, "loss": 1.4957, "step": 205220 }, { "epoch": 0.84, "grad_norm": 1.8836573362350464, "learning_rate": 0.0002, "loss": 1.5464, "step": 205230 }, { "epoch": 0.84, "grad_norm": 2.803138256072998, "learning_rate": 0.0002, "loss": 1.6154, "step": 205240 }, { "epoch": 0.84, "grad_norm": 2.587254762649536, "learning_rate": 0.0002, "loss": 1.6059, "step": 205250 }, { "epoch": 0.84, "grad_norm": 1.6477315425872803, "learning_rate": 0.0002, "loss": 1.5214, "step": 205260 }, { "epoch": 0.84, "grad_norm": 2.701573610305786, "learning_rate": 0.0002, "loss": 1.4012, "step": 205270 }, { "epoch": 0.84, "grad_norm": 2.616727590560913, "learning_rate": 0.0002, "loss": 1.4566, "step": 205280 }, { "epoch": 0.84, "grad_norm": 3.1739306449890137, "learning_rate": 0.0002, "loss": 1.3789, "step": 205290 }, { "epoch": 0.84, "grad_norm": 2.9782559871673584, "learning_rate": 0.0002, "loss": 1.4274, "step": 205300 }, { "epoch": 0.84, "grad_norm": 3.284714460372925, "learning_rate": 0.0002, "loss": 1.2906, "step": 205310 }, { "epoch": 0.84, "grad_norm": 3.5245280265808105, "learning_rate": 0.0002, "loss": 1.5034, "step": 205320 }, { "epoch": 0.84, "grad_norm": 3.118817090988159, "learning_rate": 0.0002, "loss": 1.6692, "step": 205330 }, { "epoch": 0.84, "grad_norm": 2.9609768390655518, "learning_rate": 0.0002, "loss": 1.7681, "step": 205340 }, { "epoch": 0.84, "grad_norm": 2.9871771335601807, "learning_rate": 0.0002, "loss": 1.2955, "step": 205350 }, { "epoch": 0.84, "grad_norm": 2.8949310779571533, "learning_rate": 0.0002, "loss": 1.5485, "step": 205360 }, { "epoch": 0.84, "grad_norm": 2.720616340637207, "learning_rate": 0.0002, "loss": 1.5937, "step": 205370 }, { "epoch": 0.84, "grad_norm": 5.495345592498779, "learning_rate": 0.0002, "loss": 1.753, "step": 205380 }, { "epoch": 0.84, "grad_norm": 2.816988706588745, "learning_rate": 0.0002, "loss": 1.5399, "step": 205390 }, { "epoch": 0.84, "grad_norm": 2.4663851261138916, "learning_rate": 0.0002, "loss": 1.6777, "step": 205400 }, { "epoch": 0.84, "grad_norm": 2.9492876529693604, "learning_rate": 0.0002, "loss": 1.4576, "step": 205410 }, { "epoch": 0.84, "grad_norm": 2.320481538772583, "learning_rate": 0.0002, "loss": 1.7141, "step": 205420 }, { "epoch": 0.84, "grad_norm": 2.6073830127716064, "learning_rate": 0.0002, "loss": 1.6453, "step": 205430 }, { "epoch": 0.84, "grad_norm": 4.544122695922852, "learning_rate": 0.0002, "loss": 1.6179, "step": 205440 }, { "epoch": 0.84, "grad_norm": 2.3660824298858643, "learning_rate": 0.0002, "loss": 1.4503, "step": 205450 }, { "epoch": 0.84, "grad_norm": 1.9984104633331299, "learning_rate": 0.0002, "loss": 1.9409, "step": 205460 }, { "epoch": 0.84, "grad_norm": 2.0554468631744385, "learning_rate": 0.0002, "loss": 1.3902, "step": 205470 }, { "epoch": 0.84, "grad_norm": 1.769284725189209, "learning_rate": 0.0002, "loss": 1.6579, "step": 205480 }, { "epoch": 0.84, "grad_norm": 2.787357807159424, "learning_rate": 0.0002, "loss": 1.6506, "step": 205490 }, { "epoch": 0.84, "grad_norm": 3.347184181213379, "learning_rate": 0.0002, "loss": 1.6357, "step": 205500 }, { "epoch": 0.84, "grad_norm": 1.9815678596496582, "learning_rate": 0.0002, "loss": 1.6148, "step": 205510 }, { "epoch": 0.84, "grad_norm": 3.9756617546081543, "learning_rate": 0.0002, "loss": 1.5308, "step": 205520 }, { "epoch": 0.84, "grad_norm": 4.431049346923828, "learning_rate": 0.0002, "loss": 1.3668, "step": 205530 }, { "epoch": 0.84, "grad_norm": 3.6788558959960938, "learning_rate": 0.0002, "loss": 1.5872, "step": 205540 }, { "epoch": 0.84, "grad_norm": 2.545945644378662, "learning_rate": 0.0002, "loss": 1.5216, "step": 205550 }, { "epoch": 0.84, "grad_norm": 3.487032175064087, "learning_rate": 0.0002, "loss": 1.7987, "step": 205560 }, { "epoch": 0.84, "grad_norm": 2.196028470993042, "learning_rate": 0.0002, "loss": 1.6993, "step": 205570 }, { "epoch": 0.84, "grad_norm": 3.49145245552063, "learning_rate": 0.0002, "loss": 1.5316, "step": 205580 }, { "epoch": 0.84, "grad_norm": 4.1503825187683105, "learning_rate": 0.0002, "loss": 1.6118, "step": 205590 }, { "epoch": 0.84, "grad_norm": 3.438493013381958, "learning_rate": 0.0002, "loss": 1.5481, "step": 205600 }, { "epoch": 0.84, "grad_norm": 3.431649684906006, "learning_rate": 0.0002, "loss": 1.4202, "step": 205610 }, { "epoch": 0.84, "grad_norm": 4.165268421173096, "learning_rate": 0.0002, "loss": 1.2501, "step": 205620 }, { "epoch": 0.84, "grad_norm": 2.5403764247894287, "learning_rate": 0.0002, "loss": 1.6687, "step": 205630 }, { "epoch": 0.84, "grad_norm": 2.4990415573120117, "learning_rate": 0.0002, "loss": 1.5602, "step": 205640 }, { "epoch": 0.84, "grad_norm": 2.351504325866699, "learning_rate": 0.0002, "loss": 1.5941, "step": 205650 }, { "epoch": 0.84, "grad_norm": 2.9837605953216553, "learning_rate": 0.0002, "loss": 1.6825, "step": 205660 }, { "epoch": 0.84, "grad_norm": 3.6765220165252686, "learning_rate": 0.0002, "loss": 1.3767, "step": 205670 }, { "epoch": 0.84, "grad_norm": 3.0591626167297363, "learning_rate": 0.0002, "loss": 1.5413, "step": 205680 }, { "epoch": 0.84, "grad_norm": 2.4570388793945312, "learning_rate": 0.0002, "loss": 1.7644, "step": 205690 }, { "epoch": 0.84, "grad_norm": 2.864485740661621, "learning_rate": 0.0002, "loss": 1.57, "step": 205700 }, { "epoch": 0.84, "grad_norm": 3.0717549324035645, "learning_rate": 0.0002, "loss": 1.7561, "step": 205710 }, { "epoch": 0.84, "grad_norm": 2.3263497352600098, "learning_rate": 0.0002, "loss": 1.5525, "step": 205720 }, { "epoch": 0.84, "grad_norm": 3.0065925121307373, "learning_rate": 0.0002, "loss": 1.9769, "step": 205730 }, { "epoch": 0.84, "grad_norm": 4.284135818481445, "learning_rate": 0.0002, "loss": 1.4905, "step": 205740 }, { "epoch": 0.84, "grad_norm": 2.2254765033721924, "learning_rate": 0.0002, "loss": 1.5762, "step": 205750 }, { "epoch": 0.84, "grad_norm": 2.5573205947875977, "learning_rate": 0.0002, "loss": 1.5006, "step": 205760 }, { "epoch": 0.84, "grad_norm": 2.313783645629883, "learning_rate": 0.0002, "loss": 1.7143, "step": 205770 }, { "epoch": 0.84, "grad_norm": 2.3186962604522705, "learning_rate": 0.0002, "loss": 1.4403, "step": 205780 }, { "epoch": 0.84, "grad_norm": 3.3412206172943115, "learning_rate": 0.0002, "loss": 1.6886, "step": 205790 }, { "epoch": 0.84, "grad_norm": 3.3665053844451904, "learning_rate": 0.0002, "loss": 1.6166, "step": 205800 }, { "epoch": 0.84, "grad_norm": 2.956979513168335, "learning_rate": 0.0002, "loss": 1.5735, "step": 205810 }, { "epoch": 0.84, "grad_norm": 1.4869694709777832, "learning_rate": 0.0002, "loss": 1.5149, "step": 205820 }, { "epoch": 0.84, "grad_norm": 2.566486358642578, "learning_rate": 0.0002, "loss": 1.382, "step": 205830 }, { "epoch": 0.84, "grad_norm": 2.7806572914123535, "learning_rate": 0.0002, "loss": 1.5166, "step": 205840 }, { "epoch": 0.84, "grad_norm": 2.8215370178222656, "learning_rate": 0.0002, "loss": 1.5891, "step": 205850 }, { "epoch": 0.84, "grad_norm": 2.019653558731079, "learning_rate": 0.0002, "loss": 1.5847, "step": 205860 }, { "epoch": 0.84, "grad_norm": 2.7947473526000977, "learning_rate": 0.0002, "loss": 1.8018, "step": 205870 }, { "epoch": 0.84, "grad_norm": 2.9246463775634766, "learning_rate": 0.0002, "loss": 1.6631, "step": 205880 }, { "epoch": 0.84, "grad_norm": 2.137640953063965, "learning_rate": 0.0002, "loss": 1.6917, "step": 205890 }, { "epoch": 0.84, "grad_norm": 1.5258867740631104, "learning_rate": 0.0002, "loss": 1.602, "step": 205900 }, { "epoch": 0.84, "grad_norm": 2.53501296043396, "learning_rate": 0.0002, "loss": 1.7697, "step": 205910 }, { "epoch": 0.84, "grad_norm": 3.3638858795166016, "learning_rate": 0.0002, "loss": 1.637, "step": 205920 }, { "epoch": 0.84, "grad_norm": 4.036651611328125, "learning_rate": 0.0002, "loss": 1.5599, "step": 205930 }, { "epoch": 0.84, "grad_norm": 2.5276029109954834, "learning_rate": 0.0002, "loss": 1.4414, "step": 205940 }, { "epoch": 0.84, "grad_norm": 2.7461602687835693, "learning_rate": 0.0002, "loss": 1.4904, "step": 205950 }, { "epoch": 0.84, "grad_norm": 2.0657896995544434, "learning_rate": 0.0002, "loss": 1.582, "step": 205960 }, { "epoch": 0.84, "grad_norm": 3.5911221504211426, "learning_rate": 0.0002, "loss": 1.5901, "step": 205970 }, { "epoch": 0.84, "grad_norm": 2.8388328552246094, "learning_rate": 0.0002, "loss": 1.668, "step": 205980 }, { "epoch": 0.84, "grad_norm": 2.088757276535034, "learning_rate": 0.0002, "loss": 1.8675, "step": 205990 }, { "epoch": 0.84, "grad_norm": 3.6296284198760986, "learning_rate": 0.0002, "loss": 1.4847, "step": 206000 }, { "epoch": 0.84, "grad_norm": 2.512209415435791, "learning_rate": 0.0002, "loss": 1.6922, "step": 206010 }, { "epoch": 0.84, "grad_norm": 2.19183611869812, "learning_rate": 0.0002, "loss": 1.4592, "step": 206020 }, { "epoch": 0.84, "grad_norm": 2.2798774242401123, "learning_rate": 0.0002, "loss": 1.3141, "step": 206030 }, { "epoch": 0.84, "grad_norm": 1.9649289846420288, "learning_rate": 0.0002, "loss": 1.4365, "step": 206040 }, { "epoch": 0.84, "grad_norm": 3.2927379608154297, "learning_rate": 0.0002, "loss": 1.7345, "step": 206050 }, { "epoch": 0.84, "grad_norm": 2.8877339363098145, "learning_rate": 0.0002, "loss": 1.8471, "step": 206060 }, { "epoch": 0.84, "grad_norm": 2.7200276851654053, "learning_rate": 0.0002, "loss": 1.6419, "step": 206070 }, { "epoch": 0.84, "grad_norm": 2.601189374923706, "learning_rate": 0.0002, "loss": 1.7013, "step": 206080 }, { "epoch": 0.84, "grad_norm": 3.1283605098724365, "learning_rate": 0.0002, "loss": 1.6337, "step": 206090 }, { "epoch": 0.84, "grad_norm": 2.2068803310394287, "learning_rate": 0.0002, "loss": 1.5085, "step": 206100 }, { "epoch": 0.84, "grad_norm": 3.6281580924987793, "learning_rate": 0.0002, "loss": 1.4836, "step": 206110 }, { "epoch": 0.84, "grad_norm": 2.0820841789245605, "learning_rate": 0.0002, "loss": 1.7386, "step": 206120 }, { "epoch": 0.84, "grad_norm": 2.6563971042633057, "learning_rate": 0.0002, "loss": 1.5411, "step": 206130 }, { "epoch": 0.84, "grad_norm": 3.1520843505859375, "learning_rate": 0.0002, "loss": 1.6745, "step": 206140 }, { "epoch": 0.84, "grad_norm": 3.1361989974975586, "learning_rate": 0.0002, "loss": 1.5788, "step": 206150 }, { "epoch": 0.84, "grad_norm": 3.082632303237915, "learning_rate": 0.0002, "loss": 1.562, "step": 206160 }, { "epoch": 0.84, "grad_norm": 5.101792812347412, "learning_rate": 0.0002, "loss": 1.709, "step": 206170 }, { "epoch": 0.84, "grad_norm": 3.095241069793701, "learning_rate": 0.0002, "loss": 1.6027, "step": 206180 }, { "epoch": 0.84, "grad_norm": 2.4512643814086914, "learning_rate": 0.0002, "loss": 1.21, "step": 206190 }, { "epoch": 0.84, "grad_norm": 4.010707855224609, "learning_rate": 0.0002, "loss": 1.7108, "step": 206200 }, { "epoch": 0.84, "grad_norm": 3.4110701084136963, "learning_rate": 0.0002, "loss": 1.5617, "step": 206210 }, { "epoch": 0.84, "grad_norm": 2.6986637115478516, "learning_rate": 0.0002, "loss": 1.5371, "step": 206220 }, { "epoch": 0.84, "grad_norm": 3.748459815979004, "learning_rate": 0.0002, "loss": 1.7077, "step": 206230 }, { "epoch": 0.84, "grad_norm": 3.918931722640991, "learning_rate": 0.0002, "loss": 1.8798, "step": 206240 }, { "epoch": 0.84, "grad_norm": 4.532106876373291, "learning_rate": 0.0002, "loss": 1.4558, "step": 206250 }, { "epoch": 0.84, "grad_norm": 2.0159523487091064, "learning_rate": 0.0002, "loss": 1.695, "step": 206260 }, { "epoch": 0.84, "grad_norm": 2.1512205600738525, "learning_rate": 0.0002, "loss": 1.5659, "step": 206270 }, { "epoch": 0.84, "grad_norm": 1.6895819902420044, "learning_rate": 0.0002, "loss": 1.5275, "step": 206280 }, { "epoch": 0.84, "grad_norm": 2.3975446224212646, "learning_rate": 0.0002, "loss": 1.8372, "step": 206290 }, { "epoch": 0.84, "grad_norm": 1.7721511125564575, "learning_rate": 0.0002, "loss": 1.6787, "step": 206300 }, { "epoch": 0.84, "grad_norm": 2.984039783477783, "learning_rate": 0.0002, "loss": 1.6942, "step": 206310 }, { "epoch": 0.84, "grad_norm": 3.2454190254211426, "learning_rate": 0.0002, "loss": 1.6721, "step": 206320 }, { "epoch": 0.84, "grad_norm": 2.202462673187256, "learning_rate": 0.0002, "loss": 1.6112, "step": 206330 }, { "epoch": 0.84, "grad_norm": 3.6161181926727295, "learning_rate": 0.0002, "loss": 1.5842, "step": 206340 }, { "epoch": 0.84, "grad_norm": 4.126869201660156, "learning_rate": 0.0002, "loss": 1.4618, "step": 206350 }, { "epoch": 0.84, "grad_norm": 2.0463056564331055, "learning_rate": 0.0002, "loss": 1.581, "step": 206360 }, { "epoch": 0.84, "grad_norm": 1.8232465982437134, "learning_rate": 0.0002, "loss": 1.4841, "step": 206370 }, { "epoch": 0.84, "grad_norm": 3.174868583679199, "learning_rate": 0.0002, "loss": 1.5622, "step": 206380 }, { "epoch": 0.84, "grad_norm": 2.656231164932251, "learning_rate": 0.0002, "loss": 1.7289, "step": 206390 }, { "epoch": 0.84, "grad_norm": 3.5109853744506836, "learning_rate": 0.0002, "loss": 1.4738, "step": 206400 }, { "epoch": 0.84, "grad_norm": 3.0551984310150146, "learning_rate": 0.0002, "loss": 1.4131, "step": 206410 }, { "epoch": 0.84, "grad_norm": 1.890810251235962, "learning_rate": 0.0002, "loss": 1.3452, "step": 206420 }, { "epoch": 0.84, "grad_norm": 3.232762575149536, "learning_rate": 0.0002, "loss": 1.9085, "step": 206430 }, { "epoch": 0.84, "grad_norm": 4.629990577697754, "learning_rate": 0.0002, "loss": 1.5466, "step": 206440 }, { "epoch": 0.84, "grad_norm": 3.2341136932373047, "learning_rate": 0.0002, "loss": 1.6922, "step": 206450 }, { "epoch": 0.84, "grad_norm": 2.8554575443267822, "learning_rate": 0.0002, "loss": 1.7167, "step": 206460 }, { "epoch": 0.84, "grad_norm": 2.961996078491211, "learning_rate": 0.0002, "loss": 1.3125, "step": 206470 }, { "epoch": 0.84, "grad_norm": 2.7406251430511475, "learning_rate": 0.0002, "loss": 1.6184, "step": 206480 }, { "epoch": 0.84, "grad_norm": 2.2392208576202393, "learning_rate": 0.0002, "loss": 1.5653, "step": 206490 }, { "epoch": 0.84, "grad_norm": 4.405025005340576, "learning_rate": 0.0002, "loss": 1.173, "step": 206500 }, { "epoch": 0.84, "grad_norm": 2.2541239261627197, "learning_rate": 0.0002, "loss": 1.6536, "step": 206510 }, { "epoch": 0.84, "grad_norm": 1.9158087968826294, "learning_rate": 0.0002, "loss": 1.4834, "step": 206520 }, { "epoch": 0.84, "grad_norm": 2.0173938274383545, "learning_rate": 0.0002, "loss": 1.6431, "step": 206530 }, { "epoch": 0.84, "grad_norm": 1.8877111673355103, "learning_rate": 0.0002, "loss": 1.598, "step": 206540 }, { "epoch": 0.84, "grad_norm": 3.5142877101898193, "learning_rate": 0.0002, "loss": 1.7486, "step": 206550 }, { "epoch": 0.84, "grad_norm": 2.6151301860809326, "learning_rate": 0.0002, "loss": 1.6768, "step": 206560 }, { "epoch": 0.84, "grad_norm": 3.769238233566284, "learning_rate": 0.0002, "loss": 1.6125, "step": 206570 }, { "epoch": 0.84, "grad_norm": 1.8161895275115967, "learning_rate": 0.0002, "loss": 1.8439, "step": 206580 }, { "epoch": 0.84, "grad_norm": 3.0959413051605225, "learning_rate": 0.0002, "loss": 1.3249, "step": 206590 }, { "epoch": 0.84, "grad_norm": 1.5736865997314453, "learning_rate": 0.0002, "loss": 1.6336, "step": 206600 }, { "epoch": 0.84, "grad_norm": 3.4602181911468506, "learning_rate": 0.0002, "loss": 1.5101, "step": 206610 }, { "epoch": 0.84, "grad_norm": 2.6826212406158447, "learning_rate": 0.0002, "loss": 1.5354, "step": 206620 }, { "epoch": 0.84, "grad_norm": 2.80350661277771, "learning_rate": 0.0002, "loss": 1.3862, "step": 206630 }, { "epoch": 0.84, "grad_norm": 3.2328896522521973, "learning_rate": 0.0002, "loss": 1.8499, "step": 206640 }, { "epoch": 0.84, "grad_norm": 3.1053309440612793, "learning_rate": 0.0002, "loss": 1.668, "step": 206650 }, { "epoch": 0.84, "grad_norm": 3.0845069885253906, "learning_rate": 0.0002, "loss": 1.6252, "step": 206660 }, { "epoch": 0.84, "grad_norm": 2.652449131011963, "learning_rate": 0.0002, "loss": 1.5407, "step": 206670 }, { "epoch": 0.84, "grad_norm": 2.3125109672546387, "learning_rate": 0.0002, "loss": 1.4888, "step": 206680 }, { "epoch": 0.84, "grad_norm": 1.4209696054458618, "learning_rate": 0.0002, "loss": 1.4375, "step": 206690 }, { "epoch": 0.84, "grad_norm": 3.827484130859375, "learning_rate": 0.0002, "loss": 1.4804, "step": 206700 }, { "epoch": 0.84, "grad_norm": 2.637174606323242, "learning_rate": 0.0002, "loss": 1.676, "step": 206710 }, { "epoch": 0.84, "grad_norm": 2.714040756225586, "learning_rate": 0.0002, "loss": 1.6172, "step": 206720 }, { "epoch": 0.84, "grad_norm": 3.2809696197509766, "learning_rate": 0.0002, "loss": 1.6342, "step": 206730 }, { "epoch": 0.84, "grad_norm": 3.971226215362549, "learning_rate": 0.0002, "loss": 1.8924, "step": 206740 }, { "epoch": 0.84, "grad_norm": 2.9685311317443848, "learning_rate": 0.0002, "loss": 1.5338, "step": 206750 }, { "epoch": 0.84, "grad_norm": 2.530061721801758, "learning_rate": 0.0002, "loss": 1.6549, "step": 206760 }, { "epoch": 0.84, "grad_norm": 2.468808889389038, "learning_rate": 0.0002, "loss": 1.6785, "step": 206770 }, { "epoch": 0.84, "grad_norm": 4.092580318450928, "learning_rate": 0.0002, "loss": 1.552, "step": 206780 }, { "epoch": 0.84, "grad_norm": 3.426682472229004, "learning_rate": 0.0002, "loss": 1.4731, "step": 206790 }, { "epoch": 0.84, "grad_norm": 2.9697763919830322, "learning_rate": 0.0002, "loss": 1.5023, "step": 206800 }, { "epoch": 0.84, "grad_norm": 3.0308101177215576, "learning_rate": 0.0002, "loss": 1.4245, "step": 206810 }, { "epoch": 0.84, "grad_norm": 3.0911147594451904, "learning_rate": 0.0002, "loss": 1.6041, "step": 206820 }, { "epoch": 0.84, "grad_norm": 3.0646800994873047, "learning_rate": 0.0002, "loss": 1.6566, "step": 206830 }, { "epoch": 0.84, "grad_norm": 2.791999578475952, "learning_rate": 0.0002, "loss": 1.6091, "step": 206840 }, { "epoch": 0.84, "grad_norm": 3.9385480880737305, "learning_rate": 0.0002, "loss": 1.4017, "step": 206850 }, { "epoch": 0.84, "grad_norm": 2.329867362976074, "learning_rate": 0.0002, "loss": 1.3952, "step": 206860 }, { "epoch": 0.84, "grad_norm": 4.118420600891113, "learning_rate": 0.0002, "loss": 1.7133, "step": 206870 }, { "epoch": 0.84, "grad_norm": 2.6321487426757812, "learning_rate": 0.0002, "loss": 1.6946, "step": 206880 }, { "epoch": 0.84, "grad_norm": 2.7870442867279053, "learning_rate": 0.0002, "loss": 1.4246, "step": 206890 }, { "epoch": 0.84, "grad_norm": 3.132282018661499, "learning_rate": 0.0002, "loss": 1.5363, "step": 206900 }, { "epoch": 0.84, "grad_norm": 3.7173242568969727, "learning_rate": 0.0002, "loss": 1.6995, "step": 206910 }, { "epoch": 0.84, "grad_norm": 2.607382297515869, "learning_rate": 0.0002, "loss": 1.6153, "step": 206920 }, { "epoch": 0.84, "grad_norm": 2.301837205886841, "learning_rate": 0.0002, "loss": 1.44, "step": 206930 }, { "epoch": 0.84, "grad_norm": 2.8082275390625, "learning_rate": 0.0002, "loss": 1.4673, "step": 206940 }, { "epoch": 0.84, "grad_norm": 3.1049256324768066, "learning_rate": 0.0002, "loss": 1.5475, "step": 206950 }, { "epoch": 0.84, "grad_norm": 1.6831088066101074, "learning_rate": 0.0002, "loss": 1.5478, "step": 206960 }, { "epoch": 0.84, "grad_norm": 2.8465166091918945, "learning_rate": 0.0002, "loss": 1.473, "step": 206970 }, { "epoch": 0.84, "grad_norm": 3.1501498222351074, "learning_rate": 0.0002, "loss": 1.4085, "step": 206980 }, { "epoch": 0.84, "grad_norm": 3.8760368824005127, "learning_rate": 0.0002, "loss": 1.6714, "step": 206990 }, { "epoch": 0.84, "grad_norm": 2.15238356590271, "learning_rate": 0.0002, "loss": 1.4277, "step": 207000 }, { "epoch": 0.84, "grad_norm": 3.731261968612671, "learning_rate": 0.0002, "loss": 1.3961, "step": 207010 }, { "epoch": 0.84, "grad_norm": 2.544318675994873, "learning_rate": 0.0002, "loss": 1.3322, "step": 207020 }, { "epoch": 0.84, "grad_norm": 2.5304715633392334, "learning_rate": 0.0002, "loss": 1.4781, "step": 207030 }, { "epoch": 0.84, "grad_norm": 4.5525102615356445, "learning_rate": 0.0002, "loss": 1.5071, "step": 207040 }, { "epoch": 0.84, "grad_norm": 2.3801515102386475, "learning_rate": 0.0002, "loss": 1.7723, "step": 207050 }, { "epoch": 0.84, "grad_norm": 3.7089035511016846, "learning_rate": 0.0002, "loss": 1.7106, "step": 207060 }, { "epoch": 0.84, "grad_norm": 3.789834499359131, "learning_rate": 0.0002, "loss": 1.4585, "step": 207070 }, { "epoch": 0.84, "grad_norm": 3.3492660522460938, "learning_rate": 0.0002, "loss": 1.4928, "step": 207080 }, { "epoch": 0.84, "grad_norm": 2.0063636302948, "learning_rate": 0.0002, "loss": 1.5568, "step": 207090 }, { "epoch": 0.84, "grad_norm": 2.819457769393921, "learning_rate": 0.0002, "loss": 1.5598, "step": 207100 }, { "epoch": 0.84, "grad_norm": 3.046217679977417, "learning_rate": 0.0002, "loss": 1.5171, "step": 207110 }, { "epoch": 0.84, "grad_norm": 3.8634982109069824, "learning_rate": 0.0002, "loss": 1.7387, "step": 207120 }, { "epoch": 0.84, "grad_norm": 1.8903712034225464, "learning_rate": 0.0002, "loss": 1.4775, "step": 207130 }, { "epoch": 0.84, "grad_norm": 2.112961769104004, "learning_rate": 0.0002, "loss": 1.3817, "step": 207140 }, { "epoch": 0.84, "grad_norm": 2.507460832595825, "learning_rate": 0.0002, "loss": 1.4707, "step": 207150 }, { "epoch": 0.84, "grad_norm": 2.0728421211242676, "learning_rate": 0.0002, "loss": 1.5538, "step": 207160 }, { "epoch": 0.84, "grad_norm": 3.3933143615722656, "learning_rate": 0.0002, "loss": 1.7033, "step": 207170 }, { "epoch": 0.84, "grad_norm": 3.268033266067505, "learning_rate": 0.0002, "loss": 1.5804, "step": 207180 }, { "epoch": 0.84, "grad_norm": 2.316209554672241, "learning_rate": 0.0002, "loss": 1.7054, "step": 207190 }, { "epoch": 0.84, "grad_norm": 3.7669644355773926, "learning_rate": 0.0002, "loss": 1.6919, "step": 207200 }, { "epoch": 0.84, "grad_norm": 3.508523464202881, "learning_rate": 0.0002, "loss": 1.6848, "step": 207210 }, { "epoch": 0.84, "grad_norm": 2.695746660232544, "learning_rate": 0.0002, "loss": 1.574, "step": 207220 }, { "epoch": 0.84, "grad_norm": 2.5314040184020996, "learning_rate": 0.0002, "loss": 1.6248, "step": 207230 }, { "epoch": 0.84, "grad_norm": 1.8419182300567627, "learning_rate": 0.0002, "loss": 1.4646, "step": 207240 }, { "epoch": 0.84, "grad_norm": 6.979194641113281, "learning_rate": 0.0002, "loss": 1.7436, "step": 207250 }, { "epoch": 0.84, "grad_norm": 3.353250741958618, "learning_rate": 0.0002, "loss": 1.734, "step": 207260 }, { "epoch": 0.84, "grad_norm": 2.4275062084198, "learning_rate": 0.0002, "loss": 1.4465, "step": 207270 }, { "epoch": 0.84, "grad_norm": 4.59022331237793, "learning_rate": 0.0002, "loss": 1.4694, "step": 207280 }, { "epoch": 0.84, "grad_norm": 3.358238458633423, "learning_rate": 0.0002, "loss": 1.4652, "step": 207290 }, { "epoch": 0.84, "grad_norm": 3.983274459838867, "learning_rate": 0.0002, "loss": 1.649, "step": 207300 }, { "epoch": 0.84, "grad_norm": 2.820568084716797, "learning_rate": 0.0002, "loss": 1.5078, "step": 207310 }, { "epoch": 0.84, "grad_norm": 1.8368850946426392, "learning_rate": 0.0002, "loss": 1.7123, "step": 207320 }, { "epoch": 0.84, "grad_norm": 4.784850597381592, "learning_rate": 0.0002, "loss": 1.623, "step": 207330 }, { "epoch": 0.84, "grad_norm": 3.281278371810913, "learning_rate": 0.0002, "loss": 1.4586, "step": 207340 }, { "epoch": 0.84, "grad_norm": 3.6221554279327393, "learning_rate": 0.0002, "loss": 1.8103, "step": 207350 }, { "epoch": 0.84, "grad_norm": 3.510434150695801, "learning_rate": 0.0002, "loss": 1.5687, "step": 207360 }, { "epoch": 0.84, "grad_norm": 2.4151718616485596, "learning_rate": 0.0002, "loss": 1.8595, "step": 207370 }, { "epoch": 0.84, "grad_norm": 3.3924591541290283, "learning_rate": 0.0002, "loss": 1.7265, "step": 207380 }, { "epoch": 0.84, "grad_norm": 4.145901679992676, "learning_rate": 0.0002, "loss": 1.8446, "step": 207390 }, { "epoch": 0.84, "grad_norm": 3.5376455783843994, "learning_rate": 0.0002, "loss": 1.4734, "step": 207400 }, { "epoch": 0.84, "grad_norm": 2.7672195434570312, "learning_rate": 0.0002, "loss": 1.5988, "step": 207410 }, { "epoch": 0.84, "grad_norm": 2.8006880283355713, "learning_rate": 0.0002, "loss": 1.6093, "step": 207420 }, { "epoch": 0.84, "grad_norm": 5.343867301940918, "learning_rate": 0.0002, "loss": 1.3182, "step": 207430 }, { "epoch": 0.84, "grad_norm": 2.161994218826294, "learning_rate": 0.0002, "loss": 1.5477, "step": 207440 }, { "epoch": 0.84, "grad_norm": 9.006136894226074, "learning_rate": 0.0002, "loss": 1.5058, "step": 207450 }, { "epoch": 0.84, "grad_norm": 3.2399160861968994, "learning_rate": 0.0002, "loss": 1.7127, "step": 207460 }, { "epoch": 0.84, "grad_norm": 2.4138054847717285, "learning_rate": 0.0002, "loss": 1.5406, "step": 207470 }, { "epoch": 0.84, "grad_norm": 2.940279245376587, "learning_rate": 0.0002, "loss": 1.6735, "step": 207480 }, { "epoch": 0.84, "grad_norm": 3.8007242679595947, "learning_rate": 0.0002, "loss": 1.4105, "step": 207490 }, { "epoch": 0.84, "grad_norm": 3.604604482650757, "learning_rate": 0.0002, "loss": 1.627, "step": 207500 }, { "epoch": 0.84, "grad_norm": 4.430672645568848, "learning_rate": 0.0002, "loss": 1.544, "step": 207510 }, { "epoch": 0.84, "grad_norm": 2.8506126403808594, "learning_rate": 0.0002, "loss": 1.5788, "step": 207520 }, { "epoch": 0.84, "grad_norm": 4.815647602081299, "learning_rate": 0.0002, "loss": 1.4485, "step": 207530 }, { "epoch": 0.84, "grad_norm": 3.2598958015441895, "learning_rate": 0.0002, "loss": 1.5956, "step": 207540 }, { "epoch": 0.84, "grad_norm": 2.690321445465088, "learning_rate": 0.0002, "loss": 1.4674, "step": 207550 }, { "epoch": 0.84, "grad_norm": 2.7211806774139404, "learning_rate": 0.0002, "loss": 1.5998, "step": 207560 }, { "epoch": 0.85, "grad_norm": 3.3948469161987305, "learning_rate": 0.0002, "loss": 1.7247, "step": 207570 }, { "epoch": 0.85, "grad_norm": 1.599256992340088, "learning_rate": 0.0002, "loss": 1.5464, "step": 207580 }, { "epoch": 0.85, "grad_norm": 2.4236958026885986, "learning_rate": 0.0002, "loss": 1.3345, "step": 207590 }, { "epoch": 0.85, "grad_norm": 3.4938526153564453, "learning_rate": 0.0002, "loss": 1.4115, "step": 207600 }, { "epoch": 0.85, "grad_norm": 2.8755784034729004, "learning_rate": 0.0002, "loss": 1.5224, "step": 207610 }, { "epoch": 0.85, "grad_norm": 4.51790189743042, "learning_rate": 0.0002, "loss": 1.6441, "step": 207620 }, { "epoch": 0.85, "grad_norm": 2.7339813709259033, "learning_rate": 0.0002, "loss": 1.7232, "step": 207630 }, { "epoch": 0.85, "grad_norm": 1.7640162706375122, "learning_rate": 0.0002, "loss": 1.5863, "step": 207640 }, { "epoch": 0.85, "grad_norm": 2.902609348297119, "learning_rate": 0.0002, "loss": 1.6993, "step": 207650 }, { "epoch": 0.85, "grad_norm": 2.9989070892333984, "learning_rate": 0.0002, "loss": 1.6942, "step": 207660 }, { "epoch": 0.85, "grad_norm": 3.0983057022094727, "learning_rate": 0.0002, "loss": 1.674, "step": 207670 }, { "epoch": 0.85, "grad_norm": 2.7207701206207275, "learning_rate": 0.0002, "loss": 1.5946, "step": 207680 }, { "epoch": 0.85, "grad_norm": 3.45536470413208, "learning_rate": 0.0002, "loss": 1.7477, "step": 207690 }, { "epoch": 0.85, "grad_norm": 2.4316210746765137, "learning_rate": 0.0002, "loss": 1.3271, "step": 207700 }, { "epoch": 0.85, "grad_norm": 2.31260085105896, "learning_rate": 0.0002, "loss": 1.5782, "step": 207710 }, { "epoch": 0.85, "grad_norm": 3.447927951812744, "learning_rate": 0.0002, "loss": 1.6684, "step": 207720 }, { "epoch": 0.85, "grad_norm": 2.315120220184326, "learning_rate": 0.0002, "loss": 1.531, "step": 207730 }, { "epoch": 0.85, "grad_norm": 2.465566635131836, "learning_rate": 0.0002, "loss": 1.5509, "step": 207740 }, { "epoch": 0.85, "grad_norm": 3.158085823059082, "learning_rate": 0.0002, "loss": 1.7056, "step": 207750 }, { "epoch": 0.85, "grad_norm": 2.4017751216888428, "learning_rate": 0.0002, "loss": 1.6585, "step": 207760 }, { "epoch": 0.85, "grad_norm": 1.857764482498169, "learning_rate": 0.0002, "loss": 1.4722, "step": 207770 }, { "epoch": 0.85, "grad_norm": 2.8940494060516357, "learning_rate": 0.0002, "loss": 1.6729, "step": 207780 }, { "epoch": 0.85, "grad_norm": 3.283818244934082, "learning_rate": 0.0002, "loss": 1.4577, "step": 207790 }, { "epoch": 0.85, "grad_norm": 6.204026699066162, "learning_rate": 0.0002, "loss": 1.5561, "step": 207800 }, { "epoch": 0.85, "grad_norm": 3.734630584716797, "learning_rate": 0.0002, "loss": 1.6498, "step": 207810 }, { "epoch": 0.85, "grad_norm": 2.2667315006256104, "learning_rate": 0.0002, "loss": 1.4326, "step": 207820 }, { "epoch": 0.85, "grad_norm": 3.7238781452178955, "learning_rate": 0.0002, "loss": 1.4629, "step": 207830 }, { "epoch": 0.85, "grad_norm": 3.775818347930908, "learning_rate": 0.0002, "loss": 1.5259, "step": 207840 }, { "epoch": 0.85, "grad_norm": 5.573601245880127, "learning_rate": 0.0002, "loss": 1.4846, "step": 207850 }, { "epoch": 0.85, "grad_norm": 3.469187021255493, "learning_rate": 0.0002, "loss": 1.6203, "step": 207860 }, { "epoch": 0.85, "grad_norm": 2.26190447807312, "learning_rate": 0.0002, "loss": 1.6378, "step": 207870 }, { "epoch": 0.85, "grad_norm": 2.2195417881011963, "learning_rate": 0.0002, "loss": 1.7958, "step": 207880 }, { "epoch": 0.85, "grad_norm": 5.7230377197265625, "learning_rate": 0.0002, "loss": 1.8361, "step": 207890 }, { "epoch": 0.85, "grad_norm": 2.975257635116577, "learning_rate": 0.0002, "loss": 1.6386, "step": 207900 }, { "epoch": 0.85, "grad_norm": 1.8444901704788208, "learning_rate": 0.0002, "loss": 1.5132, "step": 207910 }, { "epoch": 0.85, "grad_norm": 3.5497777462005615, "learning_rate": 0.0002, "loss": 1.6558, "step": 207920 }, { "epoch": 0.85, "grad_norm": 2.866943120956421, "learning_rate": 0.0002, "loss": 1.5908, "step": 207930 }, { "epoch": 0.85, "grad_norm": 1.4831526279449463, "learning_rate": 0.0002, "loss": 1.6071, "step": 207940 }, { "epoch": 0.85, "grad_norm": 2.5244226455688477, "learning_rate": 0.0002, "loss": 1.671, "step": 207950 }, { "epoch": 0.85, "grad_norm": 4.535414218902588, "learning_rate": 0.0002, "loss": 1.7525, "step": 207960 }, { "epoch": 0.85, "grad_norm": 3.0088653564453125, "learning_rate": 0.0002, "loss": 1.4947, "step": 207970 }, { "epoch": 0.85, "grad_norm": 2.467454195022583, "learning_rate": 0.0002, "loss": 1.5422, "step": 207980 }, { "epoch": 0.85, "grad_norm": 3.9522666931152344, "learning_rate": 0.0002, "loss": 1.7455, "step": 207990 }, { "epoch": 0.85, "grad_norm": 2.3651223182678223, "learning_rate": 0.0002, "loss": 1.8416, "step": 208000 }, { "epoch": 0.85, "grad_norm": 2.3926377296447754, "learning_rate": 0.0002, "loss": 1.4436, "step": 208010 }, { "epoch": 0.85, "grad_norm": 2.878917932510376, "learning_rate": 0.0002, "loss": 1.658, "step": 208020 }, { "epoch": 0.85, "grad_norm": 4.661046504974365, "learning_rate": 0.0002, "loss": 1.6827, "step": 208030 }, { "epoch": 0.85, "grad_norm": 2.714237689971924, "learning_rate": 0.0002, "loss": 1.5549, "step": 208040 }, { "epoch": 0.85, "grad_norm": 2.431119680404663, "learning_rate": 0.0002, "loss": 1.7909, "step": 208050 }, { "epoch": 0.85, "grad_norm": 2.8985869884490967, "learning_rate": 0.0002, "loss": 1.8069, "step": 208060 }, { "epoch": 0.85, "grad_norm": 2.121190071105957, "learning_rate": 0.0002, "loss": 1.56, "step": 208070 }, { "epoch": 0.85, "grad_norm": 2.2901992797851562, "learning_rate": 0.0002, "loss": 1.5731, "step": 208080 }, { "epoch": 0.85, "grad_norm": 3.565791130065918, "learning_rate": 0.0002, "loss": 1.5268, "step": 208090 }, { "epoch": 0.85, "grad_norm": 3.453803062438965, "learning_rate": 0.0002, "loss": 1.6289, "step": 208100 }, { "epoch": 0.85, "grad_norm": 1.736811637878418, "learning_rate": 0.0002, "loss": 1.7864, "step": 208110 }, { "epoch": 0.85, "grad_norm": 3.2572267055511475, "learning_rate": 0.0002, "loss": 1.8175, "step": 208120 }, { "epoch": 0.85, "grad_norm": 2.633704423904419, "learning_rate": 0.0002, "loss": 1.6014, "step": 208130 }, { "epoch": 0.85, "grad_norm": 2.9842939376831055, "learning_rate": 0.0002, "loss": 1.6492, "step": 208140 }, { "epoch": 0.85, "grad_norm": 3.10677170753479, "learning_rate": 0.0002, "loss": 1.4611, "step": 208150 }, { "epoch": 0.85, "grad_norm": 1.8480643033981323, "learning_rate": 0.0002, "loss": 1.5924, "step": 208160 }, { "epoch": 0.85, "grad_norm": 2.4889235496520996, "learning_rate": 0.0002, "loss": 2.0358, "step": 208170 }, { "epoch": 0.85, "grad_norm": 4.583351135253906, "learning_rate": 0.0002, "loss": 1.8658, "step": 208180 }, { "epoch": 0.85, "grad_norm": 4.715743541717529, "learning_rate": 0.0002, "loss": 1.6199, "step": 208190 }, { "epoch": 0.85, "grad_norm": 1.885938048362732, "learning_rate": 0.0002, "loss": 1.5052, "step": 208200 }, { "epoch": 0.85, "grad_norm": 2.1070587635040283, "learning_rate": 0.0002, "loss": 1.5409, "step": 208210 }, { "epoch": 0.85, "grad_norm": 3.9944159984588623, "learning_rate": 0.0002, "loss": 1.5228, "step": 208220 }, { "epoch": 0.85, "grad_norm": 3.5453178882598877, "learning_rate": 0.0002, "loss": 1.738, "step": 208230 }, { "epoch": 0.85, "grad_norm": 2.5043177604675293, "learning_rate": 0.0002, "loss": 1.8464, "step": 208240 }, { "epoch": 0.85, "grad_norm": 1.377253532409668, "learning_rate": 0.0002, "loss": 1.7131, "step": 208250 }, { "epoch": 0.85, "grad_norm": 3.063634157180786, "learning_rate": 0.0002, "loss": 1.4012, "step": 208260 }, { "epoch": 0.85, "grad_norm": 3.156954050064087, "learning_rate": 0.0002, "loss": 1.6188, "step": 208270 }, { "epoch": 0.85, "grad_norm": 3.397470712661743, "learning_rate": 0.0002, "loss": 1.7463, "step": 208280 }, { "epoch": 0.85, "grad_norm": 2.924276828765869, "learning_rate": 0.0002, "loss": 1.2472, "step": 208290 }, { "epoch": 0.85, "grad_norm": 2.3873350620269775, "learning_rate": 0.0002, "loss": 1.5145, "step": 208300 }, { "epoch": 0.85, "grad_norm": 2.4900217056274414, "learning_rate": 0.0002, "loss": 1.5526, "step": 208310 }, { "epoch": 0.85, "grad_norm": 2.4186103343963623, "learning_rate": 0.0002, "loss": 1.6726, "step": 208320 }, { "epoch": 0.85, "grad_norm": 3.126603603363037, "learning_rate": 0.0002, "loss": 1.5026, "step": 208330 }, { "epoch": 0.85, "grad_norm": 2.1566362380981445, "learning_rate": 0.0002, "loss": 1.448, "step": 208340 }, { "epoch": 0.85, "grad_norm": 2.2072887420654297, "learning_rate": 0.0002, "loss": 1.3187, "step": 208350 }, { "epoch": 0.85, "grad_norm": 1.7375415563583374, "learning_rate": 0.0002, "loss": 1.4931, "step": 208360 }, { "epoch": 0.85, "grad_norm": 2.6971969604492188, "learning_rate": 0.0002, "loss": 1.4139, "step": 208370 }, { "epoch": 0.85, "grad_norm": 4.712599277496338, "learning_rate": 0.0002, "loss": 1.861, "step": 208380 }, { "epoch": 0.85, "grad_norm": 3.1181728839874268, "learning_rate": 0.0002, "loss": 1.5801, "step": 208390 }, { "epoch": 0.85, "grad_norm": 2.56856632232666, "learning_rate": 0.0002, "loss": 1.6722, "step": 208400 }, { "epoch": 0.85, "grad_norm": 2.9162893295288086, "learning_rate": 0.0002, "loss": 1.3799, "step": 208410 }, { "epoch": 0.85, "grad_norm": 4.235556125640869, "learning_rate": 0.0002, "loss": 1.8496, "step": 208420 }, { "epoch": 0.85, "grad_norm": 2.966287612915039, "learning_rate": 0.0002, "loss": 1.5463, "step": 208430 }, { "epoch": 0.85, "grad_norm": 2.577813148498535, "learning_rate": 0.0002, "loss": 1.5804, "step": 208440 }, { "epoch": 0.85, "grad_norm": 5.652543544769287, "learning_rate": 0.0002, "loss": 1.5192, "step": 208450 }, { "epoch": 0.85, "grad_norm": 2.390247106552124, "learning_rate": 0.0002, "loss": 1.5607, "step": 208460 }, { "epoch": 0.85, "grad_norm": 3.7661476135253906, "learning_rate": 0.0002, "loss": 1.7276, "step": 208470 }, { "epoch": 0.85, "grad_norm": 7.217964172363281, "learning_rate": 0.0002, "loss": 1.6232, "step": 208480 }, { "epoch": 0.85, "grad_norm": 4.575564861297607, "learning_rate": 0.0002, "loss": 1.4872, "step": 208490 }, { "epoch": 0.85, "grad_norm": 4.868381977081299, "learning_rate": 0.0002, "loss": 1.5127, "step": 208500 }, { "epoch": 0.85, "grad_norm": 3.4237608909606934, "learning_rate": 0.0002, "loss": 1.549, "step": 208510 }, { "epoch": 0.85, "grad_norm": 3.0793774127960205, "learning_rate": 0.0002, "loss": 1.7299, "step": 208520 }, { "epoch": 0.85, "grad_norm": 3.24228835105896, "learning_rate": 0.0002, "loss": 1.6492, "step": 208530 }, { "epoch": 0.85, "grad_norm": 1.7034555673599243, "learning_rate": 0.0002, "loss": 1.6119, "step": 208540 }, { "epoch": 0.85, "grad_norm": 3.559486150741577, "learning_rate": 0.0002, "loss": 1.696, "step": 208550 }, { "epoch": 0.85, "grad_norm": 2.7866504192352295, "learning_rate": 0.0002, "loss": 1.7393, "step": 208560 }, { "epoch": 0.85, "grad_norm": 2.7210962772369385, "learning_rate": 0.0002, "loss": 1.698, "step": 208570 }, { "epoch": 0.85, "grad_norm": 3.3136637210845947, "learning_rate": 0.0002, "loss": 1.6774, "step": 208580 }, { "epoch": 0.85, "grad_norm": 2.582970142364502, "learning_rate": 0.0002, "loss": 1.4381, "step": 208590 }, { "epoch": 0.85, "grad_norm": 3.1728103160858154, "learning_rate": 0.0002, "loss": 1.3959, "step": 208600 }, { "epoch": 0.85, "grad_norm": 2.6446666717529297, "learning_rate": 0.0002, "loss": 1.4105, "step": 208610 }, { "epoch": 0.85, "grad_norm": 3.806699514389038, "learning_rate": 0.0002, "loss": 1.4912, "step": 208620 }, { "epoch": 0.85, "grad_norm": 3.215362548828125, "learning_rate": 0.0002, "loss": 1.7129, "step": 208630 }, { "epoch": 0.85, "grad_norm": 2.9013452529907227, "learning_rate": 0.0002, "loss": 1.6555, "step": 208640 }, { "epoch": 0.85, "grad_norm": 2.3364529609680176, "learning_rate": 0.0002, "loss": 1.5086, "step": 208650 }, { "epoch": 0.85, "grad_norm": 1.8218588829040527, "learning_rate": 0.0002, "loss": 1.53, "step": 208660 }, { "epoch": 0.85, "grad_norm": 2.1705989837646484, "learning_rate": 0.0002, "loss": 1.5338, "step": 208670 }, { "epoch": 0.85, "grad_norm": 2.4918315410614014, "learning_rate": 0.0002, "loss": 1.4658, "step": 208680 }, { "epoch": 0.85, "grad_norm": 2.668109178543091, "learning_rate": 0.0002, "loss": 1.8413, "step": 208690 }, { "epoch": 0.85, "grad_norm": 4.0265936851501465, "learning_rate": 0.0002, "loss": 1.5971, "step": 208700 }, { "epoch": 0.85, "grad_norm": 3.25972580909729, "learning_rate": 0.0002, "loss": 1.6478, "step": 208710 }, { "epoch": 0.85, "grad_norm": 3.84493350982666, "learning_rate": 0.0002, "loss": 1.7509, "step": 208720 }, { "epoch": 0.85, "grad_norm": 3.3665833473205566, "learning_rate": 0.0002, "loss": 1.3264, "step": 208730 }, { "epoch": 0.85, "grad_norm": 3.579725980758667, "learning_rate": 0.0002, "loss": 1.2875, "step": 208740 }, { "epoch": 0.85, "grad_norm": 1.7739343643188477, "learning_rate": 0.0002, "loss": 1.5078, "step": 208750 }, { "epoch": 0.85, "grad_norm": 3.8338234424591064, "learning_rate": 0.0002, "loss": 1.5992, "step": 208760 }, { "epoch": 0.85, "grad_norm": 2.8919193744659424, "learning_rate": 0.0002, "loss": 1.5949, "step": 208770 }, { "epoch": 0.85, "grad_norm": 2.4819273948669434, "learning_rate": 0.0002, "loss": 1.4833, "step": 208780 }, { "epoch": 0.85, "grad_norm": 2.1439616680145264, "learning_rate": 0.0002, "loss": 1.7659, "step": 208790 }, { "epoch": 0.85, "grad_norm": 2.589329481124878, "learning_rate": 0.0002, "loss": 1.6994, "step": 208800 }, { "epoch": 0.85, "grad_norm": 4.96801233291626, "learning_rate": 0.0002, "loss": 1.6347, "step": 208810 }, { "epoch": 0.85, "grad_norm": 1.9245320558547974, "learning_rate": 0.0002, "loss": 1.6019, "step": 208820 }, { "epoch": 0.85, "grad_norm": 2.1050002574920654, "learning_rate": 0.0002, "loss": 1.6739, "step": 208830 }, { "epoch": 0.85, "grad_norm": 2.3358588218688965, "learning_rate": 0.0002, "loss": 1.726, "step": 208840 }, { "epoch": 0.85, "grad_norm": 2.60659122467041, "learning_rate": 0.0002, "loss": 1.5746, "step": 208850 }, { "epoch": 0.85, "grad_norm": 2.117488384246826, "learning_rate": 0.0002, "loss": 1.5461, "step": 208860 }, { "epoch": 0.85, "grad_norm": 2.414647340774536, "learning_rate": 0.0002, "loss": 1.5472, "step": 208870 }, { "epoch": 0.85, "grad_norm": 3.300320863723755, "learning_rate": 0.0002, "loss": 1.6198, "step": 208880 }, { "epoch": 0.85, "grad_norm": 4.008643627166748, "learning_rate": 0.0002, "loss": 1.562, "step": 208890 }, { "epoch": 0.85, "grad_norm": 2.7175300121307373, "learning_rate": 0.0002, "loss": 1.5152, "step": 208900 }, { "epoch": 0.85, "grad_norm": 2.0311198234558105, "learning_rate": 0.0002, "loss": 1.435, "step": 208910 }, { "epoch": 0.85, "grad_norm": 2.676349401473999, "learning_rate": 0.0002, "loss": 1.5844, "step": 208920 }, { "epoch": 0.85, "grad_norm": 2.5197815895080566, "learning_rate": 0.0002, "loss": 1.4405, "step": 208930 }, { "epoch": 0.85, "grad_norm": 2.8622469902038574, "learning_rate": 0.0002, "loss": 1.4658, "step": 208940 }, { "epoch": 0.85, "grad_norm": 3.6586453914642334, "learning_rate": 0.0002, "loss": 1.807, "step": 208950 }, { "epoch": 0.85, "grad_norm": 2.2904651165008545, "learning_rate": 0.0002, "loss": 1.6122, "step": 208960 }, { "epoch": 0.85, "grad_norm": 2.255277156829834, "learning_rate": 0.0002, "loss": 1.6803, "step": 208970 }, { "epoch": 0.85, "grad_norm": 3.125786066055298, "learning_rate": 0.0002, "loss": 1.3955, "step": 208980 }, { "epoch": 0.85, "grad_norm": 2.506053924560547, "learning_rate": 0.0002, "loss": 1.6169, "step": 208990 }, { "epoch": 0.85, "grad_norm": 2.5715532302856445, "learning_rate": 0.0002, "loss": 1.5707, "step": 209000 }, { "epoch": 0.85, "grad_norm": 2.4516777992248535, "learning_rate": 0.0002, "loss": 1.5071, "step": 209010 }, { "epoch": 0.85, "grad_norm": 1.9928741455078125, "learning_rate": 0.0002, "loss": 1.5924, "step": 209020 }, { "epoch": 0.85, "grad_norm": 2.933529853820801, "learning_rate": 0.0002, "loss": 1.7226, "step": 209030 }, { "epoch": 0.85, "grad_norm": 4.405672073364258, "learning_rate": 0.0002, "loss": 1.6401, "step": 209040 }, { "epoch": 0.85, "grad_norm": 3.1734819412231445, "learning_rate": 0.0002, "loss": 1.4496, "step": 209050 }, { "epoch": 0.85, "grad_norm": 1.8023903369903564, "learning_rate": 0.0002, "loss": 1.6313, "step": 209060 }, { "epoch": 0.85, "grad_norm": 1.9063745737075806, "learning_rate": 0.0002, "loss": 1.4673, "step": 209070 }, { "epoch": 0.85, "grad_norm": 4.4734063148498535, "learning_rate": 0.0002, "loss": 1.5484, "step": 209080 }, { "epoch": 0.85, "grad_norm": 3.169743537902832, "learning_rate": 0.0002, "loss": 1.4222, "step": 209090 }, { "epoch": 0.85, "grad_norm": 3.1575372219085693, "learning_rate": 0.0002, "loss": 1.7895, "step": 209100 }, { "epoch": 0.85, "grad_norm": 3.4718682765960693, "learning_rate": 0.0002, "loss": 1.6329, "step": 209110 }, { "epoch": 0.85, "grad_norm": 2.9035754203796387, "learning_rate": 0.0002, "loss": 1.5504, "step": 209120 }, { "epoch": 0.85, "grad_norm": 4.0570502281188965, "learning_rate": 0.0002, "loss": 1.4577, "step": 209130 }, { "epoch": 0.85, "grad_norm": 3.5914125442504883, "learning_rate": 0.0002, "loss": 1.5328, "step": 209140 }, { "epoch": 0.85, "grad_norm": 3.364865303039551, "learning_rate": 0.0002, "loss": 1.5589, "step": 209150 }, { "epoch": 0.85, "grad_norm": 2.514310121536255, "learning_rate": 0.0002, "loss": 1.5727, "step": 209160 }, { "epoch": 0.85, "grad_norm": 3.9243297576904297, "learning_rate": 0.0002, "loss": 1.421, "step": 209170 }, { "epoch": 0.85, "grad_norm": 2.719602584838867, "learning_rate": 0.0002, "loss": 1.6503, "step": 209180 }, { "epoch": 0.85, "grad_norm": 3.748660087585449, "learning_rate": 0.0002, "loss": 1.5168, "step": 209190 }, { "epoch": 0.85, "grad_norm": 2.3228096961975098, "learning_rate": 0.0002, "loss": 1.519, "step": 209200 }, { "epoch": 0.85, "grad_norm": 1.4173115491867065, "learning_rate": 0.0002, "loss": 1.5974, "step": 209210 }, { "epoch": 0.85, "grad_norm": 5.323390960693359, "learning_rate": 0.0002, "loss": 1.6866, "step": 209220 }, { "epoch": 0.85, "grad_norm": 1.7164193391799927, "learning_rate": 0.0002, "loss": 1.6014, "step": 209230 }, { "epoch": 0.85, "grad_norm": 3.5965633392333984, "learning_rate": 0.0002, "loss": 1.9384, "step": 209240 }, { "epoch": 0.85, "grad_norm": 3.5518476963043213, "learning_rate": 0.0002, "loss": 1.6461, "step": 209250 }, { "epoch": 0.85, "grad_norm": 2.8013453483581543, "learning_rate": 0.0002, "loss": 1.7235, "step": 209260 }, { "epoch": 0.85, "grad_norm": 3.247478723526001, "learning_rate": 0.0002, "loss": 1.5814, "step": 209270 }, { "epoch": 0.85, "grad_norm": 3.404041290283203, "learning_rate": 0.0002, "loss": 1.7088, "step": 209280 }, { "epoch": 0.85, "grad_norm": 2.3103880882263184, "learning_rate": 0.0002, "loss": 1.5759, "step": 209290 }, { "epoch": 0.85, "grad_norm": 3.700747489929199, "learning_rate": 0.0002, "loss": 1.5673, "step": 209300 }, { "epoch": 0.85, "grad_norm": 3.633657693862915, "learning_rate": 0.0002, "loss": 1.5953, "step": 209310 }, { "epoch": 0.85, "grad_norm": 4.210865020751953, "learning_rate": 0.0002, "loss": 1.6377, "step": 209320 }, { "epoch": 0.85, "grad_norm": 2.5915441513061523, "learning_rate": 0.0002, "loss": 1.432, "step": 209330 }, { "epoch": 0.85, "grad_norm": 3.5853190422058105, "learning_rate": 0.0002, "loss": 1.7182, "step": 209340 }, { "epoch": 0.85, "grad_norm": 4.147492408752441, "learning_rate": 0.0002, "loss": 1.688, "step": 209350 }, { "epoch": 0.85, "grad_norm": 2.2232162952423096, "learning_rate": 0.0002, "loss": 1.4536, "step": 209360 }, { "epoch": 0.85, "grad_norm": 5.109209060668945, "learning_rate": 0.0002, "loss": 1.6783, "step": 209370 }, { "epoch": 0.85, "grad_norm": 1.4404340982437134, "learning_rate": 0.0002, "loss": 1.65, "step": 209380 }, { "epoch": 0.85, "grad_norm": 2.6247398853302, "learning_rate": 0.0002, "loss": 1.5697, "step": 209390 }, { "epoch": 0.85, "grad_norm": 2.0233805179595947, "learning_rate": 0.0002, "loss": 1.4879, "step": 209400 }, { "epoch": 0.85, "grad_norm": 6.463952541351318, "learning_rate": 0.0002, "loss": 1.3746, "step": 209410 }, { "epoch": 0.85, "grad_norm": 2.956080675125122, "learning_rate": 0.0002, "loss": 1.6273, "step": 209420 }, { "epoch": 0.85, "grad_norm": 2.0965635776519775, "learning_rate": 0.0002, "loss": 1.6054, "step": 209430 }, { "epoch": 0.85, "grad_norm": 2.6867828369140625, "learning_rate": 0.0002, "loss": 1.3388, "step": 209440 }, { "epoch": 0.85, "grad_norm": 1.777157187461853, "learning_rate": 0.0002, "loss": 1.4629, "step": 209450 }, { "epoch": 0.85, "grad_norm": 2.2038891315460205, "learning_rate": 0.0002, "loss": 1.7401, "step": 209460 }, { "epoch": 0.85, "grad_norm": 4.068746089935303, "learning_rate": 0.0002, "loss": 1.6775, "step": 209470 }, { "epoch": 0.85, "grad_norm": 3.830411434173584, "learning_rate": 0.0002, "loss": 1.7388, "step": 209480 }, { "epoch": 0.85, "grad_norm": 2.908142566680908, "learning_rate": 0.0002, "loss": 1.5329, "step": 209490 }, { "epoch": 0.85, "grad_norm": 2.8796212673187256, "learning_rate": 0.0002, "loss": 1.7677, "step": 209500 }, { "epoch": 0.85, "grad_norm": 1.9246989488601685, "learning_rate": 0.0002, "loss": 1.5389, "step": 209510 }, { "epoch": 0.85, "grad_norm": 7.705164432525635, "learning_rate": 0.0002, "loss": 1.5164, "step": 209520 }, { "epoch": 0.85, "grad_norm": 3.9211785793304443, "learning_rate": 0.0002, "loss": 1.6311, "step": 209530 }, { "epoch": 0.85, "grad_norm": 4.228458404541016, "learning_rate": 0.0002, "loss": 1.5141, "step": 209540 }, { "epoch": 0.85, "grad_norm": 3.0488247871398926, "learning_rate": 0.0002, "loss": 1.7357, "step": 209550 }, { "epoch": 0.85, "grad_norm": 2.863215684890747, "learning_rate": 0.0002, "loss": 1.4611, "step": 209560 }, { "epoch": 0.85, "grad_norm": 4.044342041015625, "learning_rate": 0.0002, "loss": 1.4387, "step": 209570 }, { "epoch": 0.85, "grad_norm": 1.7409846782684326, "learning_rate": 0.0002, "loss": 1.5848, "step": 209580 }, { "epoch": 0.85, "grad_norm": 1.8261362314224243, "learning_rate": 0.0002, "loss": 1.5661, "step": 209590 }, { "epoch": 0.85, "grad_norm": 4.037189960479736, "learning_rate": 0.0002, "loss": 1.6519, "step": 209600 }, { "epoch": 0.85, "grad_norm": 4.381288528442383, "learning_rate": 0.0002, "loss": 1.6271, "step": 209610 }, { "epoch": 0.85, "grad_norm": 4.616697788238525, "learning_rate": 0.0002, "loss": 1.5313, "step": 209620 }, { "epoch": 0.85, "grad_norm": 2.2520217895507812, "learning_rate": 0.0002, "loss": 1.5825, "step": 209630 }, { "epoch": 0.85, "grad_norm": 3.078080654144287, "learning_rate": 0.0002, "loss": 1.7199, "step": 209640 }, { "epoch": 0.85, "grad_norm": 2.7764225006103516, "learning_rate": 0.0002, "loss": 1.5728, "step": 209650 }, { "epoch": 0.85, "grad_norm": 2.6626408100128174, "learning_rate": 0.0002, "loss": 1.3347, "step": 209660 }, { "epoch": 0.85, "grad_norm": 3.99731707572937, "learning_rate": 0.0002, "loss": 1.8664, "step": 209670 }, { "epoch": 0.85, "grad_norm": 3.6131181716918945, "learning_rate": 0.0002, "loss": 1.5783, "step": 209680 }, { "epoch": 0.85, "grad_norm": 3.1816420555114746, "learning_rate": 0.0002, "loss": 1.3873, "step": 209690 }, { "epoch": 0.85, "grad_norm": 2.1406614780426025, "learning_rate": 0.0002, "loss": 1.6118, "step": 209700 }, { "epoch": 0.85, "grad_norm": 2.048675298690796, "learning_rate": 0.0002, "loss": 1.5647, "step": 209710 }, { "epoch": 0.85, "grad_norm": 3.2111523151397705, "learning_rate": 0.0002, "loss": 1.4569, "step": 209720 }, { "epoch": 0.85, "grad_norm": 4.340296268463135, "learning_rate": 0.0002, "loss": 1.7564, "step": 209730 }, { "epoch": 0.85, "grad_norm": 1.6228429079055786, "learning_rate": 0.0002, "loss": 1.5385, "step": 209740 }, { "epoch": 0.85, "grad_norm": 2.339364767074585, "learning_rate": 0.0002, "loss": 1.7466, "step": 209750 }, { "epoch": 0.85, "grad_norm": 3.3462350368499756, "learning_rate": 0.0002, "loss": 1.5594, "step": 209760 }, { "epoch": 0.85, "grad_norm": 2.5138278007507324, "learning_rate": 0.0002, "loss": 1.4344, "step": 209770 }, { "epoch": 0.85, "grad_norm": 2.0481204986572266, "learning_rate": 0.0002, "loss": 1.6363, "step": 209780 }, { "epoch": 0.85, "grad_norm": 2.5842738151550293, "learning_rate": 0.0002, "loss": 1.5025, "step": 209790 }, { "epoch": 0.85, "grad_norm": 3.4564626216888428, "learning_rate": 0.0002, "loss": 1.7397, "step": 209800 }, { "epoch": 0.85, "grad_norm": 2.1374783515930176, "learning_rate": 0.0002, "loss": 1.5344, "step": 209810 }, { "epoch": 0.85, "grad_norm": 2.841364622116089, "learning_rate": 0.0002, "loss": 1.705, "step": 209820 }, { "epoch": 0.85, "grad_norm": 2.6332595348358154, "learning_rate": 0.0002, "loss": 1.6935, "step": 209830 }, { "epoch": 0.85, "grad_norm": 3.801244020462036, "learning_rate": 0.0002, "loss": 1.6932, "step": 209840 }, { "epoch": 0.85, "grad_norm": 3.00129771232605, "learning_rate": 0.0002, "loss": 1.4853, "step": 209850 }, { "epoch": 0.85, "grad_norm": 2.807743787765503, "learning_rate": 0.0002, "loss": 1.591, "step": 209860 }, { "epoch": 0.85, "grad_norm": 2.0456788539886475, "learning_rate": 0.0002, "loss": 1.4632, "step": 209870 }, { "epoch": 0.85, "grad_norm": 2.1001486778259277, "learning_rate": 0.0002, "loss": 1.7486, "step": 209880 }, { "epoch": 0.85, "grad_norm": 2.2684786319732666, "learning_rate": 0.0002, "loss": 1.4379, "step": 209890 }, { "epoch": 0.85, "grad_norm": 1.1842992305755615, "learning_rate": 0.0002, "loss": 1.5542, "step": 209900 }, { "epoch": 0.85, "grad_norm": 2.700063705444336, "learning_rate": 0.0002, "loss": 1.727, "step": 209910 }, { "epoch": 0.85, "grad_norm": 3.619034767150879, "learning_rate": 0.0002, "loss": 1.2623, "step": 209920 }, { "epoch": 0.85, "grad_norm": 2.4546167850494385, "learning_rate": 0.0002, "loss": 1.4885, "step": 209930 }, { "epoch": 0.85, "grad_norm": 2.3433823585510254, "learning_rate": 0.0002, "loss": 1.8418, "step": 209940 }, { "epoch": 0.85, "grad_norm": 3.7214531898498535, "learning_rate": 0.0002, "loss": 1.7124, "step": 209950 }, { "epoch": 0.85, "grad_norm": 2.8597514629364014, "learning_rate": 0.0002, "loss": 1.5158, "step": 209960 }, { "epoch": 0.85, "grad_norm": 2.5515739917755127, "learning_rate": 0.0002, "loss": 1.4588, "step": 209970 }, { "epoch": 0.85, "grad_norm": 2.0806684494018555, "learning_rate": 0.0002, "loss": 1.4727, "step": 209980 }, { "epoch": 0.85, "grad_norm": 2.718196392059326, "learning_rate": 0.0002, "loss": 1.5535, "step": 209990 }, { "epoch": 0.85, "grad_norm": 3.6199865341186523, "learning_rate": 0.0002, "loss": 1.5764, "step": 210000 }, { "epoch": 0.85, "grad_norm": 2.265986204147339, "learning_rate": 0.0002, "loss": 1.3722, "step": 210010 }, { "epoch": 0.85, "grad_norm": 1.5765032768249512, "learning_rate": 0.0002, "loss": 1.7934, "step": 210020 }, { "epoch": 0.86, "grad_norm": 2.9357082843780518, "learning_rate": 0.0002, "loss": 1.5811, "step": 210030 }, { "epoch": 0.86, "grad_norm": 1.9904372692108154, "learning_rate": 0.0002, "loss": 1.3308, "step": 210040 }, { "epoch": 0.86, "grad_norm": 3.342590808868408, "learning_rate": 0.0002, "loss": 1.6214, "step": 210050 }, { "epoch": 0.86, "grad_norm": 4.905925750732422, "learning_rate": 0.0002, "loss": 1.5975, "step": 210060 }, { "epoch": 0.86, "grad_norm": 3.9641716480255127, "learning_rate": 0.0002, "loss": 1.5621, "step": 210070 }, { "epoch": 0.86, "grad_norm": 2.6174161434173584, "learning_rate": 0.0002, "loss": 1.9506, "step": 210080 }, { "epoch": 0.86, "grad_norm": 3.5777928829193115, "learning_rate": 0.0002, "loss": 1.61, "step": 210090 }, { "epoch": 0.86, "grad_norm": 3.7698965072631836, "learning_rate": 0.0002, "loss": 1.6752, "step": 210100 }, { "epoch": 0.86, "grad_norm": 1.6024497747421265, "learning_rate": 0.0002, "loss": 1.5874, "step": 210110 }, { "epoch": 0.86, "grad_norm": 2.952843427658081, "learning_rate": 0.0002, "loss": 1.5367, "step": 210120 }, { "epoch": 0.86, "grad_norm": 2.237417459487915, "learning_rate": 0.0002, "loss": 1.6053, "step": 210130 }, { "epoch": 0.86, "grad_norm": 3.0897390842437744, "learning_rate": 0.0002, "loss": 1.6433, "step": 210140 }, { "epoch": 0.86, "grad_norm": 3.1711394786834717, "learning_rate": 0.0002, "loss": 1.5976, "step": 210150 }, { "epoch": 0.86, "grad_norm": 3.209080934524536, "learning_rate": 0.0002, "loss": 1.5028, "step": 210160 }, { "epoch": 0.86, "grad_norm": 3.4259369373321533, "learning_rate": 0.0002, "loss": 1.5898, "step": 210170 }, { "epoch": 0.86, "grad_norm": 3.4281680583953857, "learning_rate": 0.0002, "loss": 1.7515, "step": 210180 }, { "epoch": 0.86, "grad_norm": 3.626901388168335, "learning_rate": 0.0002, "loss": 1.4173, "step": 210190 }, { "epoch": 0.86, "grad_norm": 4.110527992248535, "learning_rate": 0.0002, "loss": 1.2976, "step": 210200 }, { "epoch": 0.86, "grad_norm": 2.6726229190826416, "learning_rate": 0.0002, "loss": 1.6333, "step": 210210 }, { "epoch": 0.86, "grad_norm": 10.309652328491211, "learning_rate": 0.0002, "loss": 1.5403, "step": 210220 }, { "epoch": 0.86, "grad_norm": 3.139362335205078, "learning_rate": 0.0002, "loss": 1.9072, "step": 210230 }, { "epoch": 0.86, "grad_norm": 1.788384199142456, "learning_rate": 0.0002, "loss": 1.4656, "step": 210240 }, { "epoch": 0.86, "grad_norm": 1.6815861463546753, "learning_rate": 0.0002, "loss": 1.4322, "step": 210250 }, { "epoch": 0.86, "grad_norm": 2.740037202835083, "learning_rate": 0.0002, "loss": 1.6109, "step": 210260 }, { "epoch": 0.86, "grad_norm": 2.790922164916992, "learning_rate": 0.0002, "loss": 1.4534, "step": 210270 }, { "epoch": 0.86, "grad_norm": 3.004032850265503, "learning_rate": 0.0002, "loss": 1.6447, "step": 210280 }, { "epoch": 0.86, "grad_norm": 2.9086086750030518, "learning_rate": 0.0002, "loss": 1.3106, "step": 210290 }, { "epoch": 0.86, "grad_norm": 3.5685489177703857, "learning_rate": 0.0002, "loss": 1.4633, "step": 210300 }, { "epoch": 0.86, "grad_norm": 3.1385438442230225, "learning_rate": 0.0002, "loss": 1.5574, "step": 210310 }, { "epoch": 0.86, "grad_norm": 3.7730040550231934, "learning_rate": 0.0002, "loss": 1.5657, "step": 210320 }, { "epoch": 0.86, "grad_norm": 2.5511837005615234, "learning_rate": 0.0002, "loss": 1.488, "step": 210330 }, { "epoch": 0.86, "grad_norm": 2.3282172679901123, "learning_rate": 0.0002, "loss": 1.572, "step": 210340 }, { "epoch": 0.86, "grad_norm": 1.6955856084823608, "learning_rate": 0.0002, "loss": 1.5097, "step": 210350 }, { "epoch": 0.86, "grad_norm": 2.965843915939331, "learning_rate": 0.0002, "loss": 1.3864, "step": 210360 }, { "epoch": 0.86, "grad_norm": 2.7907283306121826, "learning_rate": 0.0002, "loss": 1.421, "step": 210370 }, { "epoch": 0.86, "grad_norm": 3.3323709964752197, "learning_rate": 0.0002, "loss": 1.549, "step": 210380 }, { "epoch": 0.86, "grad_norm": 3.9951839447021484, "learning_rate": 0.0002, "loss": 1.605, "step": 210390 }, { "epoch": 0.86, "grad_norm": 4.358804225921631, "learning_rate": 0.0002, "loss": 1.6171, "step": 210400 }, { "epoch": 0.86, "grad_norm": 1.8683253526687622, "learning_rate": 0.0002, "loss": 1.6132, "step": 210410 }, { "epoch": 0.86, "grad_norm": 2.396594285964966, "learning_rate": 0.0002, "loss": 1.6621, "step": 210420 }, { "epoch": 0.86, "grad_norm": 2.2426090240478516, "learning_rate": 0.0002, "loss": 1.6528, "step": 210430 }, { "epoch": 0.86, "grad_norm": 3.4487802982330322, "learning_rate": 0.0002, "loss": 1.7559, "step": 210440 }, { "epoch": 0.86, "grad_norm": 4.083419322967529, "learning_rate": 0.0002, "loss": 1.551, "step": 210450 }, { "epoch": 0.86, "grad_norm": 3.4032185077667236, "learning_rate": 0.0002, "loss": 1.752, "step": 210460 }, { "epoch": 0.86, "grad_norm": 4.147660732269287, "learning_rate": 0.0002, "loss": 1.8115, "step": 210470 }, { "epoch": 0.86, "grad_norm": 1.6335556507110596, "learning_rate": 0.0002, "loss": 1.7306, "step": 210480 }, { "epoch": 0.86, "grad_norm": 1.5192533731460571, "learning_rate": 0.0002, "loss": 1.4634, "step": 210490 }, { "epoch": 0.86, "grad_norm": 2.6843764781951904, "learning_rate": 0.0002, "loss": 1.5196, "step": 210500 }, { "epoch": 0.86, "grad_norm": 4.073224067687988, "learning_rate": 0.0002, "loss": 1.4541, "step": 210510 }, { "epoch": 0.86, "grad_norm": 4.295690536499023, "learning_rate": 0.0002, "loss": 1.5256, "step": 210520 }, { "epoch": 0.86, "grad_norm": 2.0117061138153076, "learning_rate": 0.0002, "loss": 1.5892, "step": 210530 }, { "epoch": 0.86, "grad_norm": 3.4836714267730713, "learning_rate": 0.0002, "loss": 1.5654, "step": 210540 }, { "epoch": 0.86, "grad_norm": 6.129263877868652, "learning_rate": 0.0002, "loss": 1.7291, "step": 210550 }, { "epoch": 0.86, "grad_norm": 2.2904157638549805, "learning_rate": 0.0002, "loss": 1.6217, "step": 210560 }, { "epoch": 0.86, "grad_norm": 3.2122721672058105, "learning_rate": 0.0002, "loss": 1.9271, "step": 210570 }, { "epoch": 0.86, "grad_norm": 1.982820987701416, "learning_rate": 0.0002, "loss": 1.5281, "step": 210580 }, { "epoch": 0.86, "grad_norm": 3.7168102264404297, "learning_rate": 0.0002, "loss": 1.8212, "step": 210590 }, { "epoch": 0.86, "grad_norm": 2.6329054832458496, "learning_rate": 0.0002, "loss": 1.5351, "step": 210600 }, { "epoch": 0.86, "grad_norm": 2.39607834815979, "learning_rate": 0.0002, "loss": 1.481, "step": 210610 }, { "epoch": 0.86, "grad_norm": 3.127483367919922, "learning_rate": 0.0002, "loss": 1.4578, "step": 210620 }, { "epoch": 0.86, "grad_norm": 7.61099100112915, "learning_rate": 0.0002, "loss": 1.5372, "step": 210630 }, { "epoch": 0.86, "grad_norm": 3.1540682315826416, "learning_rate": 0.0002, "loss": 1.4931, "step": 210640 }, { "epoch": 0.86, "grad_norm": 2.5795276165008545, "learning_rate": 0.0002, "loss": 1.4877, "step": 210650 }, { "epoch": 0.86, "grad_norm": 2.7175135612487793, "learning_rate": 0.0002, "loss": 1.632, "step": 210660 }, { "epoch": 0.86, "grad_norm": 1.8936508893966675, "learning_rate": 0.0002, "loss": 1.5128, "step": 210670 }, { "epoch": 0.86, "grad_norm": 2.601120948791504, "learning_rate": 0.0002, "loss": 1.7157, "step": 210680 }, { "epoch": 0.86, "grad_norm": 1.7041081190109253, "learning_rate": 0.0002, "loss": 1.6634, "step": 210690 }, { "epoch": 0.86, "grad_norm": 7.499724864959717, "learning_rate": 0.0002, "loss": 1.6195, "step": 210700 }, { "epoch": 0.86, "grad_norm": 3.4148640632629395, "learning_rate": 0.0002, "loss": 1.6608, "step": 210710 }, { "epoch": 0.86, "grad_norm": 2.4649734497070312, "learning_rate": 0.0002, "loss": 1.2863, "step": 210720 }, { "epoch": 0.86, "grad_norm": 3.057185411453247, "learning_rate": 0.0002, "loss": 1.6474, "step": 210730 }, { "epoch": 0.86, "grad_norm": 3.2763874530792236, "learning_rate": 0.0002, "loss": 1.5121, "step": 210740 }, { "epoch": 0.86, "grad_norm": 2.659543991088867, "learning_rate": 0.0002, "loss": 1.6588, "step": 210750 }, { "epoch": 0.86, "grad_norm": 1.937850832939148, "learning_rate": 0.0002, "loss": 1.325, "step": 210760 }, { "epoch": 0.86, "grad_norm": 2.967573404312134, "learning_rate": 0.0002, "loss": 1.4826, "step": 210770 }, { "epoch": 0.86, "grad_norm": 2.5855038166046143, "learning_rate": 0.0002, "loss": 1.6257, "step": 210780 }, { "epoch": 0.86, "grad_norm": 3.126129150390625, "learning_rate": 0.0002, "loss": 1.5655, "step": 210790 }, { "epoch": 0.86, "grad_norm": 3.368272542953491, "learning_rate": 0.0002, "loss": 1.638, "step": 210800 }, { "epoch": 0.86, "grad_norm": 4.128715515136719, "learning_rate": 0.0002, "loss": 1.5291, "step": 210810 }, { "epoch": 0.86, "grad_norm": 2.873821496963501, "learning_rate": 0.0002, "loss": 1.5802, "step": 210820 }, { "epoch": 0.86, "grad_norm": 3.0034871101379395, "learning_rate": 0.0002, "loss": 1.54, "step": 210830 }, { "epoch": 0.86, "grad_norm": 2.800809383392334, "learning_rate": 0.0002, "loss": 1.642, "step": 210840 }, { "epoch": 0.86, "grad_norm": 1.9536672830581665, "learning_rate": 0.0002, "loss": 1.6378, "step": 210850 }, { "epoch": 0.86, "grad_norm": 1.320669412612915, "learning_rate": 0.0002, "loss": 1.5379, "step": 210860 }, { "epoch": 0.86, "grad_norm": 3.68676495552063, "learning_rate": 0.0002, "loss": 1.8133, "step": 210870 }, { "epoch": 0.86, "grad_norm": 2.4093425273895264, "learning_rate": 0.0002, "loss": 1.4988, "step": 210880 }, { "epoch": 0.86, "grad_norm": 2.639803171157837, "learning_rate": 0.0002, "loss": 1.545, "step": 210890 }, { "epoch": 0.86, "grad_norm": 2.3027548789978027, "learning_rate": 0.0002, "loss": 1.6108, "step": 210900 }, { "epoch": 0.86, "grad_norm": 3.387406826019287, "learning_rate": 0.0002, "loss": 1.5608, "step": 210910 }, { "epoch": 0.86, "grad_norm": 2.340707778930664, "learning_rate": 0.0002, "loss": 1.2047, "step": 210920 }, { "epoch": 0.86, "grad_norm": 2.691594362258911, "learning_rate": 0.0002, "loss": 1.4841, "step": 210930 }, { "epoch": 0.86, "grad_norm": 4.255695819854736, "learning_rate": 0.0002, "loss": 1.7347, "step": 210940 }, { "epoch": 0.86, "grad_norm": 3.4879801273345947, "learning_rate": 0.0002, "loss": 1.667, "step": 210950 }, { "epoch": 0.86, "grad_norm": 5.335525989532471, "learning_rate": 0.0002, "loss": 1.626, "step": 210960 }, { "epoch": 0.86, "grad_norm": 1.8246930837631226, "learning_rate": 0.0002, "loss": 1.6783, "step": 210970 }, { "epoch": 0.86, "grad_norm": 3.4933927059173584, "learning_rate": 0.0002, "loss": 1.6937, "step": 210980 }, { "epoch": 0.86, "grad_norm": 3.5869290828704834, "learning_rate": 0.0002, "loss": 1.8194, "step": 210990 }, { "epoch": 0.86, "grad_norm": 4.262285232543945, "learning_rate": 0.0002, "loss": 1.5304, "step": 211000 }, { "epoch": 0.86, "grad_norm": 2.4777965545654297, "learning_rate": 0.0002, "loss": 1.5985, "step": 211010 }, { "epoch": 0.86, "grad_norm": 3.673551082611084, "learning_rate": 0.0002, "loss": 2.0132, "step": 211020 }, { "epoch": 0.86, "grad_norm": 3.5113158226013184, "learning_rate": 0.0002, "loss": 1.6974, "step": 211030 }, { "epoch": 0.86, "grad_norm": 6.299999237060547, "learning_rate": 0.0002, "loss": 1.5929, "step": 211040 }, { "epoch": 0.86, "grad_norm": 4.658613681793213, "learning_rate": 0.0002, "loss": 1.5963, "step": 211050 }, { "epoch": 0.86, "grad_norm": 5.328351974487305, "learning_rate": 0.0002, "loss": 1.6951, "step": 211060 }, { "epoch": 0.86, "grad_norm": 3.78829026222229, "learning_rate": 0.0002, "loss": 1.6621, "step": 211070 }, { "epoch": 0.86, "grad_norm": 2.360565185546875, "learning_rate": 0.0002, "loss": 1.2446, "step": 211080 }, { "epoch": 0.86, "grad_norm": 2.413999080657959, "learning_rate": 0.0002, "loss": 1.666, "step": 211090 }, { "epoch": 0.86, "grad_norm": 2.382363796234131, "learning_rate": 0.0002, "loss": 1.5749, "step": 211100 }, { "epoch": 0.86, "grad_norm": 2.7454581260681152, "learning_rate": 0.0002, "loss": 1.7547, "step": 211110 }, { "epoch": 0.86, "grad_norm": 4.259437561035156, "learning_rate": 0.0002, "loss": 1.8166, "step": 211120 }, { "epoch": 0.86, "grad_norm": 2.498544692993164, "learning_rate": 0.0002, "loss": 1.7298, "step": 211130 }, { "epoch": 0.86, "grad_norm": 2.2824292182922363, "learning_rate": 0.0002, "loss": 1.6296, "step": 211140 }, { "epoch": 0.86, "grad_norm": 2.0833096504211426, "learning_rate": 0.0002, "loss": 1.3525, "step": 211150 }, { "epoch": 0.86, "grad_norm": 2.2133240699768066, "learning_rate": 0.0002, "loss": 1.7551, "step": 211160 }, { "epoch": 0.86, "grad_norm": 2.23173189163208, "learning_rate": 0.0002, "loss": 1.6479, "step": 211170 }, { "epoch": 0.86, "grad_norm": 1.3318835496902466, "learning_rate": 0.0002, "loss": 1.4439, "step": 211180 }, { "epoch": 0.86, "grad_norm": 4.1257004737854, "learning_rate": 0.0002, "loss": 1.6291, "step": 211190 }, { "epoch": 0.86, "grad_norm": 2.087041139602661, "learning_rate": 0.0002, "loss": 1.5779, "step": 211200 }, { "epoch": 0.86, "grad_norm": 2.420759916305542, "learning_rate": 0.0002, "loss": 1.5527, "step": 211210 }, { "epoch": 0.86, "grad_norm": 2.7694265842437744, "learning_rate": 0.0002, "loss": 1.6662, "step": 211220 }, { "epoch": 0.86, "grad_norm": 3.139315128326416, "learning_rate": 0.0002, "loss": 1.4902, "step": 211230 }, { "epoch": 0.86, "grad_norm": 2.671591281890869, "learning_rate": 0.0002, "loss": 1.4508, "step": 211240 }, { "epoch": 0.86, "grad_norm": 3.264883518218994, "learning_rate": 0.0002, "loss": 1.731, "step": 211250 }, { "epoch": 0.86, "grad_norm": 3.7613277435302734, "learning_rate": 0.0002, "loss": 1.7852, "step": 211260 }, { "epoch": 0.86, "grad_norm": 2.5079119205474854, "learning_rate": 0.0002, "loss": 1.6031, "step": 211270 }, { "epoch": 0.86, "grad_norm": 1.9351098537445068, "learning_rate": 0.0002, "loss": 1.6486, "step": 211280 }, { "epoch": 0.86, "grad_norm": 2.302131175994873, "learning_rate": 0.0002, "loss": 1.4882, "step": 211290 }, { "epoch": 0.86, "grad_norm": 2.430422067642212, "learning_rate": 0.0002, "loss": 1.3743, "step": 211300 }, { "epoch": 0.86, "grad_norm": 2.5850627422332764, "learning_rate": 0.0002, "loss": 1.6808, "step": 211310 }, { "epoch": 0.86, "grad_norm": 5.142190933227539, "learning_rate": 0.0002, "loss": 1.5525, "step": 211320 }, { "epoch": 0.86, "grad_norm": 2.7383110523223877, "learning_rate": 0.0002, "loss": 1.3883, "step": 211330 }, { "epoch": 0.86, "grad_norm": 3.829172134399414, "learning_rate": 0.0002, "loss": 1.5241, "step": 211340 }, { "epoch": 0.86, "grad_norm": 3.6958799362182617, "learning_rate": 0.0002, "loss": 1.7014, "step": 211350 }, { "epoch": 0.86, "grad_norm": 2.7851388454437256, "learning_rate": 0.0002, "loss": 1.445, "step": 211360 }, { "epoch": 0.86, "grad_norm": 4.172987937927246, "learning_rate": 0.0002, "loss": 1.8238, "step": 211370 }, { "epoch": 0.86, "grad_norm": 3.018871307373047, "learning_rate": 0.0002, "loss": 1.606, "step": 211380 }, { "epoch": 0.86, "grad_norm": 8.493696212768555, "learning_rate": 0.0002, "loss": 1.4513, "step": 211390 }, { "epoch": 0.86, "grad_norm": 3.0723373889923096, "learning_rate": 0.0002, "loss": 1.662, "step": 211400 }, { "epoch": 0.86, "grad_norm": 4.256318092346191, "learning_rate": 0.0002, "loss": 1.3995, "step": 211410 }, { "epoch": 0.86, "grad_norm": 3.3395960330963135, "learning_rate": 0.0002, "loss": 1.4924, "step": 211420 }, { "epoch": 0.86, "grad_norm": 2.8328897953033447, "learning_rate": 0.0002, "loss": 1.5582, "step": 211430 }, { "epoch": 0.86, "grad_norm": 5.769701957702637, "learning_rate": 0.0002, "loss": 1.7954, "step": 211440 }, { "epoch": 0.86, "grad_norm": 14.952872276306152, "learning_rate": 0.0002, "loss": 1.6771, "step": 211450 }, { "epoch": 0.86, "grad_norm": 4.156365871429443, "learning_rate": 0.0002, "loss": 1.4813, "step": 211460 }, { "epoch": 0.86, "grad_norm": 3.6974997520446777, "learning_rate": 0.0002, "loss": 1.5346, "step": 211470 }, { "epoch": 0.86, "grad_norm": 1.880295753479004, "learning_rate": 0.0002, "loss": 1.5413, "step": 211480 }, { "epoch": 0.86, "grad_norm": 2.317235231399536, "learning_rate": 0.0002, "loss": 1.3567, "step": 211490 }, { "epoch": 0.86, "grad_norm": 4.130239009857178, "learning_rate": 0.0002, "loss": 1.6219, "step": 211500 }, { "epoch": 0.86, "grad_norm": 3.7467663288116455, "learning_rate": 0.0002, "loss": 1.6196, "step": 211510 }, { "epoch": 0.86, "grad_norm": 3.2151193618774414, "learning_rate": 0.0002, "loss": 1.6561, "step": 211520 }, { "epoch": 0.86, "grad_norm": 3.650822877883911, "learning_rate": 0.0002, "loss": 1.6752, "step": 211530 }, { "epoch": 0.86, "grad_norm": 4.487693786621094, "learning_rate": 0.0002, "loss": 1.5002, "step": 211540 }, { "epoch": 0.86, "grad_norm": 3.412475109100342, "learning_rate": 0.0002, "loss": 1.6743, "step": 211550 }, { "epoch": 0.86, "grad_norm": 1.8671149015426636, "learning_rate": 0.0002, "loss": 1.5871, "step": 211560 }, { "epoch": 0.86, "grad_norm": 2.7597126960754395, "learning_rate": 0.0002, "loss": 1.6466, "step": 211570 }, { "epoch": 0.86, "grad_norm": 4.4229326248168945, "learning_rate": 0.0002, "loss": 1.4289, "step": 211580 }, { "epoch": 0.86, "grad_norm": 3.191462993621826, "learning_rate": 0.0002, "loss": 1.5751, "step": 211590 }, { "epoch": 0.86, "grad_norm": 2.5324859619140625, "learning_rate": 0.0002, "loss": 1.3997, "step": 211600 }, { "epoch": 0.86, "grad_norm": 3.54014253616333, "learning_rate": 0.0002, "loss": 1.5906, "step": 211610 }, { "epoch": 0.86, "grad_norm": 3.438951015472412, "learning_rate": 0.0002, "loss": 1.652, "step": 211620 }, { "epoch": 0.86, "grad_norm": 2.590362071990967, "learning_rate": 0.0002, "loss": 1.7816, "step": 211630 }, { "epoch": 0.86, "grad_norm": 2.805102586746216, "learning_rate": 0.0002, "loss": 1.3823, "step": 211640 }, { "epoch": 0.86, "grad_norm": 2.9526219367980957, "learning_rate": 0.0002, "loss": 1.5963, "step": 211650 }, { "epoch": 0.86, "grad_norm": 3.086421251296997, "learning_rate": 0.0002, "loss": 1.3077, "step": 211660 }, { "epoch": 0.86, "grad_norm": 3.264944076538086, "learning_rate": 0.0002, "loss": 1.7276, "step": 211670 }, { "epoch": 0.86, "grad_norm": 3.8400232791900635, "learning_rate": 0.0002, "loss": 1.5955, "step": 211680 }, { "epoch": 0.86, "grad_norm": 3.3416030406951904, "learning_rate": 0.0002, "loss": 1.7773, "step": 211690 }, { "epoch": 0.86, "grad_norm": 3.270211696624756, "learning_rate": 0.0002, "loss": 1.6336, "step": 211700 }, { "epoch": 0.86, "grad_norm": 3.796560525894165, "learning_rate": 0.0002, "loss": 1.7287, "step": 211710 }, { "epoch": 0.86, "grad_norm": 1.8160136938095093, "learning_rate": 0.0002, "loss": 1.6385, "step": 211720 }, { "epoch": 0.86, "grad_norm": 3.188049077987671, "learning_rate": 0.0002, "loss": 1.273, "step": 211730 }, { "epoch": 0.86, "grad_norm": 2.363145589828491, "learning_rate": 0.0002, "loss": 1.6255, "step": 211740 }, { "epoch": 0.86, "grad_norm": 3.5867724418640137, "learning_rate": 0.0002, "loss": 1.744, "step": 211750 }, { "epoch": 0.86, "grad_norm": 2.408874034881592, "learning_rate": 0.0002, "loss": 1.3327, "step": 211760 }, { "epoch": 0.86, "grad_norm": 3.8554799556732178, "learning_rate": 0.0002, "loss": 1.445, "step": 211770 }, { "epoch": 0.86, "grad_norm": 4.503793239593506, "learning_rate": 0.0002, "loss": 1.5363, "step": 211780 }, { "epoch": 0.86, "grad_norm": 2.066066265106201, "learning_rate": 0.0002, "loss": 1.5563, "step": 211790 }, { "epoch": 0.86, "grad_norm": 3.2807669639587402, "learning_rate": 0.0002, "loss": 1.6369, "step": 211800 }, { "epoch": 0.86, "grad_norm": 4.063564300537109, "learning_rate": 0.0002, "loss": 1.6325, "step": 211810 }, { "epoch": 0.86, "grad_norm": 3.025038719177246, "learning_rate": 0.0002, "loss": 1.626, "step": 211820 }, { "epoch": 0.86, "grad_norm": 3.266305685043335, "learning_rate": 0.0002, "loss": 1.4819, "step": 211830 }, { "epoch": 0.86, "grad_norm": 2.0210471153259277, "learning_rate": 0.0002, "loss": 1.5574, "step": 211840 }, { "epoch": 0.86, "grad_norm": 2.4047951698303223, "learning_rate": 0.0002, "loss": 1.677, "step": 211850 }, { "epoch": 0.86, "grad_norm": 4.72614860534668, "learning_rate": 0.0002, "loss": 1.3009, "step": 211860 }, { "epoch": 0.86, "grad_norm": 2.791609048843384, "learning_rate": 0.0002, "loss": 1.6121, "step": 211870 }, { "epoch": 0.86, "grad_norm": 1.6300941705703735, "learning_rate": 0.0002, "loss": 1.3978, "step": 211880 }, { "epoch": 0.86, "grad_norm": 3.896740674972534, "learning_rate": 0.0002, "loss": 1.7515, "step": 211890 }, { "epoch": 0.86, "grad_norm": 5.4402008056640625, "learning_rate": 0.0002, "loss": 1.5273, "step": 211900 }, { "epoch": 0.86, "grad_norm": 3.3164680004119873, "learning_rate": 0.0002, "loss": 1.4599, "step": 211910 }, { "epoch": 0.86, "grad_norm": 2.60496187210083, "learning_rate": 0.0002, "loss": 1.783, "step": 211920 }, { "epoch": 0.86, "grad_norm": 3.879725933074951, "learning_rate": 0.0002, "loss": 1.614, "step": 211930 }, { "epoch": 0.86, "grad_norm": 2.075228691101074, "learning_rate": 0.0002, "loss": 1.3959, "step": 211940 }, { "epoch": 0.86, "grad_norm": 1.5874991416931152, "learning_rate": 0.0002, "loss": 1.706, "step": 211950 }, { "epoch": 0.86, "grad_norm": 3.778256416320801, "learning_rate": 0.0002, "loss": 1.7266, "step": 211960 }, { "epoch": 0.86, "grad_norm": 3.1500372886657715, "learning_rate": 0.0002, "loss": 1.4111, "step": 211970 }, { "epoch": 0.86, "grad_norm": 1.6149067878723145, "learning_rate": 0.0002, "loss": 1.4445, "step": 211980 }, { "epoch": 0.86, "grad_norm": 3.525136709213257, "learning_rate": 0.0002, "loss": 1.5312, "step": 211990 }, { "epoch": 0.86, "grad_norm": 1.6771090030670166, "learning_rate": 0.0002, "loss": 1.4338, "step": 212000 }, { "epoch": 0.86, "grad_norm": 2.3990395069122314, "learning_rate": 0.0002, "loss": 1.4764, "step": 212010 }, { "epoch": 0.86, "grad_norm": 2.80077862739563, "learning_rate": 0.0002, "loss": 1.6091, "step": 212020 }, { "epoch": 0.86, "grad_norm": 3.5462775230407715, "learning_rate": 0.0002, "loss": 1.5846, "step": 212030 }, { "epoch": 0.86, "grad_norm": 3.193877696990967, "learning_rate": 0.0002, "loss": 1.6853, "step": 212040 }, { "epoch": 0.86, "grad_norm": 2.020509719848633, "learning_rate": 0.0002, "loss": 1.6156, "step": 212050 }, { "epoch": 0.86, "grad_norm": 3.694467306137085, "learning_rate": 0.0002, "loss": 1.3883, "step": 212060 }, { "epoch": 0.86, "grad_norm": 2.5064549446105957, "learning_rate": 0.0002, "loss": 1.4476, "step": 212070 }, { "epoch": 0.86, "grad_norm": 2.6364262104034424, "learning_rate": 0.0002, "loss": 1.6465, "step": 212080 }, { "epoch": 0.86, "grad_norm": 3.230442523956299, "learning_rate": 0.0002, "loss": 1.7594, "step": 212090 }, { "epoch": 0.86, "grad_norm": 2.5305066108703613, "learning_rate": 0.0002, "loss": 1.5113, "step": 212100 }, { "epoch": 0.86, "grad_norm": 3.5004522800445557, "learning_rate": 0.0002, "loss": 1.4181, "step": 212110 }, { "epoch": 0.86, "grad_norm": 1.7317471504211426, "learning_rate": 0.0002, "loss": 1.5477, "step": 212120 }, { "epoch": 0.86, "grad_norm": 2.7140321731567383, "learning_rate": 0.0002, "loss": 1.6095, "step": 212130 }, { "epoch": 0.86, "grad_norm": 3.6648752689361572, "learning_rate": 0.0002, "loss": 1.9632, "step": 212140 }, { "epoch": 0.86, "grad_norm": 3.639519453048706, "learning_rate": 0.0002, "loss": 1.5106, "step": 212150 }, { "epoch": 0.86, "grad_norm": 2.944078207015991, "learning_rate": 0.0002, "loss": 1.2654, "step": 212160 }, { "epoch": 0.86, "grad_norm": 2.735379219055176, "learning_rate": 0.0002, "loss": 1.4224, "step": 212170 }, { "epoch": 0.86, "grad_norm": 2.8604252338409424, "learning_rate": 0.0002, "loss": 1.4907, "step": 212180 }, { "epoch": 0.86, "grad_norm": 3.1589415073394775, "learning_rate": 0.0002, "loss": 1.7673, "step": 212190 }, { "epoch": 0.86, "grad_norm": 4.482802867889404, "learning_rate": 0.0002, "loss": 1.6329, "step": 212200 }, { "epoch": 0.86, "grad_norm": 3.2519421577453613, "learning_rate": 0.0002, "loss": 1.6483, "step": 212210 }, { "epoch": 0.86, "grad_norm": 4.889910697937012, "learning_rate": 0.0002, "loss": 1.6313, "step": 212220 }, { "epoch": 0.86, "grad_norm": 3.1572325229644775, "learning_rate": 0.0002, "loss": 1.8088, "step": 212230 }, { "epoch": 0.86, "grad_norm": 2.191937208175659, "learning_rate": 0.0002, "loss": 1.449, "step": 212240 }, { "epoch": 0.86, "grad_norm": 2.121729612350464, "learning_rate": 0.0002, "loss": 1.7065, "step": 212250 }, { "epoch": 0.86, "grad_norm": 2.3683669567108154, "learning_rate": 0.0002, "loss": 1.2759, "step": 212260 }, { "epoch": 0.86, "grad_norm": 2.97607159614563, "learning_rate": 0.0002, "loss": 1.7143, "step": 212270 }, { "epoch": 0.86, "grad_norm": 4.044508934020996, "learning_rate": 0.0002, "loss": 1.7224, "step": 212280 }, { "epoch": 0.86, "grad_norm": 2.152559518814087, "learning_rate": 0.0002, "loss": 1.6386, "step": 212290 }, { "epoch": 0.86, "grad_norm": 3.8022894859313965, "learning_rate": 0.0002, "loss": 1.6882, "step": 212300 }, { "epoch": 0.86, "grad_norm": 5.38280725479126, "learning_rate": 0.0002, "loss": 1.7987, "step": 212310 }, { "epoch": 0.86, "grad_norm": 2.9865169525146484, "learning_rate": 0.0002, "loss": 1.4979, "step": 212320 }, { "epoch": 0.86, "grad_norm": 4.7628583908081055, "learning_rate": 0.0002, "loss": 1.6419, "step": 212330 }, { "epoch": 0.86, "grad_norm": 2.4045655727386475, "learning_rate": 0.0002, "loss": 1.1304, "step": 212340 }, { "epoch": 0.86, "grad_norm": 3.7171854972839355, "learning_rate": 0.0002, "loss": 1.6968, "step": 212350 }, { "epoch": 0.86, "grad_norm": 2.283510208129883, "learning_rate": 0.0002, "loss": 1.6156, "step": 212360 }, { "epoch": 0.86, "grad_norm": 3.3002963066101074, "learning_rate": 0.0002, "loss": 1.5442, "step": 212370 }, { "epoch": 0.86, "grad_norm": 2.977006435394287, "learning_rate": 0.0002, "loss": 1.5033, "step": 212380 }, { "epoch": 0.86, "grad_norm": 4.018154621124268, "learning_rate": 0.0002, "loss": 1.777, "step": 212390 }, { "epoch": 0.86, "grad_norm": 2.903618335723877, "learning_rate": 0.0002, "loss": 1.7842, "step": 212400 }, { "epoch": 0.86, "grad_norm": 3.628965139389038, "learning_rate": 0.0002, "loss": 1.5775, "step": 212410 }, { "epoch": 0.86, "grad_norm": 2.7969560623168945, "learning_rate": 0.0002, "loss": 1.6161, "step": 212420 }, { "epoch": 0.86, "grad_norm": 2.9903106689453125, "learning_rate": 0.0002, "loss": 1.6162, "step": 212430 }, { "epoch": 0.86, "grad_norm": 2.775766372680664, "learning_rate": 0.0002, "loss": 1.8165, "step": 212440 }, { "epoch": 0.86, "grad_norm": 2.915005922317505, "learning_rate": 0.0002, "loss": 1.5658, "step": 212450 }, { "epoch": 0.86, "grad_norm": 3.815312623977661, "learning_rate": 0.0002, "loss": 1.5162, "step": 212460 }, { "epoch": 0.86, "grad_norm": 2.016362428665161, "learning_rate": 0.0002, "loss": 1.609, "step": 212470 }, { "epoch": 0.86, "grad_norm": 3.3935179710388184, "learning_rate": 0.0002, "loss": 1.6704, "step": 212480 }, { "epoch": 0.87, "grad_norm": 2.101421356201172, "learning_rate": 0.0002, "loss": 1.5279, "step": 212490 }, { "epoch": 0.87, "grad_norm": 4.262604236602783, "learning_rate": 0.0002, "loss": 1.6215, "step": 212500 }, { "epoch": 0.87, "grad_norm": 2.530515670776367, "learning_rate": 0.0002, "loss": 1.6365, "step": 212510 }, { "epoch": 0.87, "grad_norm": 2.507047414779663, "learning_rate": 0.0002, "loss": 1.7348, "step": 212520 }, { "epoch": 0.87, "grad_norm": 3.405485153198242, "learning_rate": 0.0002, "loss": 1.5668, "step": 212530 }, { "epoch": 0.87, "grad_norm": 1.3131016492843628, "learning_rate": 0.0002, "loss": 1.5926, "step": 212540 }, { "epoch": 0.87, "grad_norm": 2.858475923538208, "learning_rate": 0.0002, "loss": 1.7169, "step": 212550 }, { "epoch": 0.87, "grad_norm": 2.912485361099243, "learning_rate": 0.0002, "loss": 1.4877, "step": 212560 }, { "epoch": 0.87, "grad_norm": 1.9494189023971558, "learning_rate": 0.0002, "loss": 1.4687, "step": 212570 }, { "epoch": 0.87, "grad_norm": 2.1136043071746826, "learning_rate": 0.0002, "loss": 1.5579, "step": 212580 }, { "epoch": 0.87, "grad_norm": 4.826011657714844, "learning_rate": 0.0002, "loss": 1.6011, "step": 212590 }, { "epoch": 0.87, "grad_norm": 3.204171895980835, "learning_rate": 0.0002, "loss": 1.5859, "step": 212600 }, { "epoch": 0.87, "grad_norm": 2.4420981407165527, "learning_rate": 0.0002, "loss": 1.7206, "step": 212610 }, { "epoch": 0.87, "grad_norm": 2.362748622894287, "learning_rate": 0.0002, "loss": 1.7635, "step": 212620 }, { "epoch": 0.87, "grad_norm": 3.385098695755005, "learning_rate": 0.0002, "loss": 1.7401, "step": 212630 }, { "epoch": 0.87, "grad_norm": 1.6924368143081665, "learning_rate": 0.0002, "loss": 1.6689, "step": 212640 }, { "epoch": 0.87, "grad_norm": 2.6881930828094482, "learning_rate": 0.0002, "loss": 1.4016, "step": 212650 }, { "epoch": 0.87, "grad_norm": 3.1812307834625244, "learning_rate": 0.0002, "loss": 1.616, "step": 212660 }, { "epoch": 0.87, "grad_norm": 2.4594340324401855, "learning_rate": 0.0002, "loss": 1.4622, "step": 212670 }, { "epoch": 0.87, "grad_norm": 3.9204916954040527, "learning_rate": 0.0002, "loss": 1.4049, "step": 212680 }, { "epoch": 0.87, "grad_norm": 2.8177034854888916, "learning_rate": 0.0002, "loss": 1.7859, "step": 212690 }, { "epoch": 0.87, "grad_norm": 3.2650578022003174, "learning_rate": 0.0002, "loss": 1.5736, "step": 212700 }, { "epoch": 0.87, "grad_norm": 3.5404486656188965, "learning_rate": 0.0002, "loss": 1.7425, "step": 212710 }, { "epoch": 0.87, "grad_norm": 2.6597182750701904, "learning_rate": 0.0002, "loss": 1.6154, "step": 212720 }, { "epoch": 0.87, "grad_norm": 2.8536369800567627, "learning_rate": 0.0002, "loss": 1.4003, "step": 212730 }, { "epoch": 0.87, "grad_norm": 3.3027422428131104, "learning_rate": 0.0002, "loss": 1.3195, "step": 212740 }, { "epoch": 0.87, "grad_norm": 4.289485931396484, "learning_rate": 0.0002, "loss": 1.6904, "step": 212750 }, { "epoch": 0.87, "grad_norm": 3.404447555541992, "learning_rate": 0.0002, "loss": 1.5978, "step": 212760 }, { "epoch": 0.87, "grad_norm": 4.017838478088379, "learning_rate": 0.0002, "loss": 1.2876, "step": 212770 }, { "epoch": 0.87, "grad_norm": 1.7838910818099976, "learning_rate": 0.0002, "loss": 1.5123, "step": 212780 }, { "epoch": 0.87, "grad_norm": 3.170180320739746, "learning_rate": 0.0002, "loss": 1.5525, "step": 212790 }, { "epoch": 0.87, "grad_norm": 2.6555511951446533, "learning_rate": 0.0002, "loss": 1.5846, "step": 212800 }, { "epoch": 0.87, "grad_norm": 3.122833013534546, "learning_rate": 0.0002, "loss": 1.5338, "step": 212810 }, { "epoch": 0.87, "grad_norm": 4.1702189445495605, "learning_rate": 0.0002, "loss": 1.6318, "step": 212820 }, { "epoch": 0.87, "grad_norm": 3.1384706497192383, "learning_rate": 0.0002, "loss": 1.3771, "step": 212830 }, { "epoch": 0.87, "grad_norm": 2.197861671447754, "learning_rate": 0.0002, "loss": 1.7287, "step": 212840 }, { "epoch": 0.87, "grad_norm": 3.790940523147583, "learning_rate": 0.0002, "loss": 1.6055, "step": 212850 }, { "epoch": 0.87, "grad_norm": 2.373924493789673, "learning_rate": 0.0002, "loss": 1.5665, "step": 212860 }, { "epoch": 0.87, "grad_norm": 3.682633399963379, "learning_rate": 0.0002, "loss": 1.4423, "step": 212870 }, { "epoch": 0.87, "grad_norm": 3.647024154663086, "learning_rate": 0.0002, "loss": 1.4576, "step": 212880 }, { "epoch": 0.87, "grad_norm": 3.647411346435547, "learning_rate": 0.0002, "loss": 1.5434, "step": 212890 }, { "epoch": 0.87, "grad_norm": 4.225340366363525, "learning_rate": 0.0002, "loss": 1.3894, "step": 212900 }, { "epoch": 0.87, "grad_norm": 2.993344783782959, "learning_rate": 0.0002, "loss": 1.4034, "step": 212910 }, { "epoch": 0.87, "grad_norm": 3.333561897277832, "learning_rate": 0.0002, "loss": 1.4739, "step": 212920 }, { "epoch": 0.87, "grad_norm": 3.254970073699951, "learning_rate": 0.0002, "loss": 1.4888, "step": 212930 }, { "epoch": 0.87, "grad_norm": 2.446671962738037, "learning_rate": 0.0002, "loss": 1.6068, "step": 212940 }, { "epoch": 0.87, "grad_norm": 4.738697052001953, "learning_rate": 0.0002, "loss": 1.5514, "step": 212950 }, { "epoch": 0.87, "grad_norm": 1.5489526987075806, "learning_rate": 0.0002, "loss": 1.6483, "step": 212960 }, { "epoch": 0.87, "grad_norm": 2.0401830673217773, "learning_rate": 0.0002, "loss": 1.6258, "step": 212970 }, { "epoch": 0.87, "grad_norm": 4.106695175170898, "learning_rate": 0.0002, "loss": 1.7188, "step": 212980 }, { "epoch": 0.87, "grad_norm": 2.9722182750701904, "learning_rate": 0.0002, "loss": 1.5614, "step": 212990 }, { "epoch": 0.87, "grad_norm": 2.0845654010772705, "learning_rate": 0.0002, "loss": 1.323, "step": 213000 }, { "epoch": 0.87, "grad_norm": 1.5960334539413452, "learning_rate": 0.0002, "loss": 1.4945, "step": 213010 }, { "epoch": 0.87, "grad_norm": 3.456286668777466, "learning_rate": 0.0002, "loss": 1.5723, "step": 213020 }, { "epoch": 0.87, "grad_norm": 4.799542427062988, "learning_rate": 0.0002, "loss": 1.5921, "step": 213030 }, { "epoch": 0.87, "grad_norm": 1.9273006916046143, "learning_rate": 0.0002, "loss": 1.1365, "step": 213040 }, { "epoch": 0.87, "grad_norm": 2.7167000770568848, "learning_rate": 0.0002, "loss": 1.8302, "step": 213050 }, { "epoch": 0.87, "grad_norm": 3.350252151489258, "learning_rate": 0.0002, "loss": 1.5134, "step": 213060 }, { "epoch": 0.87, "grad_norm": 2.888495683670044, "learning_rate": 0.0002, "loss": 1.8204, "step": 213070 }, { "epoch": 0.87, "grad_norm": 2.8537421226501465, "learning_rate": 0.0002, "loss": 1.544, "step": 213080 }, { "epoch": 0.87, "grad_norm": 2.75579833984375, "learning_rate": 0.0002, "loss": 1.4861, "step": 213090 }, { "epoch": 0.87, "grad_norm": 2.832447052001953, "learning_rate": 0.0002, "loss": 1.7564, "step": 213100 }, { "epoch": 0.87, "grad_norm": 3.591315984725952, "learning_rate": 0.0002, "loss": 1.7586, "step": 213110 }, { "epoch": 0.87, "grad_norm": 5.224267482757568, "learning_rate": 0.0002, "loss": 1.7007, "step": 213120 }, { "epoch": 0.87, "grad_norm": 1.86660635471344, "learning_rate": 0.0002, "loss": 1.4011, "step": 213130 }, { "epoch": 0.87, "grad_norm": 2.3412082195281982, "learning_rate": 0.0002, "loss": 1.4438, "step": 213140 }, { "epoch": 0.87, "grad_norm": 3.4519073963165283, "learning_rate": 0.0002, "loss": 1.4505, "step": 213150 }, { "epoch": 0.87, "grad_norm": 2.730858564376831, "learning_rate": 0.0002, "loss": 1.5135, "step": 213160 }, { "epoch": 0.87, "grad_norm": 6.140113830566406, "learning_rate": 0.0002, "loss": 1.7137, "step": 213170 }, { "epoch": 0.87, "grad_norm": 2.9472193717956543, "learning_rate": 0.0002, "loss": 1.6889, "step": 213180 }, { "epoch": 0.87, "grad_norm": 1.9544799327850342, "learning_rate": 0.0002, "loss": 1.5467, "step": 213190 }, { "epoch": 0.87, "grad_norm": 3.9460811614990234, "learning_rate": 0.0002, "loss": 1.5645, "step": 213200 }, { "epoch": 0.87, "grad_norm": 2.259544610977173, "learning_rate": 0.0002, "loss": 1.4968, "step": 213210 }, { "epoch": 0.87, "grad_norm": 6.1844353675842285, "learning_rate": 0.0002, "loss": 1.5639, "step": 213220 }, { "epoch": 0.87, "grad_norm": 1.920746922492981, "learning_rate": 0.0002, "loss": 1.6527, "step": 213230 }, { "epoch": 0.87, "grad_norm": 2.159586191177368, "learning_rate": 0.0002, "loss": 1.7571, "step": 213240 }, { "epoch": 0.87, "grad_norm": 4.579017639160156, "learning_rate": 0.0002, "loss": 1.4165, "step": 213250 }, { "epoch": 0.87, "grad_norm": 3.869565010070801, "learning_rate": 0.0002, "loss": 1.488, "step": 213260 }, { "epoch": 0.87, "grad_norm": 2.712344169616699, "learning_rate": 0.0002, "loss": 1.5503, "step": 213270 }, { "epoch": 0.87, "grad_norm": 2.098749876022339, "learning_rate": 0.0002, "loss": 1.6787, "step": 213280 }, { "epoch": 0.87, "grad_norm": 2.6814262866973877, "learning_rate": 0.0002, "loss": 1.5566, "step": 213290 }, { "epoch": 0.87, "grad_norm": 2.8837995529174805, "learning_rate": 0.0002, "loss": 1.689, "step": 213300 }, { "epoch": 0.87, "grad_norm": 3.6097412109375, "learning_rate": 0.0002, "loss": 1.6374, "step": 213310 }, { "epoch": 0.87, "grad_norm": 4.129022598266602, "learning_rate": 0.0002, "loss": 1.5926, "step": 213320 }, { "epoch": 0.87, "grad_norm": 3.213148832321167, "learning_rate": 0.0002, "loss": 1.7197, "step": 213330 }, { "epoch": 0.87, "grad_norm": 4.354250907897949, "learning_rate": 0.0002, "loss": 1.6159, "step": 213340 }, { "epoch": 0.87, "grad_norm": 2.3116676807403564, "learning_rate": 0.0002, "loss": 1.431, "step": 213350 }, { "epoch": 0.87, "grad_norm": 2.852776288986206, "learning_rate": 0.0002, "loss": 1.47, "step": 213360 }, { "epoch": 0.87, "grad_norm": 3.964733123779297, "learning_rate": 0.0002, "loss": 1.5632, "step": 213370 }, { "epoch": 0.87, "grad_norm": 5.400793075561523, "learning_rate": 0.0002, "loss": 1.6088, "step": 213380 }, { "epoch": 0.87, "grad_norm": 3.1351053714752197, "learning_rate": 0.0002, "loss": 1.6358, "step": 213390 }, { "epoch": 0.87, "grad_norm": 3.260429620742798, "learning_rate": 0.0002, "loss": 1.5496, "step": 213400 }, { "epoch": 0.87, "grad_norm": 3.853990077972412, "learning_rate": 0.0002, "loss": 1.4295, "step": 213410 }, { "epoch": 0.87, "grad_norm": 3.7912709712982178, "learning_rate": 0.0002, "loss": 1.3485, "step": 213420 }, { "epoch": 0.87, "grad_norm": 2.0932226181030273, "learning_rate": 0.0002, "loss": 1.7566, "step": 213430 }, { "epoch": 0.87, "grad_norm": 3.6242754459381104, "learning_rate": 0.0002, "loss": 1.7963, "step": 213440 }, { "epoch": 0.87, "grad_norm": 2.5935471057891846, "learning_rate": 0.0002, "loss": 1.5344, "step": 213450 }, { "epoch": 0.87, "grad_norm": 2.6776442527770996, "learning_rate": 0.0002, "loss": 1.2373, "step": 213460 }, { "epoch": 0.87, "grad_norm": 2.217304229736328, "learning_rate": 0.0002, "loss": 1.6757, "step": 213470 }, { "epoch": 0.87, "grad_norm": 2.1864640712738037, "learning_rate": 0.0002, "loss": 1.525, "step": 213480 }, { "epoch": 0.87, "grad_norm": 2.203413486480713, "learning_rate": 0.0002, "loss": 1.588, "step": 213490 }, { "epoch": 0.87, "grad_norm": 3.254147529602051, "learning_rate": 0.0002, "loss": 1.5998, "step": 213500 }, { "epoch": 0.87, "grad_norm": 2.9300503730773926, "learning_rate": 0.0002, "loss": 1.5678, "step": 213510 }, { "epoch": 0.87, "grad_norm": 2.540937662124634, "learning_rate": 0.0002, "loss": 1.6489, "step": 213520 }, { "epoch": 0.87, "grad_norm": 3.026468276977539, "learning_rate": 0.0002, "loss": 1.5975, "step": 213530 }, { "epoch": 0.87, "grad_norm": 4.295660972595215, "learning_rate": 0.0002, "loss": 1.6433, "step": 213540 }, { "epoch": 0.87, "grad_norm": 3.159491777420044, "learning_rate": 0.0002, "loss": 1.6247, "step": 213550 }, { "epoch": 0.87, "grad_norm": 2.868217706680298, "learning_rate": 0.0002, "loss": 1.6886, "step": 213560 }, { "epoch": 0.87, "grad_norm": 3.5568056106567383, "learning_rate": 0.0002, "loss": 1.6967, "step": 213570 }, { "epoch": 0.87, "grad_norm": 4.017082214355469, "learning_rate": 0.0002, "loss": 1.5093, "step": 213580 }, { "epoch": 0.87, "grad_norm": 1.9622893333435059, "learning_rate": 0.0002, "loss": 1.625, "step": 213590 }, { "epoch": 0.87, "grad_norm": 3.5903286933898926, "learning_rate": 0.0002, "loss": 1.7556, "step": 213600 }, { "epoch": 0.87, "grad_norm": 2.748425006866455, "learning_rate": 0.0002, "loss": 1.6058, "step": 213610 }, { "epoch": 0.87, "grad_norm": 4.4995927810668945, "learning_rate": 0.0002, "loss": 1.4759, "step": 213620 }, { "epoch": 0.87, "grad_norm": 2.840902805328369, "learning_rate": 0.0002, "loss": 1.7145, "step": 213630 }, { "epoch": 0.87, "grad_norm": 3.9994869232177734, "learning_rate": 0.0002, "loss": 1.4936, "step": 213640 }, { "epoch": 0.87, "grad_norm": 2.7267277240753174, "learning_rate": 0.0002, "loss": 1.6205, "step": 213650 }, { "epoch": 0.87, "grad_norm": 2.0520436763763428, "learning_rate": 0.0002, "loss": 1.5057, "step": 213660 }, { "epoch": 0.87, "grad_norm": 2.0002052783966064, "learning_rate": 0.0002, "loss": 1.5114, "step": 213670 }, { "epoch": 0.87, "grad_norm": 3.059448003768921, "learning_rate": 0.0002, "loss": 1.468, "step": 213680 }, { "epoch": 0.87, "grad_norm": 4.071458339691162, "learning_rate": 0.0002, "loss": 1.6041, "step": 213690 }, { "epoch": 0.87, "grad_norm": 2.613060712814331, "learning_rate": 0.0002, "loss": 1.616, "step": 213700 }, { "epoch": 0.87, "grad_norm": 6.054708003997803, "learning_rate": 0.0002, "loss": 1.5754, "step": 213710 }, { "epoch": 0.87, "grad_norm": 3.0962820053100586, "learning_rate": 0.0002, "loss": 1.4833, "step": 213720 }, { "epoch": 0.87, "grad_norm": 4.670351505279541, "learning_rate": 0.0002, "loss": 1.7671, "step": 213730 }, { "epoch": 0.87, "grad_norm": 2.900961399078369, "learning_rate": 0.0002, "loss": 1.6258, "step": 213740 }, { "epoch": 0.87, "grad_norm": 2.5193116664886475, "learning_rate": 0.0002, "loss": 1.5969, "step": 213750 }, { "epoch": 0.87, "grad_norm": 3.7339282035827637, "learning_rate": 0.0002, "loss": 1.5567, "step": 213760 }, { "epoch": 0.87, "grad_norm": 3.0789833068847656, "learning_rate": 0.0002, "loss": 1.4774, "step": 213770 }, { "epoch": 0.87, "grad_norm": 2.012216567993164, "learning_rate": 0.0002, "loss": 1.6712, "step": 213780 }, { "epoch": 0.87, "grad_norm": 5.597161769866943, "learning_rate": 0.0002, "loss": 1.6572, "step": 213790 }, { "epoch": 0.87, "grad_norm": 4.0488505363464355, "learning_rate": 0.0002, "loss": 1.6181, "step": 213800 }, { "epoch": 0.87, "grad_norm": 2.2713623046875, "learning_rate": 0.0002, "loss": 1.5546, "step": 213810 }, { "epoch": 0.87, "grad_norm": 3.6162571907043457, "learning_rate": 0.0002, "loss": 1.5311, "step": 213820 }, { "epoch": 0.87, "grad_norm": 9.192262649536133, "learning_rate": 0.0002, "loss": 1.7035, "step": 213830 }, { "epoch": 0.87, "grad_norm": 3.2860536575317383, "learning_rate": 0.0002, "loss": 1.518, "step": 213840 }, { "epoch": 0.87, "grad_norm": 4.270340442657471, "learning_rate": 0.0002, "loss": 1.8194, "step": 213850 }, { "epoch": 0.87, "grad_norm": 2.647444725036621, "learning_rate": 0.0002, "loss": 1.559, "step": 213860 }, { "epoch": 0.87, "grad_norm": 2.574906587600708, "learning_rate": 0.0002, "loss": 1.3622, "step": 213870 }, { "epoch": 0.87, "grad_norm": 2.612081527709961, "learning_rate": 0.0002, "loss": 1.5015, "step": 213880 }, { "epoch": 0.87, "grad_norm": 3.226367473602295, "learning_rate": 0.0002, "loss": 1.5963, "step": 213890 }, { "epoch": 0.87, "grad_norm": 2.3890252113342285, "learning_rate": 0.0002, "loss": 1.6299, "step": 213900 }, { "epoch": 0.87, "grad_norm": 5.119050979614258, "learning_rate": 0.0002, "loss": 1.8736, "step": 213910 }, { "epoch": 0.87, "grad_norm": 2.275895118713379, "learning_rate": 0.0002, "loss": 1.4569, "step": 213920 }, { "epoch": 0.87, "grad_norm": 2.328516721725464, "learning_rate": 0.0002, "loss": 1.417, "step": 213930 }, { "epoch": 0.87, "grad_norm": 3.684392213821411, "learning_rate": 0.0002, "loss": 1.7011, "step": 213940 }, { "epoch": 0.87, "grad_norm": 2.7309720516204834, "learning_rate": 0.0002, "loss": 1.5989, "step": 213950 }, { "epoch": 0.87, "grad_norm": 3.571739912033081, "learning_rate": 0.0002, "loss": 1.3411, "step": 213960 }, { "epoch": 0.87, "grad_norm": 5.550792217254639, "learning_rate": 0.0002, "loss": 1.5145, "step": 213970 }, { "epoch": 0.87, "grad_norm": 4.18527364730835, "learning_rate": 0.0002, "loss": 1.5189, "step": 213980 }, { "epoch": 0.87, "grad_norm": 2.831754684448242, "learning_rate": 0.0002, "loss": 1.6, "step": 213990 }, { "epoch": 0.87, "grad_norm": 3.371079921722412, "learning_rate": 0.0002, "loss": 1.6066, "step": 214000 }, { "epoch": 0.87, "grad_norm": 4.210103511810303, "learning_rate": 0.0002, "loss": 1.6759, "step": 214010 }, { "epoch": 0.87, "grad_norm": 1.7147862911224365, "learning_rate": 0.0002, "loss": 1.6121, "step": 214020 }, { "epoch": 0.87, "grad_norm": 3.314901351928711, "learning_rate": 0.0002, "loss": 1.6709, "step": 214030 }, { "epoch": 0.87, "grad_norm": 2.176018238067627, "learning_rate": 0.0002, "loss": 1.514, "step": 214040 }, { "epoch": 0.87, "grad_norm": 2.789649724960327, "learning_rate": 0.0002, "loss": 1.5676, "step": 214050 }, { "epoch": 0.87, "grad_norm": 3.2521047592163086, "learning_rate": 0.0002, "loss": 1.4461, "step": 214060 }, { "epoch": 0.87, "grad_norm": 2.9030871391296387, "learning_rate": 0.0002, "loss": 1.6677, "step": 214070 }, { "epoch": 0.87, "grad_norm": 3.417466878890991, "learning_rate": 0.0002, "loss": 1.5698, "step": 214080 }, { "epoch": 0.87, "grad_norm": 2.628873348236084, "learning_rate": 0.0002, "loss": 1.5223, "step": 214090 }, { "epoch": 0.87, "grad_norm": 2.7348153591156006, "learning_rate": 0.0002, "loss": 1.5264, "step": 214100 }, { "epoch": 0.87, "grad_norm": 3.1464812755584717, "learning_rate": 0.0002, "loss": 1.5484, "step": 214110 }, { "epoch": 0.87, "grad_norm": 3.0856146812438965, "learning_rate": 0.0002, "loss": 1.8951, "step": 214120 }, { "epoch": 0.87, "grad_norm": 2.351318359375, "learning_rate": 0.0002, "loss": 1.5679, "step": 214130 }, { "epoch": 0.87, "grad_norm": 1.5979468822479248, "learning_rate": 0.0002, "loss": 1.6219, "step": 214140 }, { "epoch": 0.87, "grad_norm": 3.7023441791534424, "learning_rate": 0.0002, "loss": 1.6766, "step": 214150 }, { "epoch": 0.87, "grad_norm": 4.21992826461792, "learning_rate": 0.0002, "loss": 1.3498, "step": 214160 }, { "epoch": 0.87, "grad_norm": 4.439578533172607, "learning_rate": 0.0002, "loss": 1.9062, "step": 214170 }, { "epoch": 0.87, "grad_norm": 3.359866142272949, "learning_rate": 0.0002, "loss": 1.8157, "step": 214180 }, { "epoch": 0.87, "grad_norm": 1.831061601638794, "learning_rate": 0.0002, "loss": 1.7566, "step": 214190 }, { "epoch": 0.87, "grad_norm": 3.4672937393188477, "learning_rate": 0.0002, "loss": 1.7712, "step": 214200 }, { "epoch": 0.87, "grad_norm": 3.105541467666626, "learning_rate": 0.0002, "loss": 1.5318, "step": 214210 }, { "epoch": 0.87, "grad_norm": 2.3139517307281494, "learning_rate": 0.0002, "loss": 1.6357, "step": 214220 }, { "epoch": 0.87, "grad_norm": 3.5264976024627686, "learning_rate": 0.0002, "loss": 1.8202, "step": 214230 }, { "epoch": 0.87, "grad_norm": 2.6989328861236572, "learning_rate": 0.0002, "loss": 1.7654, "step": 214240 }, { "epoch": 0.87, "grad_norm": 5.43340539932251, "learning_rate": 0.0002, "loss": 1.7498, "step": 214250 }, { "epoch": 0.87, "grad_norm": 3.8264617919921875, "learning_rate": 0.0002, "loss": 1.4583, "step": 214260 }, { "epoch": 0.87, "grad_norm": 4.367243766784668, "learning_rate": 0.0002, "loss": 1.6223, "step": 214270 }, { "epoch": 0.87, "grad_norm": 3.6260764598846436, "learning_rate": 0.0002, "loss": 1.8238, "step": 214280 }, { "epoch": 0.87, "grad_norm": 3.1791796684265137, "learning_rate": 0.0002, "loss": 1.458, "step": 214290 }, { "epoch": 0.87, "grad_norm": 3.2250583171844482, "learning_rate": 0.0002, "loss": 1.4105, "step": 214300 }, { "epoch": 0.87, "grad_norm": 4.0789408683776855, "learning_rate": 0.0002, "loss": 1.575, "step": 214310 }, { "epoch": 0.87, "grad_norm": 3.995284080505371, "learning_rate": 0.0002, "loss": 1.6068, "step": 214320 }, { "epoch": 0.87, "grad_norm": 2.461466073989868, "learning_rate": 0.0002, "loss": 1.517, "step": 214330 }, { "epoch": 0.87, "grad_norm": 3.9291460514068604, "learning_rate": 0.0002, "loss": 1.5683, "step": 214340 }, { "epoch": 0.87, "grad_norm": 3.250530242919922, "learning_rate": 0.0002, "loss": 1.571, "step": 214350 }, { "epoch": 0.87, "grad_norm": 2.906200647354126, "learning_rate": 0.0002, "loss": 1.585, "step": 214360 }, { "epoch": 0.87, "grad_norm": 3.227914810180664, "learning_rate": 0.0002, "loss": 1.6226, "step": 214370 }, { "epoch": 0.87, "grad_norm": 3.6683225631713867, "learning_rate": 0.0002, "loss": 1.5836, "step": 214380 }, { "epoch": 0.87, "grad_norm": 4.943340301513672, "learning_rate": 0.0002, "loss": 1.392, "step": 214390 }, { "epoch": 0.87, "grad_norm": 4.376307964324951, "learning_rate": 0.0002, "loss": 1.5473, "step": 214400 }, { "epoch": 0.87, "grad_norm": 3.0860910415649414, "learning_rate": 0.0002, "loss": 1.5098, "step": 214410 }, { "epoch": 0.87, "grad_norm": 2.3889551162719727, "learning_rate": 0.0002, "loss": 1.6516, "step": 214420 }, { "epoch": 0.87, "grad_norm": 3.9522809982299805, "learning_rate": 0.0002, "loss": 1.3502, "step": 214430 }, { "epoch": 0.87, "grad_norm": 2.9620227813720703, "learning_rate": 0.0002, "loss": 1.8078, "step": 214440 }, { "epoch": 0.87, "grad_norm": 2.3834760189056396, "learning_rate": 0.0002, "loss": 1.7345, "step": 214450 }, { "epoch": 0.87, "grad_norm": 1.9673194885253906, "learning_rate": 0.0002, "loss": 1.4076, "step": 214460 }, { "epoch": 0.87, "grad_norm": 5.139461517333984, "learning_rate": 0.0002, "loss": 1.742, "step": 214470 }, { "epoch": 0.87, "grad_norm": 5.03922700881958, "learning_rate": 0.0002, "loss": 1.6848, "step": 214480 }, { "epoch": 0.87, "grad_norm": 1.6813968420028687, "learning_rate": 0.0002, "loss": 1.5507, "step": 214490 }, { "epoch": 0.87, "grad_norm": 2.0900681018829346, "learning_rate": 0.0002, "loss": 1.5872, "step": 214500 }, { "epoch": 0.87, "grad_norm": 3.117602825164795, "learning_rate": 0.0002, "loss": 1.3942, "step": 214510 }, { "epoch": 0.87, "grad_norm": 3.6243553161621094, "learning_rate": 0.0002, "loss": 1.4357, "step": 214520 }, { "epoch": 0.87, "grad_norm": 2.5690221786499023, "learning_rate": 0.0002, "loss": 1.5914, "step": 214530 }, { "epoch": 0.87, "grad_norm": 1.6223233938217163, "learning_rate": 0.0002, "loss": 1.5463, "step": 214540 }, { "epoch": 0.87, "grad_norm": 2.9388487339019775, "learning_rate": 0.0002, "loss": 1.9079, "step": 214550 }, { "epoch": 0.87, "grad_norm": 1.838695764541626, "learning_rate": 0.0002, "loss": 1.4464, "step": 214560 }, { "epoch": 0.87, "grad_norm": 2.3431148529052734, "learning_rate": 0.0002, "loss": 1.4724, "step": 214570 }, { "epoch": 0.87, "grad_norm": 2.627843141555786, "learning_rate": 0.0002, "loss": 1.5259, "step": 214580 }, { "epoch": 0.87, "grad_norm": 4.07990026473999, "learning_rate": 0.0002, "loss": 1.7416, "step": 214590 }, { "epoch": 0.87, "grad_norm": 2.756290912628174, "learning_rate": 0.0002, "loss": 1.6132, "step": 214600 }, { "epoch": 0.87, "grad_norm": 4.358999729156494, "learning_rate": 0.0002, "loss": 1.4756, "step": 214610 }, { "epoch": 0.87, "grad_norm": 3.144841432571411, "learning_rate": 0.0002, "loss": 1.5449, "step": 214620 }, { "epoch": 0.87, "grad_norm": 3.8044087886810303, "learning_rate": 0.0002, "loss": 1.8288, "step": 214630 }, { "epoch": 0.87, "grad_norm": 1.9969375133514404, "learning_rate": 0.0002, "loss": 1.6518, "step": 214640 }, { "epoch": 0.87, "grad_norm": 3.406132936477661, "learning_rate": 0.0002, "loss": 1.5632, "step": 214650 }, { "epoch": 0.87, "grad_norm": 2.802267551422119, "learning_rate": 0.0002, "loss": 1.7083, "step": 214660 }, { "epoch": 0.87, "grad_norm": 1.3017855882644653, "learning_rate": 0.0002, "loss": 1.5419, "step": 214670 }, { "epoch": 0.87, "grad_norm": 2.7305610179901123, "learning_rate": 0.0002, "loss": 1.7093, "step": 214680 }, { "epoch": 0.87, "grad_norm": 2.625720500946045, "learning_rate": 0.0002, "loss": 1.6358, "step": 214690 }, { "epoch": 0.87, "grad_norm": 3.006542921066284, "learning_rate": 0.0002, "loss": 1.211, "step": 214700 }, { "epoch": 0.87, "grad_norm": 5.336878776550293, "learning_rate": 0.0002, "loss": 1.7255, "step": 214710 }, { "epoch": 0.87, "grad_norm": 8.4955472946167, "learning_rate": 0.0002, "loss": 1.4254, "step": 214720 }, { "epoch": 0.87, "grad_norm": 1.9490071535110474, "learning_rate": 0.0002, "loss": 1.466, "step": 214730 }, { "epoch": 0.87, "grad_norm": 2.023859977722168, "learning_rate": 0.0002, "loss": 1.5472, "step": 214740 }, { "epoch": 0.87, "grad_norm": 3.8659555912017822, "learning_rate": 0.0002, "loss": 1.6555, "step": 214750 }, { "epoch": 0.87, "grad_norm": 2.0840892791748047, "learning_rate": 0.0002, "loss": 1.6752, "step": 214760 }, { "epoch": 0.87, "grad_norm": 4.053767681121826, "learning_rate": 0.0002, "loss": 1.8941, "step": 214770 }, { "epoch": 0.87, "grad_norm": 2.3940274715423584, "learning_rate": 0.0002, "loss": 1.5737, "step": 214780 }, { "epoch": 0.87, "grad_norm": 2.697950601577759, "learning_rate": 0.0002, "loss": 1.3968, "step": 214790 }, { "epoch": 0.87, "grad_norm": 3.0153889656066895, "learning_rate": 0.0002, "loss": 1.7882, "step": 214800 }, { "epoch": 0.87, "grad_norm": 2.0310590267181396, "learning_rate": 0.0002, "loss": 1.5267, "step": 214810 }, { "epoch": 0.87, "grad_norm": 2.562936544418335, "learning_rate": 0.0002, "loss": 1.5573, "step": 214820 }, { "epoch": 0.87, "grad_norm": 2.915719985961914, "learning_rate": 0.0002, "loss": 1.6654, "step": 214830 }, { "epoch": 0.87, "grad_norm": 2.6132609844207764, "learning_rate": 0.0002, "loss": 1.6191, "step": 214840 }, { "epoch": 0.87, "grad_norm": 2.2241733074188232, "learning_rate": 0.0002, "loss": 1.3697, "step": 214850 }, { "epoch": 0.87, "grad_norm": 3.8811559677124023, "learning_rate": 0.0002, "loss": 1.558, "step": 214860 }, { "epoch": 0.87, "grad_norm": 3.008650302886963, "learning_rate": 0.0002, "loss": 1.4795, "step": 214870 }, { "epoch": 0.87, "grad_norm": 3.115194082260132, "learning_rate": 0.0002, "loss": 1.5226, "step": 214880 }, { "epoch": 0.87, "grad_norm": 1.6891567707061768, "learning_rate": 0.0002, "loss": 1.6694, "step": 214890 }, { "epoch": 0.87, "grad_norm": 3.058863401412964, "learning_rate": 0.0002, "loss": 1.589, "step": 214900 }, { "epoch": 0.87, "grad_norm": 3.795912265777588, "learning_rate": 0.0002, "loss": 1.6229, "step": 214910 }, { "epoch": 0.87, "grad_norm": 2.019773244857788, "learning_rate": 0.0002, "loss": 1.5023, "step": 214920 }, { "epoch": 0.87, "grad_norm": 2.6086316108703613, "learning_rate": 0.0002, "loss": 1.8204, "step": 214930 }, { "epoch": 0.88, "grad_norm": 3.324305772781372, "learning_rate": 0.0002, "loss": 1.5461, "step": 214940 }, { "epoch": 0.88, "grad_norm": 3.414278745651245, "learning_rate": 0.0002, "loss": 1.515, "step": 214950 }, { "epoch": 0.88, "grad_norm": 4.410077095031738, "learning_rate": 0.0002, "loss": 1.6852, "step": 214960 }, { "epoch": 0.88, "grad_norm": 3.4580764770507812, "learning_rate": 0.0002, "loss": 1.8204, "step": 214970 }, { "epoch": 0.88, "grad_norm": 2.672044277191162, "learning_rate": 0.0002, "loss": 1.6945, "step": 214980 }, { "epoch": 0.88, "grad_norm": 1.7196869850158691, "learning_rate": 0.0002, "loss": 1.7053, "step": 214990 }, { "epoch": 0.88, "grad_norm": 2.4784109592437744, "learning_rate": 0.0002, "loss": 1.4458, "step": 215000 }, { "epoch": 0.88, "grad_norm": 2.098165512084961, "learning_rate": 0.0002, "loss": 1.6328, "step": 215010 }, { "epoch": 0.88, "grad_norm": 2.573653221130371, "learning_rate": 0.0002, "loss": 1.6711, "step": 215020 }, { "epoch": 0.88, "grad_norm": 3.2249679565429688, "learning_rate": 0.0002, "loss": 1.4278, "step": 215030 }, { "epoch": 0.88, "grad_norm": 2.84441876411438, "learning_rate": 0.0002, "loss": 1.1967, "step": 215040 }, { "epoch": 0.88, "grad_norm": 2.271507978439331, "learning_rate": 0.0002, "loss": 1.4932, "step": 215050 }, { "epoch": 0.88, "grad_norm": 3.899319648742676, "learning_rate": 0.0002, "loss": 1.2997, "step": 215060 }, { "epoch": 0.88, "grad_norm": 3.7880289554595947, "learning_rate": 0.0002, "loss": 1.6029, "step": 215070 }, { "epoch": 0.88, "grad_norm": 3.7861104011535645, "learning_rate": 0.0002, "loss": 1.6697, "step": 215080 }, { "epoch": 0.88, "grad_norm": 2.9620044231414795, "learning_rate": 0.0002, "loss": 1.5497, "step": 215090 }, { "epoch": 0.88, "grad_norm": 2.816330671310425, "learning_rate": 0.0002, "loss": 1.5883, "step": 215100 }, { "epoch": 0.88, "grad_norm": 4.755077838897705, "learning_rate": 0.0002, "loss": 1.4908, "step": 215110 }, { "epoch": 0.88, "grad_norm": 6.760903358459473, "learning_rate": 0.0002, "loss": 1.5076, "step": 215120 }, { "epoch": 0.88, "grad_norm": 2.980006456375122, "learning_rate": 0.0002, "loss": 1.6492, "step": 215130 }, { "epoch": 0.88, "grad_norm": 3.1553752422332764, "learning_rate": 0.0002, "loss": 1.6556, "step": 215140 }, { "epoch": 0.88, "grad_norm": 2.033864736557007, "learning_rate": 0.0002, "loss": 1.5616, "step": 215150 }, { "epoch": 0.88, "grad_norm": 2.885761022567749, "learning_rate": 0.0002, "loss": 1.4068, "step": 215160 }, { "epoch": 0.88, "grad_norm": 2.6057229042053223, "learning_rate": 0.0002, "loss": 1.8763, "step": 215170 }, { "epoch": 0.88, "grad_norm": 2.0636978149414062, "learning_rate": 0.0002, "loss": 1.56, "step": 215180 }, { "epoch": 0.88, "grad_norm": 2.9369125366210938, "learning_rate": 0.0002, "loss": 1.524, "step": 215190 }, { "epoch": 0.88, "grad_norm": 2.916288375854492, "learning_rate": 0.0002, "loss": 1.4495, "step": 215200 }, { "epoch": 0.88, "grad_norm": 5.153289318084717, "learning_rate": 0.0002, "loss": 1.7724, "step": 215210 }, { "epoch": 0.88, "grad_norm": 3.890876054763794, "learning_rate": 0.0002, "loss": 1.7539, "step": 215220 }, { "epoch": 0.88, "grad_norm": 2.3441028594970703, "learning_rate": 0.0002, "loss": 1.234, "step": 215230 }, { "epoch": 0.88, "grad_norm": 2.279686212539673, "learning_rate": 0.0002, "loss": 1.6578, "step": 215240 }, { "epoch": 0.88, "grad_norm": 2.0332725048065186, "learning_rate": 0.0002, "loss": 1.3342, "step": 215250 }, { "epoch": 0.88, "grad_norm": 2.650373697280884, "learning_rate": 0.0002, "loss": 1.3477, "step": 215260 }, { "epoch": 0.88, "grad_norm": 2.5413708686828613, "learning_rate": 0.0002, "loss": 1.5813, "step": 215270 }, { "epoch": 0.88, "grad_norm": 2.399550676345825, "learning_rate": 0.0002, "loss": 1.4823, "step": 215280 }, { "epoch": 0.88, "grad_norm": 2.418809652328491, "learning_rate": 0.0002, "loss": 1.8438, "step": 215290 }, { "epoch": 0.88, "grad_norm": 3.9616212844848633, "learning_rate": 0.0002, "loss": 2.0082, "step": 215300 }, { "epoch": 0.88, "grad_norm": 4.3128180503845215, "learning_rate": 0.0002, "loss": 1.6066, "step": 215310 }, { "epoch": 0.88, "grad_norm": 4.731466770172119, "learning_rate": 0.0002, "loss": 1.5588, "step": 215320 }, { "epoch": 0.88, "grad_norm": 1.9367377758026123, "learning_rate": 0.0002, "loss": 1.597, "step": 215330 }, { "epoch": 0.88, "grad_norm": 3.6769354343414307, "learning_rate": 0.0002, "loss": 1.7089, "step": 215340 }, { "epoch": 0.88, "grad_norm": 2.8181614875793457, "learning_rate": 0.0002, "loss": 1.623, "step": 215350 }, { "epoch": 0.88, "grad_norm": 3.2665557861328125, "learning_rate": 0.0002, "loss": 1.4897, "step": 215360 }, { "epoch": 0.88, "grad_norm": 2.400042772293091, "learning_rate": 0.0002, "loss": 1.5364, "step": 215370 }, { "epoch": 0.88, "grad_norm": 3.302394390106201, "learning_rate": 0.0002, "loss": 1.6995, "step": 215380 }, { "epoch": 0.88, "grad_norm": 2.2993085384368896, "learning_rate": 0.0002, "loss": 1.5383, "step": 215390 }, { "epoch": 0.88, "grad_norm": 2.573169231414795, "learning_rate": 0.0002, "loss": 1.5606, "step": 215400 }, { "epoch": 0.88, "grad_norm": 3.813751459121704, "learning_rate": 0.0002, "loss": 1.5125, "step": 215410 }, { "epoch": 0.88, "grad_norm": 3.670968770980835, "learning_rate": 0.0002, "loss": 1.5907, "step": 215420 }, { "epoch": 0.88, "grad_norm": 4.373273849487305, "learning_rate": 0.0002, "loss": 1.4607, "step": 215430 }, { "epoch": 0.88, "grad_norm": 2.523989677429199, "learning_rate": 0.0002, "loss": 1.6053, "step": 215440 }, { "epoch": 0.88, "grad_norm": 1.642910361289978, "learning_rate": 0.0002, "loss": 1.512, "step": 215450 }, { "epoch": 0.88, "grad_norm": 2.0201313495635986, "learning_rate": 0.0002, "loss": 1.5167, "step": 215460 }, { "epoch": 0.88, "grad_norm": 2.221010208129883, "learning_rate": 0.0002, "loss": 1.4962, "step": 215470 }, { "epoch": 0.88, "grad_norm": 2.506551742553711, "learning_rate": 0.0002, "loss": 1.5868, "step": 215480 }, { "epoch": 0.88, "grad_norm": 2.573796272277832, "learning_rate": 0.0002, "loss": 1.6166, "step": 215490 }, { "epoch": 0.88, "grad_norm": 3.551518201828003, "learning_rate": 0.0002, "loss": 1.4629, "step": 215500 }, { "epoch": 0.88, "grad_norm": 1.6582096815109253, "learning_rate": 0.0002, "loss": 1.6381, "step": 215510 }, { "epoch": 0.88, "grad_norm": 2.562854290008545, "learning_rate": 0.0002, "loss": 1.6167, "step": 215520 }, { "epoch": 0.88, "grad_norm": 3.3559811115264893, "learning_rate": 0.0002, "loss": 1.4304, "step": 215530 }, { "epoch": 0.88, "grad_norm": 2.167332887649536, "learning_rate": 0.0002, "loss": 1.4449, "step": 215540 }, { "epoch": 0.88, "grad_norm": 2.9823176860809326, "learning_rate": 0.0002, "loss": 1.4722, "step": 215550 }, { "epoch": 0.88, "grad_norm": 2.241990804672241, "learning_rate": 0.0002, "loss": 1.7269, "step": 215560 }, { "epoch": 0.88, "grad_norm": 2.5695431232452393, "learning_rate": 0.0002, "loss": 1.5458, "step": 215570 }, { "epoch": 0.88, "grad_norm": 3.764014720916748, "learning_rate": 0.0002, "loss": 1.4845, "step": 215580 }, { "epoch": 0.88, "grad_norm": 3.3565914630889893, "learning_rate": 0.0002, "loss": 1.6915, "step": 215590 }, { "epoch": 0.88, "grad_norm": 2.6371800899505615, "learning_rate": 0.0002, "loss": 1.5577, "step": 215600 }, { "epoch": 0.88, "grad_norm": 4.017357349395752, "learning_rate": 0.0002, "loss": 1.7376, "step": 215610 }, { "epoch": 0.88, "grad_norm": 2.052442789077759, "learning_rate": 0.0002, "loss": 1.4604, "step": 215620 }, { "epoch": 0.88, "grad_norm": 5.751991271972656, "learning_rate": 0.0002, "loss": 1.5182, "step": 215630 }, { "epoch": 0.88, "grad_norm": 5.915976047515869, "learning_rate": 0.0002, "loss": 1.7031, "step": 215640 }, { "epoch": 0.88, "grad_norm": 2.683776378631592, "learning_rate": 0.0002, "loss": 1.3386, "step": 215650 }, { "epoch": 0.88, "grad_norm": 4.464101314544678, "learning_rate": 0.0002, "loss": 1.6737, "step": 215660 }, { "epoch": 0.88, "grad_norm": 2.185368537902832, "learning_rate": 0.0002, "loss": 1.8439, "step": 215670 }, { "epoch": 0.88, "grad_norm": 2.6222150325775146, "learning_rate": 0.0002, "loss": 1.7097, "step": 215680 }, { "epoch": 0.88, "grad_norm": 3.7066800594329834, "learning_rate": 0.0002, "loss": 1.622, "step": 215690 }, { "epoch": 0.88, "grad_norm": 2.4016168117523193, "learning_rate": 0.0002, "loss": 1.4302, "step": 215700 }, { "epoch": 0.88, "grad_norm": 2.9643688201904297, "learning_rate": 0.0002, "loss": 1.9635, "step": 215710 }, { "epoch": 0.88, "grad_norm": 8.743062019348145, "learning_rate": 0.0002, "loss": 1.5255, "step": 215720 }, { "epoch": 0.88, "grad_norm": 3.4052162170410156, "learning_rate": 0.0002, "loss": 1.5769, "step": 215730 }, { "epoch": 0.88, "grad_norm": 2.0488016605377197, "learning_rate": 0.0002, "loss": 1.6422, "step": 215740 }, { "epoch": 0.88, "grad_norm": 3.353914976119995, "learning_rate": 0.0002, "loss": 1.736, "step": 215750 }, { "epoch": 0.88, "grad_norm": 3.537705898284912, "learning_rate": 0.0002, "loss": 1.5745, "step": 215760 }, { "epoch": 0.88, "grad_norm": 3.0291266441345215, "learning_rate": 0.0002, "loss": 1.4963, "step": 215770 }, { "epoch": 0.88, "grad_norm": 2.835716485977173, "learning_rate": 0.0002, "loss": 1.77, "step": 215780 }, { "epoch": 0.88, "grad_norm": 3.4203250408172607, "learning_rate": 0.0002, "loss": 1.6578, "step": 215790 }, { "epoch": 0.88, "grad_norm": 4.241995811462402, "learning_rate": 0.0002, "loss": 1.4791, "step": 215800 }, { "epoch": 0.88, "grad_norm": 2.9064738750457764, "learning_rate": 0.0002, "loss": 1.7174, "step": 215810 }, { "epoch": 0.88, "grad_norm": 3.1303861141204834, "learning_rate": 0.0002, "loss": 1.6309, "step": 215820 }, { "epoch": 0.88, "grad_norm": 2.4212703704833984, "learning_rate": 0.0002, "loss": 1.642, "step": 215830 }, { "epoch": 0.88, "grad_norm": 3.106551170349121, "learning_rate": 0.0002, "loss": 1.4384, "step": 215840 }, { "epoch": 0.88, "grad_norm": 2.8168954849243164, "learning_rate": 0.0002, "loss": 1.4943, "step": 215850 }, { "epoch": 0.88, "grad_norm": 1.8274343013763428, "learning_rate": 0.0002, "loss": 1.5106, "step": 215860 }, { "epoch": 0.88, "grad_norm": 3.007903575897217, "learning_rate": 0.0002, "loss": 1.4524, "step": 215870 }, { "epoch": 0.88, "grad_norm": 2.915480613708496, "learning_rate": 0.0002, "loss": 1.6041, "step": 215880 }, { "epoch": 0.88, "grad_norm": 3.086873769760132, "learning_rate": 0.0002, "loss": 1.6264, "step": 215890 }, { "epoch": 0.88, "grad_norm": 2.1882760524749756, "learning_rate": 0.0002, "loss": 1.4646, "step": 215900 }, { "epoch": 0.88, "grad_norm": 4.68913459777832, "learning_rate": 0.0002, "loss": 1.8416, "step": 215910 }, { "epoch": 0.88, "grad_norm": 2.8733420372009277, "learning_rate": 0.0002, "loss": 1.7458, "step": 215920 }, { "epoch": 0.88, "grad_norm": 8.182145118713379, "learning_rate": 0.0002, "loss": 1.611, "step": 215930 }, { "epoch": 0.88, "grad_norm": 2.3864402770996094, "learning_rate": 0.0002, "loss": 1.6195, "step": 215940 }, { "epoch": 0.88, "grad_norm": 8.152215957641602, "learning_rate": 0.0002, "loss": 1.5918, "step": 215950 }, { "epoch": 0.88, "grad_norm": 3.1851396560668945, "learning_rate": 0.0002, "loss": 1.6008, "step": 215960 }, { "epoch": 0.88, "grad_norm": 2.0693459510803223, "learning_rate": 0.0002, "loss": 1.6628, "step": 215970 }, { "epoch": 0.88, "grad_norm": 2.3128674030303955, "learning_rate": 0.0002, "loss": 1.6855, "step": 215980 }, { "epoch": 0.88, "grad_norm": 3.256636619567871, "learning_rate": 0.0002, "loss": 1.4839, "step": 215990 }, { "epoch": 0.88, "grad_norm": 4.247089862823486, "learning_rate": 0.0002, "loss": 1.804, "step": 216000 }, { "epoch": 0.88, "grad_norm": 3.1564085483551025, "learning_rate": 0.0002, "loss": 1.4443, "step": 216010 }, { "epoch": 0.88, "grad_norm": 3.510958433151245, "learning_rate": 0.0002, "loss": 1.7, "step": 216020 }, { "epoch": 0.88, "grad_norm": 1.7790158987045288, "learning_rate": 0.0002, "loss": 1.7556, "step": 216030 }, { "epoch": 0.88, "grad_norm": 3.499199390411377, "learning_rate": 0.0002, "loss": 1.5983, "step": 216040 }, { "epoch": 0.88, "grad_norm": 2.6572418212890625, "learning_rate": 0.0002, "loss": 1.4319, "step": 216050 }, { "epoch": 0.88, "grad_norm": 2.1113123893737793, "learning_rate": 0.0002, "loss": 1.6532, "step": 216060 }, { "epoch": 0.88, "grad_norm": 2.478207588195801, "learning_rate": 0.0002, "loss": 1.4999, "step": 216070 }, { "epoch": 0.88, "grad_norm": 3.338073968887329, "learning_rate": 0.0002, "loss": 1.5737, "step": 216080 }, { "epoch": 0.88, "grad_norm": 3.6135501861572266, "learning_rate": 0.0002, "loss": 1.6012, "step": 216090 }, { "epoch": 0.88, "grad_norm": 3.667264699935913, "learning_rate": 0.0002, "loss": 1.4091, "step": 216100 }, { "epoch": 0.88, "grad_norm": 2.565420389175415, "learning_rate": 0.0002, "loss": 1.7752, "step": 216110 }, { "epoch": 0.88, "grad_norm": 4.2501959800720215, "learning_rate": 0.0002, "loss": 1.4948, "step": 216120 }, { "epoch": 0.88, "grad_norm": 4.248227596282959, "learning_rate": 0.0002, "loss": 1.5045, "step": 216130 }, { "epoch": 0.88, "grad_norm": 3.151904344558716, "learning_rate": 0.0002, "loss": 1.6192, "step": 216140 }, { "epoch": 0.88, "grad_norm": 2.1221604347229004, "learning_rate": 0.0002, "loss": 1.6597, "step": 216150 }, { "epoch": 0.88, "grad_norm": 7.017367839813232, "learning_rate": 0.0002, "loss": 1.3154, "step": 216160 }, { "epoch": 0.88, "grad_norm": 3.4650096893310547, "learning_rate": 0.0002, "loss": 1.6917, "step": 216170 }, { "epoch": 0.88, "grad_norm": 3.4144363403320312, "learning_rate": 0.0002, "loss": 1.5202, "step": 216180 }, { "epoch": 0.88, "grad_norm": 3.970240354537964, "learning_rate": 0.0002, "loss": 1.6232, "step": 216190 }, { "epoch": 0.88, "grad_norm": 2.6967594623565674, "learning_rate": 0.0002, "loss": 1.7159, "step": 216200 }, { "epoch": 0.88, "grad_norm": 2.67026424407959, "learning_rate": 0.0002, "loss": 1.5797, "step": 216210 }, { "epoch": 0.88, "grad_norm": 6.885105609893799, "learning_rate": 0.0002, "loss": 1.5765, "step": 216220 }, { "epoch": 0.88, "grad_norm": 3.7513370513916016, "learning_rate": 0.0002, "loss": 1.4124, "step": 216230 }, { "epoch": 0.88, "grad_norm": 2.289397954940796, "learning_rate": 0.0002, "loss": 1.4236, "step": 216240 }, { "epoch": 0.88, "grad_norm": 2.826106071472168, "learning_rate": 0.0002, "loss": 1.4424, "step": 216250 }, { "epoch": 0.88, "grad_norm": 3.0551891326904297, "learning_rate": 0.0002, "loss": 1.3872, "step": 216260 }, { "epoch": 0.88, "grad_norm": 3.425767421722412, "learning_rate": 0.0002, "loss": 1.9192, "step": 216270 }, { "epoch": 0.88, "grad_norm": 2.8978331089019775, "learning_rate": 0.0002, "loss": 1.7658, "step": 216280 }, { "epoch": 0.88, "grad_norm": 2.4011623859405518, "learning_rate": 0.0002, "loss": 1.5357, "step": 216290 }, { "epoch": 0.88, "grad_norm": 3.493204355239868, "learning_rate": 0.0002, "loss": 1.5832, "step": 216300 }, { "epoch": 0.88, "grad_norm": 3.4666354656219482, "learning_rate": 0.0002, "loss": 1.637, "step": 216310 }, { "epoch": 0.88, "grad_norm": 2.517090320587158, "learning_rate": 0.0002, "loss": 1.76, "step": 216320 }, { "epoch": 0.88, "grad_norm": 2.9257848262786865, "learning_rate": 0.0002, "loss": 1.5083, "step": 216330 }, { "epoch": 0.88, "grad_norm": 2.643287181854248, "learning_rate": 0.0002, "loss": 1.6515, "step": 216340 }, { "epoch": 0.88, "grad_norm": 2.696694850921631, "learning_rate": 0.0002, "loss": 1.6979, "step": 216350 }, { "epoch": 0.88, "grad_norm": 2.753899097442627, "learning_rate": 0.0002, "loss": 1.7705, "step": 216360 }, { "epoch": 0.88, "grad_norm": 3.797956943511963, "learning_rate": 0.0002, "loss": 1.7661, "step": 216370 }, { "epoch": 0.88, "grad_norm": 1.8238555192947388, "learning_rate": 0.0002, "loss": 1.6406, "step": 216380 }, { "epoch": 0.88, "grad_norm": 4.035009860992432, "learning_rate": 0.0002, "loss": 1.5805, "step": 216390 }, { "epoch": 0.88, "grad_norm": 2.665480136871338, "learning_rate": 0.0002, "loss": 1.566, "step": 216400 }, { "epoch": 0.88, "grad_norm": 2.533297300338745, "learning_rate": 0.0002, "loss": 1.4494, "step": 216410 }, { "epoch": 0.88, "grad_norm": 3.896186351776123, "learning_rate": 0.0002, "loss": 1.4219, "step": 216420 }, { "epoch": 0.88, "grad_norm": 2.8118271827697754, "learning_rate": 0.0002, "loss": 1.6302, "step": 216430 }, { "epoch": 0.88, "grad_norm": 1.9327192306518555, "learning_rate": 0.0002, "loss": 1.535, "step": 216440 }, { "epoch": 0.88, "grad_norm": 3.021707534790039, "learning_rate": 0.0002, "loss": 1.6855, "step": 216450 }, { "epoch": 0.88, "grad_norm": 2.179969549179077, "learning_rate": 0.0002, "loss": 1.4196, "step": 216460 }, { "epoch": 0.88, "grad_norm": 3.8367600440979004, "learning_rate": 0.0002, "loss": 1.5294, "step": 216470 }, { "epoch": 0.88, "grad_norm": 4.5361738204956055, "learning_rate": 0.0002, "loss": 1.5204, "step": 216480 }, { "epoch": 0.88, "grad_norm": 3.93902325630188, "learning_rate": 0.0002, "loss": 1.7206, "step": 216490 }, { "epoch": 0.88, "grad_norm": 4.2407355308532715, "learning_rate": 0.0002, "loss": 1.6938, "step": 216500 }, { "epoch": 0.88, "grad_norm": 3.6057560443878174, "learning_rate": 0.0002, "loss": 1.4817, "step": 216510 }, { "epoch": 0.88, "grad_norm": 3.0592827796936035, "learning_rate": 0.0002, "loss": 1.6426, "step": 216520 }, { "epoch": 0.88, "grad_norm": 1.9796639680862427, "learning_rate": 0.0002, "loss": 1.574, "step": 216530 }, { "epoch": 0.88, "grad_norm": 2.8599131107330322, "learning_rate": 0.0002, "loss": 1.5476, "step": 216540 }, { "epoch": 0.88, "grad_norm": 3.629204750061035, "learning_rate": 0.0002, "loss": 1.7685, "step": 216550 }, { "epoch": 0.88, "grad_norm": 3.3123066425323486, "learning_rate": 0.0002, "loss": 1.3858, "step": 216560 }, { "epoch": 0.88, "grad_norm": 3.2494332790374756, "learning_rate": 0.0002, "loss": 1.5863, "step": 216570 }, { "epoch": 0.88, "grad_norm": 2.858581781387329, "learning_rate": 0.0002, "loss": 1.3937, "step": 216580 }, { "epoch": 0.88, "grad_norm": 2.907118320465088, "learning_rate": 0.0002, "loss": 1.5966, "step": 216590 }, { "epoch": 0.88, "grad_norm": 3.534271001815796, "learning_rate": 0.0002, "loss": 1.4922, "step": 216600 }, { "epoch": 0.88, "grad_norm": 2.929222345352173, "learning_rate": 0.0002, "loss": 1.5735, "step": 216610 }, { "epoch": 0.88, "grad_norm": 1.8684529066085815, "learning_rate": 0.0002, "loss": 1.6098, "step": 216620 }, { "epoch": 0.88, "grad_norm": 1.8228007555007935, "learning_rate": 0.0002, "loss": 1.9258, "step": 216630 }, { "epoch": 0.88, "grad_norm": 3.04498553276062, "learning_rate": 0.0002, "loss": 1.4782, "step": 216640 }, { "epoch": 0.88, "grad_norm": 2.956385850906372, "learning_rate": 0.0002, "loss": 1.5236, "step": 216650 }, { "epoch": 0.88, "grad_norm": 3.5064799785614014, "learning_rate": 0.0002, "loss": 1.609, "step": 216660 }, { "epoch": 0.88, "grad_norm": 2.140454053878784, "learning_rate": 0.0002, "loss": 1.7384, "step": 216670 }, { "epoch": 0.88, "grad_norm": 2.3024373054504395, "learning_rate": 0.0002, "loss": 1.6856, "step": 216680 }, { "epoch": 0.88, "grad_norm": 3.0307483673095703, "learning_rate": 0.0002, "loss": 1.5226, "step": 216690 }, { "epoch": 0.88, "grad_norm": 4.038818359375, "learning_rate": 0.0002, "loss": 1.4168, "step": 216700 }, { "epoch": 0.88, "grad_norm": 3.036716938018799, "learning_rate": 0.0002, "loss": 1.5244, "step": 216710 }, { "epoch": 0.88, "grad_norm": 2.1801671981811523, "learning_rate": 0.0002, "loss": 1.6887, "step": 216720 }, { "epoch": 0.88, "grad_norm": 2.909454107284546, "learning_rate": 0.0002, "loss": 1.628, "step": 216730 }, { "epoch": 0.88, "grad_norm": 3.262583017349243, "learning_rate": 0.0002, "loss": 1.4547, "step": 216740 }, { "epoch": 0.88, "grad_norm": 2.3895018100738525, "learning_rate": 0.0002, "loss": 1.5775, "step": 216750 }, { "epoch": 0.88, "grad_norm": 6.21674919128418, "learning_rate": 0.0002, "loss": 1.5517, "step": 216760 }, { "epoch": 0.88, "grad_norm": 2.272394895553589, "learning_rate": 0.0002, "loss": 1.7522, "step": 216770 }, { "epoch": 0.88, "grad_norm": 3.0964019298553467, "learning_rate": 0.0002, "loss": 1.4043, "step": 216780 }, { "epoch": 0.88, "grad_norm": 6.908596992492676, "learning_rate": 0.0002, "loss": 1.4393, "step": 216790 }, { "epoch": 0.88, "grad_norm": 2.5942161083221436, "learning_rate": 0.0002, "loss": 1.3409, "step": 216800 }, { "epoch": 0.88, "grad_norm": 4.314878463745117, "learning_rate": 0.0002, "loss": 1.7201, "step": 216810 }, { "epoch": 0.88, "grad_norm": 3.9723362922668457, "learning_rate": 0.0002, "loss": 1.7026, "step": 216820 }, { "epoch": 0.88, "grad_norm": 3.1988685131073, "learning_rate": 0.0002, "loss": 1.6557, "step": 216830 }, { "epoch": 0.88, "grad_norm": 1.7110099792480469, "learning_rate": 0.0002, "loss": 1.5405, "step": 216840 }, { "epoch": 0.88, "grad_norm": 2.567002773284912, "learning_rate": 0.0002, "loss": 1.6204, "step": 216850 }, { "epoch": 0.88, "grad_norm": 4.451435565948486, "learning_rate": 0.0002, "loss": 1.5458, "step": 216860 }, { "epoch": 0.88, "grad_norm": 2.5235908031463623, "learning_rate": 0.0002, "loss": 1.4622, "step": 216870 }, { "epoch": 0.88, "grad_norm": 2.2741072177886963, "learning_rate": 0.0002, "loss": 1.6737, "step": 216880 }, { "epoch": 0.88, "grad_norm": 2.9793996810913086, "learning_rate": 0.0002, "loss": 1.8179, "step": 216890 }, { "epoch": 0.88, "grad_norm": 2.6703379154205322, "learning_rate": 0.0002, "loss": 1.5092, "step": 216900 }, { "epoch": 0.88, "grad_norm": 2.7619259357452393, "learning_rate": 0.0002, "loss": 1.4391, "step": 216910 }, { "epoch": 0.88, "grad_norm": 2.9513933658599854, "learning_rate": 0.0002, "loss": 1.465, "step": 216920 }, { "epoch": 0.88, "grad_norm": 2.3074347972869873, "learning_rate": 0.0002, "loss": 1.9187, "step": 216930 }, { "epoch": 0.88, "grad_norm": 4.308034896850586, "learning_rate": 0.0002, "loss": 1.4501, "step": 216940 }, { "epoch": 0.88, "grad_norm": 3.6908857822418213, "learning_rate": 0.0002, "loss": 1.6277, "step": 216950 }, { "epoch": 0.88, "grad_norm": 2.564082384109497, "learning_rate": 0.0002, "loss": 1.7259, "step": 216960 }, { "epoch": 0.88, "grad_norm": 2.3367505073547363, "learning_rate": 0.0002, "loss": 1.4879, "step": 216970 }, { "epoch": 0.88, "grad_norm": 2.3928840160369873, "learning_rate": 0.0002, "loss": 1.5152, "step": 216980 }, { "epoch": 0.88, "grad_norm": 3.255280017852783, "learning_rate": 0.0002, "loss": 1.4635, "step": 216990 }, { "epoch": 0.88, "grad_norm": 5.563729763031006, "learning_rate": 0.0002, "loss": 1.6092, "step": 217000 }, { "epoch": 0.88, "grad_norm": 2.1855039596557617, "learning_rate": 0.0002, "loss": 1.4457, "step": 217010 }, { "epoch": 0.88, "grad_norm": 3.6753101348876953, "learning_rate": 0.0002, "loss": 1.3983, "step": 217020 }, { "epoch": 0.88, "grad_norm": 2.9632058143615723, "learning_rate": 0.0002, "loss": 1.6594, "step": 217030 }, { "epoch": 0.88, "grad_norm": 3.0125346183776855, "learning_rate": 0.0002, "loss": 1.6986, "step": 217040 }, { "epoch": 0.88, "grad_norm": 2.8066916465759277, "learning_rate": 0.0002, "loss": 1.6541, "step": 217050 }, { "epoch": 0.88, "grad_norm": 2.4785711765289307, "learning_rate": 0.0002, "loss": 1.7268, "step": 217060 }, { "epoch": 0.88, "grad_norm": 3.0666167736053467, "learning_rate": 0.0002, "loss": 1.5121, "step": 217070 }, { "epoch": 0.88, "grad_norm": 2.130405902862549, "learning_rate": 0.0002, "loss": 1.3993, "step": 217080 }, { "epoch": 0.88, "grad_norm": 2.7086143493652344, "learning_rate": 0.0002, "loss": 1.6456, "step": 217090 }, { "epoch": 0.88, "grad_norm": 2.9571900367736816, "learning_rate": 0.0002, "loss": 1.5252, "step": 217100 }, { "epoch": 0.88, "grad_norm": 2.8585875034332275, "learning_rate": 0.0002, "loss": 1.6486, "step": 217110 }, { "epoch": 0.88, "grad_norm": 1.839744210243225, "learning_rate": 0.0002, "loss": 1.3682, "step": 217120 }, { "epoch": 0.88, "grad_norm": 3.339632511138916, "learning_rate": 0.0002, "loss": 1.6402, "step": 217130 }, { "epoch": 0.88, "grad_norm": 1.538462519645691, "learning_rate": 0.0002, "loss": 1.4653, "step": 217140 }, { "epoch": 0.88, "grad_norm": 1.8752288818359375, "learning_rate": 0.0002, "loss": 1.6431, "step": 217150 }, { "epoch": 0.88, "grad_norm": 2.8277125358581543, "learning_rate": 0.0002, "loss": 1.6708, "step": 217160 }, { "epoch": 0.88, "grad_norm": 2.411386728286743, "learning_rate": 0.0002, "loss": 1.4944, "step": 217170 }, { "epoch": 0.88, "grad_norm": 2.60017991065979, "learning_rate": 0.0002, "loss": 1.7552, "step": 217180 }, { "epoch": 0.88, "grad_norm": 3.991551160812378, "learning_rate": 0.0002, "loss": 1.5866, "step": 217190 }, { "epoch": 0.88, "grad_norm": 2.8180019855499268, "learning_rate": 0.0002, "loss": 1.824, "step": 217200 }, { "epoch": 0.88, "grad_norm": 3.6748878955841064, "learning_rate": 0.0002, "loss": 1.4446, "step": 217210 }, { "epoch": 0.88, "grad_norm": 3.6543474197387695, "learning_rate": 0.0002, "loss": 1.6826, "step": 217220 }, { "epoch": 0.88, "grad_norm": 3.7257890701293945, "learning_rate": 0.0002, "loss": 1.4394, "step": 217230 }, { "epoch": 0.88, "grad_norm": 5.04613733291626, "learning_rate": 0.0002, "loss": 1.6801, "step": 217240 }, { "epoch": 0.88, "grad_norm": 2.8316972255706787, "learning_rate": 0.0002, "loss": 1.678, "step": 217250 }, { "epoch": 0.88, "grad_norm": 4.207264423370361, "learning_rate": 0.0002, "loss": 1.4397, "step": 217260 }, { "epoch": 0.88, "grad_norm": 3.0967586040496826, "learning_rate": 0.0002, "loss": 1.6151, "step": 217270 }, { "epoch": 0.88, "grad_norm": 3.204439640045166, "learning_rate": 0.0002, "loss": 1.6362, "step": 217280 }, { "epoch": 0.88, "grad_norm": 1.5046393871307373, "learning_rate": 0.0002, "loss": 1.4403, "step": 217290 }, { "epoch": 0.88, "grad_norm": 2.7336642742156982, "learning_rate": 0.0002, "loss": 1.5984, "step": 217300 }, { "epoch": 0.88, "grad_norm": 1.67238450050354, "learning_rate": 0.0002, "loss": 1.8335, "step": 217310 }, { "epoch": 0.88, "grad_norm": 2.686420202255249, "learning_rate": 0.0002, "loss": 1.6092, "step": 217320 }, { "epoch": 0.88, "grad_norm": 1.6167030334472656, "learning_rate": 0.0002, "loss": 1.3688, "step": 217330 }, { "epoch": 0.88, "grad_norm": 2.957630157470703, "learning_rate": 0.0002, "loss": 1.9567, "step": 217340 }, { "epoch": 0.88, "grad_norm": 3.731330394744873, "learning_rate": 0.0002, "loss": 1.5101, "step": 217350 }, { "epoch": 0.88, "grad_norm": 2.614211320877075, "learning_rate": 0.0002, "loss": 1.6096, "step": 217360 }, { "epoch": 0.88, "grad_norm": 1.8745533227920532, "learning_rate": 0.0002, "loss": 1.7793, "step": 217370 }, { "epoch": 0.88, "grad_norm": 2.6502060890197754, "learning_rate": 0.0002, "loss": 1.6182, "step": 217380 }, { "epoch": 0.88, "grad_norm": 5.0396013259887695, "learning_rate": 0.0002, "loss": 1.4186, "step": 217390 }, { "epoch": 0.89, "grad_norm": 2.1817915439605713, "learning_rate": 0.0002, "loss": 1.6247, "step": 217400 }, { "epoch": 0.89, "grad_norm": 2.6700146198272705, "learning_rate": 0.0002, "loss": 1.4421, "step": 217410 }, { "epoch": 0.89, "grad_norm": 3.4603500366210938, "learning_rate": 0.0002, "loss": 1.4802, "step": 217420 }, { "epoch": 0.89, "grad_norm": 10.918683052062988, "learning_rate": 0.0002, "loss": 1.6354, "step": 217430 }, { "epoch": 0.89, "grad_norm": 2.316232442855835, "learning_rate": 0.0002, "loss": 1.7424, "step": 217440 }, { "epoch": 0.89, "grad_norm": 1.550440788269043, "learning_rate": 0.0002, "loss": 1.5051, "step": 217450 }, { "epoch": 0.89, "grad_norm": 4.147591590881348, "learning_rate": 0.0002, "loss": 1.6443, "step": 217460 }, { "epoch": 0.89, "grad_norm": 2.37955379486084, "learning_rate": 0.0002, "loss": 1.4985, "step": 217470 }, { "epoch": 0.89, "grad_norm": 6.526908874511719, "learning_rate": 0.0002, "loss": 1.5947, "step": 217480 }, { "epoch": 0.89, "grad_norm": 2.322920799255371, "learning_rate": 0.0002, "loss": 1.3176, "step": 217490 }, { "epoch": 0.89, "grad_norm": 2.797950506210327, "learning_rate": 0.0002, "loss": 1.3974, "step": 217500 }, { "epoch": 0.89, "grad_norm": 3.2459988594055176, "learning_rate": 0.0002, "loss": 1.4988, "step": 217510 }, { "epoch": 0.89, "grad_norm": 4.769965648651123, "learning_rate": 0.0002, "loss": 1.5676, "step": 217520 }, { "epoch": 0.89, "grad_norm": 3.3644680976867676, "learning_rate": 0.0002, "loss": 1.6845, "step": 217530 }, { "epoch": 0.89, "grad_norm": 2.579664945602417, "learning_rate": 0.0002, "loss": 2.075, "step": 217540 }, { "epoch": 0.89, "grad_norm": 1.950751543045044, "learning_rate": 0.0002, "loss": 1.5343, "step": 217550 }, { "epoch": 0.89, "grad_norm": 2.3029980659484863, "learning_rate": 0.0002, "loss": 1.408, "step": 217560 }, { "epoch": 0.89, "grad_norm": 3.9422738552093506, "learning_rate": 0.0002, "loss": 1.8134, "step": 217570 }, { "epoch": 0.89, "grad_norm": 2.591372489929199, "learning_rate": 0.0002, "loss": 1.4721, "step": 217580 }, { "epoch": 0.89, "grad_norm": 2.933804988861084, "learning_rate": 0.0002, "loss": 1.3853, "step": 217590 }, { "epoch": 0.89, "grad_norm": 2.866028070449829, "learning_rate": 0.0002, "loss": 1.6006, "step": 217600 }, { "epoch": 0.89, "grad_norm": 1.7424181699752808, "learning_rate": 0.0002, "loss": 1.3264, "step": 217610 }, { "epoch": 0.89, "grad_norm": 2.728675365447998, "learning_rate": 0.0002, "loss": 1.5905, "step": 217620 }, { "epoch": 0.89, "grad_norm": 2.1966164112091064, "learning_rate": 0.0002, "loss": 1.5293, "step": 217630 }, { "epoch": 0.89, "grad_norm": 3.1152307987213135, "learning_rate": 0.0002, "loss": 1.4803, "step": 217640 }, { "epoch": 0.89, "grad_norm": 5.689422130584717, "learning_rate": 0.0002, "loss": 1.5024, "step": 217650 }, { "epoch": 0.89, "grad_norm": 2.7618393898010254, "learning_rate": 0.0002, "loss": 1.4694, "step": 217660 }, { "epoch": 0.89, "grad_norm": 2.933716058731079, "learning_rate": 0.0002, "loss": 1.4182, "step": 217670 }, { "epoch": 0.89, "grad_norm": 3.3035941123962402, "learning_rate": 0.0002, "loss": 1.5324, "step": 217680 }, { "epoch": 0.89, "grad_norm": 3.137199640274048, "learning_rate": 0.0002, "loss": 1.5952, "step": 217690 }, { "epoch": 0.89, "grad_norm": 1.9558682441711426, "learning_rate": 0.0002, "loss": 1.6136, "step": 217700 }, { "epoch": 0.89, "grad_norm": 1.7277737855911255, "learning_rate": 0.0002, "loss": 1.5403, "step": 217710 }, { "epoch": 0.89, "grad_norm": 2.146599531173706, "learning_rate": 0.0002, "loss": 1.5903, "step": 217720 }, { "epoch": 0.89, "grad_norm": 4.12605094909668, "learning_rate": 0.0002, "loss": 1.7818, "step": 217730 }, { "epoch": 0.89, "grad_norm": 2.4654626846313477, "learning_rate": 0.0002, "loss": 1.3399, "step": 217740 }, { "epoch": 0.89, "grad_norm": 3.7981605529785156, "learning_rate": 0.0002, "loss": 1.6553, "step": 217750 }, { "epoch": 0.89, "grad_norm": 2.441005229949951, "learning_rate": 0.0002, "loss": 1.6791, "step": 217760 }, { "epoch": 0.89, "grad_norm": 3.9005134105682373, "learning_rate": 0.0002, "loss": 1.657, "step": 217770 }, { "epoch": 0.89, "grad_norm": 5.318150520324707, "learning_rate": 0.0002, "loss": 1.5777, "step": 217780 }, { "epoch": 0.89, "grad_norm": 1.897047758102417, "learning_rate": 0.0002, "loss": 1.4467, "step": 217790 }, { "epoch": 0.89, "grad_norm": 2.594428539276123, "learning_rate": 0.0002, "loss": 1.5848, "step": 217800 }, { "epoch": 0.89, "grad_norm": 2.2387354373931885, "learning_rate": 0.0002, "loss": 1.5255, "step": 217810 }, { "epoch": 0.89, "grad_norm": 1.4797130823135376, "learning_rate": 0.0002, "loss": 1.4946, "step": 217820 }, { "epoch": 0.89, "grad_norm": 2.6133153438568115, "learning_rate": 0.0002, "loss": 1.3284, "step": 217830 }, { "epoch": 0.89, "grad_norm": 2.0136027336120605, "learning_rate": 0.0002, "loss": 1.4421, "step": 217840 }, { "epoch": 0.89, "grad_norm": 2.9767889976501465, "learning_rate": 0.0002, "loss": 1.4012, "step": 217850 }, { "epoch": 0.89, "grad_norm": 3.8313040733337402, "learning_rate": 0.0002, "loss": 1.7319, "step": 217860 }, { "epoch": 0.89, "grad_norm": 2.6188700199127197, "learning_rate": 0.0002, "loss": 1.4804, "step": 217870 }, { "epoch": 0.89, "grad_norm": 2.051546573638916, "learning_rate": 0.0002, "loss": 1.4663, "step": 217880 }, { "epoch": 0.89, "grad_norm": 2.7406184673309326, "learning_rate": 0.0002, "loss": 1.5531, "step": 217890 }, { "epoch": 0.89, "grad_norm": 3.47685170173645, "learning_rate": 0.0002, "loss": 1.6537, "step": 217900 }, { "epoch": 0.89, "grad_norm": 2.292553424835205, "learning_rate": 0.0002, "loss": 1.5272, "step": 217910 }, { "epoch": 0.89, "grad_norm": 3.2352116107940674, "learning_rate": 0.0002, "loss": 1.7137, "step": 217920 }, { "epoch": 0.89, "grad_norm": 3.445723056793213, "learning_rate": 0.0002, "loss": 1.7422, "step": 217930 }, { "epoch": 0.89, "grad_norm": 3.2106564044952393, "learning_rate": 0.0002, "loss": 1.4586, "step": 217940 }, { "epoch": 0.89, "grad_norm": 2.4503965377807617, "learning_rate": 0.0002, "loss": 1.7552, "step": 217950 }, { "epoch": 0.89, "grad_norm": 2.721907138824463, "learning_rate": 0.0002, "loss": 1.5921, "step": 217960 }, { "epoch": 0.89, "grad_norm": 2.406038284301758, "learning_rate": 0.0002, "loss": 1.7306, "step": 217970 }, { "epoch": 0.89, "grad_norm": 3.5996851921081543, "learning_rate": 0.0002, "loss": 1.369, "step": 217980 }, { "epoch": 0.89, "grad_norm": 2.587477922439575, "learning_rate": 0.0002, "loss": 1.6329, "step": 217990 }, { "epoch": 0.89, "grad_norm": 2.009026288986206, "learning_rate": 0.0002, "loss": 1.7089, "step": 218000 }, { "epoch": 0.89, "grad_norm": 6.613285541534424, "learning_rate": 0.0002, "loss": 1.4643, "step": 218010 }, { "epoch": 0.89, "grad_norm": 8.1007080078125, "learning_rate": 0.0002, "loss": 1.8385, "step": 218020 }, { "epoch": 0.89, "grad_norm": 2.566420078277588, "learning_rate": 0.0002, "loss": 1.3028, "step": 218030 }, { "epoch": 0.89, "grad_norm": 1.6468474864959717, "learning_rate": 0.0002, "loss": 1.7553, "step": 218040 }, { "epoch": 0.89, "grad_norm": 3.7723934650421143, "learning_rate": 0.0002, "loss": 1.5069, "step": 218050 }, { "epoch": 0.89, "grad_norm": 2.8393070697784424, "learning_rate": 0.0002, "loss": 1.5316, "step": 218060 }, { "epoch": 0.89, "grad_norm": 2.3811166286468506, "learning_rate": 0.0002, "loss": 1.6292, "step": 218070 }, { "epoch": 0.89, "grad_norm": 4.192775726318359, "learning_rate": 0.0002, "loss": 1.6969, "step": 218080 }, { "epoch": 0.89, "grad_norm": 2.5643503665924072, "learning_rate": 0.0002, "loss": 1.489, "step": 218090 }, { "epoch": 0.89, "grad_norm": 2.5601868629455566, "learning_rate": 0.0002, "loss": 1.6441, "step": 218100 }, { "epoch": 0.89, "grad_norm": 2.9965126514434814, "learning_rate": 0.0002, "loss": 1.3716, "step": 218110 }, { "epoch": 0.89, "grad_norm": 3.332813024520874, "learning_rate": 0.0002, "loss": 1.5411, "step": 218120 }, { "epoch": 0.89, "grad_norm": 2.945803642272949, "learning_rate": 0.0002, "loss": 1.5189, "step": 218130 }, { "epoch": 0.89, "grad_norm": 3.6565380096435547, "learning_rate": 0.0002, "loss": 1.5867, "step": 218140 }, { "epoch": 0.89, "grad_norm": 5.457348823547363, "learning_rate": 0.0002, "loss": 1.4806, "step": 218150 }, { "epoch": 0.89, "grad_norm": 4.713861465454102, "learning_rate": 0.0002, "loss": 1.5769, "step": 218160 }, { "epoch": 0.89, "grad_norm": 3.0147759914398193, "learning_rate": 0.0002, "loss": 1.5931, "step": 218170 }, { "epoch": 0.89, "grad_norm": 2.704629898071289, "learning_rate": 0.0002, "loss": 1.4232, "step": 218180 }, { "epoch": 0.89, "grad_norm": 5.500534534454346, "learning_rate": 0.0002, "loss": 1.7096, "step": 218190 }, { "epoch": 0.89, "grad_norm": 2.189089775085449, "learning_rate": 0.0002, "loss": 1.3662, "step": 218200 }, { "epoch": 0.89, "grad_norm": 2.795295476913452, "learning_rate": 0.0002, "loss": 1.7765, "step": 218210 }, { "epoch": 0.89, "grad_norm": 2.82686710357666, "learning_rate": 0.0002, "loss": 1.4572, "step": 218220 }, { "epoch": 0.89, "grad_norm": 2.0066983699798584, "learning_rate": 0.0002, "loss": 1.5937, "step": 218230 }, { "epoch": 0.89, "grad_norm": 4.185102939605713, "learning_rate": 0.0002, "loss": 1.6004, "step": 218240 }, { "epoch": 0.89, "grad_norm": 4.262181282043457, "learning_rate": 0.0002, "loss": 1.4064, "step": 218250 }, { "epoch": 0.89, "grad_norm": 3.1980972290039062, "learning_rate": 0.0002, "loss": 1.6533, "step": 218260 }, { "epoch": 0.89, "grad_norm": 2.060167074203491, "learning_rate": 0.0002, "loss": 1.4579, "step": 218270 }, { "epoch": 0.89, "grad_norm": 2.004836320877075, "learning_rate": 0.0002, "loss": 1.2605, "step": 218280 }, { "epoch": 0.89, "grad_norm": 2.195711612701416, "learning_rate": 0.0002, "loss": 1.5335, "step": 218290 }, { "epoch": 0.89, "grad_norm": 4.21524715423584, "learning_rate": 0.0002, "loss": 1.6167, "step": 218300 }, { "epoch": 0.89, "grad_norm": 1.8143341541290283, "learning_rate": 0.0002, "loss": 1.8303, "step": 218310 }, { "epoch": 0.89, "grad_norm": 2.22090220451355, "learning_rate": 0.0002, "loss": 1.4671, "step": 218320 }, { "epoch": 0.89, "grad_norm": 2.291555166244507, "learning_rate": 0.0002, "loss": 1.8081, "step": 218330 }, { "epoch": 0.89, "grad_norm": 2.638338088989258, "learning_rate": 0.0002, "loss": 1.7735, "step": 218340 }, { "epoch": 0.89, "grad_norm": 3.00895357131958, "learning_rate": 0.0002, "loss": 1.5954, "step": 218350 }, { "epoch": 0.89, "grad_norm": 2.1874730587005615, "learning_rate": 0.0002, "loss": 1.6481, "step": 218360 }, { "epoch": 0.89, "grad_norm": 2.4006142616271973, "learning_rate": 0.0002, "loss": 1.7561, "step": 218370 }, { "epoch": 0.89, "grad_norm": 2.6595590114593506, "learning_rate": 0.0002, "loss": 1.5251, "step": 218380 }, { "epoch": 0.89, "grad_norm": 2.404318332672119, "learning_rate": 0.0002, "loss": 1.3211, "step": 218390 }, { "epoch": 0.89, "grad_norm": 1.873221755027771, "learning_rate": 0.0002, "loss": 1.6528, "step": 218400 }, { "epoch": 0.89, "grad_norm": 3.8119685649871826, "learning_rate": 0.0002, "loss": 1.5189, "step": 218410 }, { "epoch": 0.89, "grad_norm": 4.221464157104492, "learning_rate": 0.0002, "loss": 1.8405, "step": 218420 }, { "epoch": 0.89, "grad_norm": 2.55024790763855, "learning_rate": 0.0002, "loss": 1.6227, "step": 218430 }, { "epoch": 0.89, "grad_norm": 3.1585683822631836, "learning_rate": 0.0002, "loss": 1.6126, "step": 218440 }, { "epoch": 0.89, "grad_norm": 2.107452869415283, "learning_rate": 0.0002, "loss": 1.6874, "step": 218450 }, { "epoch": 0.89, "grad_norm": 2.931398630142212, "learning_rate": 0.0002, "loss": 1.6937, "step": 218460 }, { "epoch": 0.89, "grad_norm": 4.965715408325195, "learning_rate": 0.0002, "loss": 1.4433, "step": 218470 }, { "epoch": 0.89, "grad_norm": 7.043684959411621, "learning_rate": 0.0002, "loss": 1.5663, "step": 218480 }, { "epoch": 0.89, "grad_norm": 2.995147705078125, "learning_rate": 0.0002, "loss": 1.5925, "step": 218490 }, { "epoch": 0.89, "grad_norm": 2.081897020339966, "learning_rate": 0.0002, "loss": 1.7366, "step": 218500 }, { "epoch": 0.89, "grad_norm": 2.7659196853637695, "learning_rate": 0.0002, "loss": 1.7344, "step": 218510 }, { "epoch": 0.89, "grad_norm": 2.969061851501465, "learning_rate": 0.0002, "loss": 1.7827, "step": 218520 }, { "epoch": 0.89, "grad_norm": 3.807474374771118, "learning_rate": 0.0002, "loss": 1.5698, "step": 218530 }, { "epoch": 0.89, "grad_norm": 3.3422365188598633, "learning_rate": 0.0002, "loss": 1.678, "step": 218540 }, { "epoch": 0.89, "grad_norm": 6.899348258972168, "learning_rate": 0.0002, "loss": 1.3078, "step": 218550 }, { "epoch": 0.89, "grad_norm": Infinity, "learning_rate": 0.0002, "loss": 1.6706, "step": 218560 }, { "epoch": 0.89, "grad_norm": 3.3451404571533203, "learning_rate": 0.0002, "loss": 1.4641, "step": 218570 }, { "epoch": 0.89, "grad_norm": 3.044728994369507, "learning_rate": 0.0002, "loss": 1.4969, "step": 218580 }, { "epoch": 0.89, "grad_norm": 5.129693984985352, "learning_rate": 0.0002, "loss": 1.4908, "step": 218590 }, { "epoch": 0.89, "grad_norm": 3.645185708999634, "learning_rate": 0.0002, "loss": 1.6559, "step": 218600 }, { "epoch": 0.89, "grad_norm": 3.5563855171203613, "learning_rate": 0.0002, "loss": 1.5653, "step": 218610 }, { "epoch": 0.89, "grad_norm": 2.944605827331543, "learning_rate": 0.0002, "loss": 1.7859, "step": 218620 }, { "epoch": 0.89, "grad_norm": 2.8296761512756348, "learning_rate": 0.0002, "loss": 1.6092, "step": 218630 }, { "epoch": 0.89, "grad_norm": 2.870058536529541, "learning_rate": 0.0002, "loss": 1.5884, "step": 218640 }, { "epoch": 0.89, "grad_norm": 11.943922996520996, "learning_rate": 0.0002, "loss": 1.6079, "step": 218650 }, { "epoch": 0.89, "grad_norm": 3.2765467166900635, "learning_rate": 0.0002, "loss": 1.6698, "step": 218660 }, { "epoch": 0.89, "grad_norm": 3.239179849624634, "learning_rate": 0.0002, "loss": 1.6457, "step": 218670 }, { "epoch": 0.89, "grad_norm": 3.875237226486206, "learning_rate": 0.0002, "loss": 1.7807, "step": 218680 }, { "epoch": 0.89, "grad_norm": 4.134183406829834, "learning_rate": 0.0002, "loss": 1.52, "step": 218690 }, { "epoch": 0.89, "grad_norm": 3.8403451442718506, "learning_rate": 0.0002, "loss": 1.6005, "step": 218700 }, { "epoch": 0.89, "grad_norm": 2.9983835220336914, "learning_rate": 0.0002, "loss": 1.3726, "step": 218710 }, { "epoch": 0.89, "grad_norm": 5.16373348236084, "learning_rate": 0.0002, "loss": 1.5542, "step": 218720 }, { "epoch": 0.89, "grad_norm": 3.987222671508789, "learning_rate": 0.0002, "loss": 1.6964, "step": 218730 }, { "epoch": 0.89, "grad_norm": 2.8610126972198486, "learning_rate": 0.0002, "loss": 1.6527, "step": 218740 }, { "epoch": 0.89, "grad_norm": 12.56823444366455, "learning_rate": 0.0002, "loss": 1.6249, "step": 218750 }, { "epoch": 0.89, "grad_norm": 4.373598098754883, "learning_rate": 0.0002, "loss": 1.6301, "step": 218760 }, { "epoch": 0.89, "grad_norm": 6.467920303344727, "learning_rate": 0.0002, "loss": 1.6868, "step": 218770 }, { "epoch": 0.89, "grad_norm": 2.190232753753662, "learning_rate": 0.0002, "loss": 1.553, "step": 218780 }, { "epoch": 0.89, "grad_norm": 3.296799898147583, "learning_rate": 0.0002, "loss": 1.3966, "step": 218790 }, { "epoch": 0.89, "grad_norm": 2.2867813110351562, "learning_rate": 0.0002, "loss": 1.5992, "step": 218800 }, { "epoch": 0.89, "grad_norm": 2.7020015716552734, "learning_rate": 0.0002, "loss": 1.5007, "step": 218810 }, { "epoch": 0.89, "grad_norm": 1.5732229948043823, "learning_rate": 0.0002, "loss": 1.5871, "step": 218820 }, { "epoch": 0.89, "grad_norm": 3.8011343479156494, "learning_rate": 0.0002, "loss": 1.6555, "step": 218830 }, { "epoch": 0.89, "grad_norm": 1.5840626955032349, "learning_rate": 0.0002, "loss": 1.6812, "step": 218840 }, { "epoch": 0.89, "grad_norm": 4.155569553375244, "learning_rate": 0.0002, "loss": 1.1535, "step": 218850 }, { "epoch": 0.89, "grad_norm": 2.923536539077759, "learning_rate": 0.0002, "loss": 1.5775, "step": 218860 }, { "epoch": 0.89, "grad_norm": 3.4516360759735107, "learning_rate": 0.0002, "loss": 1.644, "step": 218870 }, { "epoch": 0.89, "grad_norm": 4.46136474609375, "learning_rate": 0.0002, "loss": 1.5397, "step": 218880 }, { "epoch": 0.89, "grad_norm": 3.110459327697754, "learning_rate": 0.0002, "loss": 1.7622, "step": 218890 }, { "epoch": 0.89, "grad_norm": 1.9311245679855347, "learning_rate": 0.0002, "loss": 1.5175, "step": 218900 }, { "epoch": 0.89, "grad_norm": 3.8403117656707764, "learning_rate": 0.0002, "loss": 1.613, "step": 218910 }, { "epoch": 0.89, "grad_norm": 2.132228374481201, "learning_rate": 0.0002, "loss": 1.714, "step": 218920 }, { "epoch": 0.89, "grad_norm": 3.6456003189086914, "learning_rate": 0.0002, "loss": 1.6364, "step": 218930 }, { "epoch": 0.89, "grad_norm": 2.6623172760009766, "learning_rate": 0.0002, "loss": 1.3148, "step": 218940 }, { "epoch": 0.89, "grad_norm": 3.0446267127990723, "learning_rate": 0.0002, "loss": 1.524, "step": 218950 }, { "epoch": 0.89, "grad_norm": 4.805114269256592, "learning_rate": 0.0002, "loss": 1.6611, "step": 218960 }, { "epoch": 0.89, "grad_norm": 2.128215789794922, "learning_rate": 0.0002, "loss": 1.637, "step": 218970 }, { "epoch": 0.89, "grad_norm": 2.7959420680999756, "learning_rate": 0.0002, "loss": 1.4679, "step": 218980 }, { "epoch": 0.89, "grad_norm": 7.265779495239258, "learning_rate": 0.0002, "loss": 1.5905, "step": 218990 }, { "epoch": 0.89, "grad_norm": 3.5967981815338135, "learning_rate": 0.0002, "loss": 1.4432, "step": 219000 }, { "epoch": 0.89, "grad_norm": 6.399229049682617, "learning_rate": 0.0002, "loss": 1.5946, "step": 219010 }, { "epoch": 0.89, "grad_norm": 3.772244453430176, "learning_rate": 0.0002, "loss": 1.6758, "step": 219020 }, { "epoch": 0.89, "grad_norm": 2.8176820278167725, "learning_rate": 0.0002, "loss": 1.683, "step": 219030 }, { "epoch": 0.89, "grad_norm": 2.1896276473999023, "learning_rate": 0.0002, "loss": 1.6562, "step": 219040 }, { "epoch": 0.89, "grad_norm": 2.782219886779785, "learning_rate": 0.0002, "loss": 1.3768, "step": 219050 }, { "epoch": 0.89, "grad_norm": 4.7264485359191895, "learning_rate": 0.0002, "loss": 1.594, "step": 219060 }, { "epoch": 0.89, "grad_norm": 3.9608075618743896, "learning_rate": 0.0002, "loss": 1.5079, "step": 219070 }, { "epoch": 0.89, "grad_norm": 3.0322515964508057, "learning_rate": 0.0002, "loss": 1.5122, "step": 219080 }, { "epoch": 0.89, "grad_norm": 3.0107831954956055, "learning_rate": 0.0002, "loss": 1.6776, "step": 219090 }, { "epoch": 0.89, "grad_norm": 3.8882930278778076, "learning_rate": 0.0002, "loss": 1.5806, "step": 219100 }, { "epoch": 0.89, "grad_norm": 2.2735109329223633, "learning_rate": 0.0002, "loss": 1.8448, "step": 219110 }, { "epoch": 0.89, "grad_norm": 1.5026731491088867, "learning_rate": 0.0002, "loss": 1.5617, "step": 219120 }, { "epoch": 0.89, "grad_norm": 2.5869226455688477, "learning_rate": 0.0002, "loss": 1.4856, "step": 219130 }, { "epoch": 0.89, "grad_norm": 3.073392391204834, "learning_rate": 0.0002, "loss": 1.6203, "step": 219140 }, { "epoch": 0.89, "grad_norm": 3.112438678741455, "learning_rate": 0.0002, "loss": 1.4549, "step": 219150 }, { "epoch": 0.89, "grad_norm": 3.3224799633026123, "learning_rate": 0.0002, "loss": 1.5479, "step": 219160 }, { "epoch": 0.89, "grad_norm": 2.5478806495666504, "learning_rate": 0.0002, "loss": 1.5668, "step": 219170 }, { "epoch": 0.89, "grad_norm": 2.212636709213257, "learning_rate": 0.0002, "loss": 1.8731, "step": 219180 }, { "epoch": 0.89, "grad_norm": 2.121865749359131, "learning_rate": 0.0002, "loss": 1.6412, "step": 219190 }, { "epoch": 0.89, "grad_norm": 3.9493484497070312, "learning_rate": 0.0002, "loss": 1.4111, "step": 219200 }, { "epoch": 0.89, "grad_norm": 2.9883229732513428, "learning_rate": 0.0002, "loss": 1.5356, "step": 219210 }, { "epoch": 0.89, "grad_norm": 3.8496625423431396, "learning_rate": 0.0002, "loss": 1.5615, "step": 219220 }, { "epoch": 0.89, "grad_norm": 2.9740958213806152, "learning_rate": 0.0002, "loss": 1.5874, "step": 219230 }, { "epoch": 0.89, "grad_norm": 2.559475898742676, "learning_rate": 0.0002, "loss": 1.7137, "step": 219240 }, { "epoch": 0.89, "grad_norm": 2.0391228199005127, "learning_rate": 0.0002, "loss": 1.699, "step": 219250 }, { "epoch": 0.89, "grad_norm": 3.683560371398926, "learning_rate": 0.0002, "loss": 1.2885, "step": 219260 }, { "epoch": 0.89, "grad_norm": 4.468015193939209, "learning_rate": 0.0002, "loss": 1.5853, "step": 219270 }, { "epoch": 0.89, "grad_norm": 3.5453059673309326, "learning_rate": 0.0002, "loss": 1.7017, "step": 219280 }, { "epoch": 0.89, "grad_norm": 1.816307783126831, "learning_rate": 0.0002, "loss": 1.7276, "step": 219290 }, { "epoch": 0.89, "grad_norm": 2.6723968982696533, "learning_rate": 0.0002, "loss": 1.669, "step": 219300 }, { "epoch": 0.89, "grad_norm": 3.502286911010742, "learning_rate": 0.0002, "loss": 1.6383, "step": 219310 }, { "epoch": 0.89, "grad_norm": 1.9382399320602417, "learning_rate": 0.0002, "loss": 1.2759, "step": 219320 }, { "epoch": 0.89, "grad_norm": 3.4434731006622314, "learning_rate": 0.0002, "loss": 1.5327, "step": 219330 }, { "epoch": 0.89, "grad_norm": 2.760981321334839, "learning_rate": 0.0002, "loss": 1.77, "step": 219340 }, { "epoch": 0.89, "grad_norm": 1.941558599472046, "learning_rate": 0.0002, "loss": 1.5845, "step": 219350 }, { "epoch": 0.89, "grad_norm": 2.28267502784729, "learning_rate": 0.0002, "loss": 1.5519, "step": 219360 }, { "epoch": 0.89, "grad_norm": 2.590475559234619, "learning_rate": 0.0002, "loss": 1.4487, "step": 219370 }, { "epoch": 0.89, "grad_norm": 3.7841684818267822, "learning_rate": 0.0002, "loss": 1.4393, "step": 219380 }, { "epoch": 0.89, "grad_norm": 1.7293310165405273, "learning_rate": 0.0002, "loss": 1.5178, "step": 219390 }, { "epoch": 0.89, "grad_norm": 4.353542804718018, "learning_rate": 0.0002, "loss": 1.4076, "step": 219400 }, { "epoch": 0.89, "grad_norm": 2.3531992435455322, "learning_rate": 0.0002, "loss": 1.7009, "step": 219410 }, { "epoch": 0.89, "grad_norm": 2.8138628005981445, "learning_rate": 0.0002, "loss": 1.5178, "step": 219420 }, { "epoch": 0.89, "grad_norm": 4.349743843078613, "learning_rate": 0.0002, "loss": 1.531, "step": 219430 }, { "epoch": 0.89, "grad_norm": 3.566415786743164, "learning_rate": 0.0002, "loss": 1.5404, "step": 219440 }, { "epoch": 0.89, "grad_norm": 2.267997980117798, "learning_rate": 0.0002, "loss": 1.5825, "step": 219450 }, { "epoch": 0.89, "grad_norm": 4.55786657333374, "learning_rate": 0.0002, "loss": 1.4733, "step": 219460 }, { "epoch": 0.89, "grad_norm": 2.809427499771118, "learning_rate": 0.0002, "loss": 1.7476, "step": 219470 }, { "epoch": 0.89, "grad_norm": 3.5882070064544678, "learning_rate": 0.0002, "loss": 1.6455, "step": 219480 }, { "epoch": 0.89, "grad_norm": 3.824880838394165, "learning_rate": 0.0002, "loss": 1.5886, "step": 219490 }, { "epoch": 0.89, "grad_norm": 2.5551888942718506, "learning_rate": 0.0002, "loss": 1.5788, "step": 219500 }, { "epoch": 0.89, "grad_norm": 1.798225998878479, "learning_rate": 0.0002, "loss": 1.6694, "step": 219510 }, { "epoch": 0.89, "grad_norm": 2.2835123538970947, "learning_rate": 0.0002, "loss": 1.4794, "step": 219520 }, { "epoch": 0.89, "grad_norm": 2.1102139949798584, "learning_rate": 0.0002, "loss": 1.4876, "step": 219530 }, { "epoch": 0.89, "grad_norm": 5.156291961669922, "learning_rate": 0.0002, "loss": 1.5076, "step": 219540 }, { "epoch": 0.89, "grad_norm": 3.150876998901367, "learning_rate": 0.0002, "loss": 1.7801, "step": 219550 }, { "epoch": 0.89, "grad_norm": 6.373408317565918, "learning_rate": 0.0002, "loss": 1.5419, "step": 219560 }, { "epoch": 0.89, "grad_norm": 2.9672577381134033, "learning_rate": 0.0002, "loss": 1.715, "step": 219570 }, { "epoch": 0.89, "grad_norm": 2.240882635116577, "learning_rate": 0.0002, "loss": 1.5702, "step": 219580 }, { "epoch": 0.89, "grad_norm": 2.8449018001556396, "learning_rate": 0.0002, "loss": 1.6528, "step": 219590 }, { "epoch": 0.89, "grad_norm": 2.1915009021759033, "learning_rate": 0.0002, "loss": 1.4878, "step": 219600 }, { "epoch": 0.89, "grad_norm": 4.088776588439941, "learning_rate": 0.0002, "loss": 1.3962, "step": 219610 }, { "epoch": 0.89, "grad_norm": 3.72794508934021, "learning_rate": 0.0002, "loss": 1.5959, "step": 219620 }, { "epoch": 0.89, "grad_norm": 3.6281397342681885, "learning_rate": 0.0002, "loss": 1.537, "step": 219630 }, { "epoch": 0.89, "grad_norm": 2.5634028911590576, "learning_rate": 0.0002, "loss": 1.7789, "step": 219640 }, { "epoch": 0.89, "grad_norm": 2.8808395862579346, "learning_rate": 0.0002, "loss": 1.6363, "step": 219650 }, { "epoch": 0.89, "grad_norm": 2.1387338638305664, "learning_rate": 0.0002, "loss": 1.4105, "step": 219660 }, { "epoch": 0.89, "grad_norm": 5.968246936798096, "learning_rate": 0.0002, "loss": 1.6126, "step": 219670 }, { "epoch": 0.89, "grad_norm": 2.941040277481079, "learning_rate": 0.0002, "loss": 1.7421, "step": 219680 }, { "epoch": 0.89, "grad_norm": 11.127263069152832, "learning_rate": 0.0002, "loss": 1.7209, "step": 219690 }, { "epoch": 0.89, "grad_norm": 2.4482336044311523, "learning_rate": 0.0002, "loss": 1.5009, "step": 219700 }, { "epoch": 0.89, "grad_norm": 2.980114221572876, "learning_rate": 0.0002, "loss": 1.594, "step": 219710 }, { "epoch": 0.89, "grad_norm": 2.1807987689971924, "learning_rate": 0.0002, "loss": 1.373, "step": 219720 }, { "epoch": 0.89, "grad_norm": 2.1742241382598877, "learning_rate": 0.0002, "loss": 1.6011, "step": 219730 }, { "epoch": 0.89, "grad_norm": 2.5443875789642334, "learning_rate": 0.0002, "loss": 1.3222, "step": 219740 }, { "epoch": 0.89, "grad_norm": 2.5952184200286865, "learning_rate": 0.0002, "loss": 1.7193, "step": 219750 }, { "epoch": 0.89, "grad_norm": 4.759688377380371, "learning_rate": 0.0002, "loss": 1.8011, "step": 219760 }, { "epoch": 0.89, "grad_norm": 2.6692519187927246, "learning_rate": 0.0002, "loss": 1.4959, "step": 219770 }, { "epoch": 0.89, "grad_norm": 4.302036762237549, "learning_rate": 0.0002, "loss": 1.3394, "step": 219780 }, { "epoch": 0.89, "grad_norm": 3.2543702125549316, "learning_rate": 0.0002, "loss": 1.5944, "step": 219790 }, { "epoch": 0.89, "grad_norm": 2.2184343338012695, "learning_rate": 0.0002, "loss": 1.3877, "step": 219800 }, { "epoch": 0.89, "grad_norm": 2.988109827041626, "learning_rate": 0.0002, "loss": 1.6815, "step": 219810 }, { "epoch": 0.89, "grad_norm": 4.904331684112549, "learning_rate": 0.0002, "loss": 1.5088, "step": 219820 }, { "epoch": 0.89, "grad_norm": 2.2248952388763428, "learning_rate": 0.0002, "loss": 1.5468, "step": 219830 }, { "epoch": 0.89, "grad_norm": 3.358187198638916, "learning_rate": 0.0002, "loss": 1.4935, "step": 219840 }, { "epoch": 0.89, "grad_norm": 3.2369439601898193, "learning_rate": 0.0002, "loss": 1.5869, "step": 219850 }, { "epoch": 0.9, "grad_norm": 3.505666971206665, "learning_rate": 0.0002, "loss": 1.622, "step": 219860 }, { "epoch": 0.9, "grad_norm": 3.6864140033721924, "learning_rate": 0.0002, "loss": 1.505, "step": 219870 }, { "epoch": 0.9, "grad_norm": 3.3938682079315186, "learning_rate": 0.0002, "loss": 1.634, "step": 219880 }, { "epoch": 0.9, "grad_norm": 2.802523136138916, "learning_rate": 0.0002, "loss": 1.7351, "step": 219890 }, { "epoch": 0.9, "grad_norm": 5.084243297576904, "learning_rate": 0.0002, "loss": 1.6637, "step": 219900 }, { "epoch": 0.9, "grad_norm": 3.8064448833465576, "learning_rate": 0.0002, "loss": 1.5109, "step": 219910 }, { "epoch": 0.9, "grad_norm": 3.785489797592163, "learning_rate": 0.0002, "loss": 1.4251, "step": 219920 }, { "epoch": 0.9, "grad_norm": 2.45819354057312, "learning_rate": 0.0002, "loss": 1.6765, "step": 219930 }, { "epoch": 0.9, "grad_norm": 4.238645076751709, "learning_rate": 0.0002, "loss": 1.5157, "step": 219940 }, { "epoch": 0.9, "grad_norm": 3.8702526092529297, "learning_rate": 0.0002, "loss": 1.7859, "step": 219950 }, { "epoch": 0.9, "grad_norm": 3.026172399520874, "learning_rate": 0.0002, "loss": 1.684, "step": 219960 }, { "epoch": 0.9, "grad_norm": 2.8317220211029053, "learning_rate": 0.0002, "loss": 1.7322, "step": 219970 }, { "epoch": 0.9, "grad_norm": 4.216511249542236, "learning_rate": 0.0002, "loss": 1.6691, "step": 219980 }, { "epoch": 0.9, "grad_norm": 2.8850369453430176, "learning_rate": 0.0002, "loss": 1.4456, "step": 219990 }, { "epoch": 0.9, "grad_norm": 2.553043842315674, "learning_rate": 0.0002, "loss": 1.5186, "step": 220000 }, { "epoch": 0.9, "grad_norm": 2.9670984745025635, "learning_rate": 0.0002, "loss": 1.3182, "step": 220010 }, { "epoch": 0.9, "grad_norm": 6.1721906661987305, "learning_rate": 0.0002, "loss": 1.6077, "step": 220020 }, { "epoch": 0.9, "grad_norm": 3.437495470046997, "learning_rate": 0.0002, "loss": 1.4532, "step": 220030 }, { "epoch": 0.9, "grad_norm": 2.8806166648864746, "learning_rate": 0.0002, "loss": 1.5705, "step": 220040 }, { "epoch": 0.9, "grad_norm": 2.6128432750701904, "learning_rate": 0.0002, "loss": 1.7023, "step": 220050 }, { "epoch": 0.9, "grad_norm": 1.8146201372146606, "learning_rate": 0.0002, "loss": 1.4563, "step": 220060 }, { "epoch": 0.9, "grad_norm": 2.5107462406158447, "learning_rate": 0.0002, "loss": 1.5382, "step": 220070 }, { "epoch": 0.9, "grad_norm": 4.788967132568359, "learning_rate": 0.0002, "loss": 1.7373, "step": 220080 }, { "epoch": 0.9, "grad_norm": 3.768333673477173, "learning_rate": 0.0002, "loss": 1.9441, "step": 220090 }, { "epoch": 0.9, "grad_norm": 3.6688268184661865, "learning_rate": 0.0002, "loss": 1.7614, "step": 220100 }, { "epoch": 0.9, "grad_norm": 2.9940357208251953, "learning_rate": 0.0002, "loss": 1.3455, "step": 220110 }, { "epoch": 0.9, "grad_norm": 3.841564178466797, "learning_rate": 0.0002, "loss": 1.8508, "step": 220120 }, { "epoch": 0.9, "grad_norm": 3.338325023651123, "learning_rate": 0.0002, "loss": 1.6655, "step": 220130 }, { "epoch": 0.9, "grad_norm": 2.883826971054077, "learning_rate": 0.0002, "loss": 1.6209, "step": 220140 }, { "epoch": 0.9, "grad_norm": 3.7935097217559814, "learning_rate": 0.0002, "loss": 1.5921, "step": 220150 }, { "epoch": 0.9, "grad_norm": 3.2290055751800537, "learning_rate": 0.0002, "loss": 1.6475, "step": 220160 }, { "epoch": 0.9, "grad_norm": 2.395338296890259, "learning_rate": 0.0002, "loss": 1.5014, "step": 220170 }, { "epoch": 0.9, "grad_norm": 1.833881139755249, "learning_rate": 0.0002, "loss": 1.5192, "step": 220180 }, { "epoch": 0.9, "grad_norm": 3.0842721462249756, "learning_rate": 0.0002, "loss": 1.5289, "step": 220190 }, { "epoch": 0.9, "grad_norm": 3.160065174102783, "learning_rate": 0.0002, "loss": 1.6134, "step": 220200 }, { "epoch": 0.9, "grad_norm": 2.549976110458374, "learning_rate": 0.0002, "loss": 1.5119, "step": 220210 }, { "epoch": 0.9, "grad_norm": 3.1109395027160645, "learning_rate": 0.0002, "loss": 1.7355, "step": 220220 }, { "epoch": 0.9, "grad_norm": 2.763678550720215, "learning_rate": 0.0002, "loss": 1.3829, "step": 220230 }, { "epoch": 0.9, "grad_norm": 1.9338338375091553, "learning_rate": 0.0002, "loss": 1.5694, "step": 220240 }, { "epoch": 0.9, "grad_norm": 1.4394961595535278, "learning_rate": 0.0002, "loss": 1.5711, "step": 220250 }, { "epoch": 0.9, "grad_norm": 3.254542589187622, "learning_rate": 0.0002, "loss": 1.531, "step": 220260 }, { "epoch": 0.9, "grad_norm": 1.9504214525222778, "learning_rate": 0.0002, "loss": 1.4424, "step": 220270 }, { "epoch": 0.9, "grad_norm": 3.1953721046447754, "learning_rate": 0.0002, "loss": 1.5314, "step": 220280 }, { "epoch": 0.9, "grad_norm": 2.626223564147949, "learning_rate": 0.0002, "loss": 1.679, "step": 220290 }, { "epoch": 0.9, "grad_norm": 3.1195974349975586, "learning_rate": 0.0002, "loss": 1.6147, "step": 220300 }, { "epoch": 0.9, "grad_norm": 3.2546632289886475, "learning_rate": 0.0002, "loss": 1.5828, "step": 220310 }, { "epoch": 0.9, "grad_norm": 2.6319940090179443, "learning_rate": 0.0002, "loss": 1.6899, "step": 220320 }, { "epoch": 0.9, "grad_norm": 4.284821510314941, "learning_rate": 0.0002, "loss": 1.446, "step": 220330 }, { "epoch": 0.9, "grad_norm": 2.2031984329223633, "learning_rate": 0.0002, "loss": 1.439, "step": 220340 }, { "epoch": 0.9, "grad_norm": 2.1882121562957764, "learning_rate": 0.0002, "loss": 1.3785, "step": 220350 }, { "epoch": 0.9, "grad_norm": 3.567741632461548, "learning_rate": 0.0002, "loss": 1.3008, "step": 220360 }, { "epoch": 0.9, "grad_norm": 3.4242184162139893, "learning_rate": 0.0002, "loss": 1.6153, "step": 220370 }, { "epoch": 0.9, "grad_norm": 2.5140416622161865, "learning_rate": 0.0002, "loss": 1.99, "step": 220380 }, { "epoch": 0.9, "grad_norm": 3.1370925903320312, "learning_rate": 0.0002, "loss": 1.6015, "step": 220390 }, { "epoch": 0.9, "grad_norm": 2.7341136932373047, "learning_rate": 0.0002, "loss": 1.6575, "step": 220400 }, { "epoch": 0.9, "grad_norm": 3.525130271911621, "learning_rate": 0.0002, "loss": 1.4539, "step": 220410 }, { "epoch": 0.9, "grad_norm": 2.6973328590393066, "learning_rate": 0.0002, "loss": 1.4885, "step": 220420 }, { "epoch": 0.9, "grad_norm": 2.2731471061706543, "learning_rate": 0.0002, "loss": 1.3367, "step": 220430 }, { "epoch": 0.9, "grad_norm": 2.56762957572937, "learning_rate": 0.0002, "loss": 1.5456, "step": 220440 }, { "epoch": 0.9, "grad_norm": 3.0318474769592285, "learning_rate": 0.0002, "loss": 1.4643, "step": 220450 }, { "epoch": 0.9, "grad_norm": 2.770524263381958, "learning_rate": 0.0002, "loss": 1.7567, "step": 220460 }, { "epoch": 0.9, "grad_norm": 2.3312807083129883, "learning_rate": 0.0002, "loss": 1.7202, "step": 220470 }, { "epoch": 0.9, "grad_norm": 3.2201433181762695, "learning_rate": 0.0002, "loss": 1.4458, "step": 220480 }, { "epoch": 0.9, "grad_norm": 1.8149086236953735, "learning_rate": 0.0002, "loss": 1.563, "step": 220490 }, { "epoch": 0.9, "grad_norm": 3.275298833847046, "learning_rate": 0.0002, "loss": 1.7548, "step": 220500 }, { "epoch": 0.9, "grad_norm": 2.2306621074676514, "learning_rate": 0.0002, "loss": 1.66, "step": 220510 }, { "epoch": 0.9, "grad_norm": 3.130363941192627, "learning_rate": 0.0002, "loss": 1.5502, "step": 220520 }, { "epoch": 0.9, "grad_norm": 3.3452417850494385, "learning_rate": 0.0002, "loss": 1.7848, "step": 220530 }, { "epoch": 0.9, "grad_norm": 2.7264907360076904, "learning_rate": 0.0002, "loss": 1.416, "step": 220540 }, { "epoch": 0.9, "grad_norm": 3.7970383167266846, "learning_rate": 0.0002, "loss": 1.5806, "step": 220550 }, { "epoch": 0.9, "grad_norm": 3.702338933944702, "learning_rate": 0.0002, "loss": 1.646, "step": 220560 }, { "epoch": 0.9, "grad_norm": 2.038224697113037, "learning_rate": 0.0002, "loss": 1.6065, "step": 220570 }, { "epoch": 0.9, "grad_norm": 4.344458103179932, "learning_rate": 0.0002, "loss": 1.5445, "step": 220580 }, { "epoch": 0.9, "grad_norm": 3.615354061126709, "learning_rate": 0.0002, "loss": 1.4409, "step": 220590 }, { "epoch": 0.9, "grad_norm": 3.7276859283447266, "learning_rate": 0.0002, "loss": 1.702, "step": 220600 }, { "epoch": 0.9, "grad_norm": 3.04825758934021, "learning_rate": 0.0002, "loss": 1.6673, "step": 220610 }, { "epoch": 0.9, "grad_norm": 2.0637896060943604, "learning_rate": 0.0002, "loss": 1.4604, "step": 220620 }, { "epoch": 0.9, "grad_norm": 2.016862392425537, "learning_rate": 0.0002, "loss": 1.4795, "step": 220630 }, { "epoch": 0.9, "grad_norm": 2.8304474353790283, "learning_rate": 0.0002, "loss": 1.5139, "step": 220640 }, { "epoch": 0.9, "grad_norm": 1.9007039070129395, "learning_rate": 0.0002, "loss": 1.3153, "step": 220650 }, { "epoch": 0.9, "grad_norm": 2.884249448776245, "learning_rate": 0.0002, "loss": 1.5132, "step": 220660 }, { "epoch": 0.9, "grad_norm": 3.600889205932617, "learning_rate": 0.0002, "loss": 1.7813, "step": 220670 }, { "epoch": 0.9, "grad_norm": 2.3514368534088135, "learning_rate": 0.0002, "loss": 1.3683, "step": 220680 }, { "epoch": 0.9, "grad_norm": 3.0926780700683594, "learning_rate": 0.0002, "loss": 1.7083, "step": 220690 }, { "epoch": 0.9, "grad_norm": 2.4412150382995605, "learning_rate": 0.0002, "loss": 1.5119, "step": 220700 }, { "epoch": 0.9, "grad_norm": 3.125439405441284, "learning_rate": 0.0002, "loss": 1.4935, "step": 220710 }, { "epoch": 0.9, "grad_norm": 7.839357852935791, "learning_rate": 0.0002, "loss": 1.4942, "step": 220720 }, { "epoch": 0.9, "grad_norm": 2.4219822883605957, "learning_rate": 0.0002, "loss": 1.7984, "step": 220730 }, { "epoch": 0.9, "grad_norm": 3.9724295139312744, "learning_rate": 0.0002, "loss": 1.922, "step": 220740 }, { "epoch": 0.9, "grad_norm": 4.73814058303833, "learning_rate": 0.0002, "loss": 1.8018, "step": 220750 }, { "epoch": 0.9, "grad_norm": 2.27514386177063, "learning_rate": 0.0002, "loss": 1.7189, "step": 220760 }, { "epoch": 0.9, "grad_norm": 3.455087423324585, "learning_rate": 0.0002, "loss": 1.5887, "step": 220770 }, { "epoch": 0.9, "grad_norm": 2.631800651550293, "learning_rate": 0.0002, "loss": 1.4781, "step": 220780 }, { "epoch": 0.9, "grad_norm": 2.2016801834106445, "learning_rate": 0.0002, "loss": 1.8173, "step": 220790 }, { "epoch": 0.9, "grad_norm": 2.4584877490997314, "learning_rate": 0.0002, "loss": 1.5562, "step": 220800 }, { "epoch": 0.9, "grad_norm": 2.933816432952881, "learning_rate": 0.0002, "loss": 1.5245, "step": 220810 }, { "epoch": 0.9, "grad_norm": 2.5736541748046875, "learning_rate": 0.0002, "loss": 1.6757, "step": 220820 }, { "epoch": 0.9, "grad_norm": 11.815399169921875, "learning_rate": 0.0002, "loss": 1.478, "step": 220830 }, { "epoch": 0.9, "grad_norm": 2.4062983989715576, "learning_rate": 0.0002, "loss": 1.506, "step": 220840 }, { "epoch": 0.9, "grad_norm": 0.9801672697067261, "learning_rate": 0.0002, "loss": 1.3634, "step": 220850 }, { "epoch": 0.9, "grad_norm": 3.2884042263031006, "learning_rate": 0.0002, "loss": 1.8644, "step": 220860 }, { "epoch": 0.9, "grad_norm": 2.8098537921905518, "learning_rate": 0.0002, "loss": 1.6948, "step": 220870 }, { "epoch": 0.9, "grad_norm": 3.0715909004211426, "learning_rate": 0.0002, "loss": 1.7491, "step": 220880 }, { "epoch": 0.9, "grad_norm": 2.391155242919922, "learning_rate": 0.0002, "loss": 1.7979, "step": 220890 }, { "epoch": 0.9, "grad_norm": 2.031425714492798, "learning_rate": 0.0002, "loss": 1.3647, "step": 220900 }, { "epoch": 0.9, "grad_norm": 4.4795002937316895, "learning_rate": 0.0002, "loss": 1.6602, "step": 220910 }, { "epoch": 0.9, "grad_norm": 3.7014150619506836, "learning_rate": 0.0002, "loss": 1.7367, "step": 220920 }, { "epoch": 0.9, "grad_norm": 3.5287275314331055, "learning_rate": 0.0002, "loss": 1.5254, "step": 220930 }, { "epoch": 0.9, "grad_norm": 3.717794179916382, "learning_rate": 0.0002, "loss": 1.9247, "step": 220940 }, { "epoch": 0.9, "grad_norm": 4.14795446395874, "learning_rate": 0.0002, "loss": 1.3464, "step": 220950 }, { "epoch": 0.9, "grad_norm": 3.010510206222534, "learning_rate": 0.0002, "loss": 1.8727, "step": 220960 }, { "epoch": 0.9, "grad_norm": 2.3197882175445557, "learning_rate": 0.0002, "loss": 1.5469, "step": 220970 }, { "epoch": 0.9, "grad_norm": 4.576319217681885, "learning_rate": 0.0002, "loss": 1.4946, "step": 220980 }, { "epoch": 0.9, "grad_norm": 4.664529323577881, "learning_rate": 0.0002, "loss": 1.5826, "step": 220990 }, { "epoch": 0.9, "grad_norm": 2.4810361862182617, "learning_rate": 0.0002, "loss": 1.4202, "step": 221000 }, { "epoch": 0.9, "grad_norm": 2.3145134449005127, "learning_rate": 0.0002, "loss": 1.631, "step": 221010 }, { "epoch": 0.9, "grad_norm": 2.4257240295410156, "learning_rate": 0.0002, "loss": 1.5209, "step": 221020 }, { "epoch": 0.9, "grad_norm": 2.3836605548858643, "learning_rate": 0.0002, "loss": 1.5143, "step": 221030 }, { "epoch": 0.9, "grad_norm": 2.0248613357543945, "learning_rate": 0.0002, "loss": 1.7002, "step": 221040 }, { "epoch": 0.9, "grad_norm": 2.7229151725769043, "learning_rate": 0.0002, "loss": 1.5576, "step": 221050 }, { "epoch": 0.9, "grad_norm": 2.7987658977508545, "learning_rate": 0.0002, "loss": 1.5566, "step": 221060 }, { "epoch": 0.9, "grad_norm": 3.025092363357544, "learning_rate": 0.0002, "loss": 1.6019, "step": 221070 }, { "epoch": 0.9, "grad_norm": 2.7659249305725098, "learning_rate": 0.0002, "loss": 1.6503, "step": 221080 }, { "epoch": 0.9, "grad_norm": 5.144567966461182, "learning_rate": 0.0002, "loss": 1.8336, "step": 221090 }, { "epoch": 0.9, "grad_norm": 4.969081401824951, "learning_rate": 0.0002, "loss": 1.7845, "step": 221100 }, { "epoch": 0.9, "grad_norm": 3.4848601818084717, "learning_rate": 0.0002, "loss": 1.3785, "step": 221110 }, { "epoch": 0.9, "grad_norm": 3.2288713455200195, "learning_rate": 0.0002, "loss": 1.68, "step": 221120 }, { "epoch": 0.9, "grad_norm": 3.629371404647827, "learning_rate": 0.0002, "loss": 1.7585, "step": 221130 }, { "epoch": 0.9, "grad_norm": 2.0478830337524414, "learning_rate": 0.0002, "loss": 1.9372, "step": 221140 }, { "epoch": 0.9, "grad_norm": 3.138272523880005, "learning_rate": 0.0002, "loss": 1.4753, "step": 221150 }, { "epoch": 0.9, "grad_norm": 4.03861665725708, "learning_rate": 0.0002, "loss": 1.6852, "step": 221160 }, { "epoch": 0.9, "grad_norm": 3.437068223953247, "learning_rate": 0.0002, "loss": 1.4818, "step": 221170 }, { "epoch": 0.9, "grad_norm": 4.168688774108887, "learning_rate": 0.0002, "loss": 1.5754, "step": 221180 }, { "epoch": 0.9, "grad_norm": 2.695824384689331, "learning_rate": 0.0002, "loss": 1.5312, "step": 221190 }, { "epoch": 0.9, "grad_norm": 2.354126214981079, "learning_rate": 0.0002, "loss": 1.57, "step": 221200 }, { "epoch": 0.9, "grad_norm": 1.6211858987808228, "learning_rate": 0.0002, "loss": 1.4085, "step": 221210 }, { "epoch": 0.9, "grad_norm": 4.625411510467529, "learning_rate": 0.0002, "loss": 1.71, "step": 221220 }, { "epoch": 0.9, "grad_norm": 2.555997371673584, "learning_rate": 0.0002, "loss": 1.5704, "step": 221230 }, { "epoch": 0.9, "grad_norm": 3.3985257148742676, "learning_rate": 0.0002, "loss": 1.5733, "step": 221240 }, { "epoch": 0.9, "grad_norm": 2.7764081954956055, "learning_rate": 0.0002, "loss": 1.6872, "step": 221250 }, { "epoch": 0.9, "grad_norm": 2.7870874404907227, "learning_rate": 0.0002, "loss": 1.2118, "step": 221260 }, { "epoch": 0.9, "grad_norm": 4.582884311676025, "learning_rate": 0.0002, "loss": 1.6635, "step": 221270 }, { "epoch": 0.9, "grad_norm": 2.9243404865264893, "learning_rate": 0.0002, "loss": 1.6366, "step": 221280 }, { "epoch": 0.9, "grad_norm": 3.6971845626831055, "learning_rate": 0.0002, "loss": 1.6758, "step": 221290 }, { "epoch": 0.9, "grad_norm": 1.4437038898468018, "learning_rate": 0.0002, "loss": 1.7288, "step": 221300 }, { "epoch": 0.9, "grad_norm": 3.7762367725372314, "learning_rate": 0.0002, "loss": 1.6364, "step": 221310 }, { "epoch": 0.9, "grad_norm": 2.203557252883911, "learning_rate": 0.0002, "loss": 1.6439, "step": 221320 }, { "epoch": 0.9, "grad_norm": 3.21099591255188, "learning_rate": 0.0002, "loss": 1.4386, "step": 221330 }, { "epoch": 0.9, "grad_norm": 2.9900362491607666, "learning_rate": 0.0002, "loss": 1.4066, "step": 221340 }, { "epoch": 0.9, "grad_norm": 2.5630414485931396, "learning_rate": 0.0002, "loss": 1.7053, "step": 221350 }, { "epoch": 0.9, "grad_norm": 2.5003044605255127, "learning_rate": 0.0002, "loss": 1.4861, "step": 221360 }, { "epoch": 0.9, "grad_norm": 4.394453048706055, "learning_rate": 0.0002, "loss": 1.4602, "step": 221370 }, { "epoch": 0.9, "grad_norm": 3.065812110900879, "learning_rate": 0.0002, "loss": 1.4968, "step": 221380 }, { "epoch": 0.9, "grad_norm": 3.758005380630493, "learning_rate": 0.0002, "loss": 1.441, "step": 221390 }, { "epoch": 0.9, "grad_norm": 4.460184574127197, "learning_rate": 0.0002, "loss": 1.5581, "step": 221400 }, { "epoch": 0.9, "grad_norm": 4.466839790344238, "learning_rate": 0.0002, "loss": 1.7168, "step": 221410 }, { "epoch": 0.9, "grad_norm": 2.0593667030334473, "learning_rate": 0.0002, "loss": 1.7684, "step": 221420 }, { "epoch": 0.9, "grad_norm": 3.1349070072174072, "learning_rate": 0.0002, "loss": 1.744, "step": 221430 }, { "epoch": 0.9, "grad_norm": 3.332200765609741, "learning_rate": 0.0002, "loss": 1.4984, "step": 221440 }, { "epoch": 0.9, "grad_norm": 2.033627510070801, "learning_rate": 0.0002, "loss": 1.4965, "step": 221450 }, { "epoch": 0.9, "grad_norm": 2.9524850845336914, "learning_rate": 0.0002, "loss": 1.7885, "step": 221460 }, { "epoch": 0.9, "grad_norm": 2.0538675785064697, "learning_rate": 0.0002, "loss": 1.5009, "step": 221470 }, { "epoch": 0.9, "grad_norm": 3.836404323577881, "learning_rate": 0.0002, "loss": 1.3018, "step": 221480 }, { "epoch": 0.9, "grad_norm": 2.7688567638397217, "learning_rate": 0.0002, "loss": 1.6331, "step": 221490 }, { "epoch": 0.9, "grad_norm": 2.597411632537842, "learning_rate": 0.0002, "loss": 1.3931, "step": 221500 }, { "epoch": 0.9, "grad_norm": 2.26633882522583, "learning_rate": 0.0002, "loss": 1.6607, "step": 221510 }, { "epoch": 0.9, "grad_norm": 2.851717233657837, "learning_rate": 0.0002, "loss": 1.3546, "step": 221520 }, { "epoch": 0.9, "grad_norm": 1.9504438638687134, "learning_rate": 0.0002, "loss": 1.628, "step": 221530 }, { "epoch": 0.9, "grad_norm": 3.4519922733306885, "learning_rate": 0.0002, "loss": 1.773, "step": 221540 }, { "epoch": 0.9, "grad_norm": 3.836643695831299, "learning_rate": 0.0002, "loss": 1.7789, "step": 221550 }, { "epoch": 0.9, "grad_norm": 3.337865114212036, "learning_rate": 0.0002, "loss": 1.8105, "step": 221560 }, { "epoch": 0.9, "grad_norm": 14.990540504455566, "learning_rate": 0.0002, "loss": 1.3128, "step": 221570 }, { "epoch": 0.9, "grad_norm": 2.8333160877227783, "learning_rate": 0.0002, "loss": 1.7832, "step": 221580 }, { "epoch": 0.9, "grad_norm": 2.4050443172454834, "learning_rate": 0.0002, "loss": 1.4889, "step": 221590 }, { "epoch": 0.9, "grad_norm": 5.321252346038818, "learning_rate": 0.0002, "loss": 1.5829, "step": 221600 }, { "epoch": 0.9, "grad_norm": 2.0139706134796143, "learning_rate": 0.0002, "loss": 1.4503, "step": 221610 }, { "epoch": 0.9, "grad_norm": 2.73732328414917, "learning_rate": 0.0002, "loss": 1.7108, "step": 221620 }, { "epoch": 0.9, "grad_norm": 4.113279342651367, "learning_rate": 0.0002, "loss": 1.5947, "step": 221630 }, { "epoch": 0.9, "grad_norm": 3.9969635009765625, "learning_rate": 0.0002, "loss": 1.8128, "step": 221640 }, { "epoch": 0.9, "grad_norm": 2.666274309158325, "learning_rate": 0.0002, "loss": 1.6794, "step": 221650 }, { "epoch": 0.9, "grad_norm": 2.808708667755127, "learning_rate": 0.0002, "loss": 1.7258, "step": 221660 }, { "epoch": 0.9, "grad_norm": 2.819986581802368, "learning_rate": 0.0002, "loss": 1.5009, "step": 221670 }, { "epoch": 0.9, "grad_norm": 3.1166744232177734, "learning_rate": 0.0002, "loss": 1.4636, "step": 221680 }, { "epoch": 0.9, "grad_norm": 2.7140092849731445, "learning_rate": 0.0002, "loss": 1.5665, "step": 221690 }, { "epoch": 0.9, "grad_norm": 2.7380306720733643, "learning_rate": 0.0002, "loss": 1.5728, "step": 221700 }, { "epoch": 0.9, "grad_norm": 6.452209949493408, "learning_rate": 0.0002, "loss": 1.8055, "step": 221710 }, { "epoch": 0.9, "grad_norm": 4.029209136962891, "learning_rate": 0.0002, "loss": 1.8146, "step": 221720 }, { "epoch": 0.9, "grad_norm": 2.880458116531372, "learning_rate": 0.0002, "loss": 1.6085, "step": 221730 }, { "epoch": 0.9, "grad_norm": 5.325296401977539, "learning_rate": 0.0002, "loss": 1.807, "step": 221740 }, { "epoch": 0.9, "grad_norm": 2.2033884525299072, "learning_rate": 0.0002, "loss": 1.3162, "step": 221750 }, { "epoch": 0.9, "grad_norm": 2.1954801082611084, "learning_rate": 0.0002, "loss": 1.6196, "step": 221760 }, { "epoch": 0.9, "grad_norm": 3.9969568252563477, "learning_rate": 0.0002, "loss": 1.6086, "step": 221770 }, { "epoch": 0.9, "grad_norm": 2.434173583984375, "learning_rate": 0.0002, "loss": 1.9193, "step": 221780 }, { "epoch": 0.9, "grad_norm": 3.072239875793457, "learning_rate": 0.0002, "loss": 1.4284, "step": 221790 }, { "epoch": 0.9, "grad_norm": 3.9775028228759766, "learning_rate": 0.0002, "loss": 1.5989, "step": 221800 }, { "epoch": 0.9, "grad_norm": 3.2916860580444336, "learning_rate": 0.0002, "loss": 1.5942, "step": 221810 }, { "epoch": 0.9, "grad_norm": 3.123490333557129, "learning_rate": 0.0002, "loss": 1.4647, "step": 221820 }, { "epoch": 0.9, "grad_norm": 2.1846365928649902, "learning_rate": 0.0002, "loss": 1.6849, "step": 221830 }, { "epoch": 0.9, "grad_norm": 3.150541305541992, "learning_rate": 0.0002, "loss": 1.624, "step": 221840 }, { "epoch": 0.9, "grad_norm": 1.5960890054702759, "learning_rate": 0.0002, "loss": 1.6963, "step": 221850 }, { "epoch": 0.9, "grad_norm": 2.1551520824432373, "learning_rate": 0.0002, "loss": 1.6744, "step": 221860 }, { "epoch": 0.9, "grad_norm": 3.076392889022827, "learning_rate": 0.0002, "loss": 1.5091, "step": 221870 }, { "epoch": 0.9, "grad_norm": 3.85062837600708, "learning_rate": 0.0002, "loss": 1.6859, "step": 221880 }, { "epoch": 0.9, "grad_norm": 4.1510748863220215, "learning_rate": 0.0002, "loss": 1.9077, "step": 221890 }, { "epoch": 0.9, "grad_norm": 3.5935473442077637, "learning_rate": 0.0002, "loss": 1.4544, "step": 221900 }, { "epoch": 0.9, "grad_norm": 1.9113391637802124, "learning_rate": 0.0002, "loss": 1.485, "step": 221910 }, { "epoch": 0.9, "grad_norm": 2.60349440574646, "learning_rate": 0.0002, "loss": 1.5759, "step": 221920 }, { "epoch": 0.9, "grad_norm": 3.676405906677246, "learning_rate": 0.0002, "loss": 1.703, "step": 221930 }, { "epoch": 0.9, "grad_norm": 3.6116955280303955, "learning_rate": 0.0002, "loss": 1.7205, "step": 221940 }, { "epoch": 0.9, "grad_norm": 4.344801425933838, "learning_rate": 0.0002, "loss": 1.5439, "step": 221950 }, { "epoch": 0.9, "grad_norm": 3.7583699226379395, "learning_rate": 0.0002, "loss": 1.7357, "step": 221960 }, { "epoch": 0.9, "grad_norm": 3.1561379432678223, "learning_rate": 0.0002, "loss": 1.652, "step": 221970 }, { "epoch": 0.9, "grad_norm": 2.9369547367095947, "learning_rate": 0.0002, "loss": 1.298, "step": 221980 }, { "epoch": 0.9, "grad_norm": 2.7913002967834473, "learning_rate": 0.0002, "loss": 1.4824, "step": 221990 }, { "epoch": 0.9, "grad_norm": 3.131664514541626, "learning_rate": 0.0002, "loss": 1.4156, "step": 222000 }, { "epoch": 0.9, "grad_norm": 4.155826568603516, "learning_rate": 0.0002, "loss": 1.4299, "step": 222010 }, { "epoch": 0.9, "grad_norm": 2.470686912536621, "learning_rate": 0.0002, "loss": 1.3462, "step": 222020 }, { "epoch": 0.9, "grad_norm": 2.4909110069274902, "learning_rate": 0.0002, "loss": 1.4971, "step": 222030 }, { "epoch": 0.9, "grad_norm": 3.2599380016326904, "learning_rate": 0.0002, "loss": 1.8545, "step": 222040 }, { "epoch": 0.9, "grad_norm": 3.248948097229004, "learning_rate": 0.0002, "loss": 1.3293, "step": 222050 }, { "epoch": 0.9, "grad_norm": 2.5052576065063477, "learning_rate": 0.0002, "loss": 1.4557, "step": 222060 }, { "epoch": 0.9, "grad_norm": 7.408921241760254, "learning_rate": 0.0002, "loss": 1.6346, "step": 222070 }, { "epoch": 0.9, "grad_norm": 2.750539779663086, "learning_rate": 0.0002, "loss": 1.2881, "step": 222080 }, { "epoch": 0.9, "grad_norm": 2.8687283992767334, "learning_rate": 0.0002, "loss": 1.6578, "step": 222090 }, { "epoch": 0.9, "grad_norm": 2.0373952388763428, "learning_rate": 0.0002, "loss": 1.7932, "step": 222100 }, { "epoch": 0.9, "grad_norm": 4.775883674621582, "learning_rate": 0.0002, "loss": 1.815, "step": 222110 }, { "epoch": 0.9, "grad_norm": 4.469463348388672, "learning_rate": 0.0002, "loss": 1.6913, "step": 222120 }, { "epoch": 0.9, "grad_norm": 2.5179686546325684, "learning_rate": 0.0002, "loss": 1.5258, "step": 222130 }, { "epoch": 0.9, "grad_norm": 1.8215999603271484, "learning_rate": 0.0002, "loss": 1.5721, "step": 222140 }, { "epoch": 0.9, "grad_norm": 4.400111675262451, "learning_rate": 0.0002, "loss": 1.5517, "step": 222150 }, { "epoch": 0.9, "grad_norm": 1.5361806154251099, "learning_rate": 0.0002, "loss": 1.4503, "step": 222160 }, { "epoch": 0.9, "grad_norm": 2.8253366947174072, "learning_rate": 0.0002, "loss": 1.6687, "step": 222170 }, { "epoch": 0.9, "grad_norm": 3.3853957653045654, "learning_rate": 0.0002, "loss": 1.6743, "step": 222180 }, { "epoch": 0.9, "grad_norm": 1.953284740447998, "learning_rate": 0.0002, "loss": 1.4701, "step": 222190 }, { "epoch": 0.9, "grad_norm": 2.407388687133789, "learning_rate": 0.0002, "loss": 1.7221, "step": 222200 }, { "epoch": 0.9, "grad_norm": 2.7733829021453857, "learning_rate": 0.0002, "loss": 1.3763, "step": 222210 }, { "epoch": 0.9, "grad_norm": 6.353353977203369, "learning_rate": 0.0002, "loss": 1.6016, "step": 222220 }, { "epoch": 0.9, "grad_norm": 2.7173643112182617, "learning_rate": 0.0002, "loss": 1.6761, "step": 222230 }, { "epoch": 0.9, "grad_norm": 2.349343776702881, "learning_rate": 0.0002, "loss": 1.5854, "step": 222240 }, { "epoch": 0.9, "grad_norm": 2.2508530616760254, "learning_rate": 0.0002, "loss": 1.4617, "step": 222250 }, { "epoch": 0.9, "grad_norm": 2.481353282928467, "learning_rate": 0.0002, "loss": 1.6241, "step": 222260 }, { "epoch": 0.9, "grad_norm": 2.463801860809326, "learning_rate": 0.0002, "loss": 1.5526, "step": 222270 }, { "epoch": 0.9, "grad_norm": 3.133815050125122, "learning_rate": 0.0002, "loss": 1.4488, "step": 222280 }, { "epoch": 0.9, "grad_norm": 3.1148180961608887, "learning_rate": 0.0002, "loss": 1.7278, "step": 222290 }, { "epoch": 0.9, "grad_norm": 7.581659317016602, "learning_rate": 0.0002, "loss": 1.7768, "step": 222300 }, { "epoch": 0.91, "grad_norm": 2.0170819759368896, "learning_rate": 0.0002, "loss": 1.8361, "step": 222310 }, { "epoch": 0.91, "grad_norm": 2.406593084335327, "learning_rate": 0.0002, "loss": 1.4649, "step": 222320 }, { "epoch": 0.91, "grad_norm": 2.4877851009368896, "learning_rate": 0.0002, "loss": 1.5263, "step": 222330 }, { "epoch": 0.91, "grad_norm": 3.2625648975372314, "learning_rate": 0.0002, "loss": 1.2822, "step": 222340 }, { "epoch": 0.91, "grad_norm": 3.640900135040283, "learning_rate": 0.0002, "loss": 1.5199, "step": 222350 }, { "epoch": 0.91, "grad_norm": 4.038167476654053, "learning_rate": 0.0002, "loss": 1.6054, "step": 222360 }, { "epoch": 0.91, "grad_norm": 2.9832136631011963, "learning_rate": 0.0002, "loss": 1.6351, "step": 222370 }, { "epoch": 0.91, "grad_norm": 2.3688812255859375, "learning_rate": 0.0002, "loss": 1.6899, "step": 222380 }, { "epoch": 0.91, "grad_norm": 1.613352656364441, "learning_rate": 0.0002, "loss": 1.5186, "step": 222390 }, { "epoch": 0.91, "grad_norm": 2.3854541778564453, "learning_rate": 0.0002, "loss": 1.2536, "step": 222400 }, { "epoch": 0.91, "grad_norm": 3.0651514530181885, "learning_rate": 0.0002, "loss": 1.5414, "step": 222410 }, { "epoch": 0.91, "grad_norm": 2.5666518211364746, "learning_rate": 0.0002, "loss": 1.6965, "step": 222420 }, { "epoch": 0.91, "grad_norm": 1.9140228033065796, "learning_rate": 0.0002, "loss": 1.6042, "step": 222430 }, { "epoch": 0.91, "grad_norm": 3.497262477874756, "learning_rate": 0.0002, "loss": 1.7262, "step": 222440 }, { "epoch": 0.91, "grad_norm": 2.7846875190734863, "learning_rate": 0.0002, "loss": 1.5637, "step": 222450 }, { "epoch": 0.91, "grad_norm": 2.0363197326660156, "learning_rate": 0.0002, "loss": 1.6101, "step": 222460 }, { "epoch": 0.91, "grad_norm": 3.718188762664795, "learning_rate": 0.0002, "loss": 1.6004, "step": 222470 }, { "epoch": 0.91, "grad_norm": 2.922759771347046, "learning_rate": 0.0002, "loss": 1.6512, "step": 222480 }, { "epoch": 0.91, "grad_norm": 5.3035502433776855, "learning_rate": 0.0002, "loss": 1.6173, "step": 222490 }, { "epoch": 0.91, "grad_norm": 1.9861944913864136, "learning_rate": 0.0002, "loss": 1.5547, "step": 222500 }, { "epoch": 0.91, "grad_norm": 3.2457399368286133, "learning_rate": 0.0002, "loss": 1.5128, "step": 222510 }, { "epoch": 0.91, "grad_norm": 2.6102135181427, "learning_rate": 0.0002, "loss": 1.7042, "step": 222520 }, { "epoch": 0.91, "grad_norm": 4.141398906707764, "learning_rate": 0.0002, "loss": 1.5753, "step": 222530 }, { "epoch": 0.91, "grad_norm": 2.7313711643218994, "learning_rate": 0.0002, "loss": 1.5479, "step": 222540 }, { "epoch": 0.91, "grad_norm": 3.425572395324707, "learning_rate": 0.0002, "loss": 1.4778, "step": 222550 }, { "epoch": 0.91, "grad_norm": 6.070044994354248, "learning_rate": 0.0002, "loss": 1.7026, "step": 222560 }, { "epoch": 0.91, "grad_norm": 1.6184602975845337, "learning_rate": 0.0002, "loss": 1.5469, "step": 222570 }, { "epoch": 0.91, "grad_norm": 2.83695650100708, "learning_rate": 0.0002, "loss": 1.6785, "step": 222580 }, { "epoch": 0.91, "grad_norm": 2.5670969486236572, "learning_rate": 0.0002, "loss": 1.4535, "step": 222590 }, { "epoch": 0.91, "grad_norm": 3.6510794162750244, "learning_rate": 0.0002, "loss": 1.6701, "step": 222600 }, { "epoch": 0.91, "grad_norm": 2.7031102180480957, "learning_rate": 0.0002, "loss": 1.529, "step": 222610 }, { "epoch": 0.91, "grad_norm": 5.421843528747559, "learning_rate": 0.0002, "loss": 1.6821, "step": 222620 }, { "epoch": 0.91, "grad_norm": 3.4892284870147705, "learning_rate": 0.0002, "loss": 1.5628, "step": 222630 }, { "epoch": 0.91, "grad_norm": 1.8528261184692383, "learning_rate": 0.0002, "loss": 1.4754, "step": 222640 }, { "epoch": 0.91, "grad_norm": 2.7281432151794434, "learning_rate": 0.0002, "loss": 1.4579, "step": 222650 }, { "epoch": 0.91, "grad_norm": 2.339589834213257, "learning_rate": 0.0002, "loss": 1.5132, "step": 222660 }, { "epoch": 0.91, "grad_norm": 3.9064478874206543, "learning_rate": 0.0002, "loss": 1.4427, "step": 222670 }, { "epoch": 0.91, "grad_norm": 2.5330312252044678, "learning_rate": 0.0002, "loss": 1.5694, "step": 222680 }, { "epoch": 0.91, "grad_norm": 3.5390162467956543, "learning_rate": 0.0002, "loss": 1.749, "step": 222690 }, { "epoch": 0.91, "grad_norm": 4.526688575744629, "learning_rate": 0.0002, "loss": 1.8139, "step": 222700 }, { "epoch": 0.91, "grad_norm": 2.4249885082244873, "learning_rate": 0.0002, "loss": 1.5827, "step": 222710 }, { "epoch": 0.91, "grad_norm": 3.2073874473571777, "learning_rate": 0.0002, "loss": 1.2967, "step": 222720 }, { "epoch": 0.91, "grad_norm": 2.7426364421844482, "learning_rate": 0.0002, "loss": 1.648, "step": 222730 }, { "epoch": 0.91, "grad_norm": 2.641289710998535, "learning_rate": 0.0002, "loss": 1.551, "step": 222740 }, { "epoch": 0.91, "grad_norm": 3.723400592803955, "learning_rate": 0.0002, "loss": 1.6867, "step": 222750 }, { "epoch": 0.91, "grad_norm": 2.3062543869018555, "learning_rate": 0.0002, "loss": 1.3722, "step": 222760 }, { "epoch": 0.91, "grad_norm": 2.732811450958252, "learning_rate": 0.0002, "loss": 1.7744, "step": 222770 }, { "epoch": 0.91, "grad_norm": 5.496709823608398, "learning_rate": 0.0002, "loss": 1.6451, "step": 222780 }, { "epoch": 0.91, "grad_norm": 3.0345025062561035, "learning_rate": 0.0002, "loss": 1.6072, "step": 222790 }, { "epoch": 0.91, "grad_norm": 1.2135273218154907, "learning_rate": 0.0002, "loss": 1.6174, "step": 222800 }, { "epoch": 0.91, "grad_norm": 2.7456932067871094, "learning_rate": 0.0002, "loss": 1.5441, "step": 222810 }, { "epoch": 0.91, "grad_norm": 2.9018843173980713, "learning_rate": 0.0002, "loss": 1.5744, "step": 222820 }, { "epoch": 0.91, "grad_norm": 3.054788827896118, "learning_rate": 0.0002, "loss": 1.46, "step": 222830 }, { "epoch": 0.91, "grad_norm": 2.789202928543091, "learning_rate": 0.0002, "loss": 1.4989, "step": 222840 }, { "epoch": 0.91, "grad_norm": 2.203991413116455, "learning_rate": 0.0002, "loss": 1.3899, "step": 222850 }, { "epoch": 0.91, "grad_norm": 2.542840003967285, "learning_rate": 0.0002, "loss": 1.6392, "step": 222860 }, { "epoch": 0.91, "grad_norm": 2.5295422077178955, "learning_rate": 0.0002, "loss": 1.6565, "step": 222870 }, { "epoch": 0.91, "grad_norm": 2.9611763954162598, "learning_rate": 0.0002, "loss": 1.4035, "step": 222880 }, { "epoch": 0.91, "grad_norm": 14.126307487487793, "learning_rate": 0.0002, "loss": 1.6847, "step": 222890 }, { "epoch": 0.91, "grad_norm": 3.5319125652313232, "learning_rate": 0.0002, "loss": 1.5853, "step": 222900 }, { "epoch": 0.91, "grad_norm": 2.5620946884155273, "learning_rate": 0.0002, "loss": 1.5212, "step": 222910 }, { "epoch": 0.91, "grad_norm": 4.632358551025391, "learning_rate": 0.0002, "loss": 1.3635, "step": 222920 }, { "epoch": 0.91, "grad_norm": 3.3039541244506836, "learning_rate": 0.0002, "loss": 1.6967, "step": 222930 }, { "epoch": 0.91, "grad_norm": 3.786041498184204, "learning_rate": 0.0002, "loss": 1.6209, "step": 222940 }, { "epoch": 0.91, "grad_norm": 1.879833459854126, "learning_rate": 0.0002, "loss": 1.5675, "step": 222950 }, { "epoch": 0.91, "grad_norm": 2.447019577026367, "learning_rate": 0.0002, "loss": 1.7392, "step": 222960 }, { "epoch": 0.91, "grad_norm": 4.162329196929932, "learning_rate": 0.0002, "loss": 1.5374, "step": 222970 }, { "epoch": 0.91, "grad_norm": 2.2632710933685303, "learning_rate": 0.0002, "loss": 1.6024, "step": 222980 }, { "epoch": 0.91, "grad_norm": 3.050668478012085, "learning_rate": 0.0002, "loss": 1.5524, "step": 222990 }, { "epoch": 0.91, "grad_norm": 4.1411309242248535, "learning_rate": 0.0002, "loss": 1.69, "step": 223000 }, { "epoch": 0.91, "grad_norm": 3.232228994369507, "learning_rate": 0.0002, "loss": 1.6127, "step": 223010 }, { "epoch": 0.91, "grad_norm": 3.805094003677368, "learning_rate": 0.0002, "loss": 1.3791, "step": 223020 }, { "epoch": 0.91, "grad_norm": 3.766998529434204, "learning_rate": 0.0002, "loss": 1.7064, "step": 223030 }, { "epoch": 0.91, "grad_norm": 2.8680002689361572, "learning_rate": 0.0002, "loss": 1.8035, "step": 223040 }, { "epoch": 0.91, "grad_norm": 2.3420565128326416, "learning_rate": 0.0002, "loss": 1.4381, "step": 223050 }, { "epoch": 0.91, "grad_norm": 3.129852294921875, "learning_rate": 0.0002, "loss": 1.6503, "step": 223060 }, { "epoch": 0.91, "grad_norm": 2.3045942783355713, "learning_rate": 0.0002, "loss": 1.616, "step": 223070 }, { "epoch": 0.91, "grad_norm": 2.537994623184204, "learning_rate": 0.0002, "loss": 1.7356, "step": 223080 }, { "epoch": 0.91, "grad_norm": 2.9583663940429688, "learning_rate": 0.0002, "loss": 1.6096, "step": 223090 }, { "epoch": 0.91, "grad_norm": 3.3435709476470947, "learning_rate": 0.0002, "loss": 1.3372, "step": 223100 }, { "epoch": 0.91, "grad_norm": 4.406556606292725, "learning_rate": 0.0002, "loss": 1.5562, "step": 223110 }, { "epoch": 0.91, "grad_norm": 2.3927271366119385, "learning_rate": 0.0002, "loss": 1.6567, "step": 223120 }, { "epoch": 0.91, "grad_norm": 2.6826770305633545, "learning_rate": 0.0002, "loss": 1.6638, "step": 223130 }, { "epoch": 0.91, "grad_norm": 3.2707159519195557, "learning_rate": 0.0002, "loss": 1.6163, "step": 223140 }, { "epoch": 0.91, "grad_norm": 3.3902652263641357, "learning_rate": 0.0002, "loss": 1.4768, "step": 223150 }, { "epoch": 0.91, "grad_norm": 3.596311330795288, "learning_rate": 0.0002, "loss": 1.4544, "step": 223160 }, { "epoch": 0.91, "grad_norm": 4.478122234344482, "learning_rate": 0.0002, "loss": 1.3455, "step": 223170 }, { "epoch": 0.91, "grad_norm": 1.907572627067566, "learning_rate": 0.0002, "loss": 1.6408, "step": 223180 }, { "epoch": 0.91, "grad_norm": 5.84427547454834, "learning_rate": 0.0002, "loss": 1.5111, "step": 223190 }, { "epoch": 0.91, "grad_norm": 2.0038042068481445, "learning_rate": 0.0002, "loss": 1.6604, "step": 223200 }, { "epoch": 0.91, "grad_norm": 2.3225479125976562, "learning_rate": 0.0002, "loss": 1.4319, "step": 223210 }, { "epoch": 0.91, "grad_norm": 2.6640703678131104, "learning_rate": 0.0002, "loss": 1.5738, "step": 223220 }, { "epoch": 0.91, "grad_norm": 4.544785022735596, "learning_rate": 0.0002, "loss": 1.5819, "step": 223230 }, { "epoch": 0.91, "grad_norm": 2.32318115234375, "learning_rate": 0.0002, "loss": 1.5354, "step": 223240 }, { "epoch": 0.91, "grad_norm": 2.684386730194092, "learning_rate": 0.0002, "loss": 1.5616, "step": 223250 }, { "epoch": 0.91, "grad_norm": 3.882449150085449, "learning_rate": 0.0002, "loss": 1.6098, "step": 223260 }, { "epoch": 0.91, "grad_norm": 2.6433680057525635, "learning_rate": 0.0002, "loss": 1.478, "step": 223270 }, { "epoch": 0.91, "grad_norm": 4.471866607666016, "learning_rate": 0.0002, "loss": 1.4629, "step": 223280 }, { "epoch": 0.91, "grad_norm": 2.4303510189056396, "learning_rate": 0.0002, "loss": 1.6575, "step": 223290 }, { "epoch": 0.91, "grad_norm": 3.0622899532318115, "learning_rate": 0.0002, "loss": 1.5419, "step": 223300 }, { "epoch": 0.91, "grad_norm": 2.866306781768799, "learning_rate": 0.0002, "loss": 1.6918, "step": 223310 }, { "epoch": 0.91, "grad_norm": 3.029656171798706, "learning_rate": 0.0002, "loss": 1.4068, "step": 223320 }, { "epoch": 0.91, "grad_norm": 3.5519330501556396, "learning_rate": 0.0002, "loss": 1.3302, "step": 223330 }, { "epoch": 0.91, "grad_norm": 3.1829447746276855, "learning_rate": 0.0002, "loss": 1.7845, "step": 223340 }, { "epoch": 0.91, "grad_norm": 4.392543315887451, "learning_rate": 0.0002, "loss": 1.7857, "step": 223350 }, { "epoch": 0.91, "grad_norm": 2.290179491043091, "learning_rate": 0.0002, "loss": 1.5587, "step": 223360 }, { "epoch": 0.91, "grad_norm": 3.0326921939849854, "learning_rate": 0.0002, "loss": 1.5201, "step": 223370 }, { "epoch": 0.91, "grad_norm": 3.292370319366455, "learning_rate": 0.0002, "loss": 1.7941, "step": 223380 }, { "epoch": 0.91, "grad_norm": 1.567761778831482, "learning_rate": 0.0002, "loss": 1.649, "step": 223390 }, { "epoch": 0.91, "grad_norm": 3.281719923019409, "learning_rate": 0.0002, "loss": 1.5096, "step": 223400 }, { "epoch": 0.91, "grad_norm": 2.333469867706299, "learning_rate": 0.0002, "loss": 1.6101, "step": 223410 }, { "epoch": 0.91, "grad_norm": 2.8643975257873535, "learning_rate": 0.0002, "loss": 1.3853, "step": 223420 }, { "epoch": 0.91, "grad_norm": 2.2852888107299805, "learning_rate": 0.0002, "loss": 1.7308, "step": 223430 }, { "epoch": 0.91, "grad_norm": 2.7947428226470947, "learning_rate": 0.0002, "loss": 1.4356, "step": 223440 }, { "epoch": 0.91, "grad_norm": 3.0361454486846924, "learning_rate": 0.0002, "loss": 1.5486, "step": 223450 }, { "epoch": 0.91, "grad_norm": 3.0885188579559326, "learning_rate": 0.0002, "loss": 1.701, "step": 223460 }, { "epoch": 0.91, "grad_norm": 2.2322797775268555, "learning_rate": 0.0002, "loss": 1.7521, "step": 223470 }, { "epoch": 0.91, "grad_norm": 3.316256046295166, "learning_rate": 0.0002, "loss": 1.3483, "step": 223480 }, { "epoch": 0.91, "grad_norm": 2.9942731857299805, "learning_rate": 0.0002, "loss": 1.5835, "step": 223490 }, { "epoch": 0.91, "grad_norm": 2.2466838359832764, "learning_rate": 0.0002, "loss": 1.6659, "step": 223500 }, { "epoch": 0.91, "grad_norm": 2.9005534648895264, "learning_rate": 0.0002, "loss": 1.6158, "step": 223510 }, { "epoch": 0.91, "grad_norm": 4.5519208908081055, "learning_rate": 0.0002, "loss": 1.3941, "step": 223520 }, { "epoch": 0.91, "grad_norm": 5.69690465927124, "learning_rate": 0.0002, "loss": 1.4213, "step": 223530 }, { "epoch": 0.91, "grad_norm": 6.099032878875732, "learning_rate": 0.0002, "loss": 1.6754, "step": 223540 }, { "epoch": 0.91, "grad_norm": 2.3147876262664795, "learning_rate": 0.0002, "loss": 1.3404, "step": 223550 }, { "epoch": 0.91, "grad_norm": 3.2932286262512207, "learning_rate": 0.0002, "loss": 1.595, "step": 223560 }, { "epoch": 0.91, "grad_norm": 3.2926900386810303, "learning_rate": 0.0002, "loss": 1.3317, "step": 223570 }, { "epoch": 0.91, "grad_norm": 5.127131462097168, "learning_rate": 0.0002, "loss": 1.7181, "step": 223580 }, { "epoch": 0.91, "grad_norm": 1.632983922958374, "learning_rate": 0.0002, "loss": 1.4801, "step": 223590 }, { "epoch": 0.91, "grad_norm": 3.9028255939483643, "learning_rate": 0.0002, "loss": 1.7129, "step": 223600 }, { "epoch": 0.91, "grad_norm": 3.0803256034851074, "learning_rate": 0.0002, "loss": 1.6358, "step": 223610 }, { "epoch": 0.91, "grad_norm": 2.970402240753174, "learning_rate": 0.0002, "loss": 1.6286, "step": 223620 }, { "epoch": 0.91, "grad_norm": 3.3003039360046387, "learning_rate": 0.0002, "loss": 1.7347, "step": 223630 }, { "epoch": 0.91, "grad_norm": 2.782567024230957, "learning_rate": 0.0002, "loss": 1.4701, "step": 223640 }, { "epoch": 0.91, "grad_norm": 2.8169515132904053, "learning_rate": 0.0002, "loss": 1.4302, "step": 223650 }, { "epoch": 0.91, "grad_norm": 1.925421953201294, "learning_rate": 0.0002, "loss": 1.872, "step": 223660 }, { "epoch": 0.91, "grad_norm": 2.6263294219970703, "learning_rate": 0.0002, "loss": 1.6232, "step": 223670 }, { "epoch": 0.91, "grad_norm": 3.0270307064056396, "learning_rate": 0.0002, "loss": 1.6283, "step": 223680 }, { "epoch": 0.91, "grad_norm": 2.0564374923706055, "learning_rate": 0.0002, "loss": 1.3173, "step": 223690 }, { "epoch": 0.91, "grad_norm": 1.9343150854110718, "learning_rate": 0.0002, "loss": 1.6118, "step": 223700 }, { "epoch": 0.91, "grad_norm": 4.452401638031006, "learning_rate": 0.0002, "loss": 1.328, "step": 223710 }, { "epoch": 0.91, "grad_norm": 3.6082520484924316, "learning_rate": 0.0002, "loss": 1.5953, "step": 223720 }, { "epoch": 0.91, "grad_norm": 2.9716928005218506, "learning_rate": 0.0002, "loss": 1.7537, "step": 223730 }, { "epoch": 0.91, "grad_norm": 3.152451992034912, "learning_rate": 0.0002, "loss": 1.4377, "step": 223740 }, { "epoch": 0.91, "grad_norm": 3.269758462905884, "learning_rate": 0.0002, "loss": 1.7241, "step": 223750 }, { "epoch": 0.91, "grad_norm": 3.1256020069122314, "learning_rate": 0.0002, "loss": 1.6333, "step": 223760 }, { "epoch": 0.91, "grad_norm": 2.290123701095581, "learning_rate": 0.0002, "loss": 1.7583, "step": 223770 }, { "epoch": 0.91, "grad_norm": 2.935025453567505, "learning_rate": 0.0002, "loss": 1.7084, "step": 223780 }, { "epoch": 0.91, "grad_norm": 2.537606716156006, "learning_rate": 0.0002, "loss": 1.7032, "step": 223790 }, { "epoch": 0.91, "grad_norm": 1.7660902738571167, "learning_rate": 0.0002, "loss": 1.4857, "step": 223800 }, { "epoch": 0.91, "grad_norm": 2.5741889476776123, "learning_rate": 0.0002, "loss": 1.4316, "step": 223810 }, { "epoch": 0.91, "grad_norm": 4.286808967590332, "learning_rate": 0.0002, "loss": 1.6713, "step": 223820 }, { "epoch": 0.91, "grad_norm": 4.537230968475342, "learning_rate": 0.0002, "loss": 1.5129, "step": 223830 }, { "epoch": 0.91, "grad_norm": 1.8692561388015747, "learning_rate": 0.0002, "loss": 1.5179, "step": 223840 }, { "epoch": 0.91, "grad_norm": 2.504031181335449, "learning_rate": 0.0002, "loss": 1.5964, "step": 223850 }, { "epoch": 0.91, "grad_norm": 3.2641897201538086, "learning_rate": 0.0002, "loss": 1.4799, "step": 223860 }, { "epoch": 0.91, "grad_norm": 2.9906067848205566, "learning_rate": 0.0002, "loss": 1.5319, "step": 223870 }, { "epoch": 0.91, "grad_norm": 3.115262269973755, "learning_rate": 0.0002, "loss": 1.7212, "step": 223880 }, { "epoch": 0.91, "grad_norm": 3.3854947090148926, "learning_rate": 0.0002, "loss": 1.6873, "step": 223890 }, { "epoch": 0.91, "grad_norm": 4.401646137237549, "learning_rate": 0.0002, "loss": 1.5111, "step": 223900 }, { "epoch": 0.91, "grad_norm": 2.4256865978240967, "learning_rate": 0.0002, "loss": 1.4579, "step": 223910 }, { "epoch": 0.91, "grad_norm": 3.2645225524902344, "learning_rate": 0.0002, "loss": 1.7503, "step": 223920 }, { "epoch": 0.91, "grad_norm": 1.6418713331222534, "learning_rate": 0.0002, "loss": 1.6486, "step": 223930 }, { "epoch": 0.91, "grad_norm": 5.042799472808838, "learning_rate": 0.0002, "loss": 1.4686, "step": 223940 }, { "epoch": 0.91, "grad_norm": 4.790139198303223, "learning_rate": 0.0002, "loss": 1.5155, "step": 223950 }, { "epoch": 0.91, "grad_norm": 6.01533317565918, "learning_rate": 0.0002, "loss": 1.4924, "step": 223960 }, { "epoch": 0.91, "grad_norm": 4.718958377838135, "learning_rate": 0.0002, "loss": 1.6458, "step": 223970 }, { "epoch": 0.91, "grad_norm": 3.0666394233703613, "learning_rate": 0.0002, "loss": 1.5423, "step": 223980 }, { "epoch": 0.91, "grad_norm": 3.0818357467651367, "learning_rate": 0.0002, "loss": 1.5842, "step": 223990 }, { "epoch": 0.91, "grad_norm": 4.391469955444336, "learning_rate": 0.0002, "loss": 1.5546, "step": 224000 }, { "epoch": 0.91, "grad_norm": 3.5908520221710205, "learning_rate": 0.0002, "loss": 1.636, "step": 224010 }, { "epoch": 0.91, "grad_norm": 2.804532289505005, "learning_rate": 0.0002, "loss": 1.5934, "step": 224020 }, { "epoch": 0.91, "grad_norm": 2.0909602642059326, "learning_rate": 0.0002, "loss": 1.6174, "step": 224030 }, { "epoch": 0.91, "grad_norm": 3.0945281982421875, "learning_rate": 0.0002, "loss": 1.5927, "step": 224040 }, { "epoch": 0.91, "grad_norm": 3.536606788635254, "learning_rate": 0.0002, "loss": 1.6442, "step": 224050 }, { "epoch": 0.91, "grad_norm": 2.7768757343292236, "learning_rate": 0.0002, "loss": 1.5537, "step": 224060 }, { "epoch": 0.91, "grad_norm": 4.101917743682861, "learning_rate": 0.0002, "loss": 1.5772, "step": 224070 }, { "epoch": 0.91, "grad_norm": 3.2531111240386963, "learning_rate": 0.0002, "loss": 1.6043, "step": 224080 }, { "epoch": 0.91, "grad_norm": 2.561382293701172, "learning_rate": 0.0002, "loss": 1.5028, "step": 224090 }, { "epoch": 0.91, "grad_norm": 3.6604275703430176, "learning_rate": 0.0002, "loss": 1.5434, "step": 224100 }, { "epoch": 0.91, "grad_norm": 2.447648525238037, "learning_rate": 0.0002, "loss": 1.5607, "step": 224110 }, { "epoch": 0.91, "grad_norm": 2.6115593910217285, "learning_rate": 0.0002, "loss": 1.6155, "step": 224120 }, { "epoch": 0.91, "grad_norm": 3.5103235244750977, "learning_rate": 0.0002, "loss": 1.7151, "step": 224130 }, { "epoch": 0.91, "grad_norm": 4.696992874145508, "learning_rate": 0.0002, "loss": 1.3865, "step": 224140 }, { "epoch": 0.91, "grad_norm": 3.2118544578552246, "learning_rate": 0.0002, "loss": 1.4904, "step": 224150 }, { "epoch": 0.91, "grad_norm": 4.563102722167969, "learning_rate": 0.0002, "loss": 1.3711, "step": 224160 }, { "epoch": 0.91, "grad_norm": 3.8978488445281982, "learning_rate": 0.0002, "loss": 1.4409, "step": 224170 }, { "epoch": 0.91, "grad_norm": 3.258859395980835, "learning_rate": 0.0002, "loss": 1.8117, "step": 224180 }, { "epoch": 0.91, "grad_norm": 3.920651435852051, "learning_rate": 0.0002, "loss": 1.6979, "step": 224190 }, { "epoch": 0.91, "grad_norm": 3.0478286743164062, "learning_rate": 0.0002, "loss": 1.4024, "step": 224200 }, { "epoch": 0.91, "grad_norm": 2.5524213314056396, "learning_rate": 0.0002, "loss": 1.4893, "step": 224210 }, { "epoch": 0.91, "grad_norm": 3.705310344696045, "learning_rate": 0.0002, "loss": 1.6452, "step": 224220 }, { "epoch": 0.91, "grad_norm": 1.972611904144287, "learning_rate": 0.0002, "loss": 1.6588, "step": 224230 }, { "epoch": 0.91, "grad_norm": 6.037472248077393, "learning_rate": 0.0002, "loss": 1.8079, "step": 224240 }, { "epoch": 0.91, "grad_norm": 2.7814865112304688, "learning_rate": 0.0002, "loss": 1.6433, "step": 224250 }, { "epoch": 0.91, "grad_norm": 3.974184036254883, "learning_rate": 0.0002, "loss": 1.5819, "step": 224260 }, { "epoch": 0.91, "grad_norm": 1.7678248882293701, "learning_rate": 0.0002, "loss": 1.6384, "step": 224270 }, { "epoch": 0.91, "grad_norm": 2.079411029815674, "learning_rate": 0.0002, "loss": 1.5493, "step": 224280 }, { "epoch": 0.91, "grad_norm": 3.1752238273620605, "learning_rate": 0.0002, "loss": 1.526, "step": 224290 }, { "epoch": 0.91, "grad_norm": 2.1715712547302246, "learning_rate": 0.0002, "loss": 1.5178, "step": 224300 }, { "epoch": 0.91, "grad_norm": 9.0551176071167, "learning_rate": 0.0002, "loss": 1.707, "step": 224310 }, { "epoch": 0.91, "grad_norm": 3.1787941455841064, "learning_rate": 0.0002, "loss": 1.5826, "step": 224320 }, { "epoch": 0.91, "grad_norm": 3.4448423385620117, "learning_rate": 0.0002, "loss": 1.5847, "step": 224330 }, { "epoch": 0.91, "grad_norm": 2.595982313156128, "learning_rate": 0.0002, "loss": 1.6161, "step": 224340 }, { "epoch": 0.91, "grad_norm": 3.2587831020355225, "learning_rate": 0.0002, "loss": 1.5218, "step": 224350 }, { "epoch": 0.91, "grad_norm": 1.7087594270706177, "learning_rate": 0.0002, "loss": 1.598, "step": 224360 }, { "epoch": 0.91, "grad_norm": 4.1743974685668945, "learning_rate": 0.0002, "loss": 1.9992, "step": 224370 }, { "epoch": 0.91, "grad_norm": 2.256618022918701, "learning_rate": 0.0002, "loss": 1.573, "step": 224380 }, { "epoch": 0.91, "grad_norm": 3.3204777240753174, "learning_rate": 0.0002, "loss": 1.53, "step": 224390 }, { "epoch": 0.91, "grad_norm": 2.7605032920837402, "learning_rate": 0.0002, "loss": 1.5788, "step": 224400 }, { "epoch": 0.91, "grad_norm": 4.7788190841674805, "learning_rate": 0.0002, "loss": 1.5838, "step": 224410 }, { "epoch": 0.91, "grad_norm": 1.5870932340621948, "learning_rate": 0.0002, "loss": 1.5184, "step": 224420 }, { "epoch": 0.91, "grad_norm": 3.2015912532806396, "learning_rate": 0.0002, "loss": 1.7925, "step": 224430 }, { "epoch": 0.91, "grad_norm": 2.8425188064575195, "learning_rate": 0.0002, "loss": 1.582, "step": 224440 }, { "epoch": 0.91, "grad_norm": 2.3040339946746826, "learning_rate": 0.0002, "loss": 1.924, "step": 224450 }, { "epoch": 0.91, "grad_norm": 3.069925308227539, "learning_rate": 0.0002, "loss": 1.4682, "step": 224460 }, { "epoch": 0.91, "grad_norm": 2.981861114501953, "learning_rate": 0.0002, "loss": 1.6041, "step": 224470 }, { "epoch": 0.91, "grad_norm": 4.225673198699951, "learning_rate": 0.0002, "loss": 1.7506, "step": 224480 }, { "epoch": 0.91, "grad_norm": 3.033310651779175, "learning_rate": 0.0002, "loss": 1.6022, "step": 224490 }, { "epoch": 0.91, "grad_norm": 2.8407349586486816, "learning_rate": 0.0002, "loss": 1.6193, "step": 224500 }, { "epoch": 0.91, "grad_norm": 3.4808170795440674, "learning_rate": 0.0002, "loss": 1.6021, "step": 224510 }, { "epoch": 0.91, "grad_norm": 2.0634608268737793, "learning_rate": 0.0002, "loss": 1.6695, "step": 224520 }, { "epoch": 0.91, "grad_norm": 3.2606420516967773, "learning_rate": 0.0002, "loss": 1.5094, "step": 224530 }, { "epoch": 0.91, "grad_norm": 2.5121090412139893, "learning_rate": 0.0002, "loss": 1.4588, "step": 224540 }, { "epoch": 0.91, "grad_norm": 4.002740383148193, "learning_rate": 0.0002, "loss": 1.5902, "step": 224550 }, { "epoch": 0.91, "grad_norm": 2.7013869285583496, "learning_rate": 0.0002, "loss": 1.6059, "step": 224560 }, { "epoch": 0.91, "grad_norm": 2.471554756164551, "learning_rate": 0.0002, "loss": 1.6645, "step": 224570 }, { "epoch": 0.91, "grad_norm": 3.650576591491699, "learning_rate": 0.0002, "loss": 1.8165, "step": 224580 }, { "epoch": 0.91, "grad_norm": 2.2018754482269287, "learning_rate": 0.0002, "loss": 1.595, "step": 224590 }, { "epoch": 0.91, "grad_norm": 2.0263895988464355, "learning_rate": 0.0002, "loss": 1.5212, "step": 224600 }, { "epoch": 0.91, "grad_norm": 2.198812246322632, "learning_rate": 0.0002, "loss": 1.2985, "step": 224610 }, { "epoch": 0.91, "grad_norm": 2.907975912094116, "learning_rate": 0.0002, "loss": 1.8441, "step": 224620 }, { "epoch": 0.91, "grad_norm": 2.324582815170288, "learning_rate": 0.0002, "loss": 1.4251, "step": 224630 }, { "epoch": 0.91, "grad_norm": 1.3481346368789673, "learning_rate": 0.0002, "loss": 1.5352, "step": 224640 }, { "epoch": 0.91, "grad_norm": 4.67061710357666, "learning_rate": 0.0002, "loss": 1.5214, "step": 224650 }, { "epoch": 0.91, "grad_norm": 2.2854020595550537, "learning_rate": 0.0002, "loss": 1.6256, "step": 224660 }, { "epoch": 0.91, "grad_norm": 3.4138896465301514, "learning_rate": 0.0002, "loss": 1.683, "step": 224670 }, { "epoch": 0.91, "grad_norm": 2.631852388381958, "learning_rate": 0.0002, "loss": 1.4814, "step": 224680 }, { "epoch": 0.91, "grad_norm": 3.867886543273926, "learning_rate": 0.0002, "loss": 1.5934, "step": 224690 }, { "epoch": 0.91, "grad_norm": 2.444089889526367, "learning_rate": 0.0002, "loss": 1.5602, "step": 224700 }, { "epoch": 0.91, "grad_norm": 3.510321617126465, "learning_rate": 0.0002, "loss": 1.9278, "step": 224710 }, { "epoch": 0.91, "grad_norm": 2.637063980102539, "learning_rate": 0.0002, "loss": 1.3771, "step": 224720 }, { "epoch": 0.91, "grad_norm": 4.150793552398682, "learning_rate": 0.0002, "loss": 1.5905, "step": 224730 }, { "epoch": 0.91, "grad_norm": 4.705293655395508, "learning_rate": 0.0002, "loss": 1.5444, "step": 224740 }, { "epoch": 0.91, "grad_norm": 3.1794657707214355, "learning_rate": 0.0002, "loss": 1.735, "step": 224750 }, { "epoch": 0.91, "grad_norm": 3.199246406555176, "learning_rate": 0.0002, "loss": 1.6991, "step": 224760 }, { "epoch": 0.92, "grad_norm": 2.401400327682495, "learning_rate": 0.0002, "loss": 1.7333, "step": 224770 }, { "epoch": 0.92, "grad_norm": 2.2156996726989746, "learning_rate": 0.0002, "loss": 1.6105, "step": 224780 }, { "epoch": 0.92, "grad_norm": 3.9231748580932617, "learning_rate": 0.0002, "loss": 1.8751, "step": 224790 }, { "epoch": 0.92, "grad_norm": 3.794231653213501, "learning_rate": 0.0002, "loss": 1.6416, "step": 224800 }, { "epoch": 0.92, "grad_norm": 4.817555904388428, "learning_rate": 0.0002, "loss": 1.5478, "step": 224810 }, { "epoch": 0.92, "grad_norm": 2.3802099227905273, "learning_rate": 0.0002, "loss": 1.7482, "step": 224820 }, { "epoch": 0.92, "grad_norm": 3.497535228729248, "learning_rate": 0.0002, "loss": 1.5658, "step": 224830 }, { "epoch": 0.92, "grad_norm": 2.7923734188079834, "learning_rate": 0.0002, "loss": 1.6796, "step": 224840 }, { "epoch": 0.92, "grad_norm": 3.6520702838897705, "learning_rate": 0.0002, "loss": 1.6329, "step": 224850 }, { "epoch": 0.92, "grad_norm": 4.263904571533203, "learning_rate": 0.0002, "loss": 1.5151, "step": 224860 }, { "epoch": 0.92, "grad_norm": 2.7618982791900635, "learning_rate": 0.0002, "loss": 1.8617, "step": 224870 }, { "epoch": 0.92, "grad_norm": 3.793882131576538, "learning_rate": 0.0002, "loss": 1.5529, "step": 224880 }, { "epoch": 0.92, "grad_norm": 1.9857033491134644, "learning_rate": 0.0002, "loss": 1.6558, "step": 224890 }, { "epoch": 0.92, "grad_norm": 2.979196786880493, "learning_rate": 0.0002, "loss": 1.8577, "step": 224900 }, { "epoch": 0.92, "grad_norm": 1.9903433322906494, "learning_rate": 0.0002, "loss": 1.2544, "step": 224910 }, { "epoch": 0.92, "grad_norm": 3.2960453033447266, "learning_rate": 0.0002, "loss": 1.7221, "step": 224920 }, { "epoch": 0.92, "grad_norm": 3.435347080230713, "learning_rate": 0.0002, "loss": 1.4282, "step": 224930 }, { "epoch": 0.92, "grad_norm": 3.043972969055176, "learning_rate": 0.0002, "loss": 1.6005, "step": 224940 }, { "epoch": 0.92, "grad_norm": 4.274830341339111, "learning_rate": 0.0002, "loss": 1.3586, "step": 224950 }, { "epoch": 0.92, "grad_norm": 3.5846283435821533, "learning_rate": 0.0002, "loss": 1.7429, "step": 224960 }, { "epoch": 0.92, "grad_norm": 1.4860409498214722, "learning_rate": 0.0002, "loss": 1.7097, "step": 224970 }, { "epoch": 0.92, "grad_norm": 2.600799083709717, "learning_rate": 0.0002, "loss": 1.5223, "step": 224980 }, { "epoch": 0.92, "grad_norm": 4.129011631011963, "learning_rate": 0.0002, "loss": 1.2905, "step": 224990 }, { "epoch": 0.92, "grad_norm": 2.829547166824341, "learning_rate": 0.0002, "loss": 1.7069, "step": 225000 }, { "epoch": 0.92, "grad_norm": 4.114347457885742, "learning_rate": 0.0002, "loss": 1.44, "step": 225010 }, { "epoch": 0.92, "grad_norm": 2.7934110164642334, "learning_rate": 0.0002, "loss": 1.616, "step": 225020 }, { "epoch": 0.92, "grad_norm": 2.772120952606201, "learning_rate": 0.0002, "loss": 1.6188, "step": 225030 }, { "epoch": 0.92, "grad_norm": 3.0073466300964355, "learning_rate": 0.0002, "loss": 1.6022, "step": 225040 }, { "epoch": 0.92, "grad_norm": 2.894348621368408, "learning_rate": 0.0002, "loss": 1.5325, "step": 225050 }, { "epoch": 0.92, "grad_norm": 3.38297176361084, "learning_rate": 0.0002, "loss": 1.5644, "step": 225060 }, { "epoch": 0.92, "grad_norm": 2.2882423400878906, "learning_rate": 0.0002, "loss": 1.7503, "step": 225070 }, { "epoch": 0.92, "grad_norm": 3.096653938293457, "learning_rate": 0.0002, "loss": 1.5401, "step": 225080 }, { "epoch": 0.92, "grad_norm": 2.984355926513672, "learning_rate": 0.0002, "loss": 1.4462, "step": 225090 }, { "epoch": 0.92, "grad_norm": 2.978148937225342, "learning_rate": 0.0002, "loss": 1.5929, "step": 225100 }, { "epoch": 0.92, "grad_norm": 2.492243766784668, "learning_rate": 0.0002, "loss": 1.7623, "step": 225110 }, { "epoch": 0.92, "grad_norm": 2.331977605819702, "learning_rate": 0.0002, "loss": 1.9576, "step": 225120 }, { "epoch": 0.92, "grad_norm": 2.4925756454467773, "learning_rate": 0.0002, "loss": 1.6397, "step": 225130 }, { "epoch": 0.92, "grad_norm": 7.23982572555542, "learning_rate": 0.0002, "loss": 1.4846, "step": 225140 }, { "epoch": 0.92, "grad_norm": 2.6628410816192627, "learning_rate": 0.0002, "loss": 1.8511, "step": 225150 }, { "epoch": 0.92, "grad_norm": 2.8698055744171143, "learning_rate": 0.0002, "loss": 1.4803, "step": 225160 }, { "epoch": 0.92, "grad_norm": 2.4187450408935547, "learning_rate": 0.0002, "loss": 1.6064, "step": 225170 }, { "epoch": 0.92, "grad_norm": 3.1672449111938477, "learning_rate": 0.0002, "loss": 1.4909, "step": 225180 }, { "epoch": 0.92, "grad_norm": 6.701659202575684, "learning_rate": 0.0002, "loss": 1.5754, "step": 225190 }, { "epoch": 0.92, "grad_norm": 2.9739956855773926, "learning_rate": 0.0002, "loss": 1.5343, "step": 225200 }, { "epoch": 0.92, "grad_norm": 2.867605686187744, "learning_rate": 0.0002, "loss": 1.7219, "step": 225210 }, { "epoch": 0.92, "grad_norm": 3.811717987060547, "learning_rate": 0.0002, "loss": 1.6465, "step": 225220 }, { "epoch": 0.92, "grad_norm": 2.9107682704925537, "learning_rate": 0.0002, "loss": 1.7119, "step": 225230 }, { "epoch": 0.92, "grad_norm": 6.310198783874512, "learning_rate": 0.0002, "loss": 1.891, "step": 225240 }, { "epoch": 0.92, "grad_norm": 2.0256752967834473, "learning_rate": 0.0002, "loss": 1.7507, "step": 225250 }, { "epoch": 0.92, "grad_norm": 2.7347617149353027, "learning_rate": 0.0002, "loss": 1.6877, "step": 225260 }, { "epoch": 0.92, "grad_norm": 2.1654489040374756, "learning_rate": 0.0002, "loss": 1.6281, "step": 225270 }, { "epoch": 0.92, "grad_norm": 2.467890977859497, "learning_rate": 0.0002, "loss": 1.5952, "step": 225280 }, { "epoch": 0.92, "grad_norm": 3.9622604846954346, "learning_rate": 0.0002, "loss": 1.6925, "step": 225290 }, { "epoch": 0.92, "grad_norm": 6.6077961921691895, "learning_rate": 0.0002, "loss": 1.5142, "step": 225300 }, { "epoch": 0.92, "grad_norm": 2.4186019897460938, "learning_rate": 0.0002, "loss": 1.6287, "step": 225310 }, { "epoch": 0.92, "grad_norm": 5.6981658935546875, "learning_rate": 0.0002, "loss": 1.6202, "step": 225320 }, { "epoch": 0.92, "grad_norm": 1.3077389001846313, "learning_rate": 0.0002, "loss": 1.5261, "step": 225330 }, { "epoch": 0.92, "grad_norm": 2.570493698120117, "learning_rate": 0.0002, "loss": 1.7444, "step": 225340 }, { "epoch": 0.92, "grad_norm": 3.3415067195892334, "learning_rate": 0.0002, "loss": 1.6918, "step": 225350 }, { "epoch": 0.92, "grad_norm": 2.326788902282715, "learning_rate": 0.0002, "loss": 1.5932, "step": 225360 }, { "epoch": 0.92, "grad_norm": 3.3453400135040283, "learning_rate": 0.0002, "loss": 1.7763, "step": 225370 }, { "epoch": 0.92, "grad_norm": 4.600463390350342, "learning_rate": 0.0002, "loss": 1.5445, "step": 225380 }, { "epoch": 0.92, "grad_norm": 4.495297431945801, "learning_rate": 0.0002, "loss": 1.3894, "step": 225390 }, { "epoch": 0.92, "grad_norm": 3.4480888843536377, "learning_rate": 0.0002, "loss": 1.6258, "step": 225400 }, { "epoch": 0.92, "grad_norm": 2.9511709213256836, "learning_rate": 0.0002, "loss": 1.5608, "step": 225410 }, { "epoch": 0.92, "grad_norm": 2.5243310928344727, "learning_rate": 0.0002, "loss": 1.4523, "step": 225420 }, { "epoch": 0.92, "grad_norm": 4.509182453155518, "learning_rate": 0.0002, "loss": 1.6068, "step": 225430 }, { "epoch": 0.92, "grad_norm": 3.8519537448883057, "learning_rate": 0.0002, "loss": 1.3394, "step": 225440 }, { "epoch": 0.92, "grad_norm": 2.788576602935791, "learning_rate": 0.0002, "loss": 1.6732, "step": 225450 }, { "epoch": 0.92, "grad_norm": 3.2632319927215576, "learning_rate": 0.0002, "loss": 1.5621, "step": 225460 }, { "epoch": 0.92, "grad_norm": 2.679201602935791, "learning_rate": 0.0002, "loss": 1.5583, "step": 225470 }, { "epoch": 0.92, "grad_norm": 3.244222402572632, "learning_rate": 0.0002, "loss": 1.6243, "step": 225480 }, { "epoch": 0.92, "grad_norm": 1.9854519367218018, "learning_rate": 0.0002, "loss": 1.593, "step": 225490 }, { "epoch": 0.92, "grad_norm": 2.5809719562530518, "learning_rate": 0.0002, "loss": 1.5744, "step": 225500 }, { "epoch": 0.92, "grad_norm": 3.7204363346099854, "learning_rate": 0.0002, "loss": 1.6172, "step": 225510 }, { "epoch": 0.92, "grad_norm": 5.264348030090332, "learning_rate": 0.0002, "loss": 1.6507, "step": 225520 }, { "epoch": 0.92, "grad_norm": 4.014188766479492, "learning_rate": 0.0002, "loss": 1.4569, "step": 225530 }, { "epoch": 0.92, "grad_norm": 4.089007377624512, "learning_rate": 0.0002, "loss": 1.7314, "step": 225540 }, { "epoch": 0.92, "grad_norm": 2.6870486736297607, "learning_rate": 0.0002, "loss": 1.5141, "step": 225550 }, { "epoch": 0.92, "grad_norm": 2.0025129318237305, "learning_rate": 0.0002, "loss": 1.8206, "step": 225560 }, { "epoch": 0.92, "grad_norm": 2.337801218032837, "learning_rate": 0.0002, "loss": 1.6197, "step": 225570 }, { "epoch": 0.92, "grad_norm": 2.72538161277771, "learning_rate": 0.0002, "loss": 1.558, "step": 225580 }, { "epoch": 0.92, "grad_norm": 2.914991617202759, "learning_rate": 0.0002, "loss": 1.6218, "step": 225590 }, { "epoch": 0.92, "grad_norm": 3.801180362701416, "learning_rate": 0.0002, "loss": 1.7205, "step": 225600 }, { "epoch": 0.92, "grad_norm": 3.2754766941070557, "learning_rate": 0.0002, "loss": 1.833, "step": 225610 }, { "epoch": 0.92, "grad_norm": 3.8776345252990723, "learning_rate": 0.0002, "loss": 1.6626, "step": 225620 }, { "epoch": 0.92, "grad_norm": 3.3862948417663574, "learning_rate": 0.0002, "loss": 1.5012, "step": 225630 }, { "epoch": 0.92, "grad_norm": 3.1601850986480713, "learning_rate": 0.0002, "loss": 1.8198, "step": 225640 }, { "epoch": 0.92, "grad_norm": 3.229253053665161, "learning_rate": 0.0002, "loss": 1.5998, "step": 225650 }, { "epoch": 0.92, "grad_norm": 5.568780899047852, "learning_rate": 0.0002, "loss": 1.4358, "step": 225660 }, { "epoch": 0.92, "grad_norm": 3.619676113128662, "learning_rate": 0.0002, "loss": 1.6099, "step": 225670 }, { "epoch": 0.92, "grad_norm": 2.638887643814087, "learning_rate": 0.0002, "loss": 1.4268, "step": 225680 }, { "epoch": 0.92, "grad_norm": 2.9622092247009277, "learning_rate": 0.0002, "loss": 1.4889, "step": 225690 }, { "epoch": 0.92, "grad_norm": 3.055997133255005, "learning_rate": 0.0002, "loss": 1.6002, "step": 225700 }, { "epoch": 0.92, "grad_norm": 3.440997362136841, "learning_rate": 0.0002, "loss": 1.8612, "step": 225710 }, { "epoch": 0.92, "grad_norm": 2.715017557144165, "learning_rate": 0.0002, "loss": 1.7165, "step": 225720 }, { "epoch": 0.92, "grad_norm": 4.783120155334473, "learning_rate": 0.0002, "loss": 1.5976, "step": 225730 }, { "epoch": 0.92, "grad_norm": 1.9581284523010254, "learning_rate": 0.0002, "loss": 1.5151, "step": 225740 }, { "epoch": 0.92, "grad_norm": 2.3039088249206543, "learning_rate": 0.0002, "loss": 1.7445, "step": 225750 }, { "epoch": 0.92, "grad_norm": 2.9244627952575684, "learning_rate": 0.0002, "loss": 1.5394, "step": 225760 }, { "epoch": 0.92, "grad_norm": 2.260265350341797, "learning_rate": 0.0002, "loss": 1.5679, "step": 225770 }, { "epoch": 0.92, "grad_norm": 4.074460983276367, "learning_rate": 0.0002, "loss": 1.7638, "step": 225780 }, { "epoch": 0.92, "grad_norm": 1.5231475830078125, "learning_rate": 0.0002, "loss": 1.5287, "step": 225790 }, { "epoch": 0.92, "grad_norm": 2.982222318649292, "learning_rate": 0.0002, "loss": 1.6676, "step": 225800 }, { "epoch": 0.92, "grad_norm": 3.149132251739502, "learning_rate": 0.0002, "loss": 1.6612, "step": 225810 }, { "epoch": 0.92, "grad_norm": 5.380035400390625, "learning_rate": 0.0002, "loss": 1.5329, "step": 225820 }, { "epoch": 0.92, "grad_norm": 2.556708335876465, "learning_rate": 0.0002, "loss": 1.3371, "step": 225830 }, { "epoch": 0.92, "grad_norm": 3.1319751739501953, "learning_rate": 0.0002, "loss": 1.8531, "step": 225840 }, { "epoch": 0.92, "grad_norm": 2.6488420963287354, "learning_rate": 0.0002, "loss": 1.4527, "step": 225850 }, { "epoch": 0.92, "grad_norm": 3.6790170669555664, "learning_rate": 0.0002, "loss": 1.5744, "step": 225860 }, { "epoch": 0.92, "grad_norm": 2.3287129402160645, "learning_rate": 0.0002, "loss": 1.7181, "step": 225870 }, { "epoch": 0.92, "grad_norm": 3.378556489944458, "learning_rate": 0.0002, "loss": 1.6019, "step": 225880 }, { "epoch": 0.92, "grad_norm": 2.4311535358428955, "learning_rate": 0.0002, "loss": 1.5928, "step": 225890 }, { "epoch": 0.92, "grad_norm": 2.3409552574157715, "learning_rate": 0.0002, "loss": 1.791, "step": 225900 }, { "epoch": 0.92, "grad_norm": 2.4592294692993164, "learning_rate": 0.0002, "loss": 1.3455, "step": 225910 }, { "epoch": 0.92, "grad_norm": 3.265439033508301, "learning_rate": 0.0002, "loss": 1.5145, "step": 225920 }, { "epoch": 0.92, "grad_norm": 2.1899592876434326, "learning_rate": 0.0002, "loss": 1.3738, "step": 225930 }, { "epoch": 0.92, "grad_norm": 2.2166335582733154, "learning_rate": 0.0002, "loss": 1.7738, "step": 225940 }, { "epoch": 0.92, "grad_norm": 3.422696828842163, "learning_rate": 0.0002, "loss": 1.5427, "step": 225950 }, { "epoch": 0.92, "grad_norm": 3.4243996143341064, "learning_rate": 0.0002, "loss": 1.4149, "step": 225960 }, { "epoch": 0.92, "grad_norm": 2.411928653717041, "learning_rate": 0.0002, "loss": 1.6421, "step": 225970 }, { "epoch": 0.92, "grad_norm": 4.438014030456543, "learning_rate": 0.0002, "loss": 1.5609, "step": 225980 }, { "epoch": 0.92, "grad_norm": 2.843738555908203, "learning_rate": 0.0002, "loss": 1.5003, "step": 225990 }, { "epoch": 0.92, "grad_norm": 2.993281602859497, "learning_rate": 0.0002, "loss": 1.4696, "step": 226000 }, { "epoch": 0.92, "grad_norm": 4.143527507781982, "learning_rate": 0.0002, "loss": 1.8217, "step": 226010 }, { "epoch": 0.92, "grad_norm": 1.9072675704956055, "learning_rate": 0.0002, "loss": 1.4382, "step": 226020 }, { "epoch": 0.92, "grad_norm": 3.8359286785125732, "learning_rate": 0.0002, "loss": 1.5006, "step": 226030 }, { "epoch": 0.92, "grad_norm": 2.1609561443328857, "learning_rate": 0.0002, "loss": 1.9082, "step": 226040 }, { "epoch": 0.92, "grad_norm": 3.2556588649749756, "learning_rate": 0.0002, "loss": 1.5706, "step": 226050 }, { "epoch": 0.92, "grad_norm": 4.838181972503662, "learning_rate": 0.0002, "loss": 1.525, "step": 226060 }, { "epoch": 0.92, "grad_norm": 3.7630558013916016, "learning_rate": 0.0002, "loss": 1.7165, "step": 226070 }, { "epoch": 0.92, "grad_norm": 1.799187183380127, "learning_rate": 0.0002, "loss": 1.661, "step": 226080 }, { "epoch": 0.92, "grad_norm": 2.1862423419952393, "learning_rate": 0.0002, "loss": 1.5685, "step": 226090 }, { "epoch": 0.92, "grad_norm": 4.519053936004639, "learning_rate": 0.0002, "loss": 1.5081, "step": 226100 }, { "epoch": 0.92, "grad_norm": 2.658848285675049, "learning_rate": 0.0002, "loss": 1.695, "step": 226110 }, { "epoch": 0.92, "grad_norm": 4.091358184814453, "learning_rate": 0.0002, "loss": 1.704, "step": 226120 }, { "epoch": 0.92, "grad_norm": 3.542555570602417, "learning_rate": 0.0002, "loss": 1.6528, "step": 226130 }, { "epoch": 0.92, "grad_norm": 3.355872631072998, "learning_rate": 0.0002, "loss": 1.5991, "step": 226140 }, { "epoch": 0.92, "grad_norm": 2.585564136505127, "learning_rate": 0.0002, "loss": 1.6419, "step": 226150 }, { "epoch": 0.92, "grad_norm": 4.1026740074157715, "learning_rate": 0.0002, "loss": 1.7254, "step": 226160 }, { "epoch": 0.92, "grad_norm": 2.2287023067474365, "learning_rate": 0.0002, "loss": 1.5805, "step": 226170 }, { "epoch": 0.92, "grad_norm": 3.8311004638671875, "learning_rate": 0.0002, "loss": 1.4967, "step": 226180 }, { "epoch": 0.92, "grad_norm": 2.2196662425994873, "learning_rate": 0.0002, "loss": 1.6682, "step": 226190 }, { "epoch": 0.92, "grad_norm": 2.511322259902954, "learning_rate": 0.0002, "loss": 1.7845, "step": 226200 }, { "epoch": 0.92, "grad_norm": 3.983685255050659, "learning_rate": 0.0002, "loss": 1.5449, "step": 226210 }, { "epoch": 0.92, "grad_norm": 2.0650384426116943, "learning_rate": 0.0002, "loss": 1.5964, "step": 226220 }, { "epoch": 0.92, "grad_norm": 3.1446473598480225, "learning_rate": 0.0002, "loss": 1.8872, "step": 226230 }, { "epoch": 0.92, "grad_norm": 2.865166664123535, "learning_rate": 0.0002, "loss": 1.6799, "step": 226240 }, { "epoch": 0.92, "grad_norm": 2.5827159881591797, "learning_rate": 0.0002, "loss": 1.6516, "step": 226250 }, { "epoch": 0.92, "grad_norm": 2.5472097396850586, "learning_rate": 0.0002, "loss": 1.5699, "step": 226260 }, { "epoch": 0.92, "grad_norm": 3.140397310256958, "learning_rate": 0.0002, "loss": 1.6881, "step": 226270 }, { "epoch": 0.92, "grad_norm": 2.522597551345825, "learning_rate": 0.0002, "loss": 1.3372, "step": 226280 }, { "epoch": 0.92, "grad_norm": 1.934754490852356, "learning_rate": 0.0002, "loss": 1.4927, "step": 226290 }, { "epoch": 0.92, "grad_norm": 3.075166702270508, "learning_rate": 0.0002, "loss": 1.3856, "step": 226300 }, { "epoch": 0.92, "grad_norm": 2.992293119430542, "learning_rate": 0.0002, "loss": 1.5759, "step": 226310 }, { "epoch": 0.92, "grad_norm": 2.887178659439087, "learning_rate": 0.0002, "loss": 1.3912, "step": 226320 }, { "epoch": 0.92, "grad_norm": 2.1930906772613525, "learning_rate": 0.0002, "loss": 1.3823, "step": 226330 }, { "epoch": 0.92, "grad_norm": 2.683854818344116, "learning_rate": 0.0002, "loss": 1.4034, "step": 226340 }, { "epoch": 0.92, "grad_norm": 4.2730865478515625, "learning_rate": 0.0002, "loss": 1.6505, "step": 226350 }, { "epoch": 0.92, "grad_norm": 2.2135305404663086, "learning_rate": 0.0002, "loss": 1.6116, "step": 226360 }, { "epoch": 0.92, "grad_norm": 7.706422805786133, "learning_rate": 0.0002, "loss": 1.5032, "step": 226370 }, { "epoch": 0.92, "grad_norm": 4.025771617889404, "learning_rate": 0.0002, "loss": 1.8174, "step": 226380 }, { "epoch": 0.92, "grad_norm": 3.2174978256225586, "learning_rate": 0.0002, "loss": 1.6823, "step": 226390 }, { "epoch": 0.92, "grad_norm": 3.871750831604004, "learning_rate": 0.0002, "loss": 1.3608, "step": 226400 }, { "epoch": 0.92, "grad_norm": 2.9244208335876465, "learning_rate": 0.0002, "loss": 1.6694, "step": 226410 }, { "epoch": 0.92, "grad_norm": 2.337928056716919, "learning_rate": 0.0002, "loss": 1.6249, "step": 226420 }, { "epoch": 0.92, "grad_norm": 1.8178973197937012, "learning_rate": 0.0002, "loss": 1.4001, "step": 226430 }, { "epoch": 0.92, "grad_norm": 2.4274134635925293, "learning_rate": 0.0002, "loss": 1.6367, "step": 226440 }, { "epoch": 0.92, "grad_norm": 2.4259486198425293, "learning_rate": 0.0002, "loss": 1.7407, "step": 226450 }, { "epoch": 0.92, "grad_norm": 2.6485564708709717, "learning_rate": 0.0002, "loss": 1.7815, "step": 226460 }, { "epoch": 0.92, "grad_norm": 2.9848241806030273, "learning_rate": 0.0002, "loss": 1.5157, "step": 226470 }, { "epoch": 0.92, "grad_norm": 3.3455440998077393, "learning_rate": 0.0002, "loss": 1.5966, "step": 226480 }, { "epoch": 0.92, "grad_norm": 1.9181694984436035, "learning_rate": 0.0002, "loss": 1.6573, "step": 226490 }, { "epoch": 0.92, "grad_norm": 4.084582328796387, "learning_rate": 0.0002, "loss": 1.5012, "step": 226500 }, { "epoch": 0.92, "grad_norm": 3.3960225582122803, "learning_rate": 0.0002, "loss": 1.4766, "step": 226510 }, { "epoch": 0.92, "grad_norm": 2.3978512287139893, "learning_rate": 0.0002, "loss": 1.621, "step": 226520 }, { "epoch": 0.92, "grad_norm": 3.66373872756958, "learning_rate": 0.0002, "loss": 1.6238, "step": 226530 }, { "epoch": 0.92, "grad_norm": 3.4414308071136475, "learning_rate": 0.0002, "loss": 1.4298, "step": 226540 }, { "epoch": 0.92, "grad_norm": 2.1780459880828857, "learning_rate": 0.0002, "loss": 1.4715, "step": 226550 }, { "epoch": 0.92, "grad_norm": 2.2956159114837646, "learning_rate": 0.0002, "loss": 1.5822, "step": 226560 }, { "epoch": 0.92, "grad_norm": 3.0597546100616455, "learning_rate": 0.0002, "loss": 1.5654, "step": 226570 }, { "epoch": 0.92, "grad_norm": 6.371210098266602, "learning_rate": 0.0002, "loss": 1.8017, "step": 226580 }, { "epoch": 0.92, "grad_norm": 2.8529160022735596, "learning_rate": 0.0002, "loss": 1.7521, "step": 226590 }, { "epoch": 0.92, "grad_norm": 2.083690881729126, "learning_rate": 0.0002, "loss": 1.5762, "step": 226600 }, { "epoch": 0.92, "grad_norm": 2.6515233516693115, "learning_rate": 0.0002, "loss": 1.6451, "step": 226610 }, { "epoch": 0.92, "grad_norm": 2.0885419845581055, "learning_rate": 0.0002, "loss": 1.5786, "step": 226620 }, { "epoch": 0.92, "grad_norm": 1.3883461952209473, "learning_rate": 0.0002, "loss": 1.8069, "step": 226630 }, { "epoch": 0.92, "grad_norm": 2.8035728931427, "learning_rate": 0.0002, "loss": 1.6301, "step": 226640 }, { "epoch": 0.92, "grad_norm": 8.854748725891113, "learning_rate": 0.0002, "loss": 1.5258, "step": 226650 }, { "epoch": 0.92, "grad_norm": 3.4786300659179688, "learning_rate": 0.0002, "loss": 1.8686, "step": 226660 }, { "epoch": 0.92, "grad_norm": 4.065461158752441, "learning_rate": 0.0002, "loss": 1.4837, "step": 226670 }, { "epoch": 0.92, "grad_norm": 4.651946067810059, "learning_rate": 0.0002, "loss": 1.4153, "step": 226680 }, { "epoch": 0.92, "grad_norm": 3.6132304668426514, "learning_rate": 0.0002, "loss": 1.6898, "step": 226690 }, { "epoch": 0.92, "grad_norm": 2.3636839389801025, "learning_rate": 0.0002, "loss": 1.702, "step": 226700 }, { "epoch": 0.92, "grad_norm": 3.2129437923431396, "learning_rate": 0.0002, "loss": 1.5226, "step": 226710 }, { "epoch": 0.92, "grad_norm": 2.390465259552002, "learning_rate": 0.0002, "loss": 1.6749, "step": 226720 }, { "epoch": 0.92, "grad_norm": 1.8214858770370483, "learning_rate": 0.0002, "loss": 1.4859, "step": 226730 }, { "epoch": 0.92, "grad_norm": 3.0903072357177734, "learning_rate": 0.0002, "loss": 1.6512, "step": 226740 }, { "epoch": 0.92, "grad_norm": 2.6384711265563965, "learning_rate": 0.0002, "loss": 1.5508, "step": 226750 }, { "epoch": 0.92, "grad_norm": 3.3619790077209473, "learning_rate": 0.0002, "loss": 1.8221, "step": 226760 }, { "epoch": 0.92, "grad_norm": 6.765284538269043, "learning_rate": 0.0002, "loss": 1.6941, "step": 226770 }, { "epoch": 0.92, "grad_norm": 2.6226346492767334, "learning_rate": 0.0002, "loss": 1.354, "step": 226780 }, { "epoch": 0.92, "grad_norm": 2.832956075668335, "learning_rate": 0.0002, "loss": 1.6302, "step": 226790 }, { "epoch": 0.92, "grad_norm": 3.3907461166381836, "learning_rate": 0.0002, "loss": 1.3302, "step": 226800 }, { "epoch": 0.92, "grad_norm": 2.332443952560425, "learning_rate": 0.0002, "loss": 1.8055, "step": 226810 }, { "epoch": 0.92, "grad_norm": 3.8545241355895996, "learning_rate": 0.0002, "loss": 1.4229, "step": 226820 }, { "epoch": 0.92, "grad_norm": 3.6360580921173096, "learning_rate": 0.0002, "loss": 1.5449, "step": 226830 }, { "epoch": 0.92, "grad_norm": 2.6160004138946533, "learning_rate": 0.0002, "loss": 1.4476, "step": 226840 }, { "epoch": 0.92, "grad_norm": 5.621588230133057, "learning_rate": 0.0002, "loss": 1.5421, "step": 226850 }, { "epoch": 0.92, "grad_norm": 3.4218928813934326, "learning_rate": 0.0002, "loss": 1.5732, "step": 226860 }, { "epoch": 0.92, "grad_norm": 1.6250362396240234, "learning_rate": 0.0002, "loss": 1.6356, "step": 226870 }, { "epoch": 0.92, "grad_norm": 2.641028881072998, "learning_rate": 0.0002, "loss": 1.4287, "step": 226880 }, { "epoch": 0.92, "grad_norm": 3.478070020675659, "learning_rate": 0.0002, "loss": 1.7887, "step": 226890 }, { "epoch": 0.92, "grad_norm": 1.8380006551742554, "learning_rate": 0.0002, "loss": 1.5823, "step": 226900 }, { "epoch": 0.92, "grad_norm": 5.042342185974121, "learning_rate": 0.0002, "loss": 1.4051, "step": 226910 }, { "epoch": 0.92, "grad_norm": 4.954797267913818, "learning_rate": 0.0002, "loss": 1.5746, "step": 226920 }, { "epoch": 0.92, "grad_norm": 3.5813019275665283, "learning_rate": 0.0002, "loss": 1.4185, "step": 226930 }, { "epoch": 0.92, "grad_norm": 2.8056983947753906, "learning_rate": 0.0002, "loss": 1.6077, "step": 226940 }, { "epoch": 0.92, "grad_norm": 2.188131332397461, "learning_rate": 0.0002, "loss": 1.5751, "step": 226950 }, { "epoch": 0.92, "grad_norm": 3.44838547706604, "learning_rate": 0.0002, "loss": 1.442, "step": 226960 }, { "epoch": 0.92, "grad_norm": 2.521444082260132, "learning_rate": 0.0002, "loss": 1.6131, "step": 226970 }, { "epoch": 0.92, "grad_norm": 1.6763511896133423, "learning_rate": 0.0002, "loss": 1.5072, "step": 226980 }, { "epoch": 0.92, "grad_norm": 2.775317907333374, "learning_rate": 0.0002, "loss": 1.6317, "step": 226990 }, { "epoch": 0.92, "grad_norm": 4.367018699645996, "learning_rate": 0.0002, "loss": 1.6056, "step": 227000 }, { "epoch": 0.92, "grad_norm": 4.239157199859619, "learning_rate": 0.0002, "loss": 1.6227, "step": 227010 }, { "epoch": 0.92, "grad_norm": 2.111677408218384, "learning_rate": 0.0002, "loss": 1.5747, "step": 227020 }, { "epoch": 0.92, "grad_norm": 3.5570170879364014, "learning_rate": 0.0002, "loss": 1.7396, "step": 227030 }, { "epoch": 0.92, "grad_norm": 4.889623641967773, "learning_rate": 0.0002, "loss": 1.4195, "step": 227040 }, { "epoch": 0.92, "grad_norm": 4.614323616027832, "learning_rate": 0.0002, "loss": 1.5116, "step": 227050 }, { "epoch": 0.92, "grad_norm": 1.722371220588684, "learning_rate": 0.0002, "loss": 1.5174, "step": 227060 }, { "epoch": 0.92, "grad_norm": 3.976651191711426, "learning_rate": 0.0002, "loss": 1.5936, "step": 227070 }, { "epoch": 0.92, "grad_norm": 3.037036895751953, "learning_rate": 0.0002, "loss": 1.7521, "step": 227080 }, { "epoch": 0.92, "grad_norm": 4.0918169021606445, "learning_rate": 0.0002, "loss": 1.7994, "step": 227090 }, { "epoch": 0.92, "grad_norm": 2.9985060691833496, "learning_rate": 0.0002, "loss": 1.5357, "step": 227100 }, { "epoch": 0.92, "grad_norm": 4.464623928070068, "learning_rate": 0.0002, "loss": 1.3007, "step": 227110 }, { "epoch": 0.92, "grad_norm": 1.8868786096572876, "learning_rate": 0.0002, "loss": 1.5189, "step": 227120 }, { "epoch": 0.92, "grad_norm": 2.8838109970092773, "learning_rate": 0.0002, "loss": 1.4679, "step": 227130 }, { "epoch": 0.92, "grad_norm": 2.1337974071502686, "learning_rate": 0.0002, "loss": 1.5563, "step": 227140 }, { "epoch": 0.92, "grad_norm": 4.221001148223877, "learning_rate": 0.0002, "loss": 1.5499, "step": 227150 }, { "epoch": 0.92, "grad_norm": 2.142639398574829, "learning_rate": 0.0002, "loss": 1.5552, "step": 227160 }, { "epoch": 0.92, "grad_norm": 3.60717511177063, "learning_rate": 0.0002, "loss": 1.4443, "step": 227170 }, { "epoch": 0.92, "grad_norm": 2.9190518856048584, "learning_rate": 0.0002, "loss": 1.388, "step": 227180 }, { "epoch": 0.92, "grad_norm": 3.2542998790740967, "learning_rate": 0.0002, "loss": 1.551, "step": 227190 }, { "epoch": 0.92, "grad_norm": 3.8095571994781494, "learning_rate": 0.0002, "loss": 1.6501, "step": 227200 }, { "epoch": 0.92, "grad_norm": 2.957890510559082, "learning_rate": 0.0002, "loss": 1.5372, "step": 227210 }, { "epoch": 0.92, "grad_norm": 2.8176565170288086, "learning_rate": 0.0002, "loss": 1.5334, "step": 227220 }, { "epoch": 0.93, "grad_norm": 1.8814094066619873, "learning_rate": 0.0002, "loss": 1.2597, "step": 227230 }, { "epoch": 0.93, "grad_norm": 3.1187896728515625, "learning_rate": 0.0002, "loss": 1.5867, "step": 227240 }, { "epoch": 0.93, "grad_norm": 1.846800446510315, "learning_rate": 0.0002, "loss": 1.317, "step": 227250 }, { "epoch": 0.93, "grad_norm": 2.110638380050659, "learning_rate": 0.0002, "loss": 1.6677, "step": 227260 }, { "epoch": 0.93, "grad_norm": 2.075948476791382, "learning_rate": 0.0002, "loss": 1.4985, "step": 227270 }, { "epoch": 0.93, "grad_norm": 2.4317166805267334, "learning_rate": 0.0002, "loss": 1.3624, "step": 227280 }, { "epoch": 0.93, "grad_norm": 2.503540277481079, "learning_rate": 0.0002, "loss": 1.692, "step": 227290 }, { "epoch": 0.93, "grad_norm": 3.178276300430298, "learning_rate": 0.0002, "loss": 1.4803, "step": 227300 }, { "epoch": 0.93, "grad_norm": 2.713013172149658, "learning_rate": 0.0002, "loss": 1.9085, "step": 227310 }, { "epoch": 0.93, "grad_norm": 2.5002059936523438, "learning_rate": 0.0002, "loss": 1.7179, "step": 227320 }, { "epoch": 0.93, "grad_norm": 1.560314655303955, "learning_rate": 0.0002, "loss": 1.5036, "step": 227330 }, { "epoch": 0.93, "grad_norm": 4.112673759460449, "learning_rate": 0.0002, "loss": 1.6401, "step": 227340 }, { "epoch": 0.93, "grad_norm": 2.614093780517578, "learning_rate": 0.0002, "loss": 1.6055, "step": 227350 }, { "epoch": 0.93, "grad_norm": 4.502505779266357, "learning_rate": 0.0002, "loss": 1.7289, "step": 227360 }, { "epoch": 0.93, "grad_norm": 4.64753532409668, "learning_rate": 0.0002, "loss": 1.8945, "step": 227370 }, { "epoch": 0.93, "grad_norm": 2.045093059539795, "learning_rate": 0.0002, "loss": 1.6721, "step": 227380 }, { "epoch": 0.93, "grad_norm": 2.5478382110595703, "learning_rate": 0.0002, "loss": 1.7444, "step": 227390 }, { "epoch": 0.93, "grad_norm": 3.2446799278259277, "learning_rate": 0.0002, "loss": 1.3371, "step": 227400 }, { "epoch": 0.93, "grad_norm": 3.4573090076446533, "learning_rate": 0.0002, "loss": 1.7469, "step": 227410 }, { "epoch": 0.93, "grad_norm": 1.6356977224349976, "learning_rate": 0.0002, "loss": 1.6929, "step": 227420 }, { "epoch": 0.93, "grad_norm": 1.9782003164291382, "learning_rate": 0.0002, "loss": 1.4901, "step": 227430 }, { "epoch": 0.93, "grad_norm": 2.6989452838897705, "learning_rate": 0.0002, "loss": 1.6019, "step": 227440 }, { "epoch": 0.93, "grad_norm": 4.210150241851807, "learning_rate": 0.0002, "loss": 1.4707, "step": 227450 }, { "epoch": 0.93, "grad_norm": 2.6568002700805664, "learning_rate": 0.0002, "loss": 1.3094, "step": 227460 }, { "epoch": 0.93, "grad_norm": 6.5683369636535645, "learning_rate": 0.0002, "loss": 1.7162, "step": 227470 }, { "epoch": 0.93, "grad_norm": 2.4850378036499023, "learning_rate": 0.0002, "loss": 1.5427, "step": 227480 }, { "epoch": 0.93, "grad_norm": 4.690985202789307, "learning_rate": 0.0002, "loss": 1.3848, "step": 227490 }, { "epoch": 0.93, "grad_norm": 2.9714250564575195, "learning_rate": 0.0002, "loss": 1.6449, "step": 227500 }, { "epoch": 0.93, "grad_norm": 2.567960262298584, "learning_rate": 0.0002, "loss": 1.6957, "step": 227510 }, { "epoch": 0.93, "grad_norm": 2.371079683303833, "learning_rate": 0.0002, "loss": 1.5279, "step": 227520 }, { "epoch": 0.93, "grad_norm": 1.9613231420516968, "learning_rate": 0.0002, "loss": 1.8055, "step": 227530 }, { "epoch": 0.93, "grad_norm": 2.1636979579925537, "learning_rate": 0.0002, "loss": 1.6513, "step": 227540 }, { "epoch": 0.93, "grad_norm": 4.5839457511901855, "learning_rate": 0.0002, "loss": 1.8152, "step": 227550 }, { "epoch": 0.93, "grad_norm": 1.9530246257781982, "learning_rate": 0.0002, "loss": 1.3359, "step": 227560 }, { "epoch": 0.93, "grad_norm": 3.468647003173828, "learning_rate": 0.0002, "loss": 1.3937, "step": 227570 }, { "epoch": 0.93, "grad_norm": 2.7528858184814453, "learning_rate": 0.0002, "loss": 1.3195, "step": 227580 }, { "epoch": 0.93, "grad_norm": 1.6866912841796875, "learning_rate": 0.0002, "loss": 1.6424, "step": 227590 }, { "epoch": 0.93, "grad_norm": 3.0628228187561035, "learning_rate": 0.0002, "loss": 1.4985, "step": 227600 }, { "epoch": 0.93, "grad_norm": 3.5563879013061523, "learning_rate": 0.0002, "loss": 1.4453, "step": 227610 }, { "epoch": 0.93, "grad_norm": 5.822734355926514, "learning_rate": 0.0002, "loss": 1.6045, "step": 227620 }, { "epoch": 0.93, "grad_norm": 2.316787004470825, "learning_rate": 0.0002, "loss": 1.5334, "step": 227630 }, { "epoch": 0.93, "grad_norm": 2.353576183319092, "learning_rate": 0.0002, "loss": 1.5266, "step": 227640 }, { "epoch": 0.93, "grad_norm": 3.1527316570281982, "learning_rate": 0.0002, "loss": 1.641, "step": 227650 }, { "epoch": 0.93, "grad_norm": 3.1283719539642334, "learning_rate": 0.0002, "loss": 1.9964, "step": 227660 }, { "epoch": 0.93, "grad_norm": 2.4529640674591064, "learning_rate": 0.0002, "loss": 1.6877, "step": 227670 }, { "epoch": 0.93, "grad_norm": 3.342057228088379, "learning_rate": 0.0002, "loss": 1.3524, "step": 227680 }, { "epoch": 0.93, "grad_norm": 3.226984739303589, "learning_rate": 0.0002, "loss": 1.7607, "step": 227690 }, { "epoch": 0.93, "grad_norm": 2.605600357055664, "learning_rate": 0.0002, "loss": 1.4894, "step": 227700 }, { "epoch": 0.93, "grad_norm": 2.70471453666687, "learning_rate": 0.0002, "loss": 1.7543, "step": 227710 }, { "epoch": 0.93, "grad_norm": 3.223606586456299, "learning_rate": 0.0002, "loss": 1.478, "step": 227720 }, { "epoch": 0.93, "grad_norm": 3.554372787475586, "learning_rate": 0.0002, "loss": 1.4007, "step": 227730 }, { "epoch": 0.93, "grad_norm": 2.312882900238037, "learning_rate": 0.0002, "loss": 1.3659, "step": 227740 }, { "epoch": 0.93, "grad_norm": 2.5292325019836426, "learning_rate": 0.0002, "loss": 1.4355, "step": 227750 }, { "epoch": 0.93, "grad_norm": 2.1133766174316406, "learning_rate": 0.0002, "loss": 1.5578, "step": 227760 }, { "epoch": 0.93, "grad_norm": 3.4292256832122803, "learning_rate": 0.0002, "loss": 1.6556, "step": 227770 }, { "epoch": 0.93, "grad_norm": 3.435137987136841, "learning_rate": 0.0002, "loss": 1.5344, "step": 227780 }, { "epoch": 0.93, "grad_norm": 3.8795182704925537, "learning_rate": 0.0002, "loss": 1.5451, "step": 227790 }, { "epoch": 0.93, "grad_norm": 3.8766658306121826, "learning_rate": 0.0002, "loss": 1.6481, "step": 227800 }, { "epoch": 0.93, "grad_norm": 2.273394823074341, "learning_rate": 0.0002, "loss": 1.6699, "step": 227810 }, { "epoch": 0.93, "grad_norm": 2.794421434402466, "learning_rate": 0.0002, "loss": 1.5764, "step": 227820 }, { "epoch": 0.93, "grad_norm": 1.8602162599563599, "learning_rate": 0.0002, "loss": 1.6447, "step": 227830 }, { "epoch": 0.93, "grad_norm": 4.075484752655029, "learning_rate": 0.0002, "loss": 1.6178, "step": 227840 }, { "epoch": 0.93, "grad_norm": 3.8704683780670166, "learning_rate": 0.0002, "loss": 1.5609, "step": 227850 }, { "epoch": 0.93, "grad_norm": 3.9928383827209473, "learning_rate": 0.0002, "loss": 1.8015, "step": 227860 }, { "epoch": 0.93, "grad_norm": 4.277327537536621, "learning_rate": 0.0002, "loss": 1.5487, "step": 227870 }, { "epoch": 0.93, "grad_norm": 4.851487159729004, "learning_rate": 0.0002, "loss": 1.4224, "step": 227880 }, { "epoch": 0.93, "grad_norm": 3.14160418510437, "learning_rate": 0.0002, "loss": 1.9207, "step": 227890 }, { "epoch": 0.93, "grad_norm": 1.4476380348205566, "learning_rate": 0.0002, "loss": 1.5488, "step": 227900 }, { "epoch": 0.93, "grad_norm": 3.476975202560425, "learning_rate": 0.0002, "loss": 1.3659, "step": 227910 }, { "epoch": 0.93, "grad_norm": 4.237102508544922, "learning_rate": 0.0002, "loss": 1.6788, "step": 227920 }, { "epoch": 0.93, "grad_norm": 2.824148178100586, "learning_rate": 0.0002, "loss": 1.575, "step": 227930 }, { "epoch": 0.93, "grad_norm": 3.236039400100708, "learning_rate": 0.0002, "loss": 1.7011, "step": 227940 }, { "epoch": 0.93, "grad_norm": 5.004766464233398, "learning_rate": 0.0002, "loss": 1.4602, "step": 227950 }, { "epoch": 0.93, "grad_norm": 3.0979385375976562, "learning_rate": 0.0002, "loss": 1.8187, "step": 227960 }, { "epoch": 0.93, "grad_norm": 3.091193199157715, "learning_rate": 0.0002, "loss": 1.999, "step": 227970 }, { "epoch": 0.93, "grad_norm": 3.466695785522461, "learning_rate": 0.0002, "loss": 1.8282, "step": 227980 }, { "epoch": 0.93, "grad_norm": 3.3297924995422363, "learning_rate": 0.0002, "loss": 1.5682, "step": 227990 }, { "epoch": 0.93, "grad_norm": 2.1766533851623535, "learning_rate": 0.0002, "loss": 1.5769, "step": 228000 }, { "epoch": 0.93, "grad_norm": 2.9274356365203857, "learning_rate": 0.0002, "loss": 1.6007, "step": 228010 }, { "epoch": 0.93, "grad_norm": 3.201441526412964, "learning_rate": 0.0002, "loss": 1.5727, "step": 228020 }, { "epoch": 0.93, "grad_norm": 2.2137327194213867, "learning_rate": 0.0002, "loss": 1.569, "step": 228030 }, { "epoch": 0.93, "grad_norm": 2.976512908935547, "learning_rate": 0.0002, "loss": 1.6694, "step": 228040 }, { "epoch": 0.93, "grad_norm": 2.0068318843841553, "learning_rate": 0.0002, "loss": 1.5029, "step": 228050 }, { "epoch": 0.93, "grad_norm": 2.101243734359741, "learning_rate": 0.0002, "loss": 1.6679, "step": 228060 }, { "epoch": 0.93, "grad_norm": 2.567915916442871, "learning_rate": 0.0002, "loss": 1.4068, "step": 228070 }, { "epoch": 0.93, "grad_norm": 3.5385591983795166, "learning_rate": 0.0002, "loss": 1.5317, "step": 228080 }, { "epoch": 0.93, "grad_norm": 2.5569446086883545, "learning_rate": 0.0002, "loss": 1.6307, "step": 228090 }, { "epoch": 0.93, "grad_norm": 3.0798532962799072, "learning_rate": 0.0002, "loss": 1.5779, "step": 228100 }, { "epoch": 0.93, "grad_norm": 3.710949182510376, "learning_rate": 0.0002, "loss": 1.6212, "step": 228110 }, { "epoch": 0.93, "grad_norm": 3.702859878540039, "learning_rate": 0.0002, "loss": 1.5537, "step": 228120 }, { "epoch": 0.93, "grad_norm": 3.746877431869507, "learning_rate": 0.0002, "loss": 1.5824, "step": 228130 }, { "epoch": 0.93, "grad_norm": 4.924861431121826, "learning_rate": 0.0002, "loss": 1.7235, "step": 228140 }, { "epoch": 0.93, "grad_norm": 2.3722031116485596, "learning_rate": 0.0002, "loss": 1.4575, "step": 228150 }, { "epoch": 0.93, "grad_norm": 3.565147876739502, "learning_rate": 0.0002, "loss": 1.4808, "step": 228160 }, { "epoch": 0.93, "grad_norm": 2.400578260421753, "learning_rate": 0.0002, "loss": 1.5266, "step": 228170 }, { "epoch": 0.93, "grad_norm": 2.351050853729248, "learning_rate": 0.0002, "loss": 1.646, "step": 228180 }, { "epoch": 0.93, "grad_norm": 2.491865873336792, "learning_rate": 0.0002, "loss": 1.5576, "step": 228190 }, { "epoch": 0.93, "grad_norm": 2.8008952140808105, "learning_rate": 0.0002, "loss": 1.5647, "step": 228200 }, { "epoch": 0.93, "grad_norm": 2.8061676025390625, "learning_rate": 0.0002, "loss": 1.5506, "step": 228210 }, { "epoch": 0.93, "grad_norm": 2.9402554035186768, "learning_rate": 0.0002, "loss": 1.7633, "step": 228220 }, { "epoch": 0.93, "grad_norm": 2.839468240737915, "learning_rate": 0.0002, "loss": 1.6106, "step": 228230 }, { "epoch": 0.93, "grad_norm": 2.775843381881714, "learning_rate": 0.0002, "loss": 1.555, "step": 228240 }, { "epoch": 0.93, "grad_norm": 2.9673290252685547, "learning_rate": 0.0002, "loss": 1.5643, "step": 228250 }, { "epoch": 0.93, "grad_norm": 3.338367223739624, "learning_rate": 0.0002, "loss": 1.6645, "step": 228260 }, { "epoch": 0.93, "grad_norm": 3.3122735023498535, "learning_rate": 0.0002, "loss": 1.4174, "step": 228270 }, { "epoch": 0.93, "grad_norm": 4.765654563903809, "learning_rate": 0.0002, "loss": 1.5078, "step": 228280 }, { "epoch": 0.93, "grad_norm": 1.9882471561431885, "learning_rate": 0.0002, "loss": 1.679, "step": 228290 }, { "epoch": 0.93, "grad_norm": 3.348200798034668, "learning_rate": 0.0002, "loss": 1.6589, "step": 228300 }, { "epoch": 0.93, "grad_norm": 3.1048452854156494, "learning_rate": 0.0002, "loss": 1.5046, "step": 228310 }, { "epoch": 0.93, "grad_norm": 3.151477813720703, "learning_rate": 0.0002, "loss": 1.3969, "step": 228320 }, { "epoch": 0.93, "grad_norm": 3.2148597240448, "learning_rate": 0.0002, "loss": 1.5094, "step": 228330 }, { "epoch": 0.93, "grad_norm": 3.6392018795013428, "learning_rate": 0.0002, "loss": 1.5778, "step": 228340 }, { "epoch": 0.93, "grad_norm": 3.0104243755340576, "learning_rate": 0.0002, "loss": 1.5029, "step": 228350 }, { "epoch": 0.93, "grad_norm": 3.759084701538086, "learning_rate": 0.0002, "loss": 1.6648, "step": 228360 }, { "epoch": 0.93, "grad_norm": 7.119109630584717, "learning_rate": 0.0002, "loss": 1.4654, "step": 228370 }, { "epoch": 0.93, "grad_norm": 3.23004150390625, "learning_rate": 0.0002, "loss": 1.4788, "step": 228380 }, { "epoch": 0.93, "grad_norm": 2.5314130783081055, "learning_rate": 0.0002, "loss": 1.4956, "step": 228390 }, { "epoch": 0.93, "grad_norm": 3.649533987045288, "learning_rate": 0.0002, "loss": 1.6107, "step": 228400 }, { "epoch": 0.93, "grad_norm": 3.288780450820923, "learning_rate": 0.0002, "loss": 1.669, "step": 228410 }, { "epoch": 0.93, "grad_norm": 3.2429802417755127, "learning_rate": 0.0002, "loss": 1.5115, "step": 228420 }, { "epoch": 0.93, "grad_norm": 5.097744941711426, "learning_rate": 0.0002, "loss": 1.4759, "step": 228430 }, { "epoch": 0.93, "grad_norm": 3.440314292907715, "learning_rate": 0.0002, "loss": 1.3878, "step": 228440 }, { "epoch": 0.93, "grad_norm": 4.595001697540283, "learning_rate": 0.0002, "loss": 1.4579, "step": 228450 }, { "epoch": 0.93, "grad_norm": 2.587002992630005, "learning_rate": 0.0002, "loss": 1.5235, "step": 228460 }, { "epoch": 0.93, "grad_norm": 4.695349216461182, "learning_rate": 0.0002, "loss": 1.5879, "step": 228470 }, { "epoch": 0.93, "grad_norm": 2.4829301834106445, "learning_rate": 0.0002, "loss": 1.7692, "step": 228480 }, { "epoch": 0.93, "grad_norm": 2.645127534866333, "learning_rate": 0.0002, "loss": 1.4858, "step": 228490 }, { "epoch": 0.93, "grad_norm": 3.6088922023773193, "learning_rate": 0.0002, "loss": 1.7435, "step": 228500 }, { "epoch": 0.93, "grad_norm": 2.801154136657715, "learning_rate": 0.0002, "loss": 1.5416, "step": 228510 }, { "epoch": 0.93, "grad_norm": 3.1952626705169678, "learning_rate": 0.0002, "loss": 1.4608, "step": 228520 }, { "epoch": 0.93, "grad_norm": 5.183440685272217, "learning_rate": 0.0002, "loss": 1.4308, "step": 228530 }, { "epoch": 0.93, "grad_norm": 6.51840877532959, "learning_rate": 0.0002, "loss": 1.4229, "step": 228540 }, { "epoch": 0.93, "grad_norm": 3.8590495586395264, "learning_rate": 0.0002, "loss": 1.6143, "step": 228550 }, { "epoch": 0.93, "grad_norm": 3.477405071258545, "learning_rate": 0.0002, "loss": 1.4463, "step": 228560 }, { "epoch": 0.93, "grad_norm": 2.7232422828674316, "learning_rate": 0.0002, "loss": 1.5015, "step": 228570 }, { "epoch": 0.93, "grad_norm": 3.444744348526001, "learning_rate": 0.0002, "loss": 1.8196, "step": 228580 }, { "epoch": 0.93, "grad_norm": 3.6354482173919678, "learning_rate": 0.0002, "loss": 1.4496, "step": 228590 }, { "epoch": 0.93, "grad_norm": 3.2384378910064697, "learning_rate": 0.0002, "loss": 1.4925, "step": 228600 }, { "epoch": 0.93, "grad_norm": 4.179108142852783, "learning_rate": 0.0002, "loss": 1.4985, "step": 228610 }, { "epoch": 0.93, "grad_norm": 2.9544625282287598, "learning_rate": 0.0002, "loss": 1.4002, "step": 228620 }, { "epoch": 0.93, "grad_norm": 5.599368095397949, "learning_rate": 0.0002, "loss": 1.5599, "step": 228630 }, { "epoch": 0.93, "grad_norm": 3.834378957748413, "learning_rate": 0.0002, "loss": 1.5282, "step": 228640 }, { "epoch": 0.93, "grad_norm": 1.7868633270263672, "learning_rate": 0.0002, "loss": 1.5585, "step": 228650 }, { "epoch": 0.93, "grad_norm": 3.883700370788574, "learning_rate": 0.0002, "loss": 1.4879, "step": 228660 }, { "epoch": 0.93, "grad_norm": 5.179433822631836, "learning_rate": 0.0002, "loss": 1.6186, "step": 228670 }, { "epoch": 0.93, "grad_norm": 3.5226714611053467, "learning_rate": 0.0002, "loss": 1.4483, "step": 228680 }, { "epoch": 0.93, "grad_norm": 2.4545297622680664, "learning_rate": 0.0002, "loss": 1.7395, "step": 228690 }, { "epoch": 0.93, "grad_norm": 5.444394111633301, "learning_rate": 0.0002, "loss": 1.5285, "step": 228700 }, { "epoch": 0.93, "grad_norm": 3.8227860927581787, "learning_rate": 0.0002, "loss": 1.7699, "step": 228710 }, { "epoch": 0.93, "grad_norm": 2.512913465499878, "learning_rate": 0.0002, "loss": 1.648, "step": 228720 }, { "epoch": 0.93, "grad_norm": 2.5598647594451904, "learning_rate": 0.0002, "loss": 1.3049, "step": 228730 }, { "epoch": 0.93, "grad_norm": 3.103626012802124, "learning_rate": 0.0002, "loss": 1.3995, "step": 228740 }, { "epoch": 0.93, "grad_norm": 3.0216453075408936, "learning_rate": 0.0002, "loss": 1.7533, "step": 228750 }, { "epoch": 0.93, "grad_norm": 2.0059666633605957, "learning_rate": 0.0002, "loss": 1.5146, "step": 228760 }, { "epoch": 0.93, "grad_norm": 3.3871872425079346, "learning_rate": 0.0002, "loss": 1.5145, "step": 228770 }, { "epoch": 0.93, "grad_norm": 1.7916197776794434, "learning_rate": 0.0002, "loss": 1.5948, "step": 228780 }, { "epoch": 0.93, "grad_norm": 1.6229714155197144, "learning_rate": 0.0002, "loss": 1.5858, "step": 228790 }, { "epoch": 0.93, "grad_norm": 2.808788776397705, "learning_rate": 0.0002, "loss": 1.6084, "step": 228800 }, { "epoch": 0.93, "grad_norm": 2.1265745162963867, "learning_rate": 0.0002, "loss": 1.6735, "step": 228810 }, { "epoch": 0.93, "grad_norm": 2.7433321475982666, "learning_rate": 0.0002, "loss": 1.4339, "step": 228820 }, { "epoch": 0.93, "grad_norm": 3.424391746520996, "learning_rate": 0.0002, "loss": 1.5396, "step": 228830 }, { "epoch": 0.93, "grad_norm": 1.903378963470459, "learning_rate": 0.0002, "loss": 1.5478, "step": 228840 }, { "epoch": 0.93, "grad_norm": 3.051400661468506, "learning_rate": 0.0002, "loss": 1.651, "step": 228850 }, { "epoch": 0.93, "grad_norm": 1.1249419450759888, "learning_rate": 0.0002, "loss": 1.4967, "step": 228860 }, { "epoch": 0.93, "grad_norm": 2.2234323024749756, "learning_rate": 0.0002, "loss": 1.481, "step": 228870 }, { "epoch": 0.93, "grad_norm": 4.548048496246338, "learning_rate": 0.0002, "loss": 1.4886, "step": 228880 }, { "epoch": 0.93, "grad_norm": 2.6439905166625977, "learning_rate": 0.0002, "loss": 1.5674, "step": 228890 }, { "epoch": 0.93, "grad_norm": 2.1213552951812744, "learning_rate": 0.0002, "loss": 1.7243, "step": 228900 }, { "epoch": 0.93, "grad_norm": 3.9833340644836426, "learning_rate": 0.0002, "loss": 1.4052, "step": 228910 }, { "epoch": 0.93, "grad_norm": 2.303337574005127, "learning_rate": 0.0002, "loss": 1.6895, "step": 228920 }, { "epoch": 0.93, "grad_norm": 2.9344563484191895, "learning_rate": 0.0002, "loss": 1.6047, "step": 228930 }, { "epoch": 0.93, "grad_norm": 2.8216919898986816, "learning_rate": 0.0002, "loss": 1.8073, "step": 228940 }, { "epoch": 0.93, "grad_norm": 1.796280860900879, "learning_rate": 0.0002, "loss": 1.5825, "step": 228950 }, { "epoch": 0.93, "grad_norm": 2.730619430541992, "learning_rate": 0.0002, "loss": 1.7611, "step": 228960 }, { "epoch": 0.93, "grad_norm": 3.2922539710998535, "learning_rate": 0.0002, "loss": 1.7066, "step": 228970 }, { "epoch": 0.93, "grad_norm": 3.347443103790283, "learning_rate": 0.0002, "loss": 1.6171, "step": 228980 }, { "epoch": 0.93, "grad_norm": 2.381030321121216, "learning_rate": 0.0002, "loss": 1.569, "step": 228990 }, { "epoch": 0.93, "grad_norm": 1.5652788877487183, "learning_rate": 0.0002, "loss": 1.2972, "step": 229000 }, { "epoch": 0.93, "grad_norm": 1.6789637804031372, "learning_rate": 0.0002, "loss": 1.5528, "step": 229010 }, { "epoch": 0.93, "grad_norm": 3.4427084922790527, "learning_rate": 0.0002, "loss": 1.7045, "step": 229020 }, { "epoch": 0.93, "grad_norm": 2.827495574951172, "learning_rate": 0.0002, "loss": 1.6739, "step": 229030 }, { "epoch": 0.93, "grad_norm": 4.850096225738525, "learning_rate": 0.0002, "loss": 1.5108, "step": 229040 }, { "epoch": 0.93, "grad_norm": 2.7747457027435303, "learning_rate": 0.0002, "loss": 1.6303, "step": 229050 }, { "epoch": 0.93, "grad_norm": 3.4377338886260986, "learning_rate": 0.0002, "loss": 1.4859, "step": 229060 }, { "epoch": 0.93, "grad_norm": 3.795724630355835, "learning_rate": 0.0002, "loss": 1.542, "step": 229070 }, { "epoch": 0.93, "grad_norm": 2.6704890727996826, "learning_rate": 0.0002, "loss": 1.8104, "step": 229080 }, { "epoch": 0.93, "grad_norm": 4.281625270843506, "learning_rate": 0.0002, "loss": 1.4221, "step": 229090 }, { "epoch": 0.93, "grad_norm": 2.803309440612793, "learning_rate": 0.0002, "loss": 1.5785, "step": 229100 }, { "epoch": 0.93, "grad_norm": 2.0230069160461426, "learning_rate": 0.0002, "loss": 1.8503, "step": 229110 }, { "epoch": 0.93, "grad_norm": 3.8296799659729004, "learning_rate": 0.0002, "loss": 1.5437, "step": 229120 }, { "epoch": 0.93, "grad_norm": 13.053448677062988, "learning_rate": 0.0002, "loss": 1.8437, "step": 229130 }, { "epoch": 0.93, "grad_norm": 2.98146653175354, "learning_rate": 0.0002, "loss": 1.5439, "step": 229140 }, { "epoch": 0.93, "grad_norm": 2.804588794708252, "learning_rate": 0.0002, "loss": 1.6487, "step": 229150 }, { "epoch": 0.93, "grad_norm": 2.7892136573791504, "learning_rate": 0.0002, "loss": 1.5776, "step": 229160 }, { "epoch": 0.93, "grad_norm": 3.2742676734924316, "learning_rate": 0.0002, "loss": 1.3788, "step": 229170 }, { "epoch": 0.93, "grad_norm": 1.7979822158813477, "learning_rate": 0.0002, "loss": 1.8006, "step": 229180 }, { "epoch": 0.93, "grad_norm": 2.708238124847412, "learning_rate": 0.0002, "loss": 1.8045, "step": 229190 }, { "epoch": 0.93, "grad_norm": 2.3499577045440674, "learning_rate": 0.0002, "loss": 1.3399, "step": 229200 }, { "epoch": 0.93, "grad_norm": 3.041649103164673, "learning_rate": 0.0002, "loss": 1.5251, "step": 229210 }, { "epoch": 0.93, "grad_norm": 2.929271697998047, "learning_rate": 0.0002, "loss": 1.4736, "step": 229220 }, { "epoch": 0.93, "grad_norm": 3.5271403789520264, "learning_rate": 0.0002, "loss": 1.6755, "step": 229230 }, { "epoch": 0.93, "grad_norm": 3.3155102729797363, "learning_rate": 0.0002, "loss": 1.7317, "step": 229240 }, { "epoch": 0.93, "grad_norm": 2.344515562057495, "learning_rate": 0.0002, "loss": 1.6027, "step": 229250 }, { "epoch": 0.93, "grad_norm": 4.076652526855469, "learning_rate": 0.0002, "loss": 1.489, "step": 229260 }, { "epoch": 0.93, "grad_norm": 2.6802971363067627, "learning_rate": 0.0002, "loss": 1.6314, "step": 229270 }, { "epoch": 0.93, "grad_norm": 3.84374737739563, "learning_rate": 0.0002, "loss": 1.4381, "step": 229280 }, { "epoch": 0.93, "grad_norm": 2.3665616512298584, "learning_rate": 0.0002, "loss": 1.455, "step": 229290 }, { "epoch": 0.93, "grad_norm": 3.9428935050964355, "learning_rate": 0.0002, "loss": 1.5875, "step": 229300 }, { "epoch": 0.93, "grad_norm": 2.420870542526245, "learning_rate": 0.0002, "loss": 1.5554, "step": 229310 }, { "epoch": 0.93, "grad_norm": 3.615997552871704, "learning_rate": 0.0002, "loss": 1.5688, "step": 229320 }, { "epoch": 0.93, "grad_norm": 2.4085822105407715, "learning_rate": 0.0002, "loss": 1.407, "step": 229330 }, { "epoch": 0.93, "grad_norm": 1.689160704612732, "learning_rate": 0.0002, "loss": 1.5173, "step": 229340 }, { "epoch": 0.93, "grad_norm": 1.650755763053894, "learning_rate": 0.0002, "loss": 1.3537, "step": 229350 }, { "epoch": 0.93, "grad_norm": 4.39990234375, "learning_rate": 0.0002, "loss": 1.8566, "step": 229360 }, { "epoch": 0.93, "grad_norm": 3.5756375789642334, "learning_rate": 0.0002, "loss": 1.6234, "step": 229370 }, { "epoch": 0.93, "grad_norm": 2.6061713695526123, "learning_rate": 0.0002, "loss": 1.5202, "step": 229380 }, { "epoch": 0.93, "grad_norm": 3.6961846351623535, "learning_rate": 0.0002, "loss": 1.5682, "step": 229390 }, { "epoch": 0.93, "grad_norm": 1.5955175161361694, "learning_rate": 0.0002, "loss": 1.5437, "step": 229400 }, { "epoch": 0.93, "grad_norm": 2.7761199474334717, "learning_rate": 0.0002, "loss": 1.5932, "step": 229410 }, { "epoch": 0.93, "grad_norm": 2.838412284851074, "learning_rate": 0.0002, "loss": 1.7221, "step": 229420 }, { "epoch": 0.93, "grad_norm": 4.273672103881836, "learning_rate": 0.0002, "loss": 1.599, "step": 229430 }, { "epoch": 0.93, "grad_norm": 5.012815952301025, "learning_rate": 0.0002, "loss": 1.5647, "step": 229440 }, { "epoch": 0.93, "grad_norm": 2.788478374481201, "learning_rate": 0.0002, "loss": 1.6017, "step": 229450 }, { "epoch": 0.93, "grad_norm": 4.411409378051758, "learning_rate": 0.0002, "loss": 1.5364, "step": 229460 }, { "epoch": 0.93, "grad_norm": 2.8633532524108887, "learning_rate": 0.0002, "loss": 1.849, "step": 229470 }, { "epoch": 0.93, "grad_norm": 3.6263632774353027, "learning_rate": 0.0002, "loss": 1.6697, "step": 229480 }, { "epoch": 0.93, "grad_norm": 2.9722607135772705, "learning_rate": 0.0002, "loss": 1.8729, "step": 229490 }, { "epoch": 0.93, "grad_norm": 3.6932213306427, "learning_rate": 0.0002, "loss": 1.7297, "step": 229500 }, { "epoch": 0.93, "grad_norm": 2.493051528930664, "learning_rate": 0.0002, "loss": 1.7215, "step": 229510 }, { "epoch": 0.93, "grad_norm": 1.9894388914108276, "learning_rate": 0.0002, "loss": 1.5891, "step": 229520 }, { "epoch": 0.93, "grad_norm": 2.8619282245635986, "learning_rate": 0.0002, "loss": 1.6681, "step": 229530 }, { "epoch": 0.93, "grad_norm": 4.712116241455078, "learning_rate": 0.0002, "loss": 1.6553, "step": 229540 }, { "epoch": 0.93, "grad_norm": 2.909752607345581, "learning_rate": 0.0002, "loss": 1.7271, "step": 229550 }, { "epoch": 0.93, "grad_norm": 2.273477554321289, "learning_rate": 0.0002, "loss": 1.6204, "step": 229560 }, { "epoch": 0.93, "grad_norm": 3.670863151550293, "learning_rate": 0.0002, "loss": 1.8201, "step": 229570 }, { "epoch": 0.93, "grad_norm": 2.7914633750915527, "learning_rate": 0.0002, "loss": 1.7983, "step": 229580 }, { "epoch": 0.93, "grad_norm": 4.339376449584961, "learning_rate": 0.0002, "loss": 1.442, "step": 229590 }, { "epoch": 0.93, "grad_norm": 2.6545276641845703, "learning_rate": 0.0002, "loss": 1.624, "step": 229600 }, { "epoch": 0.93, "grad_norm": 2.8170230388641357, "learning_rate": 0.0002, "loss": 1.502, "step": 229610 }, { "epoch": 0.93, "grad_norm": 5.136733531951904, "learning_rate": 0.0002, "loss": 1.6092, "step": 229620 }, { "epoch": 0.93, "grad_norm": 3.329066514968872, "learning_rate": 0.0002, "loss": 1.6173, "step": 229630 }, { "epoch": 0.93, "grad_norm": 4.101750373840332, "learning_rate": 0.0002, "loss": 1.7674, "step": 229640 }, { "epoch": 0.93, "grad_norm": 2.9508540630340576, "learning_rate": 0.0002, "loss": 1.5339, "step": 229650 }, { "epoch": 0.93, "grad_norm": 1.8245000839233398, "learning_rate": 0.0002, "loss": 1.4441, "step": 229660 }, { "epoch": 0.93, "grad_norm": 4.735082626342773, "learning_rate": 0.0002, "loss": 1.4427, "step": 229670 }, { "epoch": 0.94, "grad_norm": 2.805896759033203, "learning_rate": 0.0002, "loss": 1.6034, "step": 229680 }, { "epoch": 0.94, "grad_norm": 2.329190969467163, "learning_rate": 0.0002, "loss": 1.4584, "step": 229690 }, { "epoch": 0.94, "grad_norm": 3.9374186992645264, "learning_rate": 0.0002, "loss": 1.5631, "step": 229700 }, { "epoch": 0.94, "grad_norm": 3.051922559738159, "learning_rate": 0.0002, "loss": 1.6705, "step": 229710 }, { "epoch": 0.94, "grad_norm": 2.6167335510253906, "learning_rate": 0.0002, "loss": 1.7341, "step": 229720 }, { "epoch": 0.94, "grad_norm": 3.5657477378845215, "learning_rate": 0.0002, "loss": 1.5627, "step": 229730 }, { "epoch": 0.94, "grad_norm": 3.0797154903411865, "learning_rate": 0.0002, "loss": 1.7682, "step": 229740 }, { "epoch": 0.94, "grad_norm": 2.9101057052612305, "learning_rate": 0.0002, "loss": 1.349, "step": 229750 }, { "epoch": 0.94, "grad_norm": 3.3741257190704346, "learning_rate": 0.0002, "loss": 1.7728, "step": 229760 }, { "epoch": 0.94, "grad_norm": 2.1931333541870117, "learning_rate": 0.0002, "loss": 1.6011, "step": 229770 }, { "epoch": 0.94, "grad_norm": 3.2904305458068848, "learning_rate": 0.0002, "loss": 1.9116, "step": 229780 }, { "epoch": 0.94, "grad_norm": 3.3527016639709473, "learning_rate": 0.0002, "loss": 1.4572, "step": 229790 }, { "epoch": 0.94, "grad_norm": 2.5425148010253906, "learning_rate": 0.0002, "loss": 1.4753, "step": 229800 }, { "epoch": 0.94, "grad_norm": 2.1558563709259033, "learning_rate": 0.0002, "loss": 1.3822, "step": 229810 }, { "epoch": 0.94, "grad_norm": 2.8837485313415527, "learning_rate": 0.0002, "loss": 1.5304, "step": 229820 }, { "epoch": 0.94, "grad_norm": 3.127439498901367, "learning_rate": 0.0002, "loss": 1.563, "step": 229830 }, { "epoch": 0.94, "grad_norm": 2.23612117767334, "learning_rate": 0.0002, "loss": 1.362, "step": 229840 }, { "epoch": 0.94, "grad_norm": 2.042680025100708, "learning_rate": 0.0002, "loss": 1.6561, "step": 229850 }, { "epoch": 0.94, "grad_norm": 6.188562870025635, "learning_rate": 0.0002, "loss": 1.7433, "step": 229860 }, { "epoch": 0.94, "grad_norm": 2.5187270641326904, "learning_rate": 0.0002, "loss": 1.4704, "step": 229870 }, { "epoch": 0.94, "grad_norm": 2.949275016784668, "learning_rate": 0.0002, "loss": 1.743, "step": 229880 }, { "epoch": 0.94, "grad_norm": 3.494187593460083, "learning_rate": 0.0002, "loss": 1.6417, "step": 229890 }, { "epoch": 0.94, "grad_norm": 3.2087836265563965, "learning_rate": 0.0002, "loss": 1.3036, "step": 229900 }, { "epoch": 0.94, "grad_norm": 3.97033429145813, "learning_rate": 0.0002, "loss": 1.5066, "step": 229910 }, { "epoch": 0.94, "grad_norm": 3.7630598545074463, "learning_rate": 0.0002, "loss": 1.7349, "step": 229920 }, { "epoch": 0.94, "grad_norm": 3.216409683227539, "learning_rate": 0.0002, "loss": 1.5594, "step": 229930 }, { "epoch": 0.94, "grad_norm": 2.109266996383667, "learning_rate": 0.0002, "loss": 1.7005, "step": 229940 }, { "epoch": 0.94, "grad_norm": 2.1559979915618896, "learning_rate": 0.0002, "loss": 1.503, "step": 229950 }, { "epoch": 0.94, "grad_norm": 4.29383659362793, "learning_rate": 0.0002, "loss": 1.5729, "step": 229960 }, { "epoch": 0.94, "grad_norm": 2.735499620437622, "learning_rate": 0.0002, "loss": 1.4528, "step": 229970 }, { "epoch": 0.94, "grad_norm": 2.753190279006958, "learning_rate": 0.0002, "loss": 1.5726, "step": 229980 }, { "epoch": 0.94, "grad_norm": 1.342747688293457, "learning_rate": 0.0002, "loss": 1.5313, "step": 229990 }, { "epoch": 0.94, "grad_norm": 2.9737510681152344, "learning_rate": 0.0002, "loss": 1.8267, "step": 230000 }, { "epoch": 0.94, "grad_norm": 3.048703670501709, "learning_rate": 0.0002, "loss": 1.6752, "step": 230010 }, { "epoch": 0.94, "grad_norm": 6.889738082885742, "learning_rate": 0.0002, "loss": 1.43, "step": 230020 }, { "epoch": 0.94, "grad_norm": 2.2815732955932617, "learning_rate": 0.0002, "loss": 1.2397, "step": 230030 }, { "epoch": 0.94, "grad_norm": 2.222914457321167, "learning_rate": 0.0002, "loss": 1.5049, "step": 230040 }, { "epoch": 0.94, "grad_norm": 1.7567439079284668, "learning_rate": 0.0002, "loss": 1.7447, "step": 230050 }, { "epoch": 0.94, "grad_norm": 3.1420888900756836, "learning_rate": 0.0002, "loss": 1.4993, "step": 230060 }, { "epoch": 0.94, "grad_norm": 2.8549036979675293, "learning_rate": 0.0002, "loss": 1.4715, "step": 230070 }, { "epoch": 0.94, "grad_norm": 3.390316963195801, "learning_rate": 0.0002, "loss": 1.594, "step": 230080 }, { "epoch": 0.94, "grad_norm": 4.840744495391846, "learning_rate": 0.0002, "loss": 1.6174, "step": 230090 }, { "epoch": 0.94, "grad_norm": 2.9179399013519287, "learning_rate": 0.0002, "loss": 1.6721, "step": 230100 }, { "epoch": 0.94, "grad_norm": 2.3430309295654297, "learning_rate": 0.0002, "loss": 1.7932, "step": 230110 }, { "epoch": 0.94, "grad_norm": 3.956174612045288, "learning_rate": 0.0002, "loss": 1.4681, "step": 230120 }, { "epoch": 0.94, "grad_norm": 5.277463912963867, "learning_rate": 0.0002, "loss": 1.5844, "step": 230130 }, { "epoch": 0.94, "grad_norm": 5.2980780601501465, "learning_rate": 0.0002, "loss": 1.8163, "step": 230140 }, { "epoch": 0.94, "grad_norm": 2.6556169986724854, "learning_rate": 0.0002, "loss": 1.5374, "step": 230150 }, { "epoch": 0.94, "grad_norm": 2.5142524242401123, "learning_rate": 0.0002, "loss": 1.9211, "step": 230160 }, { "epoch": 0.94, "grad_norm": 4.1348395347595215, "learning_rate": 0.0002, "loss": 1.4136, "step": 230170 }, { "epoch": 0.94, "grad_norm": 4.441339492797852, "learning_rate": 0.0002, "loss": 1.5583, "step": 230180 }, { "epoch": 0.94, "grad_norm": 1.961857795715332, "learning_rate": 0.0002, "loss": 1.4725, "step": 230190 }, { "epoch": 0.94, "grad_norm": 2.259500503540039, "learning_rate": 0.0002, "loss": 1.5946, "step": 230200 }, { "epoch": 0.94, "grad_norm": 2.3788840770721436, "learning_rate": 0.0002, "loss": 1.6991, "step": 230210 }, { "epoch": 0.94, "grad_norm": 3.3280351161956787, "learning_rate": 0.0002, "loss": 1.483, "step": 230220 }, { "epoch": 0.94, "grad_norm": 4.082042694091797, "learning_rate": 0.0002, "loss": 1.6379, "step": 230230 }, { "epoch": 0.94, "grad_norm": 2.905862808227539, "learning_rate": 0.0002, "loss": 1.7509, "step": 230240 }, { "epoch": 0.94, "grad_norm": 3.060534715652466, "learning_rate": 0.0002, "loss": 1.5443, "step": 230250 }, { "epoch": 0.94, "grad_norm": 2.381072521209717, "learning_rate": 0.0002, "loss": 1.6656, "step": 230260 }, { "epoch": 0.94, "grad_norm": 3.6178343296051025, "learning_rate": 0.0002, "loss": 1.7924, "step": 230270 }, { "epoch": 0.94, "grad_norm": 2.24165415763855, "learning_rate": 0.0002, "loss": 1.8343, "step": 230280 }, { "epoch": 0.94, "grad_norm": 4.8264665603637695, "learning_rate": 0.0002, "loss": 1.3509, "step": 230290 }, { "epoch": 0.94, "grad_norm": 3.019395112991333, "learning_rate": 0.0002, "loss": 1.1546, "step": 230300 }, { "epoch": 0.94, "grad_norm": 3.604710340499878, "learning_rate": 0.0002, "loss": 1.6954, "step": 230310 }, { "epoch": 0.94, "grad_norm": 3.047614574432373, "learning_rate": 0.0002, "loss": 1.5539, "step": 230320 }, { "epoch": 0.94, "grad_norm": 3.5854363441467285, "learning_rate": 0.0002, "loss": 1.5134, "step": 230330 }, { "epoch": 0.94, "grad_norm": 1.9811948537826538, "learning_rate": 0.0002, "loss": 1.47, "step": 230340 }, { "epoch": 0.94, "grad_norm": 1.8017770051956177, "learning_rate": 0.0002, "loss": 1.5789, "step": 230350 }, { "epoch": 0.94, "grad_norm": 3.401177167892456, "learning_rate": 0.0002, "loss": 1.5816, "step": 230360 }, { "epoch": 0.94, "grad_norm": 3.0880863666534424, "learning_rate": 0.0002, "loss": 1.543, "step": 230370 }, { "epoch": 0.94, "grad_norm": 2.8355348110198975, "learning_rate": 0.0002, "loss": 1.7528, "step": 230380 }, { "epoch": 0.94, "grad_norm": 3.0537397861480713, "learning_rate": 0.0002, "loss": 1.8929, "step": 230390 }, { "epoch": 0.94, "grad_norm": 3.260615110397339, "learning_rate": 0.0002, "loss": 1.4547, "step": 230400 }, { "epoch": 0.94, "grad_norm": 3.916747808456421, "learning_rate": 0.0002, "loss": 1.7833, "step": 230410 }, { "epoch": 0.94, "grad_norm": 3.216521739959717, "learning_rate": 0.0002, "loss": 1.483, "step": 230420 }, { "epoch": 0.94, "grad_norm": 4.027027606964111, "learning_rate": 0.0002, "loss": 1.7511, "step": 230430 }, { "epoch": 0.94, "grad_norm": 2.6397674083709717, "learning_rate": 0.0002, "loss": 1.7193, "step": 230440 }, { "epoch": 0.94, "grad_norm": 2.561086416244507, "learning_rate": 0.0002, "loss": 1.5781, "step": 230450 }, { "epoch": 0.94, "grad_norm": 1.9401801824569702, "learning_rate": 0.0002, "loss": 1.6431, "step": 230460 }, { "epoch": 0.94, "grad_norm": 3.8977317810058594, "learning_rate": 0.0002, "loss": 1.6481, "step": 230470 }, { "epoch": 0.94, "grad_norm": 4.454480171203613, "learning_rate": 0.0002, "loss": 1.5712, "step": 230480 }, { "epoch": 0.94, "grad_norm": 2.611527442932129, "learning_rate": 0.0002, "loss": 1.5077, "step": 230490 }, { "epoch": 0.94, "grad_norm": 2.793018341064453, "learning_rate": 0.0002, "loss": 1.4072, "step": 230500 }, { "epoch": 0.94, "grad_norm": 4.077794075012207, "learning_rate": 0.0002, "loss": 1.6395, "step": 230510 }, { "epoch": 0.94, "grad_norm": 3.1058926582336426, "learning_rate": 0.0002, "loss": 1.6373, "step": 230520 }, { "epoch": 0.94, "grad_norm": 4.811814308166504, "learning_rate": 0.0002, "loss": 1.4221, "step": 230530 }, { "epoch": 0.94, "grad_norm": 4.2172346115112305, "learning_rate": 0.0002, "loss": 1.4827, "step": 230540 }, { "epoch": 0.94, "grad_norm": 2.5038137435913086, "learning_rate": 0.0002, "loss": 1.5722, "step": 230550 }, { "epoch": 0.94, "grad_norm": 4.337973117828369, "learning_rate": 0.0002, "loss": 1.636, "step": 230560 }, { "epoch": 0.94, "grad_norm": 1.9997175931930542, "learning_rate": 0.0002, "loss": 1.6111, "step": 230570 }, { "epoch": 0.94, "grad_norm": 2.572990894317627, "learning_rate": 0.0002, "loss": 1.702, "step": 230580 }, { "epoch": 0.94, "grad_norm": 2.8181238174438477, "learning_rate": 0.0002, "loss": 1.5973, "step": 230590 }, { "epoch": 0.94, "grad_norm": 1.7750887870788574, "learning_rate": 0.0002, "loss": 1.7269, "step": 230600 }, { "epoch": 0.94, "grad_norm": 3.623260259628296, "learning_rate": 0.0002, "loss": 1.6174, "step": 230610 }, { "epoch": 0.94, "grad_norm": 6.908071517944336, "learning_rate": 0.0002, "loss": 1.5205, "step": 230620 }, { "epoch": 0.94, "grad_norm": 2.9235730171203613, "learning_rate": 0.0002, "loss": 1.5324, "step": 230630 }, { "epoch": 0.94, "grad_norm": 4.085180759429932, "learning_rate": 0.0002, "loss": 1.3891, "step": 230640 }, { "epoch": 0.94, "grad_norm": 4.141042232513428, "learning_rate": 0.0002, "loss": 1.6599, "step": 230650 }, { "epoch": 0.94, "grad_norm": 2.4402523040771484, "learning_rate": 0.0002, "loss": 1.6155, "step": 230660 }, { "epoch": 0.94, "grad_norm": 3.326404094696045, "learning_rate": 0.0002, "loss": 1.6889, "step": 230670 }, { "epoch": 0.94, "grad_norm": 3.366725206375122, "learning_rate": 0.0002, "loss": 1.4228, "step": 230680 }, { "epoch": 0.94, "grad_norm": 1.4383323192596436, "learning_rate": 0.0002, "loss": 1.2184, "step": 230690 }, { "epoch": 0.94, "grad_norm": 3.1189467906951904, "learning_rate": 0.0002, "loss": 1.6512, "step": 230700 }, { "epoch": 0.94, "grad_norm": 2.323920965194702, "learning_rate": 0.0002, "loss": 1.5974, "step": 230710 }, { "epoch": 0.94, "grad_norm": 3.073749303817749, "learning_rate": 0.0002, "loss": 1.4321, "step": 230720 }, { "epoch": 0.94, "grad_norm": 2.3234806060791016, "learning_rate": 0.0002, "loss": 1.6285, "step": 230730 }, { "epoch": 0.94, "grad_norm": 3.064480781555176, "learning_rate": 0.0002, "loss": 1.8564, "step": 230740 }, { "epoch": 0.94, "grad_norm": 2.3812716007232666, "learning_rate": 0.0002, "loss": 1.4374, "step": 230750 }, { "epoch": 0.94, "grad_norm": 6.145968914031982, "learning_rate": 0.0002, "loss": 1.6074, "step": 230760 }, { "epoch": 0.94, "grad_norm": 2.872368097305298, "learning_rate": 0.0002, "loss": 1.7879, "step": 230770 }, { "epoch": 0.94, "grad_norm": 3.24617862701416, "learning_rate": 0.0002, "loss": 1.7707, "step": 230780 }, { "epoch": 0.94, "grad_norm": 4.029052734375, "learning_rate": 0.0002, "loss": 1.6716, "step": 230790 }, { "epoch": 0.94, "grad_norm": 4.596087455749512, "learning_rate": 0.0002, "loss": 1.5373, "step": 230800 }, { "epoch": 0.94, "grad_norm": 1.8436481952667236, "learning_rate": 0.0002, "loss": 1.4463, "step": 230810 }, { "epoch": 0.94, "grad_norm": 2.1640262603759766, "learning_rate": 0.0002, "loss": 1.5187, "step": 230820 }, { "epoch": 0.94, "grad_norm": 2.3200442790985107, "learning_rate": 0.0002, "loss": 1.5682, "step": 230830 }, { "epoch": 0.94, "grad_norm": 2.5393998622894287, "learning_rate": 0.0002, "loss": 1.5851, "step": 230840 }, { "epoch": 0.94, "grad_norm": 2.6445152759552, "learning_rate": 0.0002, "loss": 1.5938, "step": 230850 }, { "epoch": 0.94, "grad_norm": 3.2611281871795654, "learning_rate": 0.0002, "loss": 1.6777, "step": 230860 }, { "epoch": 0.94, "grad_norm": 3.0316948890686035, "learning_rate": 0.0002, "loss": 1.7852, "step": 230870 }, { "epoch": 0.94, "grad_norm": 2.928987503051758, "learning_rate": 0.0002, "loss": 1.4838, "step": 230880 }, { "epoch": 0.94, "grad_norm": 3.488927125930786, "learning_rate": 0.0002, "loss": 1.4298, "step": 230890 }, { "epoch": 0.94, "grad_norm": 3.966736078262329, "learning_rate": 0.0002, "loss": 1.6117, "step": 230900 }, { "epoch": 0.94, "grad_norm": 3.7084786891937256, "learning_rate": 0.0002, "loss": 1.7253, "step": 230910 }, { "epoch": 0.94, "grad_norm": 3.6247167587280273, "learning_rate": 0.0002, "loss": 1.5065, "step": 230920 }, { "epoch": 0.94, "grad_norm": 3.4086837768554688, "learning_rate": 0.0002, "loss": 1.6107, "step": 230930 }, { "epoch": 0.94, "grad_norm": 3.7983453273773193, "learning_rate": 0.0002, "loss": 1.5653, "step": 230940 }, { "epoch": 0.94, "grad_norm": 1.9602563381195068, "learning_rate": 0.0002, "loss": 1.5431, "step": 230950 }, { "epoch": 0.94, "grad_norm": 2.849940538406372, "learning_rate": 0.0002, "loss": 1.5481, "step": 230960 }, { "epoch": 0.94, "grad_norm": 1.9127973318099976, "learning_rate": 0.0002, "loss": 1.5559, "step": 230970 }, { "epoch": 0.94, "grad_norm": 3.7686429023742676, "learning_rate": 0.0002, "loss": 1.4913, "step": 230980 }, { "epoch": 0.94, "grad_norm": 2.6568048000335693, "learning_rate": 0.0002, "loss": 1.8837, "step": 230990 }, { "epoch": 0.94, "grad_norm": 2.251516819000244, "learning_rate": 0.0002, "loss": 1.8218, "step": 231000 }, { "epoch": 0.94, "grad_norm": 2.538949728012085, "learning_rate": 0.0002, "loss": 1.5141, "step": 231010 }, { "epoch": 0.94, "grad_norm": 3.0799548625946045, "learning_rate": 0.0002, "loss": 1.3902, "step": 231020 }, { "epoch": 0.94, "grad_norm": 3.534329414367676, "learning_rate": 0.0002, "loss": 1.4631, "step": 231030 }, { "epoch": 0.94, "grad_norm": 2.403146982192993, "learning_rate": 0.0002, "loss": 1.6247, "step": 231040 }, { "epoch": 0.94, "grad_norm": 2.3614494800567627, "learning_rate": 0.0002, "loss": 1.5977, "step": 231050 }, { "epoch": 0.94, "grad_norm": 4.168054580688477, "learning_rate": 0.0002, "loss": 1.5329, "step": 231060 }, { "epoch": 0.94, "grad_norm": 4.382872104644775, "learning_rate": 0.0002, "loss": 1.1895, "step": 231070 }, { "epoch": 0.94, "grad_norm": 2.3340539932250977, "learning_rate": 0.0002, "loss": 1.5898, "step": 231080 }, { "epoch": 0.94, "grad_norm": 3.2126591205596924, "learning_rate": 0.0002, "loss": 1.4928, "step": 231090 }, { "epoch": 0.94, "grad_norm": 4.051944255828857, "learning_rate": 0.0002, "loss": 1.8128, "step": 231100 }, { "epoch": 0.94, "grad_norm": 2.181143045425415, "learning_rate": 0.0002, "loss": 1.6001, "step": 231110 }, { "epoch": 0.94, "grad_norm": 3.0388925075531006, "learning_rate": 0.0002, "loss": 1.6174, "step": 231120 }, { "epoch": 0.94, "grad_norm": 2.0976078510284424, "learning_rate": 0.0002, "loss": 1.5924, "step": 231130 }, { "epoch": 0.94, "grad_norm": 1.9907803535461426, "learning_rate": 0.0002, "loss": 1.7437, "step": 231140 }, { "epoch": 0.94, "grad_norm": 3.8857827186584473, "learning_rate": 0.0002, "loss": 1.5093, "step": 231150 }, { "epoch": 0.94, "grad_norm": 6.198359489440918, "learning_rate": 0.0002, "loss": 1.3884, "step": 231160 }, { "epoch": 0.94, "grad_norm": 3.2527997493743896, "learning_rate": 0.0002, "loss": 1.7431, "step": 231170 }, { "epoch": 0.94, "grad_norm": 2.45934796333313, "learning_rate": 0.0002, "loss": 1.5777, "step": 231180 }, { "epoch": 0.94, "grad_norm": 2.021458148956299, "learning_rate": 0.0002, "loss": 1.7329, "step": 231190 }, { "epoch": 0.94, "grad_norm": 2.590970516204834, "learning_rate": 0.0002, "loss": 1.5763, "step": 231200 }, { "epoch": 0.94, "grad_norm": 3.3430094718933105, "learning_rate": 0.0002, "loss": 1.5868, "step": 231210 }, { "epoch": 0.94, "grad_norm": 2.6249866485595703, "learning_rate": 0.0002, "loss": 1.7747, "step": 231220 }, { "epoch": 0.94, "grad_norm": 3.517545223236084, "learning_rate": 0.0002, "loss": 1.7208, "step": 231230 }, { "epoch": 0.94, "grad_norm": 4.664552211761475, "learning_rate": 0.0002, "loss": 1.5509, "step": 231240 }, { "epoch": 0.94, "grad_norm": 2.7060530185699463, "learning_rate": 0.0002, "loss": 1.4296, "step": 231250 }, { "epoch": 0.94, "grad_norm": 2.825103521347046, "learning_rate": 0.0002, "loss": 1.7075, "step": 231260 }, { "epoch": 0.94, "grad_norm": 2.2556886672973633, "learning_rate": 0.0002, "loss": 1.5287, "step": 231270 }, { "epoch": 0.94, "grad_norm": 4.352172374725342, "learning_rate": 0.0002, "loss": 1.4378, "step": 231280 }, { "epoch": 0.94, "grad_norm": 2.3427464962005615, "learning_rate": 0.0002, "loss": 1.532, "step": 231290 }, { "epoch": 0.94, "grad_norm": 3.841249465942383, "learning_rate": 0.0002, "loss": 1.4789, "step": 231300 }, { "epoch": 0.94, "grad_norm": 3.0105905532836914, "learning_rate": 0.0002, "loss": 1.6427, "step": 231310 }, { "epoch": 0.94, "grad_norm": 2.0532050132751465, "learning_rate": 0.0002, "loss": 1.5436, "step": 231320 }, { "epoch": 0.94, "grad_norm": 2.1809637546539307, "learning_rate": 0.0002, "loss": 1.5326, "step": 231330 }, { "epoch": 0.94, "grad_norm": 3.7472896575927734, "learning_rate": 0.0002, "loss": 1.7234, "step": 231340 }, { "epoch": 0.94, "grad_norm": 1.6164826154708862, "learning_rate": 0.0002, "loss": 1.6044, "step": 231350 }, { "epoch": 0.94, "grad_norm": 3.6931777000427246, "learning_rate": 0.0002, "loss": 1.5146, "step": 231360 }, { "epoch": 0.94, "grad_norm": 2.3798182010650635, "learning_rate": 0.0002, "loss": 1.4975, "step": 231370 }, { "epoch": 0.94, "grad_norm": 4.212082386016846, "learning_rate": 0.0002, "loss": 1.4329, "step": 231380 }, { "epoch": 0.94, "grad_norm": 2.9003021717071533, "learning_rate": 0.0002, "loss": 1.7434, "step": 231390 }, { "epoch": 0.94, "grad_norm": 2.4279232025146484, "learning_rate": 0.0002, "loss": 1.4872, "step": 231400 }, { "epoch": 0.94, "grad_norm": 3.7189130783081055, "learning_rate": 0.0002, "loss": 1.6015, "step": 231410 }, { "epoch": 0.94, "grad_norm": 3.1876165866851807, "learning_rate": 0.0002, "loss": 1.5747, "step": 231420 }, { "epoch": 0.94, "grad_norm": 2.3373970985412598, "learning_rate": 0.0002, "loss": 1.629, "step": 231430 }, { "epoch": 0.94, "grad_norm": 3.448241710662842, "learning_rate": 0.0002, "loss": 1.6696, "step": 231440 }, { "epoch": 0.94, "grad_norm": 1.703595519065857, "learning_rate": 0.0002, "loss": 1.5897, "step": 231450 }, { "epoch": 0.94, "grad_norm": 3.285292148590088, "learning_rate": 0.0002, "loss": 1.7887, "step": 231460 }, { "epoch": 0.94, "grad_norm": 2.5341601371765137, "learning_rate": 0.0002, "loss": 1.4833, "step": 231470 }, { "epoch": 0.94, "grad_norm": 2.9615237712860107, "learning_rate": 0.0002, "loss": 1.4299, "step": 231480 }, { "epoch": 0.94, "grad_norm": 4.017309665679932, "learning_rate": 0.0002, "loss": 1.6107, "step": 231490 }, { "epoch": 0.94, "grad_norm": 5.592817306518555, "learning_rate": 0.0002, "loss": 1.5785, "step": 231500 }, { "epoch": 0.94, "grad_norm": 2.4068644046783447, "learning_rate": 0.0002, "loss": 1.7327, "step": 231510 }, { "epoch": 0.94, "grad_norm": 2.9442105293273926, "learning_rate": 0.0002, "loss": 1.529, "step": 231520 }, { "epoch": 0.94, "grad_norm": 4.6327595710754395, "learning_rate": 0.0002, "loss": 1.4483, "step": 231530 }, { "epoch": 0.94, "grad_norm": 5.36979341506958, "learning_rate": 0.0002, "loss": 1.4012, "step": 231540 }, { "epoch": 0.94, "grad_norm": 1.7918709516525269, "learning_rate": 0.0002, "loss": 1.4415, "step": 231550 }, { "epoch": 0.94, "grad_norm": 1.8337318897247314, "learning_rate": 0.0002, "loss": 1.4726, "step": 231560 }, { "epoch": 0.94, "grad_norm": 2.7867565155029297, "learning_rate": 0.0002, "loss": 1.6457, "step": 231570 }, { "epoch": 0.94, "grad_norm": 3.5292797088623047, "learning_rate": 0.0002, "loss": 1.7025, "step": 231580 }, { "epoch": 0.94, "grad_norm": 2.4036741256713867, "learning_rate": 0.0002, "loss": 1.4722, "step": 231590 }, { "epoch": 0.94, "grad_norm": 4.174699783325195, "learning_rate": 0.0002, "loss": 1.7818, "step": 231600 }, { "epoch": 0.94, "grad_norm": 4.645886421203613, "learning_rate": 0.0002, "loss": 1.5038, "step": 231610 }, { "epoch": 0.94, "grad_norm": 3.1697566509246826, "learning_rate": 0.0002, "loss": 1.7285, "step": 231620 }, { "epoch": 0.94, "grad_norm": 3.913830280303955, "learning_rate": 0.0002, "loss": 1.5284, "step": 231630 }, { "epoch": 0.94, "grad_norm": 4.181804656982422, "learning_rate": 0.0002, "loss": 1.4116, "step": 231640 }, { "epoch": 0.94, "grad_norm": 2.6926651000976562, "learning_rate": 0.0002, "loss": 1.4819, "step": 231650 }, { "epoch": 0.94, "grad_norm": 3.8690690994262695, "learning_rate": 0.0002, "loss": 1.584, "step": 231660 }, { "epoch": 0.94, "grad_norm": 1.9805433750152588, "learning_rate": 0.0002, "loss": 1.5211, "step": 231670 }, { "epoch": 0.94, "grad_norm": 2.232038736343384, "learning_rate": 0.0002, "loss": 1.4059, "step": 231680 }, { "epoch": 0.94, "grad_norm": 2.688230037689209, "learning_rate": 0.0002, "loss": 1.655, "step": 231690 }, { "epoch": 0.94, "grad_norm": 4.593257904052734, "learning_rate": 0.0002, "loss": 1.7398, "step": 231700 }, { "epoch": 0.94, "grad_norm": 2.613325595855713, "learning_rate": 0.0002, "loss": 1.5827, "step": 231710 }, { "epoch": 0.94, "grad_norm": 2.675405740737915, "learning_rate": 0.0002, "loss": 1.5266, "step": 231720 }, { "epoch": 0.94, "grad_norm": 3.394272804260254, "learning_rate": 0.0002, "loss": 1.507, "step": 231730 }, { "epoch": 0.94, "grad_norm": 2.9410147666931152, "learning_rate": 0.0002, "loss": 1.4904, "step": 231740 }, { "epoch": 0.94, "grad_norm": 2.1807215213775635, "learning_rate": 0.0002, "loss": 1.6002, "step": 231750 }, { "epoch": 0.94, "grad_norm": 1.9795880317687988, "learning_rate": 0.0002, "loss": 1.4131, "step": 231760 }, { "epoch": 0.94, "grad_norm": 3.3420252799987793, "learning_rate": 0.0002, "loss": 1.4481, "step": 231770 }, { "epoch": 0.94, "grad_norm": 4.4040656089782715, "learning_rate": 0.0002, "loss": 1.6731, "step": 231780 }, { "epoch": 0.94, "grad_norm": 3.8526148796081543, "learning_rate": 0.0002, "loss": 1.6131, "step": 231790 }, { "epoch": 0.94, "grad_norm": 2.234585762023926, "learning_rate": 0.0002, "loss": 1.481, "step": 231800 }, { "epoch": 0.94, "grad_norm": 2.8221731185913086, "learning_rate": 0.0002, "loss": 1.47, "step": 231810 }, { "epoch": 0.94, "grad_norm": 3.6883180141448975, "learning_rate": 0.0002, "loss": 1.514, "step": 231820 }, { "epoch": 0.94, "grad_norm": 4.036888599395752, "learning_rate": 0.0002, "loss": 1.7979, "step": 231830 }, { "epoch": 0.94, "grad_norm": 3.256469964981079, "learning_rate": 0.0002, "loss": 1.8973, "step": 231840 }, { "epoch": 0.94, "grad_norm": 3.48382568359375, "learning_rate": 0.0002, "loss": 1.4424, "step": 231850 }, { "epoch": 0.94, "grad_norm": 2.7555699348449707, "learning_rate": 0.0002, "loss": 1.5387, "step": 231860 }, { "epoch": 0.94, "grad_norm": 1.6483222246170044, "learning_rate": 0.0002, "loss": 1.4077, "step": 231870 }, { "epoch": 0.94, "grad_norm": 3.929142951965332, "learning_rate": 0.0002, "loss": 1.6169, "step": 231880 }, { "epoch": 0.94, "grad_norm": 1.8688257932662964, "learning_rate": 0.0002, "loss": 1.4712, "step": 231890 }, { "epoch": 0.94, "grad_norm": 6.536464214324951, "learning_rate": 0.0002, "loss": 1.5039, "step": 231900 }, { "epoch": 0.94, "grad_norm": 2.443117380142212, "learning_rate": 0.0002, "loss": 1.383, "step": 231910 }, { "epoch": 0.94, "grad_norm": 2.5249886512756348, "learning_rate": 0.0002, "loss": 1.503, "step": 231920 }, { "epoch": 0.94, "grad_norm": 2.653987169265747, "learning_rate": 0.0002, "loss": 1.5957, "step": 231930 }, { "epoch": 0.94, "grad_norm": 2.0957489013671875, "learning_rate": 0.0002, "loss": 1.728, "step": 231940 }, { "epoch": 0.94, "grad_norm": 2.729079008102417, "learning_rate": 0.0002, "loss": 1.6098, "step": 231950 }, { "epoch": 0.94, "grad_norm": 3.4335293769836426, "learning_rate": 0.0002, "loss": 1.6757, "step": 231960 }, { "epoch": 0.94, "grad_norm": 2.675536870956421, "learning_rate": 0.0002, "loss": 1.8312, "step": 231970 }, { "epoch": 0.94, "grad_norm": 3.6433515548706055, "learning_rate": 0.0002, "loss": 1.6155, "step": 231980 }, { "epoch": 0.94, "grad_norm": 2.0374462604522705, "learning_rate": 0.0002, "loss": 1.8173, "step": 231990 }, { "epoch": 0.94, "grad_norm": 2.58144474029541, "learning_rate": 0.0002, "loss": 1.8153, "step": 232000 }, { "epoch": 0.94, "grad_norm": 2.5211992263793945, "learning_rate": 0.0002, "loss": 1.5718, "step": 232010 }, { "epoch": 0.94, "grad_norm": 2.8865156173706055, "learning_rate": 0.0002, "loss": 1.6241, "step": 232020 }, { "epoch": 0.94, "grad_norm": 2.24123215675354, "learning_rate": 0.0002, "loss": 1.5456, "step": 232030 }, { "epoch": 0.94, "grad_norm": 7.132985591888428, "learning_rate": 0.0002, "loss": 1.6694, "step": 232040 }, { "epoch": 0.94, "grad_norm": 2.372598886489868, "learning_rate": 0.0002, "loss": 1.5406, "step": 232050 }, { "epoch": 0.94, "grad_norm": 2.1464459896087646, "learning_rate": 0.0002, "loss": 1.5187, "step": 232060 }, { "epoch": 0.94, "grad_norm": 2.4221861362457275, "learning_rate": 0.0002, "loss": 1.7888, "step": 232070 }, { "epoch": 0.94, "grad_norm": 3.16378116607666, "learning_rate": 0.0002, "loss": 1.3189, "step": 232080 }, { "epoch": 0.94, "grad_norm": 2.4847822189331055, "learning_rate": 0.0002, "loss": 1.6304, "step": 232090 }, { "epoch": 0.94, "grad_norm": 1.758455753326416, "learning_rate": 0.0002, "loss": 1.5412, "step": 232100 }, { "epoch": 0.94, "grad_norm": 3.305738687515259, "learning_rate": 0.0002, "loss": 1.6795, "step": 232110 }, { "epoch": 0.94, "grad_norm": 2.5789082050323486, "learning_rate": 0.0002, "loss": 1.5632, "step": 232120 }, { "epoch": 0.94, "grad_norm": 3.4224050045013428, "learning_rate": 0.0002, "loss": 1.5779, "step": 232130 }, { "epoch": 0.95, "grad_norm": 2.244373083114624, "learning_rate": 0.0002, "loss": 1.7788, "step": 232140 }, { "epoch": 0.95, "grad_norm": 3.407796859741211, "learning_rate": 0.0002, "loss": 1.4903, "step": 232150 }, { "epoch": 0.95, "grad_norm": 1.9918574094772339, "learning_rate": 0.0002, "loss": 1.7261, "step": 232160 }, { "epoch": 0.95, "grad_norm": 3.6733360290527344, "learning_rate": 0.0002, "loss": 1.6844, "step": 232170 }, { "epoch": 0.95, "grad_norm": 4.0516839027404785, "learning_rate": 0.0002, "loss": 1.6851, "step": 232180 }, { "epoch": 0.95, "grad_norm": 2.1742372512817383, "learning_rate": 0.0002, "loss": 1.5846, "step": 232190 }, { "epoch": 0.95, "grad_norm": 3.023206949234009, "learning_rate": 0.0002, "loss": 1.6917, "step": 232200 }, { "epoch": 0.95, "grad_norm": 2.535186767578125, "learning_rate": 0.0002, "loss": 1.7393, "step": 232210 }, { "epoch": 0.95, "grad_norm": 3.1273159980773926, "learning_rate": 0.0002, "loss": 1.6703, "step": 232220 }, { "epoch": 0.95, "grad_norm": 2.38570237159729, "learning_rate": 0.0002, "loss": 1.6086, "step": 232230 }, { "epoch": 0.95, "grad_norm": 2.896594762802124, "learning_rate": 0.0002, "loss": 1.5429, "step": 232240 }, { "epoch": 0.95, "grad_norm": 1.8068866729736328, "learning_rate": 0.0002, "loss": 1.5813, "step": 232250 }, { "epoch": 0.95, "grad_norm": 3.2805798053741455, "learning_rate": 0.0002, "loss": 1.558, "step": 232260 }, { "epoch": 0.95, "grad_norm": 2.386321783065796, "learning_rate": 0.0002, "loss": 1.5736, "step": 232270 }, { "epoch": 0.95, "grad_norm": 3.0612616539001465, "learning_rate": 0.0002, "loss": 1.7099, "step": 232280 }, { "epoch": 0.95, "grad_norm": 2.5034162998199463, "learning_rate": 0.0002, "loss": 1.6684, "step": 232290 }, { "epoch": 0.95, "grad_norm": 3.2486863136291504, "learning_rate": 0.0002, "loss": 1.7702, "step": 232300 }, { "epoch": 0.95, "grad_norm": 2.9164011478424072, "learning_rate": 0.0002, "loss": 1.5117, "step": 232310 }, { "epoch": 0.95, "grad_norm": 3.2511818408966064, "learning_rate": 0.0002, "loss": 1.646, "step": 232320 }, { "epoch": 0.95, "grad_norm": 1.6369611024856567, "learning_rate": 0.0002, "loss": 1.6852, "step": 232330 }, { "epoch": 0.95, "grad_norm": 2.772509813308716, "learning_rate": 0.0002, "loss": 1.8085, "step": 232340 }, { "epoch": 0.95, "grad_norm": 3.117731809616089, "learning_rate": 0.0002, "loss": 1.5848, "step": 232350 }, { "epoch": 0.95, "grad_norm": 1.946698546409607, "learning_rate": 0.0002, "loss": 1.3789, "step": 232360 }, { "epoch": 0.95, "grad_norm": 2.45306396484375, "learning_rate": 0.0002, "loss": 1.6126, "step": 232370 }, { "epoch": 0.95, "grad_norm": 2.8340606689453125, "learning_rate": 0.0002, "loss": 1.583, "step": 232380 }, { "epoch": 0.95, "grad_norm": 2.9233322143554688, "learning_rate": 0.0002, "loss": 1.3509, "step": 232390 }, { "epoch": 0.95, "grad_norm": 2.172081708908081, "learning_rate": 0.0002, "loss": 1.5136, "step": 232400 }, { "epoch": 0.95, "grad_norm": 3.2302825450897217, "learning_rate": 0.0002, "loss": 1.8243, "step": 232410 }, { "epoch": 0.95, "grad_norm": 3.322761058807373, "learning_rate": 0.0002, "loss": 1.8217, "step": 232420 }, { "epoch": 0.95, "grad_norm": 4.1476850509643555, "learning_rate": 0.0002, "loss": 1.4592, "step": 232430 }, { "epoch": 0.95, "grad_norm": 2.435554027557373, "learning_rate": 0.0002, "loss": 1.4517, "step": 232440 }, { "epoch": 0.95, "grad_norm": 3.89668607711792, "learning_rate": 0.0002, "loss": 1.57, "step": 232450 }, { "epoch": 0.95, "grad_norm": 4.096686840057373, "learning_rate": 0.0002, "loss": 1.6626, "step": 232460 }, { "epoch": 0.95, "grad_norm": 3.609537124633789, "learning_rate": 0.0002, "loss": 1.3438, "step": 232470 }, { "epoch": 0.95, "grad_norm": 2.3458592891693115, "learning_rate": 0.0002, "loss": 1.6685, "step": 232480 }, { "epoch": 0.95, "grad_norm": 3.4111664295196533, "learning_rate": 0.0002, "loss": 1.5404, "step": 232490 }, { "epoch": 0.95, "grad_norm": 2.702749729156494, "learning_rate": 0.0002, "loss": 1.5269, "step": 232500 }, { "epoch": 0.95, "grad_norm": 3.9642813205718994, "learning_rate": 0.0002, "loss": 1.5013, "step": 232510 }, { "epoch": 0.95, "grad_norm": 3.0758039951324463, "learning_rate": 0.0002, "loss": 1.5544, "step": 232520 }, { "epoch": 0.95, "grad_norm": 3.51789927482605, "learning_rate": 0.0002, "loss": 1.6943, "step": 232530 }, { "epoch": 0.95, "grad_norm": 3.8200936317443848, "learning_rate": 0.0002, "loss": 1.7526, "step": 232540 }, { "epoch": 0.95, "grad_norm": 2.1199474334716797, "learning_rate": 0.0002, "loss": 1.5766, "step": 232550 }, { "epoch": 0.95, "grad_norm": 2.7722268104553223, "learning_rate": 0.0002, "loss": 1.7114, "step": 232560 }, { "epoch": 0.95, "grad_norm": 6.912868022918701, "learning_rate": 0.0002, "loss": 1.5395, "step": 232570 }, { "epoch": 0.95, "grad_norm": 3.3546195030212402, "learning_rate": 0.0002, "loss": 1.6906, "step": 232580 }, { "epoch": 0.95, "grad_norm": 4.179385185241699, "learning_rate": 0.0002, "loss": 1.6919, "step": 232590 }, { "epoch": 0.95, "grad_norm": 1.8163796663284302, "learning_rate": 0.0002, "loss": 1.6003, "step": 232600 }, { "epoch": 0.95, "grad_norm": 3.8119401931762695, "learning_rate": 0.0002, "loss": 1.6233, "step": 232610 }, { "epoch": 0.95, "grad_norm": 1.691579818725586, "learning_rate": 0.0002, "loss": 1.6806, "step": 232620 }, { "epoch": 0.95, "grad_norm": 2.0359549522399902, "learning_rate": 0.0002, "loss": 1.615, "step": 232630 }, { "epoch": 0.95, "grad_norm": 4.3024725914001465, "learning_rate": 0.0002, "loss": 1.7715, "step": 232640 }, { "epoch": 0.95, "grad_norm": 3.288288116455078, "learning_rate": 0.0002, "loss": 1.5534, "step": 232650 }, { "epoch": 0.95, "grad_norm": 3.357290029525757, "learning_rate": 0.0002, "loss": 1.6585, "step": 232660 }, { "epoch": 0.95, "grad_norm": 1.97527015209198, "learning_rate": 0.0002, "loss": 1.6147, "step": 232670 }, { "epoch": 0.95, "grad_norm": 2.3171231746673584, "learning_rate": 0.0002, "loss": 1.5347, "step": 232680 }, { "epoch": 0.95, "grad_norm": 2.279132127761841, "learning_rate": 0.0002, "loss": 1.3275, "step": 232690 }, { "epoch": 0.95, "grad_norm": 3.2060041427612305, "learning_rate": 0.0002, "loss": 1.5636, "step": 232700 }, { "epoch": 0.95, "grad_norm": 2.8103203773498535, "learning_rate": 0.0002, "loss": 1.6619, "step": 232710 }, { "epoch": 0.95, "grad_norm": 2.7038233280181885, "learning_rate": 0.0002, "loss": 1.321, "step": 232720 }, { "epoch": 0.95, "grad_norm": 2.835097551345825, "learning_rate": 0.0002, "loss": 1.6943, "step": 232730 }, { "epoch": 0.95, "grad_norm": 2.5940420627593994, "learning_rate": 0.0002, "loss": 1.3325, "step": 232740 }, { "epoch": 0.95, "grad_norm": 4.292984962463379, "learning_rate": 0.0002, "loss": 1.4804, "step": 232750 }, { "epoch": 0.95, "grad_norm": 2.943955421447754, "learning_rate": 0.0002, "loss": 1.4518, "step": 232760 }, { "epoch": 0.95, "grad_norm": 5.219200611114502, "learning_rate": 0.0002, "loss": 1.6366, "step": 232770 }, { "epoch": 0.95, "grad_norm": 2.6011595726013184, "learning_rate": 0.0002, "loss": 1.4062, "step": 232780 }, { "epoch": 0.95, "grad_norm": 3.4681899547576904, "learning_rate": 0.0002, "loss": 1.594, "step": 232790 }, { "epoch": 0.95, "grad_norm": 3.2275617122650146, "learning_rate": 0.0002, "loss": 1.8783, "step": 232800 }, { "epoch": 0.95, "grad_norm": 2.4577927589416504, "learning_rate": 0.0002, "loss": 1.5417, "step": 232810 }, { "epoch": 0.95, "grad_norm": 3.2889328002929688, "learning_rate": 0.0002, "loss": 1.6411, "step": 232820 }, { "epoch": 0.95, "grad_norm": 2.728367328643799, "learning_rate": 0.0002, "loss": 1.567, "step": 232830 }, { "epoch": 0.95, "grad_norm": 5.493232250213623, "learning_rate": 0.0002, "loss": 1.4024, "step": 232840 }, { "epoch": 0.95, "grad_norm": 2.5467641353607178, "learning_rate": 0.0002, "loss": 1.3038, "step": 232850 }, { "epoch": 0.95, "grad_norm": 5.223058700561523, "learning_rate": 0.0002, "loss": 1.5432, "step": 232860 }, { "epoch": 0.95, "grad_norm": 3.372685194015503, "learning_rate": 0.0002, "loss": 1.6, "step": 232870 }, { "epoch": 0.95, "grad_norm": 4.224725723266602, "learning_rate": 0.0002, "loss": 1.6341, "step": 232880 }, { "epoch": 0.95, "grad_norm": 3.422165870666504, "learning_rate": 0.0002, "loss": 1.86, "step": 232890 }, { "epoch": 0.95, "grad_norm": 2.7182626724243164, "learning_rate": 0.0002, "loss": 1.3779, "step": 232900 }, { "epoch": 0.95, "grad_norm": 3.39162540435791, "learning_rate": 0.0002, "loss": 1.3125, "step": 232910 }, { "epoch": 0.95, "grad_norm": 1.9773914813995361, "learning_rate": 0.0002, "loss": 1.532, "step": 232920 }, { "epoch": 0.95, "grad_norm": 3.6344895362854004, "learning_rate": 0.0002, "loss": 1.5078, "step": 232930 }, { "epoch": 0.95, "grad_norm": 2.458353281021118, "learning_rate": 0.0002, "loss": 1.504, "step": 232940 }, { "epoch": 0.95, "grad_norm": 4.540797233581543, "learning_rate": 0.0002, "loss": 1.7111, "step": 232950 }, { "epoch": 0.95, "grad_norm": 1.528603434562683, "learning_rate": 0.0002, "loss": 1.7795, "step": 232960 }, { "epoch": 0.95, "grad_norm": 3.1491637229919434, "learning_rate": 0.0002, "loss": 1.6463, "step": 232970 }, { "epoch": 0.95, "grad_norm": 4.043532371520996, "learning_rate": 0.0002, "loss": 1.6226, "step": 232980 }, { "epoch": 0.95, "grad_norm": 2.358128309249878, "learning_rate": 0.0002, "loss": 1.5207, "step": 232990 }, { "epoch": 0.95, "grad_norm": 4.392256259918213, "learning_rate": 0.0002, "loss": 1.6963, "step": 233000 }, { "epoch": 0.95, "grad_norm": 3.7535150051116943, "learning_rate": 0.0002, "loss": 1.6371, "step": 233010 }, { "epoch": 0.95, "grad_norm": 2.1021692752838135, "learning_rate": 0.0002, "loss": 1.4181, "step": 233020 }, { "epoch": 0.95, "grad_norm": 2.16408634185791, "learning_rate": 0.0002, "loss": 1.7917, "step": 233030 }, { "epoch": 0.95, "grad_norm": 2.016939640045166, "learning_rate": 0.0002, "loss": 1.6404, "step": 233040 }, { "epoch": 0.95, "grad_norm": 3.7333264350891113, "learning_rate": 0.0002, "loss": 1.6963, "step": 233050 }, { "epoch": 0.95, "grad_norm": 2.061343193054199, "learning_rate": 0.0002, "loss": 1.468, "step": 233060 }, { "epoch": 0.95, "grad_norm": 2.3960018157958984, "learning_rate": 0.0002, "loss": 1.4204, "step": 233070 }, { "epoch": 0.95, "grad_norm": 3.5281333923339844, "learning_rate": 0.0002, "loss": 1.7381, "step": 233080 }, { "epoch": 0.95, "grad_norm": 3.3519232273101807, "learning_rate": 0.0002, "loss": 1.5409, "step": 233090 }, { "epoch": 0.95, "grad_norm": 2.472371816635132, "learning_rate": 0.0002, "loss": 1.5677, "step": 233100 }, { "epoch": 0.95, "grad_norm": 3.206516742706299, "learning_rate": 0.0002, "loss": 1.8042, "step": 233110 }, { "epoch": 0.95, "grad_norm": 3.032470703125, "learning_rate": 0.0002, "loss": 1.6937, "step": 233120 }, { "epoch": 0.95, "grad_norm": 2.316779136657715, "learning_rate": 0.0002, "loss": 1.5276, "step": 233130 }, { "epoch": 0.95, "grad_norm": 3.579317808151245, "learning_rate": 0.0002, "loss": 1.3747, "step": 233140 }, { "epoch": 0.95, "grad_norm": 4.073686599731445, "learning_rate": 0.0002, "loss": 1.6284, "step": 233150 }, { "epoch": 0.95, "grad_norm": 2.482562780380249, "learning_rate": 0.0002, "loss": 1.4811, "step": 233160 }, { "epoch": 0.95, "grad_norm": 3.190908432006836, "learning_rate": 0.0002, "loss": 1.4684, "step": 233170 }, { "epoch": 0.95, "grad_norm": 3.1485917568206787, "learning_rate": 0.0002, "loss": 1.7125, "step": 233180 }, { "epoch": 0.95, "grad_norm": 3.002153158187866, "learning_rate": 0.0002, "loss": 1.5202, "step": 233190 }, { "epoch": 0.95, "grad_norm": 2.653114080429077, "learning_rate": 0.0002, "loss": 1.3953, "step": 233200 }, { "epoch": 0.95, "grad_norm": 2.456235647201538, "learning_rate": 0.0002, "loss": 1.5899, "step": 233210 }, { "epoch": 0.95, "grad_norm": 2.099903106689453, "learning_rate": 0.0002, "loss": 1.579, "step": 233220 }, { "epoch": 0.95, "grad_norm": 2.6517691612243652, "learning_rate": 0.0002, "loss": 1.3513, "step": 233230 }, { "epoch": 0.95, "grad_norm": 5.040988922119141, "learning_rate": 0.0002, "loss": 1.6225, "step": 233240 }, { "epoch": 0.95, "grad_norm": 3.610891580581665, "learning_rate": 0.0002, "loss": 1.3356, "step": 233250 }, { "epoch": 0.95, "grad_norm": 1.8970561027526855, "learning_rate": 0.0002, "loss": 1.6264, "step": 233260 }, { "epoch": 0.95, "grad_norm": 3.313115119934082, "learning_rate": 0.0002, "loss": 1.4803, "step": 233270 }, { "epoch": 0.95, "grad_norm": 3.0369246006011963, "learning_rate": 0.0002, "loss": 1.5833, "step": 233280 }, { "epoch": 0.95, "grad_norm": 4.673581600189209, "learning_rate": 0.0002, "loss": 1.5189, "step": 233290 }, { "epoch": 0.95, "grad_norm": 3.6248221397399902, "learning_rate": 0.0002, "loss": 1.6637, "step": 233300 }, { "epoch": 0.95, "grad_norm": 2.52894926071167, "learning_rate": 0.0002, "loss": 1.437, "step": 233310 }, { "epoch": 0.95, "grad_norm": 3.382223129272461, "learning_rate": 0.0002, "loss": 1.6402, "step": 233320 }, { "epoch": 0.95, "grad_norm": 1.8235241174697876, "learning_rate": 0.0002, "loss": 1.6625, "step": 233330 }, { "epoch": 0.95, "grad_norm": 4.007238864898682, "learning_rate": 0.0002, "loss": 1.4606, "step": 233340 }, { "epoch": 0.95, "grad_norm": 3.526803493499756, "learning_rate": 0.0002, "loss": 1.4825, "step": 233350 }, { "epoch": 0.95, "grad_norm": 2.8675737380981445, "learning_rate": 0.0002, "loss": 1.797, "step": 233360 }, { "epoch": 0.95, "grad_norm": 2.28812837600708, "learning_rate": 0.0002, "loss": 1.5618, "step": 233370 }, { "epoch": 0.95, "grad_norm": 2.587496280670166, "learning_rate": 0.0002, "loss": 1.6878, "step": 233380 }, { "epoch": 0.95, "grad_norm": 3.830390214920044, "learning_rate": 0.0002, "loss": 1.4901, "step": 233390 }, { "epoch": 0.95, "grad_norm": 6.816972732543945, "learning_rate": 0.0002, "loss": 1.602, "step": 233400 }, { "epoch": 0.95, "grad_norm": 2.3607892990112305, "learning_rate": 0.0002, "loss": 1.4058, "step": 233410 }, { "epoch": 0.95, "grad_norm": 2.195486068725586, "learning_rate": 0.0002, "loss": 1.2557, "step": 233420 }, { "epoch": 0.95, "grad_norm": 2.6797966957092285, "learning_rate": 0.0002, "loss": 1.4419, "step": 233430 }, { "epoch": 0.95, "grad_norm": 3.2879722118377686, "learning_rate": 0.0002, "loss": 1.1919, "step": 233440 }, { "epoch": 0.95, "grad_norm": 3.3272383213043213, "learning_rate": 0.0002, "loss": 1.5799, "step": 233450 }, { "epoch": 0.95, "grad_norm": 2.67954683303833, "learning_rate": 0.0002, "loss": 1.6027, "step": 233460 }, { "epoch": 0.95, "grad_norm": 4.468661308288574, "learning_rate": 0.0002, "loss": 1.4034, "step": 233470 }, { "epoch": 0.95, "grad_norm": 3.1395671367645264, "learning_rate": 0.0002, "loss": 1.6361, "step": 233480 }, { "epoch": 0.95, "grad_norm": 2.140178918838501, "learning_rate": 0.0002, "loss": 1.6347, "step": 233490 }, { "epoch": 0.95, "grad_norm": 5.0372796058654785, "learning_rate": 0.0002, "loss": 1.6189, "step": 233500 }, { "epoch": 0.95, "grad_norm": 2.226865768432617, "learning_rate": 0.0002, "loss": 1.7881, "step": 233510 }, { "epoch": 0.95, "grad_norm": 2.780257225036621, "learning_rate": 0.0002, "loss": 1.3153, "step": 233520 }, { "epoch": 0.95, "grad_norm": 3.2255640029907227, "learning_rate": 0.0002, "loss": 1.5478, "step": 233530 }, { "epoch": 0.95, "grad_norm": 2.9167959690093994, "learning_rate": 0.0002, "loss": 1.4029, "step": 233540 }, { "epoch": 0.95, "grad_norm": 2.7504355907440186, "learning_rate": 0.0002, "loss": 1.5189, "step": 233550 }, { "epoch": 0.95, "grad_norm": 2.635761022567749, "learning_rate": 0.0002, "loss": 1.6604, "step": 233560 }, { "epoch": 0.95, "grad_norm": 2.767721652984619, "learning_rate": 0.0002, "loss": 1.7751, "step": 233570 }, { "epoch": 0.95, "grad_norm": 2.0008158683776855, "learning_rate": 0.0002, "loss": 1.7392, "step": 233580 }, { "epoch": 0.95, "grad_norm": 3.436494827270508, "learning_rate": 0.0002, "loss": 1.7318, "step": 233590 }, { "epoch": 0.95, "grad_norm": 2.8154234886169434, "learning_rate": 0.0002, "loss": 1.7435, "step": 233600 }, { "epoch": 0.95, "grad_norm": 2.938819169998169, "learning_rate": 0.0002, "loss": 1.8715, "step": 233610 }, { "epoch": 0.95, "grad_norm": 3.05129337310791, "learning_rate": 0.0002, "loss": 1.5277, "step": 233620 }, { "epoch": 0.95, "grad_norm": 3.790119171142578, "learning_rate": 0.0002, "loss": 1.4791, "step": 233630 }, { "epoch": 0.95, "grad_norm": 2.4710440635681152, "learning_rate": 0.0002, "loss": 1.7062, "step": 233640 }, { "epoch": 0.95, "grad_norm": 2.2123358249664307, "learning_rate": 0.0002, "loss": 1.6369, "step": 233650 }, { "epoch": 0.95, "grad_norm": 5.54049015045166, "learning_rate": 0.0002, "loss": 1.5404, "step": 233660 }, { "epoch": 0.95, "grad_norm": 1.9483554363250732, "learning_rate": 0.0002, "loss": 1.3239, "step": 233670 }, { "epoch": 0.95, "grad_norm": 2.7608606815338135, "learning_rate": 0.0002, "loss": 1.7352, "step": 233680 }, { "epoch": 0.95, "grad_norm": 2.8702614307403564, "learning_rate": 0.0002, "loss": 1.7897, "step": 233690 }, { "epoch": 0.95, "grad_norm": 5.556624412536621, "learning_rate": 0.0002, "loss": 1.6135, "step": 233700 }, { "epoch": 0.95, "grad_norm": 3.4939143657684326, "learning_rate": 0.0002, "loss": 1.834, "step": 233710 }, { "epoch": 0.95, "grad_norm": 3.167654514312744, "learning_rate": 0.0002, "loss": 1.5609, "step": 233720 }, { "epoch": 0.95, "grad_norm": 3.604686975479126, "learning_rate": 0.0002, "loss": 1.7636, "step": 233730 }, { "epoch": 0.95, "grad_norm": 3.237020254135132, "learning_rate": 0.0002, "loss": 1.6042, "step": 233740 }, { "epoch": 0.95, "grad_norm": 3.258028030395508, "learning_rate": 0.0002, "loss": 1.6336, "step": 233750 }, { "epoch": 0.95, "grad_norm": 2.673887014389038, "learning_rate": 0.0002, "loss": 1.3994, "step": 233760 }, { "epoch": 0.95, "grad_norm": 4.017027378082275, "learning_rate": 0.0002, "loss": 1.5835, "step": 233770 }, { "epoch": 0.95, "grad_norm": 1.9828842878341675, "learning_rate": 0.0002, "loss": 1.5795, "step": 233780 }, { "epoch": 0.95, "grad_norm": 2.9772140979766846, "learning_rate": 0.0002, "loss": 1.4319, "step": 233790 }, { "epoch": 0.95, "grad_norm": 3.687340021133423, "learning_rate": 0.0002, "loss": 1.5531, "step": 233800 }, { "epoch": 0.95, "grad_norm": 4.4231367111206055, "learning_rate": 0.0002, "loss": 1.2966, "step": 233810 }, { "epoch": 0.95, "grad_norm": 2.4661483764648438, "learning_rate": 0.0002, "loss": 1.6036, "step": 233820 }, { "epoch": 0.95, "grad_norm": 3.259045362472534, "learning_rate": 0.0002, "loss": 1.8044, "step": 233830 }, { "epoch": 0.95, "grad_norm": 1.167605996131897, "learning_rate": 0.0002, "loss": 1.3181, "step": 233840 }, { "epoch": 0.95, "grad_norm": 1.7410606145858765, "learning_rate": 0.0002, "loss": 1.5909, "step": 233850 }, { "epoch": 0.95, "grad_norm": 3.010044813156128, "learning_rate": 0.0002, "loss": 1.3703, "step": 233860 }, { "epoch": 0.95, "grad_norm": 2.9116315841674805, "learning_rate": 0.0002, "loss": 1.7788, "step": 233870 }, { "epoch": 0.95, "grad_norm": 2.352428674697876, "learning_rate": 0.0002, "loss": 1.5631, "step": 233880 }, { "epoch": 0.95, "grad_norm": 3.366403818130493, "learning_rate": 0.0002, "loss": 1.536, "step": 233890 }, { "epoch": 0.95, "grad_norm": 2.7402820587158203, "learning_rate": 0.0002, "loss": 1.3316, "step": 233900 }, { "epoch": 0.95, "grad_norm": 3.518951892852783, "learning_rate": 0.0002, "loss": 1.4857, "step": 233910 }, { "epoch": 0.95, "grad_norm": 4.1281304359436035, "learning_rate": 0.0002, "loss": 1.5636, "step": 233920 }, { "epoch": 0.95, "grad_norm": 3.225895404815674, "learning_rate": 0.0002, "loss": 1.4537, "step": 233930 }, { "epoch": 0.95, "grad_norm": 2.4944522380828857, "learning_rate": 0.0002, "loss": 1.401, "step": 233940 }, { "epoch": 0.95, "grad_norm": 2.9507193565368652, "learning_rate": 0.0002, "loss": 1.7442, "step": 233950 }, { "epoch": 0.95, "grad_norm": 3.40016770362854, "learning_rate": 0.0002, "loss": 1.7844, "step": 233960 }, { "epoch": 0.95, "grad_norm": 3.2450361251831055, "learning_rate": 0.0002, "loss": 1.5593, "step": 233970 }, { "epoch": 0.95, "grad_norm": 2.5923731327056885, "learning_rate": 0.0002, "loss": 1.7989, "step": 233980 }, { "epoch": 0.95, "grad_norm": 3.5041513442993164, "learning_rate": 0.0002, "loss": 1.8509, "step": 233990 }, { "epoch": 0.95, "grad_norm": 4.001319885253906, "learning_rate": 0.0002, "loss": 1.8703, "step": 234000 }, { "epoch": 0.95, "grad_norm": 2.154183864593506, "learning_rate": 0.0002, "loss": 1.5121, "step": 234010 }, { "epoch": 0.95, "grad_norm": 3.80873441696167, "learning_rate": 0.0002, "loss": 1.3661, "step": 234020 }, { "epoch": 0.95, "grad_norm": 3.77245831489563, "learning_rate": 0.0002, "loss": 1.5276, "step": 234030 }, { "epoch": 0.95, "grad_norm": 2.7587649822235107, "learning_rate": 0.0002, "loss": 1.5979, "step": 234040 }, { "epoch": 0.95, "grad_norm": 3.902284622192383, "learning_rate": 0.0002, "loss": 1.6513, "step": 234050 }, { "epoch": 0.95, "grad_norm": 2.7526440620422363, "learning_rate": 0.0002, "loss": 1.4867, "step": 234060 }, { "epoch": 0.95, "grad_norm": 1.9364192485809326, "learning_rate": 0.0002, "loss": 1.7495, "step": 234070 }, { "epoch": 0.95, "grad_norm": 1.771058201789856, "learning_rate": 0.0002, "loss": 1.5073, "step": 234080 }, { "epoch": 0.95, "grad_norm": 3.085923671722412, "learning_rate": 0.0002, "loss": 1.7343, "step": 234090 }, { "epoch": 0.95, "grad_norm": 3.4587295055389404, "learning_rate": 0.0002, "loss": 1.4069, "step": 234100 }, { "epoch": 0.95, "grad_norm": 2.520218849182129, "learning_rate": 0.0002, "loss": 1.4991, "step": 234110 }, { "epoch": 0.95, "grad_norm": 2.918753147125244, "learning_rate": 0.0002, "loss": 1.9133, "step": 234120 }, { "epoch": 0.95, "grad_norm": 3.5761208534240723, "learning_rate": 0.0002, "loss": 1.7112, "step": 234130 }, { "epoch": 0.95, "grad_norm": 6.346073627471924, "learning_rate": 0.0002, "loss": 1.3902, "step": 234140 }, { "epoch": 0.95, "grad_norm": 2.2903895378112793, "learning_rate": 0.0002, "loss": 1.576, "step": 234150 }, { "epoch": 0.95, "grad_norm": 3.2072370052337646, "learning_rate": 0.0002, "loss": 1.5774, "step": 234160 }, { "epoch": 0.95, "grad_norm": 2.268846035003662, "learning_rate": 0.0002, "loss": 1.4606, "step": 234170 }, { "epoch": 0.95, "grad_norm": 2.665783405303955, "learning_rate": 0.0002, "loss": 1.5982, "step": 234180 }, { "epoch": 0.95, "grad_norm": 2.605149269104004, "learning_rate": 0.0002, "loss": 1.6571, "step": 234190 }, { "epoch": 0.95, "grad_norm": 1.564572811126709, "learning_rate": 0.0002, "loss": 1.9286, "step": 234200 }, { "epoch": 0.95, "grad_norm": 3.2293949127197266, "learning_rate": 0.0002, "loss": 1.502, "step": 234210 }, { "epoch": 0.95, "grad_norm": 2.727039337158203, "learning_rate": 0.0002, "loss": 1.7219, "step": 234220 }, { "epoch": 0.95, "grad_norm": 1.8932875394821167, "learning_rate": 0.0002, "loss": 1.593, "step": 234230 }, { "epoch": 0.95, "grad_norm": 3.5771994590759277, "learning_rate": 0.0002, "loss": 1.586, "step": 234240 }, { "epoch": 0.95, "grad_norm": 7.136636734008789, "learning_rate": 0.0002, "loss": 1.6745, "step": 234250 }, { "epoch": 0.95, "grad_norm": 3.992609739303589, "learning_rate": 0.0002, "loss": 1.6901, "step": 234260 }, { "epoch": 0.95, "grad_norm": 3.3681623935699463, "learning_rate": 0.0002, "loss": 1.8255, "step": 234270 }, { "epoch": 0.95, "grad_norm": 2.2900195121765137, "learning_rate": 0.0002, "loss": 1.5442, "step": 234280 }, { "epoch": 0.95, "grad_norm": 2.5584309101104736, "learning_rate": 0.0002, "loss": 1.3613, "step": 234290 }, { "epoch": 0.95, "grad_norm": 2.0259318351745605, "learning_rate": 0.0002, "loss": 1.8371, "step": 234300 }, { "epoch": 0.95, "grad_norm": 3.0251286029815674, "learning_rate": 0.0002, "loss": 1.7735, "step": 234310 }, { "epoch": 0.95, "grad_norm": 2.7813072204589844, "learning_rate": 0.0002, "loss": 1.3752, "step": 234320 }, { "epoch": 0.95, "grad_norm": 2.168928861618042, "learning_rate": 0.0002, "loss": 1.5805, "step": 234330 }, { "epoch": 0.95, "grad_norm": 3.283524751663208, "learning_rate": 0.0002, "loss": 1.6211, "step": 234340 }, { "epoch": 0.95, "grad_norm": 3.65562105178833, "learning_rate": 0.0002, "loss": 1.4324, "step": 234350 }, { "epoch": 0.95, "grad_norm": 4.41299295425415, "learning_rate": 0.0002, "loss": 1.5567, "step": 234360 }, { "epoch": 0.95, "grad_norm": 2.989718198776245, "learning_rate": 0.0002, "loss": 1.7654, "step": 234370 }, { "epoch": 0.95, "grad_norm": 3.698651075363159, "learning_rate": 0.0002, "loss": 1.5343, "step": 234380 }, { "epoch": 0.95, "grad_norm": 1.8185327053070068, "learning_rate": 0.0002, "loss": 1.7344, "step": 234390 }, { "epoch": 0.95, "grad_norm": 3.6843907833099365, "learning_rate": 0.0002, "loss": 1.6838, "step": 234400 }, { "epoch": 0.95, "grad_norm": 2.5609426498413086, "learning_rate": 0.0002, "loss": 1.3615, "step": 234410 }, { "epoch": 0.95, "grad_norm": 3.8253095149993896, "learning_rate": 0.0002, "loss": 1.4385, "step": 234420 }, { "epoch": 0.95, "grad_norm": 2.3939521312713623, "learning_rate": 0.0002, "loss": 1.6954, "step": 234430 }, { "epoch": 0.95, "grad_norm": 2.6314644813537598, "learning_rate": 0.0002, "loss": 1.6347, "step": 234440 }, { "epoch": 0.95, "grad_norm": 2.8989248275756836, "learning_rate": 0.0002, "loss": 1.732, "step": 234450 }, { "epoch": 0.95, "grad_norm": 3.3632454872131348, "learning_rate": 0.0002, "loss": 1.4179, "step": 234460 }, { "epoch": 0.95, "grad_norm": 3.151496171951294, "learning_rate": 0.0002, "loss": 1.5468, "step": 234470 }, { "epoch": 0.95, "grad_norm": 2.754734992980957, "learning_rate": 0.0002, "loss": 1.7311, "step": 234480 }, { "epoch": 0.95, "grad_norm": 2.5754544734954834, "learning_rate": 0.0002, "loss": 1.587, "step": 234490 }, { "epoch": 0.95, "grad_norm": 2.858637809753418, "learning_rate": 0.0002, "loss": 1.7349, "step": 234500 }, { "epoch": 0.95, "grad_norm": 5.160152435302734, "learning_rate": 0.0002, "loss": 1.7082, "step": 234510 }, { "epoch": 0.95, "grad_norm": 3.1827552318573, "learning_rate": 0.0002, "loss": 1.584, "step": 234520 }, { "epoch": 0.95, "grad_norm": 2.4433040618896484, "learning_rate": 0.0002, "loss": 1.8931, "step": 234530 }, { "epoch": 0.95, "grad_norm": 3.0807573795318604, "learning_rate": 0.0002, "loss": 1.6812, "step": 234540 }, { "epoch": 0.95, "grad_norm": 2.4635705947875977, "learning_rate": 0.0002, "loss": 1.5222, "step": 234550 }, { "epoch": 0.95, "grad_norm": 3.2501165866851807, "learning_rate": 0.0002, "loss": 1.5013, "step": 234560 }, { "epoch": 0.95, "grad_norm": 2.350131034851074, "learning_rate": 0.0002, "loss": 1.4235, "step": 234570 }, { "epoch": 0.95, "grad_norm": 4.633877754211426, "learning_rate": 0.0002, "loss": 1.4083, "step": 234580 }, { "epoch": 0.95, "grad_norm": 3.382469892501831, "learning_rate": 0.0002, "loss": 1.6932, "step": 234590 }, { "epoch": 0.96, "grad_norm": 2.670098066329956, "learning_rate": 0.0002, "loss": 1.717, "step": 234600 }, { "epoch": 0.96, "grad_norm": 2.5984466075897217, "learning_rate": 0.0002, "loss": 1.7195, "step": 234610 }, { "epoch": 0.96, "grad_norm": 3.34798526763916, "learning_rate": 0.0002, "loss": 1.5701, "step": 234620 }, { "epoch": 0.96, "grad_norm": 2.6131997108459473, "learning_rate": 0.0002, "loss": 1.4449, "step": 234630 }, { "epoch": 0.96, "grad_norm": 2.481961965560913, "learning_rate": 0.0002, "loss": 1.2164, "step": 234640 }, { "epoch": 0.96, "grad_norm": 3.7027108669281006, "learning_rate": 0.0002, "loss": 1.5491, "step": 234650 }, { "epoch": 0.96, "grad_norm": 3.622253894805908, "learning_rate": 0.0002, "loss": 1.5464, "step": 234660 }, { "epoch": 0.96, "grad_norm": 2.731523275375366, "learning_rate": 0.0002, "loss": 1.2218, "step": 234670 }, { "epoch": 0.96, "grad_norm": 4.391806125640869, "learning_rate": 0.0002, "loss": 1.2362, "step": 234680 }, { "epoch": 0.96, "grad_norm": 5.889267444610596, "learning_rate": 0.0002, "loss": 1.8527, "step": 234690 }, { "epoch": 0.96, "grad_norm": 2.269777774810791, "learning_rate": 0.0002, "loss": 1.2908, "step": 234700 }, { "epoch": 0.96, "grad_norm": 1.847333312034607, "learning_rate": 0.0002, "loss": 1.6947, "step": 234710 }, { "epoch": 0.96, "grad_norm": 3.5358541011810303, "learning_rate": 0.0002, "loss": 1.484, "step": 234720 }, { "epoch": 0.96, "grad_norm": 2.532362699508667, "learning_rate": 0.0002, "loss": 1.6588, "step": 234730 }, { "epoch": 0.96, "grad_norm": 3.7138402462005615, "learning_rate": 0.0002, "loss": 1.2368, "step": 234740 }, { "epoch": 0.96, "grad_norm": 2.3125314712524414, "learning_rate": 0.0002, "loss": 1.5419, "step": 234750 }, { "epoch": 0.96, "grad_norm": 3.175200939178467, "learning_rate": 0.0002, "loss": 1.4669, "step": 234760 }, { "epoch": 0.96, "grad_norm": 4.394792556762695, "learning_rate": 0.0002, "loss": 1.8382, "step": 234770 }, { "epoch": 0.96, "grad_norm": 5.236793518066406, "learning_rate": 0.0002, "loss": 1.8284, "step": 234780 }, { "epoch": 0.96, "grad_norm": 1.980358362197876, "learning_rate": 0.0002, "loss": 1.4189, "step": 234790 }, { "epoch": 0.96, "grad_norm": 4.307941436767578, "learning_rate": 0.0002, "loss": 1.4702, "step": 234800 }, { "epoch": 0.96, "grad_norm": 3.6425998210906982, "learning_rate": 0.0002, "loss": 1.6643, "step": 234810 }, { "epoch": 0.96, "grad_norm": 3.553719997406006, "learning_rate": 0.0002, "loss": 1.6013, "step": 234820 }, { "epoch": 0.96, "grad_norm": 2.772456407546997, "learning_rate": 0.0002, "loss": 1.6672, "step": 234830 }, { "epoch": 0.96, "grad_norm": 3.288653612136841, "learning_rate": 0.0002, "loss": 1.3329, "step": 234840 }, { "epoch": 0.96, "grad_norm": 3.356436252593994, "learning_rate": 0.0002, "loss": 1.5638, "step": 234850 }, { "epoch": 0.96, "grad_norm": 3.344299554824829, "learning_rate": 0.0002, "loss": 1.6253, "step": 234860 }, { "epoch": 0.96, "grad_norm": 5.445404529571533, "learning_rate": 0.0002, "loss": 1.6568, "step": 234870 }, { "epoch": 0.96, "grad_norm": 2.8139212131500244, "learning_rate": 0.0002, "loss": 1.6815, "step": 234880 }, { "epoch": 0.96, "grad_norm": 3.854295492172241, "learning_rate": 0.0002, "loss": 1.699, "step": 234890 }, { "epoch": 0.96, "grad_norm": 1.9824765920639038, "learning_rate": 0.0002, "loss": 1.5733, "step": 234900 }, { "epoch": 0.96, "grad_norm": 2.808361291885376, "learning_rate": 0.0002, "loss": 1.5453, "step": 234910 }, { "epoch": 0.96, "grad_norm": 3.447444438934326, "learning_rate": 0.0002, "loss": 1.8382, "step": 234920 }, { "epoch": 0.96, "grad_norm": 2.513448476791382, "learning_rate": 0.0002, "loss": 1.6896, "step": 234930 }, { "epoch": 0.96, "grad_norm": 3.4277594089508057, "learning_rate": 0.0002, "loss": 1.5065, "step": 234940 }, { "epoch": 0.96, "grad_norm": 2.870826005935669, "learning_rate": 0.0002, "loss": 1.6073, "step": 234950 }, { "epoch": 0.96, "grad_norm": 3.132702112197876, "learning_rate": 0.0002, "loss": 1.6601, "step": 234960 }, { "epoch": 0.96, "grad_norm": 3.9662065505981445, "learning_rate": 0.0002, "loss": 1.7013, "step": 234970 }, { "epoch": 0.96, "grad_norm": 2.9869415760040283, "learning_rate": 0.0002, "loss": 1.563, "step": 234980 }, { "epoch": 0.96, "grad_norm": 2.0014140605926514, "learning_rate": 0.0002, "loss": 1.2948, "step": 234990 }, { "epoch": 0.96, "grad_norm": 3.1258630752563477, "learning_rate": 0.0002, "loss": 1.62, "step": 235000 }, { "epoch": 0.96, "grad_norm": 2.3920843601226807, "learning_rate": 0.0002, "loss": 1.634, "step": 235010 }, { "epoch": 0.96, "grad_norm": 6.084268093109131, "learning_rate": 0.0002, "loss": 1.4411, "step": 235020 }, { "epoch": 0.96, "grad_norm": 3.3753466606140137, "learning_rate": 0.0002, "loss": 1.7839, "step": 235030 }, { "epoch": 0.96, "grad_norm": 3.2561323642730713, "learning_rate": 0.0002, "loss": 1.6119, "step": 235040 }, { "epoch": 0.96, "grad_norm": 4.206699848175049, "learning_rate": 0.0002, "loss": 1.675, "step": 235050 }, { "epoch": 0.96, "grad_norm": 2.0310213565826416, "learning_rate": 0.0002, "loss": 1.5255, "step": 235060 }, { "epoch": 0.96, "grad_norm": 2.7873871326446533, "learning_rate": 0.0002, "loss": 1.659, "step": 235070 }, { "epoch": 0.96, "grad_norm": 3.106132984161377, "learning_rate": 0.0002, "loss": 1.7296, "step": 235080 }, { "epoch": 0.96, "grad_norm": 3.6692726612091064, "learning_rate": 0.0002, "loss": 1.7385, "step": 235090 }, { "epoch": 0.96, "grad_norm": 2.5635852813720703, "learning_rate": 0.0002, "loss": 1.4404, "step": 235100 }, { "epoch": 0.96, "grad_norm": 2.588594913482666, "learning_rate": 0.0002, "loss": 1.7333, "step": 235110 }, { "epoch": 0.96, "grad_norm": 2.938826084136963, "learning_rate": 0.0002, "loss": 1.5132, "step": 235120 }, { "epoch": 0.96, "grad_norm": 2.0836260318756104, "learning_rate": 0.0002, "loss": 1.4218, "step": 235130 }, { "epoch": 0.96, "grad_norm": 2.284268856048584, "learning_rate": 0.0002, "loss": 1.5347, "step": 235140 }, { "epoch": 0.96, "grad_norm": 3.1130149364471436, "learning_rate": 0.0002, "loss": 1.4801, "step": 235150 }, { "epoch": 0.96, "grad_norm": 2.8962795734405518, "learning_rate": 0.0002, "loss": 1.7698, "step": 235160 }, { "epoch": 0.96, "grad_norm": 3.443847179412842, "learning_rate": 0.0002, "loss": 1.547, "step": 235170 }, { "epoch": 0.96, "grad_norm": 4.983170032501221, "learning_rate": 0.0002, "loss": 1.5949, "step": 235180 }, { "epoch": 0.96, "grad_norm": 4.031501770019531, "learning_rate": 0.0002, "loss": 1.6336, "step": 235190 }, { "epoch": 0.96, "grad_norm": 3.4203100204467773, "learning_rate": 0.0002, "loss": 1.5289, "step": 235200 }, { "epoch": 0.96, "grad_norm": 2.6223700046539307, "learning_rate": 0.0002, "loss": 1.5376, "step": 235210 }, { "epoch": 0.96, "grad_norm": 2.1469476222991943, "learning_rate": 0.0002, "loss": 1.4635, "step": 235220 }, { "epoch": 0.96, "grad_norm": 2.609990119934082, "learning_rate": 0.0002, "loss": 1.8551, "step": 235230 }, { "epoch": 0.96, "grad_norm": 2.7846970558166504, "learning_rate": 0.0002, "loss": 1.6492, "step": 235240 }, { "epoch": 0.96, "grad_norm": 2.143618106842041, "learning_rate": 0.0002, "loss": 1.5184, "step": 235250 }, { "epoch": 0.96, "grad_norm": 5.798407077789307, "learning_rate": 0.0002, "loss": 1.6527, "step": 235260 }, { "epoch": 0.96, "grad_norm": 3.3543996810913086, "learning_rate": 0.0002, "loss": 1.6837, "step": 235270 }, { "epoch": 0.96, "grad_norm": 2.8750312328338623, "learning_rate": 0.0002, "loss": 1.5343, "step": 235280 }, { "epoch": 0.96, "grad_norm": 1.6883190870285034, "learning_rate": 0.0002, "loss": 1.5826, "step": 235290 }, { "epoch": 0.96, "grad_norm": 2.5013413429260254, "learning_rate": 0.0002, "loss": 1.5308, "step": 235300 }, { "epoch": 0.96, "grad_norm": 2.186471462249756, "learning_rate": 0.0002, "loss": 1.2434, "step": 235310 }, { "epoch": 0.96, "grad_norm": 4.047539710998535, "learning_rate": 0.0002, "loss": 1.3668, "step": 235320 }, { "epoch": 0.96, "grad_norm": 3.41589093208313, "learning_rate": 0.0002, "loss": 1.6246, "step": 235330 }, { "epoch": 0.96, "grad_norm": 6.983996868133545, "learning_rate": 0.0002, "loss": 1.6534, "step": 235340 }, { "epoch": 0.96, "grad_norm": 1.9940928220748901, "learning_rate": 0.0002, "loss": 1.5262, "step": 235350 }, { "epoch": 0.96, "grad_norm": 2.9754390716552734, "learning_rate": 0.0002, "loss": 1.8717, "step": 235360 }, { "epoch": 0.96, "grad_norm": 3.7492218017578125, "learning_rate": 0.0002, "loss": 1.3672, "step": 235370 }, { "epoch": 0.96, "grad_norm": 5.07816219329834, "learning_rate": 0.0002, "loss": 1.6502, "step": 235380 }, { "epoch": 0.96, "grad_norm": 3.3165817260742188, "learning_rate": 0.0002, "loss": 1.6954, "step": 235390 }, { "epoch": 0.96, "grad_norm": 7.793699741363525, "learning_rate": 0.0002, "loss": 1.4828, "step": 235400 }, { "epoch": 0.96, "grad_norm": 6.223214149475098, "learning_rate": 0.0002, "loss": 1.8343, "step": 235410 }, { "epoch": 0.96, "grad_norm": 4.591248035430908, "learning_rate": 0.0002, "loss": 1.3714, "step": 235420 }, { "epoch": 0.96, "grad_norm": 3.556506633758545, "learning_rate": 0.0002, "loss": 1.5082, "step": 235430 }, { "epoch": 0.96, "grad_norm": 3.0685858726501465, "learning_rate": 0.0002, "loss": 1.4645, "step": 235440 }, { "epoch": 0.96, "grad_norm": 2.4094910621643066, "learning_rate": 0.0002, "loss": 1.3363, "step": 235450 }, { "epoch": 0.96, "grad_norm": 2.1951839923858643, "learning_rate": 0.0002, "loss": 1.6553, "step": 235460 }, { "epoch": 0.96, "grad_norm": 2.0424203872680664, "learning_rate": 0.0002, "loss": 1.8797, "step": 235470 }, { "epoch": 0.96, "grad_norm": 3.0413289070129395, "learning_rate": 0.0002, "loss": 1.4156, "step": 235480 }, { "epoch": 0.96, "grad_norm": 8.1217041015625, "learning_rate": 0.0002, "loss": 1.6938, "step": 235490 }, { "epoch": 0.96, "grad_norm": 3.880598545074463, "learning_rate": 0.0002, "loss": 1.634, "step": 235500 }, { "epoch": 0.96, "grad_norm": 3.972191095352173, "learning_rate": 0.0002, "loss": 1.3334, "step": 235510 }, { "epoch": 0.96, "grad_norm": 3.2964320182800293, "learning_rate": 0.0002, "loss": 1.8233, "step": 235520 }, { "epoch": 0.96, "grad_norm": 3.253977060317993, "learning_rate": 0.0002, "loss": 1.6759, "step": 235530 }, { "epoch": 0.96, "grad_norm": 3.362295150756836, "learning_rate": 0.0002, "loss": 1.489, "step": 235540 }, { "epoch": 0.96, "grad_norm": 3.0295677185058594, "learning_rate": 0.0002, "loss": 1.671, "step": 235550 }, { "epoch": 0.96, "grad_norm": 2.606174945831299, "learning_rate": 0.0002, "loss": 1.6285, "step": 235560 }, { "epoch": 0.96, "grad_norm": 2.6580238342285156, "learning_rate": 0.0002, "loss": 1.7419, "step": 235570 }, { "epoch": 0.96, "grad_norm": 1.8744863271713257, "learning_rate": 0.0002, "loss": 1.3513, "step": 235580 }, { "epoch": 0.96, "grad_norm": 2.8433518409729004, "learning_rate": 0.0002, "loss": 1.6537, "step": 235590 }, { "epoch": 0.96, "grad_norm": 2.2346179485321045, "learning_rate": 0.0002, "loss": 1.3986, "step": 235600 }, { "epoch": 0.96, "grad_norm": 3.4192161560058594, "learning_rate": 0.0002, "loss": 1.5294, "step": 235610 }, { "epoch": 0.96, "grad_norm": 2.94884991645813, "learning_rate": 0.0002, "loss": 1.2986, "step": 235620 }, { "epoch": 0.96, "grad_norm": 2.795593738555908, "learning_rate": 0.0002, "loss": 1.6328, "step": 235630 }, { "epoch": 0.96, "grad_norm": 3.165011405944824, "learning_rate": 0.0002, "loss": 1.4184, "step": 235640 }, { "epoch": 0.96, "grad_norm": 2.560124158859253, "learning_rate": 0.0002, "loss": 1.4578, "step": 235650 }, { "epoch": 0.96, "grad_norm": 3.0035502910614014, "learning_rate": 0.0002, "loss": 1.5334, "step": 235660 }, { "epoch": 0.96, "grad_norm": 2.1492080688476562, "learning_rate": 0.0002, "loss": 1.7653, "step": 235670 }, { "epoch": 0.96, "grad_norm": 2.227566719055176, "learning_rate": 0.0002, "loss": 1.6964, "step": 235680 }, { "epoch": 0.96, "grad_norm": 2.622070074081421, "learning_rate": 0.0002, "loss": 1.8327, "step": 235690 }, { "epoch": 0.96, "grad_norm": 3.244405508041382, "learning_rate": 0.0002, "loss": 1.785, "step": 235700 }, { "epoch": 0.96, "grad_norm": 3.4797112941741943, "learning_rate": 0.0002, "loss": 1.7178, "step": 235710 }, { "epoch": 0.96, "grad_norm": 2.9141945838928223, "learning_rate": 0.0002, "loss": 1.6011, "step": 235720 }, { "epoch": 0.96, "grad_norm": 3.7810475826263428, "learning_rate": 0.0002, "loss": 1.5348, "step": 235730 }, { "epoch": 0.96, "grad_norm": 2.857024669647217, "learning_rate": 0.0002, "loss": 1.5971, "step": 235740 }, { "epoch": 0.96, "grad_norm": 5.207771301269531, "learning_rate": 0.0002, "loss": 1.6992, "step": 235750 }, { "epoch": 0.96, "grad_norm": 17.357051849365234, "learning_rate": 0.0002, "loss": 1.58, "step": 235760 }, { "epoch": 0.96, "grad_norm": 2.7841527462005615, "learning_rate": 0.0002, "loss": 1.7367, "step": 235770 }, { "epoch": 0.96, "grad_norm": 1.863463282585144, "learning_rate": 0.0002, "loss": 1.6912, "step": 235780 }, { "epoch": 0.96, "grad_norm": 3.39217209815979, "learning_rate": 0.0002, "loss": 1.5842, "step": 235790 }, { "epoch": 0.96, "grad_norm": 2.9937522411346436, "learning_rate": 0.0002, "loss": 1.6448, "step": 235800 }, { "epoch": 0.96, "grad_norm": 3.7385740280151367, "learning_rate": 0.0002, "loss": 1.6672, "step": 235810 }, { "epoch": 0.96, "grad_norm": 4.029402256011963, "learning_rate": 0.0002, "loss": 1.3504, "step": 235820 }, { "epoch": 0.96, "grad_norm": 2.8884072303771973, "learning_rate": 0.0002, "loss": 1.517, "step": 235830 }, { "epoch": 0.96, "grad_norm": 3.987360954284668, "learning_rate": 0.0002, "loss": 1.6315, "step": 235840 }, { "epoch": 0.96, "grad_norm": 2.0814402103424072, "learning_rate": 0.0002, "loss": 1.554, "step": 235850 }, { "epoch": 0.96, "grad_norm": 3.1833624839782715, "learning_rate": 0.0002, "loss": 1.5242, "step": 235860 }, { "epoch": 0.96, "grad_norm": 3.2112889289855957, "learning_rate": 0.0002, "loss": 1.6313, "step": 235870 }, { "epoch": 0.96, "grad_norm": 4.6082377433776855, "learning_rate": 0.0002, "loss": 1.6066, "step": 235880 }, { "epoch": 0.96, "grad_norm": 3.344123601913452, "learning_rate": 0.0002, "loss": 1.2604, "step": 235890 }, { "epoch": 0.96, "grad_norm": 3.690312385559082, "learning_rate": 0.0002, "loss": 1.908, "step": 235900 }, { "epoch": 0.96, "grad_norm": 4.247154235839844, "learning_rate": 0.0002, "loss": 1.4684, "step": 235910 }, { "epoch": 0.96, "grad_norm": 4.753109931945801, "learning_rate": 0.0002, "loss": 1.5681, "step": 235920 }, { "epoch": 0.96, "grad_norm": 3.6649417877197266, "learning_rate": 0.0002, "loss": 1.7751, "step": 235930 }, { "epoch": 0.96, "grad_norm": 3.0245981216430664, "learning_rate": 0.0002, "loss": 1.6971, "step": 235940 }, { "epoch": 0.96, "grad_norm": 2.138009786605835, "learning_rate": 0.0002, "loss": 1.6265, "step": 235950 }, { "epoch": 0.96, "grad_norm": 2.7667806148529053, "learning_rate": 0.0002, "loss": 1.3594, "step": 235960 }, { "epoch": 0.96, "grad_norm": 2.5491273403167725, "learning_rate": 0.0002, "loss": 1.6419, "step": 235970 }, { "epoch": 0.96, "grad_norm": 6.480526924133301, "learning_rate": 0.0002, "loss": 1.5655, "step": 235980 }, { "epoch": 0.96, "grad_norm": 2.036339521408081, "learning_rate": 0.0002, "loss": 1.3624, "step": 235990 }, { "epoch": 0.96, "grad_norm": 2.6672258377075195, "learning_rate": 0.0002, "loss": 1.6095, "step": 236000 }, { "epoch": 0.96, "grad_norm": 2.254328489303589, "learning_rate": 0.0002, "loss": 1.6735, "step": 236010 }, { "epoch": 0.96, "grad_norm": 2.409912109375, "learning_rate": 0.0002, "loss": 1.6434, "step": 236020 }, { "epoch": 0.96, "grad_norm": 5.4918084144592285, "learning_rate": 0.0002, "loss": 1.6181, "step": 236030 }, { "epoch": 0.96, "grad_norm": 2.6880040168762207, "learning_rate": 0.0002, "loss": 1.4916, "step": 236040 }, { "epoch": 0.96, "grad_norm": 2.78340482711792, "learning_rate": 0.0002, "loss": 1.5906, "step": 236050 }, { "epoch": 0.96, "grad_norm": 3.161102294921875, "learning_rate": 0.0002, "loss": 1.3907, "step": 236060 }, { "epoch": 0.96, "grad_norm": 2.6542153358459473, "learning_rate": 0.0002, "loss": 1.3407, "step": 236070 }, { "epoch": 0.96, "grad_norm": 2.6210880279541016, "learning_rate": 0.0002, "loss": 1.5898, "step": 236080 }, { "epoch": 0.96, "grad_norm": 2.959149122238159, "learning_rate": 0.0002, "loss": 1.4228, "step": 236090 }, { "epoch": 0.96, "grad_norm": 2.857264995574951, "learning_rate": 0.0002, "loss": 1.7201, "step": 236100 }, { "epoch": 0.96, "grad_norm": 3.371938705444336, "learning_rate": 0.0002, "loss": 1.5167, "step": 236110 }, { "epoch": 0.96, "grad_norm": 1.7778033018112183, "learning_rate": 0.0002, "loss": 1.617, "step": 236120 }, { "epoch": 0.96, "grad_norm": 4.75790548324585, "learning_rate": 0.0002, "loss": 1.5711, "step": 236130 }, { "epoch": 0.96, "grad_norm": 5.51469087600708, "learning_rate": 0.0002, "loss": 1.7747, "step": 236140 }, { "epoch": 0.96, "grad_norm": 3.547452449798584, "learning_rate": 0.0002, "loss": 1.7315, "step": 236150 }, { "epoch": 0.96, "grad_norm": 1.8149235248565674, "learning_rate": 0.0002, "loss": 1.3829, "step": 236160 }, { "epoch": 0.96, "grad_norm": 3.0061705112457275, "learning_rate": 0.0002, "loss": 1.5576, "step": 236170 }, { "epoch": 0.96, "grad_norm": 1.8851001262664795, "learning_rate": 0.0002, "loss": 1.6185, "step": 236180 }, { "epoch": 0.96, "grad_norm": 3.8924083709716797, "learning_rate": 0.0002, "loss": 1.4539, "step": 236190 }, { "epoch": 0.96, "grad_norm": 3.334524393081665, "learning_rate": 0.0002, "loss": 1.5745, "step": 236200 }, { "epoch": 0.96, "grad_norm": 3.9662270545959473, "learning_rate": 0.0002, "loss": 1.519, "step": 236210 }, { "epoch": 0.96, "grad_norm": 2.060396194458008, "learning_rate": 0.0002, "loss": 1.4753, "step": 236220 }, { "epoch": 0.96, "grad_norm": 2.802711009979248, "learning_rate": 0.0002, "loss": 1.4716, "step": 236230 }, { "epoch": 0.96, "grad_norm": 2.452360153198242, "learning_rate": 0.0002, "loss": 1.3104, "step": 236240 }, { "epoch": 0.96, "grad_norm": 2.5964105129241943, "learning_rate": 0.0002, "loss": 1.5924, "step": 236250 }, { "epoch": 0.96, "grad_norm": 2.2408838272094727, "learning_rate": 0.0002, "loss": 1.7426, "step": 236260 }, { "epoch": 0.96, "grad_norm": 2.3101043701171875, "learning_rate": 0.0002, "loss": 1.533, "step": 236270 }, { "epoch": 0.96, "grad_norm": 6.073798179626465, "learning_rate": 0.0002, "loss": 1.7396, "step": 236280 }, { "epoch": 0.96, "grad_norm": 3.0510547161102295, "learning_rate": 0.0002, "loss": 1.6441, "step": 236290 }, { "epoch": 0.96, "grad_norm": 3.164367437362671, "learning_rate": 0.0002, "loss": 1.5205, "step": 236300 }, { "epoch": 0.96, "grad_norm": 2.606207847595215, "learning_rate": 0.0002, "loss": 1.5089, "step": 236310 }, { "epoch": 0.96, "grad_norm": 2.370161771774292, "learning_rate": 0.0002, "loss": 1.7281, "step": 236320 }, { "epoch": 0.96, "grad_norm": 3.1869845390319824, "learning_rate": 0.0002, "loss": 1.6163, "step": 236330 }, { "epoch": 0.96, "grad_norm": 4.001168251037598, "learning_rate": 0.0002, "loss": 1.6334, "step": 236340 }, { "epoch": 0.96, "grad_norm": 2.0084733963012695, "learning_rate": 0.0002, "loss": 1.469, "step": 236350 }, { "epoch": 0.96, "grad_norm": 5.406243801116943, "learning_rate": 0.0002, "loss": 1.4214, "step": 236360 }, { "epoch": 0.96, "grad_norm": 3.7573375701904297, "learning_rate": 0.0002, "loss": 1.4868, "step": 236370 }, { "epoch": 0.96, "grad_norm": 4.073435306549072, "learning_rate": 0.0002, "loss": 1.4654, "step": 236380 }, { "epoch": 0.96, "grad_norm": 2.2954070568084717, "learning_rate": 0.0002, "loss": 1.4486, "step": 236390 }, { "epoch": 0.96, "grad_norm": 3.008990526199341, "learning_rate": 0.0002, "loss": 1.4702, "step": 236400 }, { "epoch": 0.96, "grad_norm": 3.2644717693328857, "learning_rate": 0.0002, "loss": 1.7216, "step": 236410 }, { "epoch": 0.96, "grad_norm": 2.6695590019226074, "learning_rate": 0.0002, "loss": 1.5679, "step": 236420 }, { "epoch": 0.96, "grad_norm": 2.6665871143341064, "learning_rate": 0.0002, "loss": 1.7139, "step": 236430 }, { "epoch": 0.96, "grad_norm": 4.740972518920898, "learning_rate": 0.0002, "loss": 1.1872, "step": 236440 }, { "epoch": 0.96, "grad_norm": 2.3083791732788086, "learning_rate": 0.0002, "loss": 1.4777, "step": 236450 }, { "epoch": 0.96, "grad_norm": 1.8739792108535767, "learning_rate": 0.0002, "loss": 1.5424, "step": 236460 }, { "epoch": 0.96, "grad_norm": 4.45789909362793, "learning_rate": 0.0002, "loss": 1.6051, "step": 236470 }, { "epoch": 0.96, "grad_norm": 2.6332225799560547, "learning_rate": 0.0002, "loss": 1.6677, "step": 236480 }, { "epoch": 0.96, "grad_norm": 2.902754783630371, "learning_rate": 0.0002, "loss": 1.6307, "step": 236490 }, { "epoch": 0.96, "grad_norm": 2.7826290130615234, "learning_rate": 0.0002, "loss": 1.4713, "step": 236500 }, { "epoch": 0.96, "grad_norm": 2.9806060791015625, "learning_rate": 0.0002, "loss": 1.5176, "step": 236510 }, { "epoch": 0.96, "grad_norm": 2.6455910205841064, "learning_rate": 0.0002, "loss": 1.5757, "step": 236520 }, { "epoch": 0.96, "grad_norm": 2.1239264011383057, "learning_rate": 0.0002, "loss": 1.6006, "step": 236530 }, { "epoch": 0.96, "grad_norm": 2.702712297439575, "learning_rate": 0.0002, "loss": 1.6314, "step": 236540 }, { "epoch": 0.96, "grad_norm": 2.3218932151794434, "learning_rate": 0.0002, "loss": 1.6022, "step": 236550 }, { "epoch": 0.96, "grad_norm": 3.403564691543579, "learning_rate": 0.0002, "loss": 1.6669, "step": 236560 }, { "epoch": 0.96, "grad_norm": 3.668644905090332, "learning_rate": 0.0002, "loss": 1.3182, "step": 236570 }, { "epoch": 0.96, "grad_norm": 2.695063829421997, "learning_rate": 0.0002, "loss": 1.8304, "step": 236580 }, { "epoch": 0.96, "grad_norm": 3.4966602325439453, "learning_rate": 0.0002, "loss": 1.657, "step": 236590 }, { "epoch": 0.96, "grad_norm": 3.724604845046997, "learning_rate": 0.0002, "loss": 1.5481, "step": 236600 }, { "epoch": 0.96, "grad_norm": 3.4213035106658936, "learning_rate": 0.0002, "loss": 1.686, "step": 236610 }, { "epoch": 0.96, "grad_norm": 3.3807919025421143, "learning_rate": 0.0002, "loss": 1.5209, "step": 236620 }, { "epoch": 0.96, "grad_norm": 2.127268075942993, "learning_rate": 0.0002, "loss": 1.6287, "step": 236630 }, { "epoch": 0.96, "grad_norm": 3.8009331226348877, "learning_rate": 0.0002, "loss": 1.79, "step": 236640 }, { "epoch": 0.96, "grad_norm": 3.867119312286377, "learning_rate": 0.0002, "loss": 1.6434, "step": 236650 }, { "epoch": 0.96, "grad_norm": 3.0846590995788574, "learning_rate": 0.0002, "loss": 1.6157, "step": 236660 }, { "epoch": 0.96, "grad_norm": 3.7479031085968018, "learning_rate": 0.0002, "loss": 1.4765, "step": 236670 }, { "epoch": 0.96, "grad_norm": 3.9538910388946533, "learning_rate": 0.0002, "loss": 1.6339, "step": 236680 }, { "epoch": 0.96, "grad_norm": 1.7419244050979614, "learning_rate": 0.0002, "loss": 1.4224, "step": 236690 }, { "epoch": 0.96, "grad_norm": 4.608674049377441, "learning_rate": 0.0002, "loss": 1.5422, "step": 236700 }, { "epoch": 0.96, "grad_norm": 3.7904722690582275, "learning_rate": 0.0002, "loss": 1.516, "step": 236710 }, { "epoch": 0.96, "grad_norm": 3.227290630340576, "learning_rate": 0.0002, "loss": 1.7058, "step": 236720 }, { "epoch": 0.96, "grad_norm": 5.453657150268555, "learning_rate": 0.0002, "loss": 1.6904, "step": 236730 }, { "epoch": 0.96, "grad_norm": 3.3359248638153076, "learning_rate": 0.0002, "loss": 1.6455, "step": 236740 }, { "epoch": 0.96, "grad_norm": 3.5699033737182617, "learning_rate": 0.0002, "loss": 1.7298, "step": 236750 }, { "epoch": 0.96, "grad_norm": 3.397411584854126, "learning_rate": 0.0002, "loss": 1.5432, "step": 236760 }, { "epoch": 0.96, "grad_norm": 3.895131826400757, "learning_rate": 0.0002, "loss": 1.4568, "step": 236770 }, { "epoch": 0.96, "grad_norm": 2.687401056289673, "learning_rate": 0.0002, "loss": 1.3677, "step": 236780 }, { "epoch": 0.96, "grad_norm": 1.652880311012268, "learning_rate": 0.0002, "loss": 1.7841, "step": 236790 }, { "epoch": 0.96, "grad_norm": 2.954458713531494, "learning_rate": 0.0002, "loss": 1.6482, "step": 236800 }, { "epoch": 0.96, "grad_norm": 1.4619015455245972, "learning_rate": 0.0002, "loss": 1.6198, "step": 236810 }, { "epoch": 0.96, "grad_norm": 2.68894362449646, "learning_rate": 0.0002, "loss": 1.5887, "step": 236820 }, { "epoch": 0.96, "grad_norm": 2.5480995178222656, "learning_rate": 0.0002, "loss": 1.261, "step": 236830 }, { "epoch": 0.96, "grad_norm": 2.7992522716522217, "learning_rate": 0.0002, "loss": 1.6981, "step": 236840 }, { "epoch": 0.96, "grad_norm": 3.2927932739257812, "learning_rate": 0.0002, "loss": 1.4579, "step": 236850 }, { "epoch": 0.96, "grad_norm": 4.828125953674316, "learning_rate": 0.0002, "loss": 1.6431, "step": 236860 }, { "epoch": 0.96, "grad_norm": 4.903231620788574, "learning_rate": 0.0002, "loss": 1.4375, "step": 236870 }, { "epoch": 0.96, "grad_norm": 4.239526748657227, "learning_rate": 0.0002, "loss": 1.7376, "step": 236880 }, { "epoch": 0.96, "grad_norm": 2.6034255027770996, "learning_rate": 0.0002, "loss": 1.7165, "step": 236890 }, { "epoch": 0.96, "grad_norm": 1.7308403253555298, "learning_rate": 0.0002, "loss": 1.3012, "step": 236900 }, { "epoch": 0.96, "grad_norm": 2.5787081718444824, "learning_rate": 0.0002, "loss": 1.4452, "step": 236910 }, { "epoch": 0.96, "grad_norm": 3.2116024494171143, "learning_rate": 0.0002, "loss": 1.6379, "step": 236920 }, { "epoch": 0.96, "grad_norm": 2.7567479610443115, "learning_rate": 0.0002, "loss": 1.427, "step": 236930 }, { "epoch": 0.96, "grad_norm": 2.126694917678833, "learning_rate": 0.0002, "loss": 1.4203, "step": 236940 }, { "epoch": 0.96, "grad_norm": 3.2252614498138428, "learning_rate": 0.0002, "loss": 1.6819, "step": 236950 }, { "epoch": 0.96, "grad_norm": 2.643529176712036, "learning_rate": 0.0002, "loss": 1.5825, "step": 236960 }, { "epoch": 0.96, "grad_norm": 2.2847068309783936, "learning_rate": 0.0002, "loss": 1.6874, "step": 236970 }, { "epoch": 0.96, "grad_norm": 2.9862942695617676, "learning_rate": 0.0002, "loss": 1.5536, "step": 236980 }, { "epoch": 0.96, "grad_norm": 3.9388210773468018, "learning_rate": 0.0002, "loss": 1.539, "step": 236990 }, { "epoch": 0.96, "grad_norm": 2.8160769939422607, "learning_rate": 0.0002, "loss": 1.5203, "step": 237000 }, { "epoch": 0.96, "grad_norm": 5.990671157836914, "learning_rate": 0.0002, "loss": 1.6007, "step": 237010 }, { "epoch": 0.96, "grad_norm": 3.2156460285186768, "learning_rate": 0.0002, "loss": 1.7685, "step": 237020 }, { "epoch": 0.96, "grad_norm": 2.809518814086914, "learning_rate": 0.0002, "loss": 1.8686, "step": 237030 }, { "epoch": 0.96, "grad_norm": 3.0091991424560547, "learning_rate": 0.0002, "loss": 1.569, "step": 237040 }, { "epoch": 0.97, "grad_norm": 2.903122663497925, "learning_rate": 0.0002, "loss": 1.4366, "step": 237050 }, { "epoch": 0.97, "grad_norm": 2.2696783542633057, "learning_rate": 0.0002, "loss": 1.6777, "step": 237060 }, { "epoch": 0.97, "grad_norm": 3.7705798149108887, "learning_rate": 0.0002, "loss": 1.9121, "step": 237070 }, { "epoch": 0.97, "grad_norm": 3.0762698650360107, "learning_rate": 0.0002, "loss": 1.5047, "step": 237080 }, { "epoch": 0.97, "grad_norm": 2.3446755409240723, "learning_rate": 0.0002, "loss": 1.8568, "step": 237090 }, { "epoch": 0.97, "grad_norm": 2.8800034523010254, "learning_rate": 0.0002, "loss": 1.5415, "step": 237100 }, { "epoch": 0.97, "grad_norm": 2.3113934993743896, "learning_rate": 0.0002, "loss": 1.4931, "step": 237110 }, { "epoch": 0.97, "grad_norm": 3.268192768096924, "learning_rate": 0.0002, "loss": 1.4447, "step": 237120 }, { "epoch": 0.97, "grad_norm": 2.71349835395813, "learning_rate": 0.0002, "loss": 1.4906, "step": 237130 }, { "epoch": 0.97, "grad_norm": 1.8386434316635132, "learning_rate": 0.0002, "loss": 1.692, "step": 237140 }, { "epoch": 0.97, "grad_norm": 3.416412830352783, "learning_rate": 0.0002, "loss": 1.7332, "step": 237150 }, { "epoch": 0.97, "grad_norm": 4.4959211349487305, "learning_rate": 0.0002, "loss": 1.6735, "step": 237160 }, { "epoch": 0.97, "grad_norm": 3.1243648529052734, "learning_rate": 0.0002, "loss": 1.5488, "step": 237170 }, { "epoch": 0.97, "grad_norm": 2.3034627437591553, "learning_rate": 0.0002, "loss": 1.5321, "step": 237180 }, { "epoch": 0.97, "grad_norm": 2.087886333465576, "learning_rate": 0.0002, "loss": 1.6295, "step": 237190 }, { "epoch": 0.97, "grad_norm": 2.771764039993286, "learning_rate": 0.0002, "loss": 1.8299, "step": 237200 }, { "epoch": 0.97, "grad_norm": 2.899583101272583, "learning_rate": 0.0002, "loss": 1.7381, "step": 237210 }, { "epoch": 0.97, "grad_norm": 2.7468254566192627, "learning_rate": 0.0002, "loss": 1.5476, "step": 237220 }, { "epoch": 0.97, "grad_norm": 2.7271506786346436, "learning_rate": 0.0002, "loss": 1.5674, "step": 237230 }, { "epoch": 0.97, "grad_norm": 2.6234474182128906, "learning_rate": 0.0002, "loss": 1.6116, "step": 237240 }, { "epoch": 0.97, "grad_norm": 4.291882514953613, "learning_rate": 0.0002, "loss": 1.581, "step": 237250 }, { "epoch": 0.97, "grad_norm": 2.7035958766937256, "learning_rate": 0.0002, "loss": 1.5159, "step": 237260 }, { "epoch": 0.97, "grad_norm": 2.2556235790252686, "learning_rate": 0.0002, "loss": 1.5323, "step": 237270 }, { "epoch": 0.97, "grad_norm": 4.310585021972656, "learning_rate": 0.0002, "loss": 1.5945, "step": 237280 }, { "epoch": 0.97, "grad_norm": 2.0906012058258057, "learning_rate": 0.0002, "loss": 1.3206, "step": 237290 }, { "epoch": 0.97, "grad_norm": 5.89113712310791, "learning_rate": 0.0002, "loss": 1.5724, "step": 237300 }, { "epoch": 0.97, "grad_norm": 3.6219990253448486, "learning_rate": 0.0002, "loss": 1.5359, "step": 237310 }, { "epoch": 0.97, "grad_norm": 3.9096100330352783, "learning_rate": 0.0002, "loss": 1.5008, "step": 237320 }, { "epoch": 0.97, "grad_norm": 3.688666582107544, "learning_rate": 0.0002, "loss": 1.5541, "step": 237330 }, { "epoch": 0.97, "grad_norm": 3.1804733276367188, "learning_rate": 0.0002, "loss": 1.311, "step": 237340 }, { "epoch": 0.97, "grad_norm": 3.513305187225342, "learning_rate": 0.0002, "loss": 1.4351, "step": 237350 }, { "epoch": 0.97, "grad_norm": 4.115547180175781, "learning_rate": 0.0002, "loss": 1.6429, "step": 237360 }, { "epoch": 0.97, "grad_norm": 1.2171294689178467, "learning_rate": 0.0002, "loss": 1.551, "step": 237370 }, { "epoch": 0.97, "grad_norm": 3.2122364044189453, "learning_rate": 0.0002, "loss": 1.4897, "step": 237380 }, { "epoch": 0.97, "grad_norm": 2.5647401809692383, "learning_rate": 0.0002, "loss": 1.6284, "step": 237390 }, { "epoch": 0.97, "grad_norm": 3.690183401107788, "learning_rate": 0.0002, "loss": 1.5557, "step": 237400 }, { "epoch": 0.97, "grad_norm": 3.7601964473724365, "learning_rate": 0.0002, "loss": 1.5757, "step": 237410 }, { "epoch": 0.97, "grad_norm": 3.389362096786499, "learning_rate": 0.0002, "loss": 1.5931, "step": 237420 }, { "epoch": 0.97, "grad_norm": 2.573080062866211, "learning_rate": 0.0002, "loss": 1.3976, "step": 237430 }, { "epoch": 0.97, "grad_norm": 4.571253299713135, "learning_rate": 0.0002, "loss": 1.6218, "step": 237440 }, { "epoch": 0.97, "grad_norm": 1.8524693250656128, "learning_rate": 0.0002, "loss": 1.6103, "step": 237450 }, { "epoch": 0.97, "grad_norm": 9.300558090209961, "learning_rate": 0.0002, "loss": 1.7674, "step": 237460 }, { "epoch": 0.97, "grad_norm": 3.723066806793213, "learning_rate": 0.0002, "loss": 1.671, "step": 237470 }, { "epoch": 0.97, "grad_norm": 4.218088150024414, "learning_rate": 0.0002, "loss": 1.7715, "step": 237480 }, { "epoch": 0.97, "grad_norm": 3.4126617908477783, "learning_rate": 0.0002, "loss": 1.5589, "step": 237490 }, { "epoch": 0.97, "grad_norm": 2.2173776626586914, "learning_rate": 0.0002, "loss": 1.4642, "step": 237500 }, { "epoch": 0.97, "grad_norm": 1.852068305015564, "learning_rate": 0.0002, "loss": 1.2615, "step": 237510 }, { "epoch": 0.97, "grad_norm": 2.884559154510498, "learning_rate": 0.0002, "loss": 1.4835, "step": 237520 }, { "epoch": 0.97, "grad_norm": 1.8348383903503418, "learning_rate": 0.0002, "loss": 1.6598, "step": 237530 }, { "epoch": 0.97, "grad_norm": 2.256216049194336, "learning_rate": 0.0002, "loss": 1.2794, "step": 237540 }, { "epoch": 0.97, "grad_norm": 5.353307247161865, "learning_rate": 0.0002, "loss": 1.7809, "step": 237550 }, { "epoch": 0.97, "grad_norm": 2.3520002365112305, "learning_rate": 0.0002, "loss": 1.5045, "step": 237560 }, { "epoch": 0.97, "grad_norm": 1.9223746061325073, "learning_rate": 0.0002, "loss": 1.6272, "step": 237570 }, { "epoch": 0.97, "grad_norm": 2.4520771503448486, "learning_rate": 0.0002, "loss": 1.3699, "step": 237580 }, { "epoch": 0.97, "grad_norm": 2.307299852371216, "learning_rate": 0.0002, "loss": 1.4403, "step": 237590 }, { "epoch": 0.97, "grad_norm": 3.333299398422241, "learning_rate": 0.0002, "loss": 1.5828, "step": 237600 }, { "epoch": 0.97, "grad_norm": 3.4548747539520264, "learning_rate": 0.0002, "loss": 1.4184, "step": 237610 }, { "epoch": 0.97, "grad_norm": 2.7900073528289795, "learning_rate": 0.0002, "loss": 1.2978, "step": 237620 }, { "epoch": 0.97, "grad_norm": 1.5918043851852417, "learning_rate": 0.0002, "loss": 1.4789, "step": 237630 }, { "epoch": 0.97, "grad_norm": 2.9213552474975586, "learning_rate": 0.0002, "loss": 1.498, "step": 237640 }, { "epoch": 0.97, "grad_norm": 6.998333930969238, "learning_rate": 0.0002, "loss": 1.6313, "step": 237650 }, { "epoch": 0.97, "grad_norm": 4.651315212249756, "learning_rate": 0.0002, "loss": 1.6316, "step": 237660 }, { "epoch": 0.97, "grad_norm": 2.3752517700195312, "learning_rate": 0.0002, "loss": 1.5535, "step": 237670 }, { "epoch": 0.97, "grad_norm": 2.8167474269866943, "learning_rate": 0.0002, "loss": 1.5205, "step": 237680 }, { "epoch": 0.97, "grad_norm": 5.355797290802002, "learning_rate": 0.0002, "loss": 1.6323, "step": 237690 }, { "epoch": 0.97, "grad_norm": 2.8657612800598145, "learning_rate": 0.0002, "loss": 1.3278, "step": 237700 }, { "epoch": 0.97, "grad_norm": 4.185975074768066, "learning_rate": 0.0002, "loss": 1.5383, "step": 237710 }, { "epoch": 0.97, "grad_norm": 3.49180269241333, "learning_rate": 0.0002, "loss": 1.7995, "step": 237720 }, { "epoch": 0.97, "grad_norm": 2.8922879695892334, "learning_rate": 0.0002, "loss": 1.6484, "step": 237730 }, { "epoch": 0.97, "grad_norm": 3.146604537963867, "learning_rate": 0.0002, "loss": 1.3392, "step": 237740 }, { "epoch": 0.97, "grad_norm": 5.596922397613525, "learning_rate": 0.0002, "loss": 1.405, "step": 237750 }, { "epoch": 0.97, "grad_norm": 2.493593454360962, "learning_rate": 0.0002, "loss": 1.5461, "step": 237760 }, { "epoch": 0.97, "grad_norm": 4.709272384643555, "learning_rate": 0.0002, "loss": 1.728, "step": 237770 }, { "epoch": 0.97, "grad_norm": 2.9771289825439453, "learning_rate": 0.0002, "loss": 1.5539, "step": 237780 }, { "epoch": 0.97, "grad_norm": 2.298429489135742, "learning_rate": 0.0002, "loss": 1.5182, "step": 237790 }, { "epoch": 0.97, "grad_norm": 2.9973907470703125, "learning_rate": 0.0002, "loss": 1.5486, "step": 237800 }, { "epoch": 0.97, "grad_norm": 3.252981185913086, "learning_rate": 0.0002, "loss": 1.3942, "step": 237810 }, { "epoch": 0.97, "grad_norm": 2.0260276794433594, "learning_rate": 0.0002, "loss": 1.7385, "step": 237820 }, { "epoch": 0.97, "grad_norm": 2.920762538909912, "learning_rate": 0.0002, "loss": 1.4729, "step": 237830 }, { "epoch": 0.97, "grad_norm": 2.9187915325164795, "learning_rate": 0.0002, "loss": 1.5026, "step": 237840 }, { "epoch": 0.97, "grad_norm": 2.49798321723938, "learning_rate": 0.0002, "loss": 1.6101, "step": 237850 }, { "epoch": 0.97, "grad_norm": 2.0870981216430664, "learning_rate": 0.0002, "loss": 1.6771, "step": 237860 }, { "epoch": 0.97, "grad_norm": 2.5762712955474854, "learning_rate": 0.0002, "loss": 1.495, "step": 237870 }, { "epoch": 0.97, "grad_norm": 2.018437623977661, "learning_rate": 0.0002, "loss": 1.4826, "step": 237880 }, { "epoch": 0.97, "grad_norm": 3.6411564350128174, "learning_rate": 0.0002, "loss": 1.5823, "step": 237890 }, { "epoch": 0.97, "grad_norm": 3.6409895420074463, "learning_rate": 0.0002, "loss": 1.7806, "step": 237900 }, { "epoch": 0.97, "grad_norm": 3.6035101413726807, "learning_rate": 0.0002, "loss": 1.5618, "step": 237910 }, { "epoch": 0.97, "grad_norm": 4.751997470855713, "learning_rate": 0.0002, "loss": 1.524, "step": 237920 }, { "epoch": 0.97, "grad_norm": 3.384389638900757, "learning_rate": 0.0002, "loss": 1.6139, "step": 237930 }, { "epoch": 0.97, "grad_norm": 3.7521872520446777, "learning_rate": 0.0002, "loss": 1.5186, "step": 237940 }, { "epoch": 0.97, "grad_norm": 1.3192992210388184, "learning_rate": 0.0002, "loss": 1.6832, "step": 237950 }, { "epoch": 0.97, "grad_norm": 3.398016929626465, "learning_rate": 0.0002, "loss": 1.4212, "step": 237960 }, { "epoch": 0.97, "grad_norm": 2.7078628540039062, "learning_rate": 0.0002, "loss": 1.7721, "step": 237970 }, { "epoch": 0.97, "grad_norm": 3.263516426086426, "learning_rate": 0.0002, "loss": 1.5253, "step": 237980 }, { "epoch": 0.97, "grad_norm": 3.3737668991088867, "learning_rate": 0.0002, "loss": 1.4991, "step": 237990 }, { "epoch": 0.97, "grad_norm": 1.8315544128417969, "learning_rate": 0.0002, "loss": 1.3823, "step": 238000 }, { "epoch": 0.97, "grad_norm": 3.3652708530426025, "learning_rate": 0.0002, "loss": 1.7139, "step": 238010 }, { "epoch": 0.97, "grad_norm": 2.584925889968872, "learning_rate": 0.0002, "loss": 1.7497, "step": 238020 }, { "epoch": 0.97, "grad_norm": 3.6452620029449463, "learning_rate": 0.0002, "loss": 1.5349, "step": 238030 }, { "epoch": 0.97, "grad_norm": 2.790822982788086, "learning_rate": 0.0002, "loss": 1.6374, "step": 238040 }, { "epoch": 0.97, "grad_norm": 2.839926242828369, "learning_rate": 0.0002, "loss": 1.5722, "step": 238050 }, { "epoch": 0.97, "grad_norm": 5.598869323730469, "learning_rate": 0.0002, "loss": 1.7033, "step": 238060 }, { "epoch": 0.97, "grad_norm": 6.678884983062744, "learning_rate": 0.0002, "loss": 1.4636, "step": 238070 }, { "epoch": 0.97, "grad_norm": 3.101328134536743, "learning_rate": 0.0002, "loss": 1.6127, "step": 238080 }, { "epoch": 0.97, "grad_norm": 3.0076065063476562, "learning_rate": 0.0002, "loss": 1.4887, "step": 238090 }, { "epoch": 0.97, "grad_norm": 3.728761672973633, "learning_rate": 0.0002, "loss": 1.324, "step": 238100 }, { "epoch": 0.97, "grad_norm": 2.9647939205169678, "learning_rate": 0.0002, "loss": 1.5617, "step": 238110 }, { "epoch": 0.97, "grad_norm": 2.3802027702331543, "learning_rate": 0.0002, "loss": 1.574, "step": 238120 }, { "epoch": 0.97, "grad_norm": 4.55814790725708, "learning_rate": 0.0002, "loss": 1.5507, "step": 238130 }, { "epoch": 0.97, "grad_norm": 1.9978656768798828, "learning_rate": 0.0002, "loss": 1.7636, "step": 238140 }, { "epoch": 0.97, "grad_norm": 2.403913736343384, "learning_rate": 0.0002, "loss": 1.604, "step": 238150 }, { "epoch": 0.97, "grad_norm": 2.2631912231445312, "learning_rate": 0.0002, "loss": 1.7277, "step": 238160 }, { "epoch": 0.97, "grad_norm": 2.033939838409424, "learning_rate": 0.0002, "loss": 1.4879, "step": 238170 }, { "epoch": 0.97, "grad_norm": 3.2903270721435547, "learning_rate": 0.0002, "loss": 1.7609, "step": 238180 }, { "epoch": 0.97, "grad_norm": 3.028688669204712, "learning_rate": 0.0002, "loss": 1.2777, "step": 238190 }, { "epoch": 0.97, "grad_norm": 2.4146673679351807, "learning_rate": 0.0002, "loss": 1.4828, "step": 238200 }, { "epoch": 0.97, "grad_norm": 3.231322765350342, "learning_rate": 0.0002, "loss": 1.7161, "step": 238210 }, { "epoch": 0.97, "grad_norm": 4.8315653800964355, "learning_rate": 0.0002, "loss": 1.5033, "step": 238220 }, { "epoch": 0.97, "grad_norm": 2.7481963634490967, "learning_rate": 0.0002, "loss": 1.7478, "step": 238230 }, { "epoch": 0.97, "grad_norm": 3.905240774154663, "learning_rate": 0.0002, "loss": 1.8736, "step": 238240 }, { "epoch": 0.97, "grad_norm": 1.6641576290130615, "learning_rate": 0.0002, "loss": 1.6663, "step": 238250 }, { "epoch": 0.97, "grad_norm": 4.8109917640686035, "learning_rate": 0.0002, "loss": 1.7333, "step": 238260 }, { "epoch": 0.97, "grad_norm": 2.6613235473632812, "learning_rate": 0.0002, "loss": 1.6274, "step": 238270 }, { "epoch": 0.97, "grad_norm": 3.3209826946258545, "learning_rate": 0.0002, "loss": 1.5452, "step": 238280 }, { "epoch": 0.97, "grad_norm": 2.141714096069336, "learning_rate": 0.0002, "loss": 1.4956, "step": 238290 }, { "epoch": 0.97, "grad_norm": 2.508819103240967, "learning_rate": 0.0002, "loss": 1.7323, "step": 238300 }, { "epoch": 0.97, "grad_norm": 2.936819314956665, "learning_rate": 0.0002, "loss": 1.5497, "step": 238310 }, { "epoch": 0.97, "grad_norm": 2.718047618865967, "learning_rate": 0.0002, "loss": 1.6616, "step": 238320 }, { "epoch": 0.97, "grad_norm": 3.3071157932281494, "learning_rate": 0.0002, "loss": 1.6012, "step": 238330 }, { "epoch": 0.97, "grad_norm": 2.3321373462677, "learning_rate": 0.0002, "loss": 1.5259, "step": 238340 }, { "epoch": 0.97, "grad_norm": 2.3570587635040283, "learning_rate": 0.0002, "loss": 1.856, "step": 238350 }, { "epoch": 0.97, "grad_norm": 2.3388633728027344, "learning_rate": 0.0002, "loss": 1.4523, "step": 238360 }, { "epoch": 0.97, "grad_norm": 2.766686201095581, "learning_rate": 0.0002, "loss": 1.4317, "step": 238370 }, { "epoch": 0.97, "grad_norm": 4.081692695617676, "learning_rate": 0.0002, "loss": 1.7645, "step": 238380 }, { "epoch": 0.97, "grad_norm": 3.542257785797119, "learning_rate": 0.0002, "loss": 1.8777, "step": 238390 }, { "epoch": 0.97, "grad_norm": 3.6773457527160645, "learning_rate": 0.0002, "loss": 1.4802, "step": 238400 }, { "epoch": 0.97, "grad_norm": 3.800715208053589, "learning_rate": 0.0002, "loss": 1.7145, "step": 238410 }, { "epoch": 0.97, "grad_norm": 3.8842382431030273, "learning_rate": 0.0002, "loss": 1.9003, "step": 238420 }, { "epoch": 0.97, "grad_norm": 2.71809458732605, "learning_rate": 0.0002, "loss": 1.8255, "step": 238430 }, { "epoch": 0.97, "grad_norm": 2.949103355407715, "learning_rate": 0.0002, "loss": 1.7116, "step": 238440 }, { "epoch": 0.97, "grad_norm": 3.7457339763641357, "learning_rate": 0.0002, "loss": 1.8842, "step": 238450 }, { "epoch": 0.97, "grad_norm": 3.5201313495635986, "learning_rate": 0.0002, "loss": 1.6464, "step": 238460 }, { "epoch": 0.97, "grad_norm": 2.7953951358795166, "learning_rate": 0.0002, "loss": 1.471, "step": 238470 }, { "epoch": 0.97, "grad_norm": 6.722105026245117, "learning_rate": 0.0002, "loss": 1.3274, "step": 238480 }, { "epoch": 0.97, "grad_norm": 3.041804075241089, "learning_rate": 0.0002, "loss": 1.716, "step": 238490 }, { "epoch": 0.97, "grad_norm": 4.231461048126221, "learning_rate": 0.0002, "loss": 1.5854, "step": 238500 }, { "epoch": 0.97, "grad_norm": 2.710343599319458, "learning_rate": 0.0002, "loss": 1.6306, "step": 238510 }, { "epoch": 0.97, "grad_norm": 3.3974246978759766, "learning_rate": 0.0002, "loss": 1.5923, "step": 238520 }, { "epoch": 0.97, "grad_norm": 3.549532175064087, "learning_rate": 0.0002, "loss": 1.4673, "step": 238530 }, { "epoch": 0.97, "grad_norm": 4.524828910827637, "learning_rate": 0.0002, "loss": 1.519, "step": 238540 }, { "epoch": 0.97, "grad_norm": 2.858703374862671, "learning_rate": 0.0002, "loss": 1.4942, "step": 238550 }, { "epoch": 0.97, "grad_norm": 2.7879693508148193, "learning_rate": 0.0002, "loss": 1.5533, "step": 238560 }, { "epoch": 0.97, "grad_norm": 2.5336005687713623, "learning_rate": 0.0002, "loss": 1.6443, "step": 238570 }, { "epoch": 0.97, "grad_norm": 3.281832218170166, "learning_rate": 0.0002, "loss": 1.4139, "step": 238580 }, { "epoch": 0.97, "grad_norm": 2.548888683319092, "learning_rate": 0.0002, "loss": 1.5425, "step": 238590 }, { "epoch": 0.97, "grad_norm": 3.282914400100708, "learning_rate": 0.0002, "loss": 1.5001, "step": 238600 }, { "epoch": 0.97, "grad_norm": 4.392864227294922, "learning_rate": 0.0002, "loss": 1.6469, "step": 238610 }, { "epoch": 0.97, "grad_norm": 5.656737804412842, "learning_rate": 0.0002, "loss": 1.5548, "step": 238620 }, { "epoch": 0.97, "grad_norm": 3.5617449283599854, "learning_rate": 0.0002, "loss": 1.5197, "step": 238630 }, { "epoch": 0.97, "grad_norm": 1.7062665224075317, "learning_rate": 0.0002, "loss": 1.5075, "step": 238640 }, { "epoch": 0.97, "grad_norm": 2.692099094390869, "learning_rate": 0.0002, "loss": 1.4791, "step": 238650 }, { "epoch": 0.97, "grad_norm": 2.073882818222046, "learning_rate": 0.0002, "loss": 1.5105, "step": 238660 }, { "epoch": 0.97, "grad_norm": 3.0630483627319336, "learning_rate": 0.0002, "loss": 1.7907, "step": 238670 }, { "epoch": 0.97, "grad_norm": 4.116994380950928, "learning_rate": 0.0002, "loss": 1.5875, "step": 238680 }, { "epoch": 0.97, "grad_norm": 2.4806160926818848, "learning_rate": 0.0002, "loss": 1.4057, "step": 238690 }, { "epoch": 0.97, "grad_norm": 4.349560737609863, "learning_rate": 0.0002, "loss": 1.6736, "step": 238700 }, { "epoch": 0.97, "grad_norm": 1.7266407012939453, "learning_rate": 0.0002, "loss": 1.4687, "step": 238710 }, { "epoch": 0.97, "grad_norm": 3.640152931213379, "learning_rate": 0.0002, "loss": 1.4239, "step": 238720 }, { "epoch": 0.97, "grad_norm": 2.392465829849243, "learning_rate": 0.0002, "loss": 1.6253, "step": 238730 }, { "epoch": 0.97, "grad_norm": 1.7948635816574097, "learning_rate": 0.0002, "loss": 1.7084, "step": 238740 }, { "epoch": 0.97, "grad_norm": 3.1592116355895996, "learning_rate": 0.0002, "loss": 1.5072, "step": 238750 }, { "epoch": 0.97, "grad_norm": 4.087779521942139, "learning_rate": 0.0002, "loss": 1.3323, "step": 238760 }, { "epoch": 0.97, "grad_norm": 7.911019802093506, "learning_rate": 0.0002, "loss": 1.4876, "step": 238770 }, { "epoch": 0.97, "grad_norm": 2.094374418258667, "learning_rate": 0.0002, "loss": 1.5773, "step": 238780 }, { "epoch": 0.97, "grad_norm": 3.1840455532073975, "learning_rate": 0.0002, "loss": 1.7312, "step": 238790 }, { "epoch": 0.97, "grad_norm": 3.5921080112457275, "learning_rate": 0.0002, "loss": 1.6917, "step": 238800 }, { "epoch": 0.97, "grad_norm": 2.7953059673309326, "learning_rate": 0.0002, "loss": 1.4872, "step": 238810 }, { "epoch": 0.97, "grad_norm": 3.937063694000244, "learning_rate": 0.0002, "loss": 1.4898, "step": 238820 }, { "epoch": 0.97, "grad_norm": 2.2430906295776367, "learning_rate": 0.0002, "loss": 1.6567, "step": 238830 }, { "epoch": 0.97, "grad_norm": 2.9509775638580322, "learning_rate": 0.0002, "loss": 1.649, "step": 238840 }, { "epoch": 0.97, "grad_norm": 2.494976758956909, "learning_rate": 0.0002, "loss": 1.4841, "step": 238850 }, { "epoch": 0.97, "grad_norm": 2.9601008892059326, "learning_rate": 0.0002, "loss": 1.3879, "step": 238860 }, { "epoch": 0.97, "grad_norm": 2.3375637531280518, "learning_rate": 0.0002, "loss": 1.6103, "step": 238870 }, { "epoch": 0.97, "grad_norm": 2.674032211303711, "learning_rate": 0.0002, "loss": 1.63, "step": 238880 }, { "epoch": 0.97, "grad_norm": 2.9768002033233643, "learning_rate": 0.0002, "loss": 1.6195, "step": 238890 }, { "epoch": 0.97, "grad_norm": 4.704832553863525, "learning_rate": 0.0002, "loss": 1.6699, "step": 238900 }, { "epoch": 0.97, "grad_norm": 1.5171220302581787, "learning_rate": 0.0002, "loss": 1.4108, "step": 238910 }, { "epoch": 0.97, "grad_norm": 1.6674327850341797, "learning_rate": 0.0002, "loss": 1.8706, "step": 238920 }, { "epoch": 0.97, "grad_norm": 1.9701051712036133, "learning_rate": 0.0002, "loss": 1.8626, "step": 238930 }, { "epoch": 0.97, "grad_norm": 1.9504210948944092, "learning_rate": 0.0002, "loss": 1.5995, "step": 238940 }, { "epoch": 0.97, "grad_norm": 2.567662239074707, "learning_rate": 0.0002, "loss": 1.4892, "step": 238950 }, { "epoch": 0.97, "grad_norm": 2.6572372913360596, "learning_rate": 0.0002, "loss": 1.5857, "step": 238960 }, { "epoch": 0.97, "grad_norm": 2.85356068611145, "learning_rate": 0.0002, "loss": 1.6079, "step": 238970 }, { "epoch": 0.97, "grad_norm": 2.4950625896453857, "learning_rate": 0.0002, "loss": 1.4841, "step": 238980 }, { "epoch": 0.97, "grad_norm": 2.532729387283325, "learning_rate": 0.0002, "loss": 1.6406, "step": 238990 }, { "epoch": 0.97, "grad_norm": 1.7049229145050049, "learning_rate": 0.0002, "loss": 1.874, "step": 239000 }, { "epoch": 0.97, "grad_norm": 3.174842357635498, "learning_rate": 0.0002, "loss": 1.4413, "step": 239010 }, { "epoch": 0.97, "grad_norm": 2.707613945007324, "learning_rate": 0.0002, "loss": 1.4377, "step": 239020 }, { "epoch": 0.97, "grad_norm": 2.8154733180999756, "learning_rate": 0.0002, "loss": 1.5179, "step": 239030 }, { "epoch": 0.97, "grad_norm": 3.3125038146972656, "learning_rate": 0.0002, "loss": 1.5771, "step": 239040 }, { "epoch": 0.97, "grad_norm": 2.4770472049713135, "learning_rate": 0.0002, "loss": 1.6395, "step": 239050 }, { "epoch": 0.97, "grad_norm": 2.809269428253174, "learning_rate": 0.0002, "loss": 1.6328, "step": 239060 }, { "epoch": 0.97, "grad_norm": 3.754366874694824, "learning_rate": 0.0002, "loss": 1.4668, "step": 239070 }, { "epoch": 0.97, "grad_norm": 3.034919261932373, "learning_rate": 0.0002, "loss": 1.4909, "step": 239080 }, { "epoch": 0.97, "grad_norm": 3.1213231086730957, "learning_rate": 0.0002, "loss": 1.567, "step": 239090 }, { "epoch": 0.97, "grad_norm": 2.3923180103302, "learning_rate": 0.0002, "loss": 1.6589, "step": 239100 }, { "epoch": 0.97, "grad_norm": 2.5872409343719482, "learning_rate": 0.0002, "loss": 1.7607, "step": 239110 }, { "epoch": 0.97, "grad_norm": 2.7143235206604004, "learning_rate": 0.0002, "loss": 1.6949, "step": 239120 }, { "epoch": 0.97, "grad_norm": 2.06951904296875, "learning_rate": 0.0002, "loss": 1.5788, "step": 239130 }, { "epoch": 0.97, "grad_norm": 3.8054134845733643, "learning_rate": 0.0002, "loss": 1.5702, "step": 239140 }, { "epoch": 0.97, "grad_norm": 2.479682683944702, "learning_rate": 0.0002, "loss": 1.569, "step": 239150 }, { "epoch": 0.97, "grad_norm": 2.5205228328704834, "learning_rate": 0.0002, "loss": 1.6878, "step": 239160 }, { "epoch": 0.97, "grad_norm": 3.0144248008728027, "learning_rate": 0.0002, "loss": 1.6431, "step": 239170 }, { "epoch": 0.97, "grad_norm": 2.620136022567749, "learning_rate": 0.0002, "loss": 1.3499, "step": 239180 }, { "epoch": 0.97, "grad_norm": 3.146423101425171, "learning_rate": 0.0002, "loss": 1.6858, "step": 239190 }, { "epoch": 0.97, "grad_norm": 2.4372901916503906, "learning_rate": 0.0002, "loss": 1.5805, "step": 239200 }, { "epoch": 0.97, "grad_norm": 3.3357644081115723, "learning_rate": 0.0002, "loss": 1.6974, "step": 239210 }, { "epoch": 0.97, "grad_norm": 2.9359211921691895, "learning_rate": 0.0002, "loss": 1.9242, "step": 239220 }, { "epoch": 0.97, "grad_norm": 2.0228962898254395, "learning_rate": 0.0002, "loss": 1.4232, "step": 239230 }, { "epoch": 0.97, "grad_norm": 4.386748313903809, "learning_rate": 0.0002, "loss": 1.7157, "step": 239240 }, { "epoch": 0.97, "grad_norm": 3.3036694526672363, "learning_rate": 0.0002, "loss": 1.7743, "step": 239250 }, { "epoch": 0.97, "grad_norm": 3.1203885078430176, "learning_rate": 0.0002, "loss": 1.5346, "step": 239260 }, { "epoch": 0.97, "grad_norm": 2.7809903621673584, "learning_rate": 0.0002, "loss": 1.4538, "step": 239270 }, { "epoch": 0.97, "grad_norm": 2.1993463039398193, "learning_rate": 0.0002, "loss": 1.4949, "step": 239280 }, { "epoch": 0.97, "grad_norm": 3.0562093257904053, "learning_rate": 0.0002, "loss": 1.6496, "step": 239290 }, { "epoch": 0.97, "grad_norm": 1.4599168300628662, "learning_rate": 0.0002, "loss": 1.4728, "step": 239300 }, { "epoch": 0.97, "grad_norm": 2.129653215408325, "learning_rate": 0.0002, "loss": 1.5472, "step": 239310 }, { "epoch": 0.97, "grad_norm": 6.362358570098877, "learning_rate": 0.0002, "loss": 1.4903, "step": 239320 }, { "epoch": 0.97, "grad_norm": 3.6473581790924072, "learning_rate": 0.0002, "loss": 1.693, "step": 239330 }, { "epoch": 0.97, "grad_norm": 2.2609150409698486, "learning_rate": 0.0002, "loss": 1.6208, "step": 239340 }, { "epoch": 0.97, "grad_norm": 3.738105058670044, "learning_rate": 0.0002, "loss": 1.5276, "step": 239350 }, { "epoch": 0.97, "grad_norm": 3.4287846088409424, "learning_rate": 0.0002, "loss": 1.7518, "step": 239360 }, { "epoch": 0.97, "grad_norm": 5.591020107269287, "learning_rate": 0.0002, "loss": 1.6258, "step": 239370 }, { "epoch": 0.97, "grad_norm": 2.883087158203125, "learning_rate": 0.0002, "loss": 1.3395, "step": 239380 }, { "epoch": 0.97, "grad_norm": 3.0892207622528076, "learning_rate": 0.0002, "loss": 1.4257, "step": 239390 }, { "epoch": 0.97, "grad_norm": 3.868326187133789, "learning_rate": 0.0002, "loss": 1.4946, "step": 239400 }, { "epoch": 0.97, "grad_norm": 3.5179195404052734, "learning_rate": 0.0002, "loss": 1.5954, "step": 239410 }, { "epoch": 0.97, "grad_norm": 2.631157636642456, "learning_rate": 0.0002, "loss": 1.3284, "step": 239420 }, { "epoch": 0.97, "grad_norm": 3.227337121963501, "learning_rate": 0.0002, "loss": 1.2343, "step": 239430 }, { "epoch": 0.97, "grad_norm": 3.1390860080718994, "learning_rate": 0.0002, "loss": 1.7962, "step": 239440 }, { "epoch": 0.97, "grad_norm": 4.08829402923584, "learning_rate": 0.0002, "loss": 1.5948, "step": 239450 }, { "epoch": 0.97, "grad_norm": 3.9729573726654053, "learning_rate": 0.0002, "loss": 1.4123, "step": 239460 }, { "epoch": 0.97, "grad_norm": 3.628187656402588, "learning_rate": 0.0002, "loss": 1.4728, "step": 239470 }, { "epoch": 0.97, "grad_norm": 2.3977839946746826, "learning_rate": 0.0002, "loss": 1.7028, "step": 239480 }, { "epoch": 0.97, "grad_norm": 3.646505117416382, "learning_rate": 0.0002, "loss": 1.4468, "step": 239490 }, { "epoch": 0.97, "grad_norm": 2.0268924236297607, "learning_rate": 0.0002, "loss": 1.5907, "step": 239500 }, { "epoch": 0.98, "grad_norm": 3.5302627086639404, "learning_rate": 0.0002, "loss": 1.8427, "step": 239510 }, { "epoch": 0.98, "grad_norm": 3.8765339851379395, "learning_rate": 0.0002, "loss": 1.6179, "step": 239520 }, { "epoch": 0.98, "grad_norm": 3.080817222595215, "learning_rate": 0.0002, "loss": 1.5798, "step": 239530 }, { "epoch": 0.98, "grad_norm": 3.3908133506774902, "learning_rate": 0.0002, "loss": 1.5099, "step": 239540 }, { "epoch": 0.98, "grad_norm": 3.818009853363037, "learning_rate": 0.0002, "loss": 1.5968, "step": 239550 }, { "epoch": 0.98, "grad_norm": 3.964223861694336, "learning_rate": 0.0002, "loss": 1.5848, "step": 239560 }, { "epoch": 0.98, "grad_norm": 3.360698699951172, "learning_rate": 0.0002, "loss": 1.5406, "step": 239570 }, { "epoch": 0.98, "grad_norm": 3.667036771774292, "learning_rate": 0.0002, "loss": 1.6258, "step": 239580 }, { "epoch": 0.98, "grad_norm": 4.587031841278076, "learning_rate": 0.0002, "loss": 1.8322, "step": 239590 }, { "epoch": 0.98, "grad_norm": 1.6604539155960083, "learning_rate": 0.0002, "loss": 1.7091, "step": 239600 }, { "epoch": 0.98, "grad_norm": 2.6211321353912354, "learning_rate": 0.0002, "loss": 1.357, "step": 239610 }, { "epoch": 0.98, "grad_norm": 2.67390775680542, "learning_rate": 0.0002, "loss": 1.5888, "step": 239620 }, { "epoch": 0.98, "grad_norm": 3.34033203125, "learning_rate": 0.0002, "loss": 1.6374, "step": 239630 }, { "epoch": 0.98, "grad_norm": 1.769157886505127, "learning_rate": 0.0002, "loss": 1.466, "step": 239640 }, { "epoch": 0.98, "grad_norm": 2.1861064434051514, "learning_rate": 0.0002, "loss": 1.5998, "step": 239650 }, { "epoch": 0.98, "grad_norm": 2.9888129234313965, "learning_rate": 0.0002, "loss": 1.5106, "step": 239660 }, { "epoch": 0.98, "grad_norm": 6.333854675292969, "learning_rate": 0.0002, "loss": 1.5661, "step": 239670 }, { "epoch": 0.98, "grad_norm": 2.869142770767212, "learning_rate": 0.0002, "loss": 1.7985, "step": 239680 }, { "epoch": 0.98, "grad_norm": 3.8110597133636475, "learning_rate": 0.0002, "loss": 1.4778, "step": 239690 }, { "epoch": 0.98, "grad_norm": 2.6621453762054443, "learning_rate": 0.0002, "loss": 1.5766, "step": 239700 }, { "epoch": 0.98, "grad_norm": 2.827501058578491, "learning_rate": 0.0002, "loss": 1.6056, "step": 239710 }, { "epoch": 0.98, "grad_norm": 2.148771286010742, "learning_rate": 0.0002, "loss": 1.2316, "step": 239720 }, { "epoch": 0.98, "grad_norm": 3.020883083343506, "learning_rate": 0.0002, "loss": 1.632, "step": 239730 }, { "epoch": 0.98, "grad_norm": 2.563506841659546, "learning_rate": 0.0002, "loss": 1.5504, "step": 239740 }, { "epoch": 0.98, "grad_norm": 3.4508275985717773, "learning_rate": 0.0002, "loss": 1.4744, "step": 239750 }, { "epoch": 0.98, "grad_norm": 3.190737009048462, "learning_rate": 0.0002, "loss": 1.6195, "step": 239760 }, { "epoch": 0.98, "grad_norm": 2.765329599380493, "learning_rate": 0.0002, "loss": 1.5063, "step": 239770 }, { "epoch": 0.98, "grad_norm": 3.2425527572631836, "learning_rate": 0.0002, "loss": 1.5831, "step": 239780 }, { "epoch": 0.98, "grad_norm": 2.1982250213623047, "learning_rate": 0.0002, "loss": 1.4793, "step": 239790 }, { "epoch": 0.98, "grad_norm": 3.2158162593841553, "learning_rate": 0.0002, "loss": 1.4572, "step": 239800 }, { "epoch": 0.98, "grad_norm": 3.258842706680298, "learning_rate": 0.0002, "loss": 1.7142, "step": 239810 }, { "epoch": 0.98, "grad_norm": 3.665863513946533, "learning_rate": 0.0002, "loss": 1.6841, "step": 239820 }, { "epoch": 0.98, "grad_norm": 3.4578394889831543, "learning_rate": 0.0002, "loss": 1.7858, "step": 239830 }, { "epoch": 0.98, "grad_norm": 4.908314228057861, "learning_rate": 0.0002, "loss": 1.5009, "step": 239840 }, { "epoch": 0.98, "grad_norm": 2.7066357135772705, "learning_rate": 0.0002, "loss": 1.5814, "step": 239850 }, { "epoch": 0.98, "grad_norm": 3.0929291248321533, "learning_rate": 0.0002, "loss": 1.6426, "step": 239860 }, { "epoch": 0.98, "grad_norm": 3.116478204727173, "learning_rate": 0.0002, "loss": 1.7943, "step": 239870 }, { "epoch": 0.98, "grad_norm": 3.3291077613830566, "learning_rate": 0.0002, "loss": 1.4308, "step": 239880 }, { "epoch": 0.98, "grad_norm": 4.223422050476074, "learning_rate": 0.0002, "loss": 1.5117, "step": 239890 }, { "epoch": 0.98, "grad_norm": 5.980987071990967, "learning_rate": 0.0002, "loss": 1.4899, "step": 239900 }, { "epoch": 0.98, "grad_norm": 2.8098440170288086, "learning_rate": 0.0002, "loss": 1.493, "step": 239910 }, { "epoch": 0.98, "grad_norm": 3.1033852100372314, "learning_rate": 0.0002, "loss": 1.5987, "step": 239920 }, { "epoch": 0.98, "grad_norm": 2.1803510189056396, "learning_rate": 0.0002, "loss": 1.5478, "step": 239930 }, { "epoch": 0.98, "grad_norm": 3.6110310554504395, "learning_rate": 0.0002, "loss": 1.6962, "step": 239940 }, { "epoch": 0.98, "grad_norm": 3.752997636795044, "learning_rate": 0.0002, "loss": 1.4348, "step": 239950 }, { "epoch": 0.98, "grad_norm": 2.6538453102111816, "learning_rate": 0.0002, "loss": 1.4559, "step": 239960 }, { "epoch": 0.98, "grad_norm": 2.5154762268066406, "learning_rate": 0.0002, "loss": 1.627, "step": 239970 }, { "epoch": 0.98, "grad_norm": 2.4659206867218018, "learning_rate": 0.0002, "loss": 1.5819, "step": 239980 }, { "epoch": 0.98, "grad_norm": 2.6946349143981934, "learning_rate": 0.0002, "loss": 1.5837, "step": 239990 }, { "epoch": 0.98, "grad_norm": 3.596156597137451, "learning_rate": 0.0002, "loss": 1.5553, "step": 240000 }, { "epoch": 0.98, "grad_norm": 3.1126675605773926, "learning_rate": 0.0002, "loss": 1.6701, "step": 240010 }, { "epoch": 0.98, "grad_norm": 3.470020294189453, "learning_rate": 0.0002, "loss": 1.6698, "step": 240020 }, { "epoch": 0.98, "grad_norm": 3.022369861602783, "learning_rate": 0.0002, "loss": 1.5807, "step": 240030 }, { "epoch": 0.98, "grad_norm": 3.2239599227905273, "learning_rate": 0.0002, "loss": 1.4546, "step": 240040 }, { "epoch": 0.98, "grad_norm": 2.9227845668792725, "learning_rate": 0.0002, "loss": 1.6783, "step": 240050 }, { "epoch": 0.98, "grad_norm": 2.3584516048431396, "learning_rate": 0.0002, "loss": 1.5812, "step": 240060 }, { "epoch": 0.98, "grad_norm": 6.774386405944824, "learning_rate": 0.0002, "loss": 1.4729, "step": 240070 }, { "epoch": 0.98, "grad_norm": 2.5584030151367188, "learning_rate": 0.0002, "loss": 1.7242, "step": 240080 }, { "epoch": 0.98, "grad_norm": 2.201906442642212, "learning_rate": 0.0002, "loss": 1.7632, "step": 240090 }, { "epoch": 0.98, "grad_norm": 3.3299758434295654, "learning_rate": 0.0002, "loss": 1.4946, "step": 240100 }, { "epoch": 0.98, "grad_norm": 2.2077338695526123, "learning_rate": 0.0002, "loss": 1.5571, "step": 240110 }, { "epoch": 0.98, "grad_norm": 3.250492572784424, "learning_rate": 0.0002, "loss": 1.6093, "step": 240120 }, { "epoch": 0.98, "grad_norm": 1.328304648399353, "learning_rate": 0.0002, "loss": 1.7588, "step": 240130 }, { "epoch": 0.98, "grad_norm": 4.261114597320557, "learning_rate": 0.0002, "loss": 1.6146, "step": 240140 }, { "epoch": 0.98, "grad_norm": 3.366748809814453, "learning_rate": 0.0002, "loss": 1.659, "step": 240150 }, { "epoch": 0.98, "grad_norm": 2.3055670261383057, "learning_rate": 0.0002, "loss": 1.4992, "step": 240160 }, { "epoch": 0.98, "grad_norm": 2.197514295578003, "learning_rate": 0.0002, "loss": 1.6518, "step": 240170 }, { "epoch": 0.98, "grad_norm": 2.8936755657196045, "learning_rate": 0.0002, "loss": 1.6052, "step": 240180 }, { "epoch": 0.98, "grad_norm": 3.014976739883423, "learning_rate": 0.0002, "loss": 1.7803, "step": 240190 }, { "epoch": 0.98, "grad_norm": 4.702159881591797, "learning_rate": 0.0002, "loss": 1.5778, "step": 240200 }, { "epoch": 0.98, "grad_norm": 1.8735779523849487, "learning_rate": 0.0002, "loss": 1.7833, "step": 240210 }, { "epoch": 0.98, "grad_norm": 2.57700252532959, "learning_rate": 0.0002, "loss": 1.5606, "step": 240220 }, { "epoch": 0.98, "grad_norm": 3.8113644123077393, "learning_rate": 0.0002, "loss": 1.9298, "step": 240230 }, { "epoch": 0.98, "grad_norm": 3.0916688442230225, "learning_rate": 0.0002, "loss": 1.4808, "step": 240240 }, { "epoch": 0.98, "grad_norm": 2.892383337020874, "learning_rate": 0.0002, "loss": 1.5112, "step": 240250 }, { "epoch": 0.98, "grad_norm": 4.445778846740723, "learning_rate": 0.0002, "loss": 1.8283, "step": 240260 }, { "epoch": 0.98, "grad_norm": 3.6561033725738525, "learning_rate": 0.0002, "loss": 1.614, "step": 240270 }, { "epoch": 0.98, "grad_norm": 6.590695381164551, "learning_rate": 0.0002, "loss": 1.5473, "step": 240280 }, { "epoch": 0.98, "grad_norm": 2.346803903579712, "learning_rate": 0.0002, "loss": 1.5874, "step": 240290 }, { "epoch": 0.98, "grad_norm": 3.2802655696868896, "learning_rate": 0.0002, "loss": 1.7473, "step": 240300 }, { "epoch": 0.98, "grad_norm": 3.7636332511901855, "learning_rate": 0.0002, "loss": 1.5114, "step": 240310 }, { "epoch": 0.98, "grad_norm": 1.7874138355255127, "learning_rate": 0.0002, "loss": 1.6908, "step": 240320 }, { "epoch": 0.98, "grad_norm": 2.050246477127075, "learning_rate": 0.0002, "loss": 1.6522, "step": 240330 }, { "epoch": 0.98, "grad_norm": 3.459397792816162, "learning_rate": 0.0002, "loss": 1.5525, "step": 240340 }, { "epoch": 0.98, "grad_norm": 2.857123374938965, "learning_rate": 0.0002, "loss": 1.6285, "step": 240350 }, { "epoch": 0.98, "grad_norm": 2.8087658882141113, "learning_rate": 0.0002, "loss": 1.5257, "step": 240360 }, { "epoch": 0.98, "grad_norm": 3.0229125022888184, "learning_rate": 0.0002, "loss": 1.7773, "step": 240370 }, { "epoch": 0.98, "grad_norm": 4.890194892883301, "learning_rate": 0.0002, "loss": 1.7898, "step": 240380 }, { "epoch": 0.98, "grad_norm": 3.9226880073547363, "learning_rate": 0.0002, "loss": 1.6544, "step": 240390 }, { "epoch": 0.98, "grad_norm": 1.9777042865753174, "learning_rate": 0.0002, "loss": 1.7968, "step": 240400 }, { "epoch": 0.98, "grad_norm": 4.199312210083008, "learning_rate": 0.0002, "loss": 1.5195, "step": 240410 }, { "epoch": 0.98, "grad_norm": 4.742516994476318, "learning_rate": 0.0002, "loss": 1.5678, "step": 240420 }, { "epoch": 0.98, "grad_norm": 1.8993860483169556, "learning_rate": 0.0002, "loss": 1.215, "step": 240430 }, { "epoch": 0.98, "grad_norm": 7.33910608291626, "learning_rate": 0.0002, "loss": 1.5243, "step": 240440 }, { "epoch": 0.98, "grad_norm": 1.4179116487503052, "learning_rate": 0.0002, "loss": 1.626, "step": 240450 }, { "epoch": 0.98, "grad_norm": 3.593554735183716, "learning_rate": 0.0002, "loss": 1.4273, "step": 240460 }, { "epoch": 0.98, "grad_norm": 3.0932552814483643, "learning_rate": 0.0002, "loss": 1.6652, "step": 240470 }, { "epoch": 0.98, "grad_norm": 3.8003320693969727, "learning_rate": 0.0002, "loss": 1.6349, "step": 240480 }, { "epoch": 0.98, "grad_norm": 3.532355308532715, "learning_rate": 0.0002, "loss": 1.7129, "step": 240490 }, { "epoch": 0.98, "grad_norm": 3.0067756175994873, "learning_rate": 0.0002, "loss": 1.4399, "step": 240500 }, { "epoch": 0.98, "grad_norm": 2.7632672786712646, "learning_rate": 0.0002, "loss": 1.6198, "step": 240510 }, { "epoch": 0.98, "grad_norm": 2.4741177558898926, "learning_rate": 0.0002, "loss": 1.5803, "step": 240520 }, { "epoch": 0.98, "grad_norm": 3.8514184951782227, "learning_rate": 0.0002, "loss": 1.5813, "step": 240530 }, { "epoch": 0.98, "grad_norm": 3.4215242862701416, "learning_rate": 0.0002, "loss": 1.5475, "step": 240540 }, { "epoch": 0.98, "grad_norm": 2.827742099761963, "learning_rate": 0.0002, "loss": 1.8088, "step": 240550 }, { "epoch": 0.98, "grad_norm": 1.754846215248108, "learning_rate": 0.0002, "loss": 1.5938, "step": 240560 }, { "epoch": 0.98, "grad_norm": 3.0168440341949463, "learning_rate": 0.0002, "loss": 1.5557, "step": 240570 }, { "epoch": 0.98, "grad_norm": 4.623694896697998, "learning_rate": 0.0002, "loss": 1.6029, "step": 240580 }, { "epoch": 0.98, "grad_norm": 2.1724977493286133, "learning_rate": 0.0002, "loss": 1.4142, "step": 240590 }, { "epoch": 0.98, "grad_norm": 3.645493507385254, "learning_rate": 0.0002, "loss": 1.5489, "step": 240600 }, { "epoch": 0.98, "grad_norm": 2.606588125228882, "learning_rate": 0.0002, "loss": 1.6972, "step": 240610 }, { "epoch": 0.98, "grad_norm": 2.3781650066375732, "learning_rate": 0.0002, "loss": 1.6126, "step": 240620 }, { "epoch": 0.98, "grad_norm": 2.323713541030884, "learning_rate": 0.0002, "loss": 1.708, "step": 240630 }, { "epoch": 0.98, "grad_norm": 3.110398054122925, "learning_rate": 0.0002, "loss": 1.8326, "step": 240640 }, { "epoch": 0.98, "grad_norm": 2.954119920730591, "learning_rate": 0.0002, "loss": 1.9721, "step": 240650 }, { "epoch": 0.98, "grad_norm": 1.6447652578353882, "learning_rate": 0.0002, "loss": 1.6328, "step": 240660 }, { "epoch": 0.98, "grad_norm": 4.326522350311279, "learning_rate": 0.0002, "loss": 1.6343, "step": 240670 }, { "epoch": 0.98, "grad_norm": 2.4146151542663574, "learning_rate": 0.0002, "loss": 1.4804, "step": 240680 }, { "epoch": 0.98, "grad_norm": 4.456214904785156, "learning_rate": 0.0002, "loss": 1.758, "step": 240690 }, { "epoch": 0.98, "grad_norm": 3.104647397994995, "learning_rate": 0.0002, "loss": 1.6629, "step": 240700 }, { "epoch": 0.98, "grad_norm": 4.848875522613525, "learning_rate": 0.0002, "loss": 2.0349, "step": 240710 }, { "epoch": 0.98, "grad_norm": 2.2767913341522217, "learning_rate": 0.0002, "loss": 1.7573, "step": 240720 }, { "epoch": 0.98, "grad_norm": 3.805713653564453, "learning_rate": 0.0002, "loss": 1.2957, "step": 240730 }, { "epoch": 0.98, "grad_norm": 4.98151969909668, "learning_rate": 0.0002, "loss": 1.663, "step": 240740 }, { "epoch": 0.98, "grad_norm": 1.8284399509429932, "learning_rate": 0.0002, "loss": 1.6212, "step": 240750 }, { "epoch": 0.98, "grad_norm": 4.292240142822266, "learning_rate": 0.0002, "loss": 1.4781, "step": 240760 }, { "epoch": 0.98, "grad_norm": 2.6786000728607178, "learning_rate": 0.0002, "loss": 1.6465, "step": 240770 }, { "epoch": 0.98, "grad_norm": 3.7171008586883545, "learning_rate": 0.0002, "loss": 1.2266, "step": 240780 }, { "epoch": 0.98, "grad_norm": 2.7678253650665283, "learning_rate": 0.0002, "loss": 1.5828, "step": 240790 }, { "epoch": 0.98, "grad_norm": 1.326434850692749, "learning_rate": 0.0002, "loss": 1.4329, "step": 240800 }, { "epoch": 0.98, "grad_norm": 1.9608464241027832, "learning_rate": 0.0002, "loss": 1.7575, "step": 240810 }, { "epoch": 0.98, "grad_norm": 1.4349076747894287, "learning_rate": 0.0002, "loss": 1.7179, "step": 240820 }, { "epoch": 0.98, "grad_norm": 2.1162002086639404, "learning_rate": 0.0002, "loss": 1.7519, "step": 240830 }, { "epoch": 0.98, "grad_norm": 3.0591928958892822, "learning_rate": 0.0002, "loss": 1.5058, "step": 240840 }, { "epoch": 0.98, "grad_norm": 3.223264217376709, "learning_rate": 0.0002, "loss": 1.4118, "step": 240850 }, { "epoch": 0.98, "grad_norm": 5.0623273849487305, "learning_rate": 0.0002, "loss": 1.544, "step": 240860 }, { "epoch": 0.98, "grad_norm": 2.408562421798706, "learning_rate": 0.0002, "loss": 1.4996, "step": 240870 }, { "epoch": 0.98, "grad_norm": 2.2757556438446045, "learning_rate": 0.0002, "loss": 1.5161, "step": 240880 }, { "epoch": 0.98, "grad_norm": 1.9236551523208618, "learning_rate": 0.0002, "loss": 1.5903, "step": 240890 }, { "epoch": 0.98, "grad_norm": 2.7510554790496826, "learning_rate": 0.0002, "loss": 1.648, "step": 240900 }, { "epoch": 0.98, "grad_norm": 6.2342915534973145, "learning_rate": 0.0002, "loss": 1.5606, "step": 240910 }, { "epoch": 0.98, "grad_norm": 3.730729579925537, "learning_rate": 0.0002, "loss": 1.5474, "step": 240920 }, { "epoch": 0.98, "grad_norm": 3.472712278366089, "learning_rate": 0.0002, "loss": 1.8619, "step": 240930 }, { "epoch": 0.98, "grad_norm": 1.8874170780181885, "learning_rate": 0.0002, "loss": 1.7946, "step": 240940 }, { "epoch": 0.98, "grad_norm": 2.287616729736328, "learning_rate": 0.0002, "loss": 1.6616, "step": 240950 }, { "epoch": 0.98, "grad_norm": 1.9835660457611084, "learning_rate": 0.0002, "loss": 1.4226, "step": 240960 }, { "epoch": 0.98, "grad_norm": 2.554382085800171, "learning_rate": 0.0002, "loss": 1.4101, "step": 240970 }, { "epoch": 0.98, "grad_norm": 2.0120224952697754, "learning_rate": 0.0002, "loss": 1.6106, "step": 240980 }, { "epoch": 0.98, "grad_norm": 3.5406546592712402, "learning_rate": 0.0002, "loss": 1.6036, "step": 240990 }, { "epoch": 0.98, "grad_norm": 2.9304568767547607, "learning_rate": 0.0002, "loss": 1.4939, "step": 241000 }, { "epoch": 0.98, "grad_norm": 1.59674870967865, "learning_rate": 0.0002, "loss": 1.6723, "step": 241010 }, { "epoch": 0.98, "grad_norm": 3.32049822807312, "learning_rate": 0.0002, "loss": 1.5674, "step": 241020 }, { "epoch": 0.98, "grad_norm": 2.1742348670959473, "learning_rate": 0.0002, "loss": 1.6298, "step": 241030 }, { "epoch": 0.98, "grad_norm": 3.3790953159332275, "learning_rate": 0.0002, "loss": 1.5148, "step": 241040 }, { "epoch": 0.98, "grad_norm": 2.520526647567749, "learning_rate": 0.0002, "loss": 1.5985, "step": 241050 }, { "epoch": 0.98, "grad_norm": 3.747468948364258, "learning_rate": 0.0002, "loss": 1.876, "step": 241060 }, { "epoch": 0.98, "grad_norm": 3.541128396987915, "learning_rate": 0.0002, "loss": 1.9056, "step": 241070 }, { "epoch": 0.98, "grad_norm": 2.7586071491241455, "learning_rate": 0.0002, "loss": 1.3867, "step": 241080 }, { "epoch": 0.98, "grad_norm": 3.8024649620056152, "learning_rate": 0.0002, "loss": 1.3882, "step": 241090 }, { "epoch": 0.98, "grad_norm": 2.7821500301361084, "learning_rate": 0.0002, "loss": 1.5663, "step": 241100 }, { "epoch": 0.98, "grad_norm": 2.59783673286438, "learning_rate": 0.0002, "loss": 1.3925, "step": 241110 }, { "epoch": 0.98, "grad_norm": 3.4425625801086426, "learning_rate": 0.0002, "loss": 1.3707, "step": 241120 }, { "epoch": 0.98, "grad_norm": 2.890097141265869, "learning_rate": 0.0002, "loss": 1.5488, "step": 241130 }, { "epoch": 0.98, "grad_norm": 4.054685115814209, "learning_rate": 0.0002, "loss": 1.4875, "step": 241140 }, { "epoch": 0.98, "grad_norm": 3.9271209239959717, "learning_rate": 0.0002, "loss": 1.6175, "step": 241150 }, { "epoch": 0.98, "grad_norm": 4.595567226409912, "learning_rate": 0.0002, "loss": 1.7867, "step": 241160 }, { "epoch": 0.98, "grad_norm": 6.915510177612305, "learning_rate": 0.0002, "loss": 1.6308, "step": 241170 }, { "epoch": 0.98, "grad_norm": 2.550558090209961, "learning_rate": 0.0002, "loss": 1.7079, "step": 241180 }, { "epoch": 0.98, "grad_norm": 2.472111701965332, "learning_rate": 0.0002, "loss": 1.6107, "step": 241190 }, { "epoch": 0.98, "grad_norm": 3.4706947803497314, "learning_rate": 0.0002, "loss": 1.6508, "step": 241200 }, { "epoch": 0.98, "grad_norm": 2.2458603382110596, "learning_rate": 0.0002, "loss": 1.3102, "step": 241210 }, { "epoch": 0.98, "grad_norm": 3.5422205924987793, "learning_rate": 0.0002, "loss": 1.5151, "step": 241220 }, { "epoch": 0.98, "grad_norm": 3.260195255279541, "learning_rate": 0.0002, "loss": 1.6231, "step": 241230 }, { "epoch": 0.98, "grad_norm": 2.0209455490112305, "learning_rate": 0.0002, "loss": 1.4888, "step": 241240 }, { "epoch": 0.98, "grad_norm": 5.214035511016846, "learning_rate": 0.0002, "loss": 1.5081, "step": 241250 }, { "epoch": 0.98, "grad_norm": 2.8918120861053467, "learning_rate": 0.0002, "loss": 1.4661, "step": 241260 }, { "epoch": 0.98, "grad_norm": 2.293813943862915, "learning_rate": 0.0002, "loss": 1.3675, "step": 241270 }, { "epoch": 0.98, "grad_norm": 2.1530511379241943, "learning_rate": 0.0002, "loss": 1.7274, "step": 241280 }, { "epoch": 0.98, "grad_norm": 3.259718179702759, "learning_rate": 0.0002, "loss": 1.5653, "step": 241290 }, { "epoch": 0.98, "grad_norm": 3.004718780517578, "learning_rate": 0.0002, "loss": 1.6009, "step": 241300 }, { "epoch": 0.98, "grad_norm": 2.302058219909668, "learning_rate": 0.0002, "loss": 1.2314, "step": 241310 }, { "epoch": 0.98, "grad_norm": 1.7793262004852295, "learning_rate": 0.0002, "loss": 1.5703, "step": 241320 }, { "epoch": 0.98, "grad_norm": 3.0534934997558594, "learning_rate": 0.0002, "loss": 1.5619, "step": 241330 }, { "epoch": 0.98, "grad_norm": 3.492202043533325, "learning_rate": 0.0002, "loss": 1.4311, "step": 241340 }, { "epoch": 0.98, "grad_norm": 2.7012412548065186, "learning_rate": 0.0002, "loss": 1.6165, "step": 241350 }, { "epoch": 0.98, "grad_norm": 2.0429060459136963, "learning_rate": 0.0002, "loss": 1.5165, "step": 241360 }, { "epoch": 0.98, "grad_norm": 3.5734944343566895, "learning_rate": 0.0002, "loss": 1.4811, "step": 241370 }, { "epoch": 0.98, "grad_norm": 3.765270471572876, "learning_rate": 0.0002, "loss": 1.7831, "step": 241380 }, { "epoch": 0.98, "grad_norm": 1.8845030069351196, "learning_rate": 0.0002, "loss": 1.4508, "step": 241390 }, { "epoch": 0.98, "grad_norm": 2.4278275966644287, "learning_rate": 0.0002, "loss": 1.529, "step": 241400 }, { "epoch": 0.98, "grad_norm": 4.4445414543151855, "learning_rate": 0.0002, "loss": 1.648, "step": 241410 }, { "epoch": 0.98, "grad_norm": 3.3751165866851807, "learning_rate": 0.0002, "loss": 1.4089, "step": 241420 }, { "epoch": 0.98, "grad_norm": 2.0322890281677246, "learning_rate": 0.0002, "loss": 1.66, "step": 241430 }, { "epoch": 0.98, "grad_norm": 2.094388246536255, "learning_rate": 0.0002, "loss": 1.3731, "step": 241440 }, { "epoch": 0.98, "grad_norm": 2.6084682941436768, "learning_rate": 0.0002, "loss": 1.5306, "step": 241450 }, { "epoch": 0.98, "grad_norm": 1.7757372856140137, "learning_rate": 0.0002, "loss": 1.704, "step": 241460 }, { "epoch": 0.98, "grad_norm": 3.157618522644043, "learning_rate": 0.0002, "loss": 1.4724, "step": 241470 }, { "epoch": 0.98, "grad_norm": 2.923055648803711, "learning_rate": 0.0002, "loss": 1.3446, "step": 241480 }, { "epoch": 0.98, "grad_norm": 1.8258769512176514, "learning_rate": 0.0002, "loss": 1.5123, "step": 241490 }, { "epoch": 0.98, "grad_norm": 2.8666317462921143, "learning_rate": 0.0002, "loss": 1.8457, "step": 241500 }, { "epoch": 0.98, "grad_norm": 5.187094688415527, "learning_rate": 0.0002, "loss": 1.692, "step": 241510 }, { "epoch": 0.98, "grad_norm": 3.524193286895752, "learning_rate": 0.0002, "loss": 1.7341, "step": 241520 }, { "epoch": 0.98, "grad_norm": 3.203374147415161, "learning_rate": 0.0002, "loss": 1.7785, "step": 241530 }, { "epoch": 0.98, "grad_norm": 5.098714828491211, "learning_rate": 0.0002, "loss": 1.765, "step": 241540 }, { "epoch": 0.98, "grad_norm": 2.3304195404052734, "learning_rate": 0.0002, "loss": 1.6299, "step": 241550 }, { "epoch": 0.98, "grad_norm": 3.7921218872070312, "learning_rate": 0.0002, "loss": 1.424, "step": 241560 }, { "epoch": 0.98, "grad_norm": 4.981209754943848, "learning_rate": 0.0002, "loss": 1.5632, "step": 241570 }, { "epoch": 0.98, "grad_norm": 3.239616870880127, "learning_rate": 0.0002, "loss": 1.683, "step": 241580 }, { "epoch": 0.98, "grad_norm": 3.755540370941162, "learning_rate": 0.0002, "loss": 1.6921, "step": 241590 }, { "epoch": 0.98, "grad_norm": 1.6221376657485962, "learning_rate": 0.0002, "loss": 1.2546, "step": 241600 }, { "epoch": 0.98, "grad_norm": 3.133836030960083, "learning_rate": 0.0002, "loss": 1.7436, "step": 241610 }, { "epoch": 0.98, "grad_norm": 3.7163124084472656, "learning_rate": 0.0002, "loss": 1.7742, "step": 241620 }, { "epoch": 0.98, "grad_norm": 2.879243850708008, "learning_rate": 0.0002, "loss": 1.527, "step": 241630 }, { "epoch": 0.98, "grad_norm": 2.801687479019165, "learning_rate": 0.0002, "loss": 1.4901, "step": 241640 }, { "epoch": 0.98, "grad_norm": 5.308520793914795, "learning_rate": 0.0002, "loss": 1.3751, "step": 241650 }, { "epoch": 0.98, "grad_norm": 1.78432297706604, "learning_rate": 0.0002, "loss": 1.6805, "step": 241660 }, { "epoch": 0.98, "grad_norm": 1.722754955291748, "learning_rate": 0.0002, "loss": 1.619, "step": 241670 }, { "epoch": 0.98, "grad_norm": 6.469809532165527, "learning_rate": 0.0002, "loss": 1.714, "step": 241680 }, { "epoch": 0.98, "grad_norm": 3.0374341011047363, "learning_rate": 0.0002, "loss": 1.7267, "step": 241690 }, { "epoch": 0.98, "grad_norm": 2.4444427490234375, "learning_rate": 0.0002, "loss": 1.5912, "step": 241700 }, { "epoch": 0.98, "grad_norm": 2.98122501373291, "learning_rate": 0.0002, "loss": 1.7686, "step": 241710 }, { "epoch": 0.98, "grad_norm": 2.550804853439331, "learning_rate": 0.0002, "loss": 1.843, "step": 241720 }, { "epoch": 0.98, "grad_norm": 1.896756649017334, "learning_rate": 0.0002, "loss": 1.7114, "step": 241730 }, { "epoch": 0.98, "grad_norm": 3.3343539237976074, "learning_rate": 0.0002, "loss": 1.5237, "step": 241740 }, { "epoch": 0.98, "grad_norm": 2.7617151737213135, "learning_rate": 0.0002, "loss": 1.6032, "step": 241750 }, { "epoch": 0.98, "grad_norm": 3.975245714187622, "learning_rate": 0.0002, "loss": 1.626, "step": 241760 }, { "epoch": 0.98, "grad_norm": 3.785371780395508, "learning_rate": 0.0002, "loss": 1.722, "step": 241770 }, { "epoch": 0.98, "grad_norm": 2.1622798442840576, "learning_rate": 0.0002, "loss": 1.7161, "step": 241780 }, { "epoch": 0.98, "grad_norm": 2.4221649169921875, "learning_rate": 0.0002, "loss": 1.6627, "step": 241790 }, { "epoch": 0.98, "grad_norm": 3.4572620391845703, "learning_rate": 0.0002, "loss": 1.1552, "step": 241800 }, { "epoch": 0.98, "grad_norm": 2.164088726043701, "learning_rate": 0.0002, "loss": 1.256, "step": 241810 }, { "epoch": 0.98, "grad_norm": 5.276429653167725, "learning_rate": 0.0002, "loss": 1.5162, "step": 241820 }, { "epoch": 0.98, "grad_norm": 2.9955430030822754, "learning_rate": 0.0002, "loss": 1.367, "step": 241830 }, { "epoch": 0.98, "grad_norm": 2.0558602809906006, "learning_rate": 0.0002, "loss": 1.4552, "step": 241840 }, { "epoch": 0.98, "grad_norm": 2.361178398132324, "learning_rate": 0.0002, "loss": 1.6631, "step": 241850 }, { "epoch": 0.98, "grad_norm": 2.9075262546539307, "learning_rate": 0.0002, "loss": 1.6795, "step": 241860 }, { "epoch": 0.98, "grad_norm": 4.073111534118652, "learning_rate": 0.0002, "loss": 1.6676, "step": 241870 }, { "epoch": 0.98, "grad_norm": 3.1835145950317383, "learning_rate": 0.0002, "loss": 1.633, "step": 241880 }, { "epoch": 0.98, "grad_norm": 4.181859016418457, "learning_rate": 0.0002, "loss": 1.6656, "step": 241890 }, { "epoch": 0.98, "grad_norm": 3.1330690383911133, "learning_rate": 0.0002, "loss": 1.8174, "step": 241900 }, { "epoch": 0.98, "grad_norm": 2.549922227859497, "learning_rate": 0.0002, "loss": 1.8903, "step": 241910 }, { "epoch": 0.98, "grad_norm": 3.1873772144317627, "learning_rate": 0.0002, "loss": 1.6356, "step": 241920 }, { "epoch": 0.98, "grad_norm": 3.265507936477661, "learning_rate": 0.0002, "loss": 1.4423, "step": 241930 }, { "epoch": 0.98, "grad_norm": 2.6946194171905518, "learning_rate": 0.0002, "loss": 1.6319, "step": 241940 }, { "epoch": 0.98, "grad_norm": 3.628329277038574, "learning_rate": 0.0002, "loss": 1.62, "step": 241950 }, { "epoch": 0.99, "grad_norm": 2.707777738571167, "learning_rate": 0.0002, "loss": 1.4976, "step": 241960 }, { "epoch": 0.99, "grad_norm": 1.754411220550537, "learning_rate": 0.0002, "loss": 1.4735, "step": 241970 }, { "epoch": 0.99, "grad_norm": 4.172959804534912, "learning_rate": 0.0002, "loss": 1.709, "step": 241980 }, { "epoch": 0.99, "grad_norm": 2.8998918533325195, "learning_rate": 0.0002, "loss": 1.5765, "step": 241990 }, { "epoch": 0.99, "grad_norm": 2.99112868309021, "learning_rate": 0.0002, "loss": 1.6135, "step": 242000 }, { "epoch": 0.99, "grad_norm": 5.54975700378418, "learning_rate": 0.0002, "loss": 1.6164, "step": 242010 }, { "epoch": 0.99, "grad_norm": 3.4565951824188232, "learning_rate": 0.0002, "loss": 1.6809, "step": 242020 }, { "epoch": 0.99, "grad_norm": 1.7651277780532837, "learning_rate": 0.0002, "loss": 1.5027, "step": 242030 }, { "epoch": 0.99, "grad_norm": 3.9430742263793945, "learning_rate": 0.0002, "loss": 1.5512, "step": 242040 }, { "epoch": 0.99, "grad_norm": 3.701111316680908, "learning_rate": 0.0002, "loss": 1.6604, "step": 242050 }, { "epoch": 0.99, "grad_norm": 2.971637487411499, "learning_rate": 0.0002, "loss": 1.796, "step": 242060 }, { "epoch": 0.99, "grad_norm": 2.067898750305176, "learning_rate": 0.0002, "loss": 1.8021, "step": 242070 }, { "epoch": 0.99, "grad_norm": 2.5629403591156006, "learning_rate": 0.0002, "loss": 1.5504, "step": 242080 }, { "epoch": 0.99, "grad_norm": 2.873114824295044, "learning_rate": 0.0002, "loss": 1.4824, "step": 242090 }, { "epoch": 0.99, "grad_norm": 3.556049108505249, "learning_rate": 0.0002, "loss": 1.7304, "step": 242100 }, { "epoch": 0.99, "grad_norm": 2.380333662033081, "learning_rate": 0.0002, "loss": 1.5338, "step": 242110 }, { "epoch": 0.99, "grad_norm": 3.2447032928466797, "learning_rate": 0.0002, "loss": 1.6563, "step": 242120 }, { "epoch": 0.99, "grad_norm": 2.6548209190368652, "learning_rate": 0.0002, "loss": 1.6117, "step": 242130 }, { "epoch": 0.99, "grad_norm": 2.1819725036621094, "learning_rate": 0.0002, "loss": 1.5333, "step": 242140 }, { "epoch": 0.99, "grad_norm": 2.279979944229126, "learning_rate": 0.0002, "loss": 1.6345, "step": 242150 }, { "epoch": 0.99, "grad_norm": 2.7663464546203613, "learning_rate": 0.0002, "loss": 1.6273, "step": 242160 }, { "epoch": 0.99, "grad_norm": 2.324845552444458, "learning_rate": 0.0002, "loss": 1.7254, "step": 242170 }, { "epoch": 0.99, "grad_norm": 2.33620285987854, "learning_rate": 0.0002, "loss": 1.593, "step": 242180 }, { "epoch": 0.99, "grad_norm": 1.7050621509552002, "learning_rate": 0.0002, "loss": 1.6608, "step": 242190 }, { "epoch": 0.99, "grad_norm": 2.2535526752471924, "learning_rate": 0.0002, "loss": 1.7058, "step": 242200 }, { "epoch": 0.99, "grad_norm": 2.5690178871154785, "learning_rate": 0.0002, "loss": 1.6053, "step": 242210 }, { "epoch": 0.99, "grad_norm": 3.3725972175598145, "learning_rate": 0.0002, "loss": 1.7772, "step": 242220 }, { "epoch": 0.99, "grad_norm": 4.136144638061523, "learning_rate": 0.0002, "loss": 1.8065, "step": 242230 }, { "epoch": 0.99, "grad_norm": 3.7891831398010254, "learning_rate": 0.0002, "loss": 1.7282, "step": 242240 }, { "epoch": 0.99, "grad_norm": 3.0331852436065674, "learning_rate": 0.0002, "loss": 1.7397, "step": 242250 }, { "epoch": 0.99, "grad_norm": 3.844373941421509, "learning_rate": 0.0002, "loss": 1.4046, "step": 242260 }, { "epoch": 0.99, "grad_norm": 3.0050477981567383, "learning_rate": 0.0002, "loss": 1.6009, "step": 242270 }, { "epoch": 0.99, "grad_norm": 3.414177417755127, "learning_rate": 0.0002, "loss": 1.6046, "step": 242280 }, { "epoch": 0.99, "grad_norm": 2.806039810180664, "learning_rate": 0.0002, "loss": 1.4566, "step": 242290 }, { "epoch": 0.99, "grad_norm": 3.2021520137786865, "learning_rate": 0.0002, "loss": 1.7916, "step": 242300 }, { "epoch": 0.99, "grad_norm": 2.1090517044067383, "learning_rate": 0.0002, "loss": 1.4994, "step": 242310 }, { "epoch": 0.99, "grad_norm": 2.6027626991271973, "learning_rate": 0.0002, "loss": 1.4592, "step": 242320 }, { "epoch": 0.99, "grad_norm": 2.6509273052215576, "learning_rate": 0.0002, "loss": 1.6771, "step": 242330 }, { "epoch": 0.99, "grad_norm": 2.1617226600646973, "learning_rate": 0.0002, "loss": 1.831, "step": 242340 }, { "epoch": 0.99, "grad_norm": 3.82883358001709, "learning_rate": 0.0002, "loss": 1.48, "step": 242350 }, { "epoch": 0.99, "grad_norm": 3.3718526363372803, "learning_rate": 0.0002, "loss": 1.6604, "step": 242360 }, { "epoch": 0.99, "grad_norm": 2.3071646690368652, "learning_rate": 0.0002, "loss": 1.5187, "step": 242370 }, { "epoch": 0.99, "grad_norm": 2.897547721862793, "learning_rate": 0.0002, "loss": 1.6438, "step": 242380 }, { "epoch": 0.99, "grad_norm": 3.515476942062378, "learning_rate": 0.0002, "loss": 1.6725, "step": 242390 }, { "epoch": 0.99, "grad_norm": 7.377354621887207, "learning_rate": 0.0002, "loss": 1.7226, "step": 242400 }, { "epoch": 0.99, "grad_norm": 3.9645586013793945, "learning_rate": 0.0002, "loss": 1.6397, "step": 242410 }, { "epoch": 0.99, "grad_norm": 2.6337993144989014, "learning_rate": 0.0002, "loss": 1.5249, "step": 242420 }, { "epoch": 0.99, "grad_norm": 3.2147231101989746, "learning_rate": 0.0002, "loss": 1.6151, "step": 242430 }, { "epoch": 0.99, "grad_norm": 2.5037918090820312, "learning_rate": 0.0002, "loss": 1.6079, "step": 242440 }, { "epoch": 0.99, "grad_norm": 5.141454219818115, "learning_rate": 0.0002, "loss": 1.4946, "step": 242450 }, { "epoch": 0.99, "grad_norm": 3.639397382736206, "learning_rate": 0.0002, "loss": 1.5694, "step": 242460 }, { "epoch": 0.99, "grad_norm": 3.5389068126678467, "learning_rate": 0.0002, "loss": 1.9027, "step": 242470 }, { "epoch": 0.99, "grad_norm": 2.127535343170166, "learning_rate": 0.0002, "loss": 1.7958, "step": 242480 }, { "epoch": 0.99, "grad_norm": 3.0125608444213867, "learning_rate": 0.0002, "loss": 1.8613, "step": 242490 }, { "epoch": 0.99, "grad_norm": 3.795588254928589, "learning_rate": 0.0002, "loss": 1.6307, "step": 242500 }, { "epoch": 0.99, "grad_norm": 3.3367087841033936, "learning_rate": 0.0002, "loss": 1.604, "step": 242510 }, { "epoch": 0.99, "grad_norm": 3.0118839740753174, "learning_rate": 0.0002, "loss": 1.6708, "step": 242520 }, { "epoch": 0.99, "grad_norm": 1.8638392686843872, "learning_rate": 0.0002, "loss": 1.6317, "step": 242530 }, { "epoch": 0.99, "grad_norm": 8.303910255432129, "learning_rate": 0.0002, "loss": 1.5292, "step": 242540 }, { "epoch": 0.99, "grad_norm": 4.43212366104126, "learning_rate": 0.0002, "loss": 1.7121, "step": 242550 }, { "epoch": 0.99, "grad_norm": 3.3131284713745117, "learning_rate": 0.0002, "loss": 1.5912, "step": 242560 }, { "epoch": 0.99, "grad_norm": 2.105621099472046, "learning_rate": 0.0002, "loss": 1.5193, "step": 242570 }, { "epoch": 0.99, "grad_norm": 2.3038623332977295, "learning_rate": 0.0002, "loss": 1.5878, "step": 242580 }, { "epoch": 0.99, "grad_norm": 2.88665771484375, "learning_rate": 0.0002, "loss": 1.5672, "step": 242590 }, { "epoch": 0.99, "grad_norm": 2.528456926345825, "learning_rate": 0.0002, "loss": 1.5526, "step": 242600 }, { "epoch": 0.99, "grad_norm": 3.4715476036071777, "learning_rate": 0.0002, "loss": 1.4774, "step": 242610 }, { "epoch": 0.99, "grad_norm": 2.1462087631225586, "learning_rate": 0.0002, "loss": 1.5148, "step": 242620 }, { "epoch": 0.99, "grad_norm": 3.048832893371582, "learning_rate": 0.0002, "loss": 1.5282, "step": 242630 }, { "epoch": 0.99, "grad_norm": 2.715388298034668, "learning_rate": 0.0002, "loss": 1.4284, "step": 242640 }, { "epoch": 0.99, "grad_norm": 1.8832718133926392, "learning_rate": 0.0002, "loss": 1.3621, "step": 242650 }, { "epoch": 0.99, "grad_norm": 3.169598340988159, "learning_rate": 0.0002, "loss": 1.6774, "step": 242660 }, { "epoch": 0.99, "grad_norm": 2.487675905227661, "learning_rate": 0.0002, "loss": 1.8208, "step": 242670 }, { "epoch": 0.99, "grad_norm": 2.9031543731689453, "learning_rate": 0.0002, "loss": 1.7786, "step": 242680 }, { "epoch": 0.99, "grad_norm": 2.6673035621643066, "learning_rate": 0.0002, "loss": 1.6069, "step": 242690 }, { "epoch": 0.99, "grad_norm": 2.873826742172241, "learning_rate": 0.0002, "loss": 1.5969, "step": 242700 }, { "epoch": 0.99, "grad_norm": 1.4966462850570679, "learning_rate": 0.0002, "loss": 1.6737, "step": 242710 }, { "epoch": 0.99, "grad_norm": 4.450915813446045, "learning_rate": 0.0002, "loss": 1.5646, "step": 242720 }, { "epoch": 0.99, "grad_norm": 3.8002583980560303, "learning_rate": 0.0002, "loss": 1.4525, "step": 242730 }, { "epoch": 0.99, "grad_norm": 2.250065565109253, "learning_rate": 0.0002, "loss": 1.519, "step": 242740 }, { "epoch": 0.99, "grad_norm": 2.3849403858184814, "learning_rate": 0.0002, "loss": 1.8213, "step": 242750 }, { "epoch": 0.99, "grad_norm": 3.7994112968444824, "learning_rate": 0.0002, "loss": 1.7324, "step": 242760 }, { "epoch": 0.99, "grad_norm": 4.175635814666748, "learning_rate": 0.0002, "loss": 1.4698, "step": 242770 }, { "epoch": 0.99, "grad_norm": 3.26520037651062, "learning_rate": 0.0002, "loss": 1.3601, "step": 242780 }, { "epoch": 0.99, "grad_norm": 3.212803363800049, "learning_rate": 0.0002, "loss": 1.5914, "step": 242790 }, { "epoch": 0.99, "grad_norm": 4.312051773071289, "learning_rate": 0.0002, "loss": 1.8255, "step": 242800 }, { "epoch": 0.99, "grad_norm": 2.607015609741211, "learning_rate": 0.0002, "loss": 1.5611, "step": 242810 }, { "epoch": 0.99, "grad_norm": 2.5829803943634033, "learning_rate": 0.0002, "loss": 1.4721, "step": 242820 }, { "epoch": 0.99, "grad_norm": 1.7488620281219482, "learning_rate": 0.0002, "loss": 1.5042, "step": 242830 }, { "epoch": 0.99, "grad_norm": 4.087906837463379, "learning_rate": 0.0002, "loss": 1.4646, "step": 242840 }, { "epoch": 0.99, "grad_norm": 3.9954919815063477, "learning_rate": 0.0002, "loss": 1.5901, "step": 242850 }, { "epoch": 0.99, "grad_norm": 3.535252094268799, "learning_rate": 0.0002, "loss": 1.6627, "step": 242860 }, { "epoch": 0.99, "grad_norm": 2.907944440841675, "learning_rate": 0.0002, "loss": 1.5497, "step": 242870 }, { "epoch": 0.99, "grad_norm": 6.0949907302856445, "learning_rate": 0.0002, "loss": 1.4877, "step": 242880 }, { "epoch": 0.99, "grad_norm": 2.2411231994628906, "learning_rate": 0.0002, "loss": 1.572, "step": 242890 }, { "epoch": 0.99, "grad_norm": 3.044865608215332, "learning_rate": 0.0002, "loss": 1.5082, "step": 242900 }, { "epoch": 0.99, "grad_norm": 1.3409289121627808, "learning_rate": 0.0002, "loss": 1.6321, "step": 242910 }, { "epoch": 0.99, "grad_norm": 3.1982173919677734, "learning_rate": 0.0002, "loss": 1.4552, "step": 242920 }, { "epoch": 0.99, "grad_norm": 3.495957374572754, "learning_rate": 0.0002, "loss": 1.5927, "step": 242930 }, { "epoch": 0.99, "grad_norm": 2.861590623855591, "learning_rate": 0.0002, "loss": 1.656, "step": 242940 }, { "epoch": 0.99, "grad_norm": 2.437804698944092, "learning_rate": 0.0002, "loss": 1.6398, "step": 242950 }, { "epoch": 0.99, "grad_norm": 2.6303415298461914, "learning_rate": 0.0002, "loss": 1.5973, "step": 242960 }, { "epoch": 0.99, "grad_norm": 3.7819435596466064, "learning_rate": 0.0002, "loss": 1.6589, "step": 242970 }, { "epoch": 0.99, "grad_norm": 2.0148890018463135, "learning_rate": 0.0002, "loss": 1.603, "step": 242980 }, { "epoch": 0.99, "grad_norm": 2.476586103439331, "learning_rate": 0.0002, "loss": 1.1202, "step": 242990 }, { "epoch": 0.99, "grad_norm": 3.217297077178955, "learning_rate": 0.0002, "loss": 1.3085, "step": 243000 }, { "epoch": 0.99, "grad_norm": 2.9215877056121826, "learning_rate": 0.0002, "loss": 1.4911, "step": 243010 }, { "epoch": 0.99, "grad_norm": 2.325695037841797, "learning_rate": 0.0002, "loss": 1.464, "step": 243020 }, { "epoch": 0.99, "grad_norm": 2.531381607055664, "learning_rate": 0.0002, "loss": 1.6767, "step": 243030 }, { "epoch": 0.99, "grad_norm": 4.207574367523193, "learning_rate": 0.0002, "loss": 1.4759, "step": 243040 }, { "epoch": 0.99, "grad_norm": 3.098871946334839, "learning_rate": 0.0002, "loss": 1.5253, "step": 243050 }, { "epoch": 0.99, "grad_norm": 2.141728639602661, "learning_rate": 0.0002, "loss": 1.4722, "step": 243060 }, { "epoch": 0.99, "grad_norm": 2.467589855194092, "learning_rate": 0.0002, "loss": 1.5789, "step": 243070 }, { "epoch": 0.99, "grad_norm": 4.660097122192383, "learning_rate": 0.0002, "loss": 1.6073, "step": 243080 }, { "epoch": 0.99, "grad_norm": 2.137152671813965, "learning_rate": 0.0002, "loss": 1.4941, "step": 243090 }, { "epoch": 0.99, "grad_norm": 1.9055447578430176, "learning_rate": 0.0002, "loss": 1.6163, "step": 243100 }, { "epoch": 0.99, "grad_norm": 4.6176371574401855, "learning_rate": 0.0002, "loss": 1.5606, "step": 243110 }, { "epoch": 0.99, "grad_norm": 2.8235888481140137, "learning_rate": 0.0002, "loss": 1.6374, "step": 243120 }, { "epoch": 0.99, "grad_norm": 3.0311014652252197, "learning_rate": 0.0002, "loss": 1.3326, "step": 243130 }, { "epoch": 0.99, "grad_norm": 5.276876926422119, "learning_rate": 0.0002, "loss": 1.75, "step": 243140 }, { "epoch": 0.99, "grad_norm": 4.129922866821289, "learning_rate": 0.0002, "loss": 1.5628, "step": 243150 }, { "epoch": 0.99, "grad_norm": 4.562769412994385, "learning_rate": 0.0002, "loss": 1.5377, "step": 243160 }, { "epoch": 0.99, "grad_norm": 3.6121106147766113, "learning_rate": 0.0002, "loss": 1.5763, "step": 243170 }, { "epoch": 0.99, "grad_norm": 3.908025026321411, "learning_rate": 0.0002, "loss": 1.6549, "step": 243180 }, { "epoch": 0.99, "grad_norm": 1.6850035190582275, "learning_rate": 0.0002, "loss": 1.5133, "step": 243190 }, { "epoch": 0.99, "grad_norm": 3.097562551498413, "learning_rate": 0.0002, "loss": 1.6816, "step": 243200 }, { "epoch": 0.99, "grad_norm": 1.9434291124343872, "learning_rate": 0.0002, "loss": 1.8674, "step": 243210 }, { "epoch": 0.99, "grad_norm": 2.8152289390563965, "learning_rate": 0.0002, "loss": 1.5084, "step": 243220 }, { "epoch": 0.99, "grad_norm": 2.917461395263672, "learning_rate": 0.0002, "loss": 1.6704, "step": 243230 }, { "epoch": 0.99, "grad_norm": 2.497300148010254, "learning_rate": 0.0002, "loss": 1.6227, "step": 243240 }, { "epoch": 0.99, "grad_norm": 2.9195783138275146, "learning_rate": 0.0002, "loss": 1.426, "step": 243250 }, { "epoch": 0.99, "grad_norm": 3.3039398193359375, "learning_rate": 0.0002, "loss": 1.4877, "step": 243260 }, { "epoch": 0.99, "grad_norm": 2.9806177616119385, "learning_rate": 0.0002, "loss": 1.6393, "step": 243270 }, { "epoch": 0.99, "grad_norm": 5.1297783851623535, "learning_rate": 0.0002, "loss": 1.5225, "step": 243280 }, { "epoch": 0.99, "grad_norm": 2.2448647022247314, "learning_rate": 0.0002, "loss": 1.5706, "step": 243290 }, { "epoch": 0.99, "grad_norm": 3.0065512657165527, "learning_rate": 0.0002, "loss": 1.6256, "step": 243300 }, { "epoch": 0.99, "grad_norm": 4.677682876586914, "learning_rate": 0.0002, "loss": 1.382, "step": 243310 }, { "epoch": 0.99, "grad_norm": 2.7042770385742188, "learning_rate": 0.0002, "loss": 1.4919, "step": 243320 }, { "epoch": 0.99, "grad_norm": 2.8300485610961914, "learning_rate": 0.0002, "loss": 1.4778, "step": 243330 }, { "epoch": 0.99, "grad_norm": 2.2365808486938477, "learning_rate": 0.0002, "loss": 1.4189, "step": 243340 }, { "epoch": 0.99, "grad_norm": 3.9218783378601074, "learning_rate": 0.0002, "loss": 1.5827, "step": 243350 }, { "epoch": 0.99, "grad_norm": 3.229163646697998, "learning_rate": 0.0002, "loss": 1.8497, "step": 243360 }, { "epoch": 0.99, "grad_norm": 2.4232962131500244, "learning_rate": 0.0002, "loss": 1.5329, "step": 243370 }, { "epoch": 0.99, "grad_norm": 2.5563716888427734, "learning_rate": 0.0002, "loss": 1.7529, "step": 243380 }, { "epoch": 0.99, "grad_norm": 2.8350696563720703, "learning_rate": 0.0002, "loss": 1.7716, "step": 243390 }, { "epoch": 0.99, "grad_norm": 3.267479658126831, "learning_rate": 0.0002, "loss": 1.7802, "step": 243400 }, { "epoch": 0.99, "grad_norm": 3.4667272567749023, "learning_rate": 0.0002, "loss": 1.5878, "step": 243410 }, { "epoch": 0.99, "grad_norm": 2.9583709239959717, "learning_rate": 0.0002, "loss": 1.6919, "step": 243420 }, { "epoch": 0.99, "grad_norm": 2.402367115020752, "learning_rate": 0.0002, "loss": 1.7764, "step": 243430 }, { "epoch": 0.99, "grad_norm": 2.995816707611084, "learning_rate": 0.0002, "loss": 1.3854, "step": 243440 }, { "epoch": 0.99, "grad_norm": 3.2186672687530518, "learning_rate": 0.0002, "loss": 1.5025, "step": 243450 }, { "epoch": 0.99, "grad_norm": 3.069920301437378, "learning_rate": 0.0002, "loss": 1.5287, "step": 243460 }, { "epoch": 0.99, "grad_norm": 3.735846996307373, "learning_rate": 0.0002, "loss": 1.5199, "step": 243470 }, { "epoch": 0.99, "grad_norm": 2.310061454772949, "learning_rate": 0.0002, "loss": 1.6423, "step": 243480 }, { "epoch": 0.99, "grad_norm": 2.714578151702881, "learning_rate": 0.0002, "loss": 1.6186, "step": 243490 }, { "epoch": 0.99, "grad_norm": 8.868927955627441, "learning_rate": 0.0002, "loss": 1.5885, "step": 243500 }, { "epoch": 0.99, "grad_norm": 3.502875566482544, "learning_rate": 0.0002, "loss": 1.661, "step": 243510 }, { "epoch": 0.99, "grad_norm": 3.3919191360473633, "learning_rate": 0.0002, "loss": 1.4758, "step": 243520 }, { "epoch": 0.99, "grad_norm": 2.8313636779785156, "learning_rate": 0.0002, "loss": 1.5673, "step": 243530 }, { "epoch": 0.99, "grad_norm": 3.3224542140960693, "learning_rate": 0.0002, "loss": 1.7993, "step": 243540 }, { "epoch": 0.99, "grad_norm": 4.385492324829102, "learning_rate": 0.0002, "loss": 1.6975, "step": 243550 }, { "epoch": 0.99, "grad_norm": 2.998079299926758, "learning_rate": 0.0002, "loss": 1.5048, "step": 243560 }, { "epoch": 0.99, "grad_norm": 6.025020122528076, "learning_rate": 0.0002, "loss": 1.5607, "step": 243570 }, { "epoch": 0.99, "grad_norm": 2.370638132095337, "learning_rate": 0.0002, "loss": 1.6492, "step": 243580 }, { "epoch": 0.99, "grad_norm": 2.7499239444732666, "learning_rate": 0.0002, "loss": 1.6051, "step": 243590 }, { "epoch": 0.99, "grad_norm": 3.10481333732605, "learning_rate": 0.0002, "loss": 1.6062, "step": 243600 }, { "epoch": 0.99, "grad_norm": 2.135939836502075, "learning_rate": 0.0002, "loss": 1.551, "step": 243610 }, { "epoch": 0.99, "grad_norm": 3.393120050430298, "learning_rate": 0.0002, "loss": 1.7509, "step": 243620 }, { "epoch": 0.99, "grad_norm": 4.140952110290527, "learning_rate": 0.0002, "loss": 1.5728, "step": 243630 }, { "epoch": 0.99, "grad_norm": 2.7352633476257324, "learning_rate": 0.0002, "loss": 1.3976, "step": 243640 }, { "epoch": 0.99, "grad_norm": 2.349464178085327, "learning_rate": 0.0002, "loss": 1.6576, "step": 243650 }, { "epoch": 0.99, "grad_norm": 2.7892329692840576, "learning_rate": 0.0002, "loss": 1.6113, "step": 243660 }, { "epoch": 0.99, "grad_norm": 2.77640962600708, "learning_rate": 0.0002, "loss": 1.7822, "step": 243670 }, { "epoch": 0.99, "grad_norm": 3.7136383056640625, "learning_rate": 0.0002, "loss": 1.5446, "step": 243680 }, { "epoch": 0.99, "grad_norm": 4.656014442443848, "learning_rate": 0.0002, "loss": 1.4881, "step": 243690 }, { "epoch": 0.99, "grad_norm": 1.6894499063491821, "learning_rate": 0.0002, "loss": 1.7798, "step": 243700 }, { "epoch": 0.99, "grad_norm": 3.0941529273986816, "learning_rate": 0.0002, "loss": 1.5127, "step": 243710 }, { "epoch": 0.99, "grad_norm": 2.6832408905029297, "learning_rate": 0.0002, "loss": 1.6089, "step": 243720 }, { "epoch": 0.99, "grad_norm": 3.6693737506866455, "learning_rate": 0.0002, "loss": 1.611, "step": 243730 }, { "epoch": 0.99, "grad_norm": 1.3258814811706543, "learning_rate": 0.0002, "loss": 1.5459, "step": 243740 }, { "epoch": 0.99, "grad_norm": 3.176663637161255, "learning_rate": 0.0002, "loss": 1.6183, "step": 243750 }, { "epoch": 0.99, "grad_norm": 2.0748307704925537, "learning_rate": 0.0002, "loss": 1.5807, "step": 243760 }, { "epoch": 0.99, "grad_norm": 3.4512152671813965, "learning_rate": 0.0002, "loss": 1.6362, "step": 243770 }, { "epoch": 0.99, "grad_norm": 3.342111110687256, "learning_rate": 0.0002, "loss": 1.3862, "step": 243780 }, { "epoch": 0.99, "grad_norm": 2.713397264480591, "learning_rate": 0.0002, "loss": 1.4526, "step": 243790 }, { "epoch": 0.99, "grad_norm": 2.782285690307617, "learning_rate": 0.0002, "loss": 1.3619, "step": 243800 }, { "epoch": 0.99, "grad_norm": 4.539551734924316, "learning_rate": 0.0002, "loss": 1.5646, "step": 243810 }, { "epoch": 0.99, "grad_norm": 6.3634562492370605, "learning_rate": 0.0002, "loss": 1.4595, "step": 243820 }, { "epoch": 0.99, "grad_norm": 3.0050978660583496, "learning_rate": 0.0002, "loss": 1.7911, "step": 243830 }, { "epoch": 0.99, "grad_norm": 3.259690761566162, "learning_rate": 0.0002, "loss": 1.4788, "step": 243840 }, { "epoch": 0.99, "grad_norm": 3.7107834815979004, "learning_rate": 0.0002, "loss": 1.5104, "step": 243850 }, { "epoch": 0.99, "grad_norm": 2.7128913402557373, "learning_rate": 0.0002, "loss": 1.6701, "step": 243860 }, { "epoch": 0.99, "grad_norm": 1.5099900960922241, "learning_rate": 0.0002, "loss": 1.1706, "step": 243870 }, { "epoch": 0.99, "grad_norm": 4.765144348144531, "learning_rate": 0.0002, "loss": 1.735, "step": 243880 }, { "epoch": 0.99, "grad_norm": 3.0372321605682373, "learning_rate": 0.0002, "loss": 1.371, "step": 243890 }, { "epoch": 0.99, "grad_norm": 3.39506459236145, "learning_rate": 0.0002, "loss": 1.5612, "step": 243900 }, { "epoch": 0.99, "grad_norm": 3.060067653656006, "learning_rate": 0.0002, "loss": 1.573, "step": 243910 }, { "epoch": 0.99, "grad_norm": 3.4179434776306152, "learning_rate": 0.0002, "loss": 1.6027, "step": 243920 }, { "epoch": 0.99, "grad_norm": 3.0578701496124268, "learning_rate": 0.0002, "loss": 1.5234, "step": 243930 }, { "epoch": 0.99, "grad_norm": 3.210442543029785, "learning_rate": 0.0002, "loss": 1.5635, "step": 243940 }, { "epoch": 0.99, "grad_norm": 4.057765483856201, "learning_rate": 0.0002, "loss": 1.6326, "step": 243950 }, { "epoch": 0.99, "grad_norm": 2.622526168823242, "learning_rate": 0.0002, "loss": 1.7195, "step": 243960 }, { "epoch": 0.99, "grad_norm": 2.2562942504882812, "learning_rate": 0.0002, "loss": 1.511, "step": 243970 }, { "epoch": 0.99, "grad_norm": 2.8149964809417725, "learning_rate": 0.0002, "loss": 1.5986, "step": 243980 }, { "epoch": 0.99, "grad_norm": 2.5065343379974365, "learning_rate": 0.0002, "loss": 1.6513, "step": 243990 }, { "epoch": 0.99, "grad_norm": 3.3856372833251953, "learning_rate": 0.0002, "loss": 1.506, "step": 244000 }, { "epoch": 0.99, "grad_norm": 3.3405134677886963, "learning_rate": 0.0002, "loss": 1.6209, "step": 244010 }, { "epoch": 0.99, "grad_norm": 3.6969971656799316, "learning_rate": 0.0002, "loss": 1.4969, "step": 244020 }, { "epoch": 0.99, "grad_norm": 1.4569644927978516, "learning_rate": 0.0002, "loss": 1.7354, "step": 244030 }, { "epoch": 0.99, "grad_norm": 3.601579189300537, "learning_rate": 0.0002, "loss": 1.2433, "step": 244040 }, { "epoch": 0.99, "grad_norm": 3.8458995819091797, "learning_rate": 0.0002, "loss": 1.6562, "step": 244050 }, { "epoch": 0.99, "grad_norm": 2.4081151485443115, "learning_rate": 0.0002, "loss": 1.5734, "step": 244060 }, { "epoch": 0.99, "grad_norm": 1.7969154119491577, "learning_rate": 0.0002, "loss": 1.7507, "step": 244070 }, { "epoch": 0.99, "grad_norm": 2.5974338054656982, "learning_rate": 0.0002, "loss": 1.7515, "step": 244080 }, { "epoch": 0.99, "grad_norm": 2.8836069107055664, "learning_rate": 0.0002, "loss": 1.4667, "step": 244090 }, { "epoch": 0.99, "grad_norm": 2.0392487049102783, "learning_rate": 0.0002, "loss": 1.652, "step": 244100 }, { "epoch": 0.99, "grad_norm": 1.9610720872879028, "learning_rate": 0.0002, "loss": 1.5595, "step": 244110 }, { "epoch": 0.99, "grad_norm": 6.601734638214111, "learning_rate": 0.0002, "loss": 1.6869, "step": 244120 }, { "epoch": 0.99, "grad_norm": 2.8203160762786865, "learning_rate": 0.0002, "loss": 1.6693, "step": 244130 }, { "epoch": 0.99, "grad_norm": 2.562386989593506, "learning_rate": 0.0002, "loss": 1.7045, "step": 244140 }, { "epoch": 0.99, "grad_norm": 2.0646438598632812, "learning_rate": 0.0002, "loss": 1.2954, "step": 244150 }, { "epoch": 0.99, "grad_norm": 3.188357353210449, "learning_rate": 0.0002, "loss": 1.4848, "step": 244160 }, { "epoch": 0.99, "grad_norm": 5.014845848083496, "learning_rate": 0.0002, "loss": 1.8258, "step": 244170 }, { "epoch": 0.99, "grad_norm": 3.2358639240264893, "learning_rate": 0.0002, "loss": 1.5227, "step": 244180 }, { "epoch": 0.99, "grad_norm": 1.8312759399414062, "learning_rate": 0.0002, "loss": 1.6234, "step": 244190 }, { "epoch": 0.99, "grad_norm": 12.33316421508789, "learning_rate": 0.0002, "loss": 1.6661, "step": 244200 }, { "epoch": 0.99, "grad_norm": 1.8505253791809082, "learning_rate": 0.0002, "loss": 1.4393, "step": 244210 }, { "epoch": 0.99, "grad_norm": 3.298473596572876, "learning_rate": 0.0002, "loss": 1.4479, "step": 244220 }, { "epoch": 0.99, "grad_norm": 2.4313206672668457, "learning_rate": 0.0002, "loss": 1.4966, "step": 244230 }, { "epoch": 0.99, "grad_norm": 2.2697815895080566, "learning_rate": 0.0002, "loss": 1.5228, "step": 244240 }, { "epoch": 0.99, "grad_norm": 3.241797924041748, "learning_rate": 0.0002, "loss": 1.551, "step": 244250 }, { "epoch": 0.99, "grad_norm": 5.538398742675781, "learning_rate": 0.0002, "loss": 1.3207, "step": 244260 }, { "epoch": 0.99, "grad_norm": 2.930231809616089, "learning_rate": 0.0002, "loss": 1.5855, "step": 244270 }, { "epoch": 0.99, "grad_norm": 6.693807125091553, "learning_rate": 0.0002, "loss": 1.5383, "step": 244280 }, { "epoch": 0.99, "grad_norm": 3.8378286361694336, "learning_rate": 0.0002, "loss": 1.6694, "step": 244290 }, { "epoch": 0.99, "grad_norm": 2.30263090133667, "learning_rate": 0.0002, "loss": 1.5695, "step": 244300 }, { "epoch": 0.99, "grad_norm": 2.9800243377685547, "learning_rate": 0.0002, "loss": 1.4718, "step": 244310 }, { "epoch": 0.99, "grad_norm": 5.685245513916016, "learning_rate": 0.0002, "loss": 1.2838, "step": 244320 }, { "epoch": 0.99, "grad_norm": 5.42698335647583, "learning_rate": 0.0002, "loss": 1.6004, "step": 244330 }, { "epoch": 0.99, "grad_norm": 2.9139091968536377, "learning_rate": 0.0002, "loss": 1.7159, "step": 244340 }, { "epoch": 0.99, "grad_norm": 3.9798433780670166, "learning_rate": 0.0002, "loss": 1.7783, "step": 244350 }, { "epoch": 0.99, "grad_norm": 3.1908445358276367, "learning_rate": 0.0002, "loss": 1.6016, "step": 244360 }, { "epoch": 0.99, "grad_norm": 2.538459062576294, "learning_rate": 0.0002, "loss": 1.6004, "step": 244370 }, { "epoch": 0.99, "grad_norm": 2.8286943435668945, "learning_rate": 0.0002, "loss": 1.6489, "step": 244380 }, { "epoch": 0.99, "grad_norm": 3.032560348510742, "learning_rate": 0.0002, "loss": 1.7384, "step": 244390 }, { "epoch": 0.99, "grad_norm": 2.682147741317749, "learning_rate": 0.0002, "loss": 1.6677, "step": 244400 }, { "epoch": 0.99, "grad_norm": 1.7346981763839722, "learning_rate": 0.0002, "loss": 1.4556, "step": 244410 }, { "epoch": 1.0, "grad_norm": 2.684082508087158, "learning_rate": 0.0002, "loss": 1.6997, "step": 244420 }, { "epoch": 1.0, "grad_norm": 3.5686070919036865, "learning_rate": 0.0002, "loss": 1.5722, "step": 244430 }, { "epoch": 1.0, "grad_norm": 2.0124404430389404, "learning_rate": 0.0002, "loss": 1.7791, "step": 244440 }, { "epoch": 1.0, "grad_norm": 2.0922632217407227, "learning_rate": 0.0002, "loss": 1.7712, "step": 244450 }, { "epoch": 1.0, "grad_norm": 1.429547905921936, "learning_rate": 0.0002, "loss": 1.4179, "step": 244460 }, { "epoch": 1.0, "grad_norm": 1.9236055612564087, "learning_rate": 0.0002, "loss": 1.4407, "step": 244470 }, { "epoch": 1.0, "grad_norm": 3.865070104598999, "learning_rate": 0.0002, "loss": 1.4887, "step": 244480 }, { "epoch": 1.0, "grad_norm": 3.592982530593872, "learning_rate": 0.0002, "loss": 1.4859, "step": 244490 }, { "epoch": 1.0, "grad_norm": 3.1790852546691895, "learning_rate": 0.0002, "loss": 1.5601, "step": 244500 }, { "epoch": 1.0, "grad_norm": 4.336814880371094, "learning_rate": 0.0002, "loss": 1.6797, "step": 244510 }, { "epoch": 1.0, "grad_norm": 4.405966758728027, "learning_rate": 0.0002, "loss": 1.4241, "step": 244520 }, { "epoch": 1.0, "grad_norm": 3.237424850463867, "learning_rate": 0.0002, "loss": 1.5647, "step": 244530 }, { "epoch": 1.0, "grad_norm": 2.328672409057617, "learning_rate": 0.0002, "loss": 1.4451, "step": 244540 }, { "epoch": 1.0, "grad_norm": 2.9702155590057373, "learning_rate": 0.0002, "loss": 1.4959, "step": 244550 }, { "epoch": 1.0, "grad_norm": 3.468751907348633, "learning_rate": 0.0002, "loss": 1.5561, "step": 244560 }, { "epoch": 1.0, "grad_norm": 3.198326349258423, "learning_rate": 0.0002, "loss": 1.6522, "step": 244570 }, { "epoch": 1.0, "grad_norm": 3.7797622680664062, "learning_rate": 0.0002, "loss": 1.8258, "step": 244580 }, { "epoch": 1.0, "grad_norm": 2.3384509086608887, "learning_rate": 0.0002, "loss": 1.6414, "step": 244590 }, { "epoch": 1.0, "grad_norm": 2.676342248916626, "learning_rate": 0.0002, "loss": 1.5221, "step": 244600 }, { "epoch": 1.0, "grad_norm": 2.24489688873291, "learning_rate": 0.0002, "loss": 1.6593, "step": 244610 }, { "epoch": 1.0, "grad_norm": 3.067528486251831, "learning_rate": 0.0002, "loss": 1.5503, "step": 244620 }, { "epoch": 1.0, "grad_norm": 3.13175368309021, "learning_rate": 0.0002, "loss": 1.4515, "step": 244630 }, { "epoch": 1.0, "grad_norm": 3.50399112701416, "learning_rate": 0.0002, "loss": 1.5954, "step": 244640 }, { "epoch": 1.0, "grad_norm": 3.1509954929351807, "learning_rate": 0.0002, "loss": 1.5476, "step": 244650 }, { "epoch": 1.0, "grad_norm": 2.36095929145813, "learning_rate": 0.0002, "loss": 1.5583, "step": 244660 }, { "epoch": 1.0, "grad_norm": 2.441993474960327, "learning_rate": 0.0002, "loss": 1.5232, "step": 244670 }, { "epoch": 1.0, "grad_norm": 3.3489956855773926, "learning_rate": 0.0002, "loss": 1.9057, "step": 244680 }, { "epoch": 1.0, "grad_norm": 3.410827875137329, "learning_rate": 0.0002, "loss": 1.5175, "step": 244690 }, { "epoch": 1.0, "grad_norm": 2.7174899578094482, "learning_rate": 0.0002, "loss": 1.6268, "step": 244700 }, { "epoch": 1.0, "grad_norm": 2.894526720046997, "learning_rate": 0.0002, "loss": 1.5356, "step": 244710 }, { "epoch": 1.0, "grad_norm": 2.4146316051483154, "learning_rate": 0.0002, "loss": 1.5549, "step": 244720 }, { "epoch": 1.0, "grad_norm": 3.145439386367798, "learning_rate": 0.0002, "loss": 1.6, "step": 244730 }, { "epoch": 1.0, "grad_norm": 4.319880485534668, "learning_rate": 0.0002, "loss": 1.5793, "step": 244740 }, { "epoch": 1.0, "grad_norm": 2.6512832641601562, "learning_rate": 0.0002, "loss": 1.6034, "step": 244750 }, { "epoch": 1.0, "grad_norm": 2.697033405303955, "learning_rate": 0.0002, "loss": 1.6776, "step": 244760 }, { "epoch": 1.0, "grad_norm": 2.353311777114868, "learning_rate": 0.0002, "loss": 1.6301, "step": 244770 }, { "epoch": 1.0, "grad_norm": 3.680372714996338, "learning_rate": 0.0002, "loss": 1.4566, "step": 244780 }, { "epoch": 1.0, "grad_norm": 2.3230230808258057, "learning_rate": 0.0002, "loss": 1.6008, "step": 244790 }, { "epoch": 1.0, "grad_norm": 3.2032322883605957, "learning_rate": 0.0002, "loss": 1.6243, "step": 244800 }, { "epoch": 1.0, "grad_norm": 3.107987403869629, "learning_rate": 0.0002, "loss": 1.3261, "step": 244810 }, { "epoch": 1.0, "grad_norm": 3.7025251388549805, "learning_rate": 0.0002, "loss": 1.4786, "step": 244820 }, { "epoch": 1.0, "grad_norm": 2.6327977180480957, "learning_rate": 0.0002, "loss": 1.4533, "step": 244830 }, { "epoch": 1.0, "grad_norm": 2.6017539501190186, "learning_rate": 0.0002, "loss": 1.7811, "step": 244840 }, { "epoch": 1.0, "grad_norm": 2.3596131801605225, "learning_rate": 0.0002, "loss": 1.6597, "step": 244850 }, { "epoch": 1.0, "grad_norm": 3.4555764198303223, "learning_rate": 0.0002, "loss": 1.7201, "step": 244860 }, { "epoch": 1.0, "grad_norm": 2.4398090839385986, "learning_rate": 0.0002, "loss": 1.6723, "step": 244870 }, { "epoch": 1.0, "grad_norm": 2.734222412109375, "learning_rate": 0.0002, "loss": 1.5525, "step": 244880 }, { "epoch": 1.0, "grad_norm": 1.9581294059753418, "learning_rate": 0.0002, "loss": 1.6944, "step": 244890 }, { "epoch": 1.0, "grad_norm": 2.3875772953033447, "learning_rate": 0.0002, "loss": 1.527, "step": 244900 }, { "epoch": 1.0, "grad_norm": 3.145606279373169, "learning_rate": 0.0002, "loss": 1.49, "step": 244910 }, { "epoch": 1.0, "grad_norm": 2.191987991333008, "learning_rate": 0.0002, "loss": 1.6719, "step": 244920 }, { "epoch": 1.0, "grad_norm": 2.7213590145111084, "learning_rate": 0.0002, "loss": 1.5895, "step": 244930 }, { "epoch": 1.0, "grad_norm": 4.851306438446045, "learning_rate": 0.0002, "loss": 1.6548, "step": 244940 }, { "epoch": 1.0, "grad_norm": 3.406804084777832, "learning_rate": 0.0002, "loss": 1.5254, "step": 244950 }, { "epoch": 1.0, "grad_norm": 2.262359619140625, "learning_rate": 0.0002, "loss": 1.4889, "step": 244960 }, { "epoch": 1.0, "grad_norm": 2.123664379119873, "learning_rate": 0.0002, "loss": 1.8391, "step": 244970 }, { "epoch": 1.0, "grad_norm": 2.5774433612823486, "learning_rate": 0.0002, "loss": 1.6786, "step": 244980 }, { "epoch": 1.0, "grad_norm": 1.921750783920288, "learning_rate": 0.0002, "loss": 1.6017, "step": 244990 }, { "epoch": 1.0, "grad_norm": 3.3813724517822266, "learning_rate": 0.0002, "loss": 1.419, "step": 245000 } ], "logging_steps": 10, "max_steps": 245644, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 2.753606083342762e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }