{ "best_metric": 0.7407407407407407, "best_model_checkpoint": "vvt-gs-rot-flip-wtoken-f198-4.4-h768-t8.16.16\\checkpoint-5500", "epoch": 1.0001818181818183, "eval_steps": 500, "global_step": 5501, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0018181818181818182, "grad_norm": 5.903317928314209, "learning_rate": 9.09090909090909e-07, "loss": 1.0691, "step": 10 }, { "epoch": 0.0036363636363636364, "grad_norm": 11.508553504943848, "learning_rate": 1.818181818181818e-06, "loss": 1.1499, "step": 20 }, { "epoch": 0.005454545454545455, "grad_norm": 9.509793281555176, "learning_rate": 2.7272727272727272e-06, "loss": 1.0829, "step": 30 }, { "epoch": 0.007272727272727273, "grad_norm": 18.586177825927734, "learning_rate": 3.636363636363636e-06, "loss": 1.1101, "step": 40 }, { "epoch": 0.00909090909090909, "grad_norm": 10.552851676940918, "learning_rate": 4.5454545454545455e-06, "loss": 1.065, "step": 50 }, { "epoch": 0.01090909090909091, "grad_norm": 10.60718822479248, "learning_rate": 5.4545454545454545e-06, "loss": 1.127, "step": 60 }, { "epoch": 0.012727272727272728, "grad_norm": 5.415429592132568, "learning_rate": 6.363636363636363e-06, "loss": 1.0046, "step": 70 }, { "epoch": 0.014545454545454545, "grad_norm": 7.580716133117676, "learning_rate": 7.272727272727272e-06, "loss": 1.1022, "step": 80 }, { "epoch": 0.016363636363636365, "grad_norm": 34.73521041870117, "learning_rate": 8.181818181818183e-06, "loss": 1.0765, "step": 90 }, { "epoch": 0.01818181818181818, "grad_norm": 18.438007354736328, "learning_rate": 9.090909090909091e-06, "loss": 1.0189, "step": 100 }, { "epoch": 0.02, "grad_norm": 54.06311798095703, "learning_rate": 1e-05, "loss": 1.1794, "step": 110 }, { "epoch": 0.02181818181818182, "grad_norm": 7.959690570831299, "learning_rate": 1.0909090909090909e-05, "loss": 1.1737, "step": 120 }, { "epoch": 0.023636363636363636, "grad_norm": 33.0672492980957, "learning_rate": 1.1818181818181819e-05, "loss": 1.117, "step": 130 }, { "epoch": 0.025454545454545455, "grad_norm": 20.22812271118164, "learning_rate": 1.2727272727272727e-05, "loss": 1.1634, "step": 140 }, { "epoch": 0.02727272727272727, "grad_norm": 27.637243270874023, "learning_rate": 1.3636363636363637e-05, "loss": 1.0183, "step": 150 }, { "epoch": 0.02909090909090909, "grad_norm": 16.066158294677734, "learning_rate": 1.4545454545454545e-05, "loss": 1.2349, "step": 160 }, { "epoch": 0.03090909090909091, "grad_norm": 28.06402015686035, "learning_rate": 1.5454545454545454e-05, "loss": 1.0342, "step": 170 }, { "epoch": 0.03272727272727273, "grad_norm": 11.528419494628906, "learning_rate": 1.6363636363636366e-05, "loss": 1.2609, "step": 180 }, { "epoch": 0.034545454545454546, "grad_norm": 14.250886917114258, "learning_rate": 1.7272727272727274e-05, "loss": 1.1208, "step": 190 }, { "epoch": 0.03636363636363636, "grad_norm": 9.647354125976562, "learning_rate": 1.8181818181818182e-05, "loss": 1.1326, "step": 200 }, { "epoch": 0.038181818181818185, "grad_norm": 9.057283401489258, "learning_rate": 1.9090909090909094e-05, "loss": 1.1233, "step": 210 }, { "epoch": 0.04, "grad_norm": 6.013422966003418, "learning_rate": 2e-05, "loss": 1.0266, "step": 220 }, { "epoch": 0.04018181818181818, "eval_accuracy": 0.42857142857142855, "eval_loss": 1.0405620336532593, "eval_runtime": 193.3467, "eval_samples_per_second": 0.978, "eval_steps_per_second": 0.248, "step": 221 }, { "epoch": 1.0016363636363637, "grad_norm": 35.22421646118164, "learning_rate": 2.090909090909091e-05, "loss": 1.1156, "step": 230 }, { "epoch": 1.0034545454545454, "grad_norm": 11.300332069396973, "learning_rate": 2.1818181818181818e-05, "loss": 1.1326, "step": 240 }, { "epoch": 1.0052727272727273, "grad_norm": 16.684968948364258, "learning_rate": 2.272727272727273e-05, "loss": 1.1189, "step": 250 }, { "epoch": 1.007090909090909, "grad_norm": 16.38736915588379, "learning_rate": 2.3636363636363637e-05, "loss": 1.1015, "step": 260 }, { "epoch": 1.008909090909091, "grad_norm": 22.911460876464844, "learning_rate": 2.4545454545454545e-05, "loss": 1.0275, "step": 270 }, { "epoch": 1.0107272727272727, "grad_norm": 18.65599250793457, "learning_rate": 2.5454545454545454e-05, "loss": 1.1375, "step": 280 }, { "epoch": 1.0125454545454546, "grad_norm": 15.462641716003418, "learning_rate": 2.636363636363636e-05, "loss": 0.9, "step": 290 }, { "epoch": 1.0143636363636364, "grad_norm": 38.87071990966797, "learning_rate": 2.7272727272727273e-05, "loss": 1.226, "step": 300 }, { "epoch": 1.016181818181818, "grad_norm": 27.41946029663086, "learning_rate": 2.818181818181818e-05, "loss": 1.1471, "step": 310 }, { "epoch": 1.018, "grad_norm": 14.629090309143066, "learning_rate": 2.909090909090909e-05, "loss": 1.1017, "step": 320 }, { "epoch": 1.0198181818181817, "grad_norm": 11.047767639160156, "learning_rate": 3e-05, "loss": 1.155, "step": 330 }, { "epoch": 1.0216363636363637, "grad_norm": 48.5820198059082, "learning_rate": 3.090909090909091e-05, "loss": 1.0182, "step": 340 }, { "epoch": 1.0234545454545454, "grad_norm": 29.4666805267334, "learning_rate": 3.181818181818182e-05, "loss": 1.0224, "step": 350 }, { "epoch": 1.0252727272727273, "grad_norm": 23.09134292602539, "learning_rate": 3.272727272727273e-05, "loss": 0.8628, "step": 360 }, { "epoch": 1.027090909090909, "grad_norm": 35.83842849731445, "learning_rate": 3.3636363636363636e-05, "loss": 1.0581, "step": 370 }, { "epoch": 1.028909090909091, "grad_norm": 11.075454711914062, "learning_rate": 3.454545454545455e-05, "loss": 1.1639, "step": 380 }, { "epoch": 1.0307272727272727, "grad_norm": 16.66240119934082, "learning_rate": 3.545454545454546e-05, "loss": 1.1218, "step": 390 }, { "epoch": 1.0325454545454544, "grad_norm": 18.380191802978516, "learning_rate": 3.6363636363636364e-05, "loss": 0.9699, "step": 400 }, { "epoch": 1.0343636363636364, "grad_norm": 105.08155822753906, "learning_rate": 3.7272727272727276e-05, "loss": 1.1986, "step": 410 }, { "epoch": 1.036181818181818, "grad_norm": 15.301504135131836, "learning_rate": 3.818181818181819e-05, "loss": 1.3065, "step": 420 }, { "epoch": 1.038, "grad_norm": 11.040570259094238, "learning_rate": 3.909090909090909e-05, "loss": 1.1796, "step": 430 }, { "epoch": 1.0398181818181818, "grad_norm": 20.411191940307617, "learning_rate": 4e-05, "loss": 1.0928, "step": 440 }, { "epoch": 1.040181818181818, "eval_accuracy": 0.5291005291005291, "eval_loss": 0.9597143530845642, "eval_runtime": 200.8753, "eval_samples_per_second": 0.941, "eval_steps_per_second": 0.239, "step": 442 }, { "epoch": 2.0014545454545454, "grad_norm": 58.89450454711914, "learning_rate": 4.0909090909090915e-05, "loss": 0.9651, "step": 450 }, { "epoch": 2.0032727272727273, "grad_norm": 17.683551788330078, "learning_rate": 4.181818181818182e-05, "loss": 1.1622, "step": 460 }, { "epoch": 2.0050909090909093, "grad_norm": 16.6800537109375, "learning_rate": 4.2727272727272724e-05, "loss": 1.0687, "step": 470 }, { "epoch": 2.0069090909090908, "grad_norm": 19.49321174621582, "learning_rate": 4.3636363636363636e-05, "loss": 1.0215, "step": 480 }, { "epoch": 2.0087272727272727, "grad_norm": 11.021658897399902, "learning_rate": 4.454545454545455e-05, "loss": 1.0232, "step": 490 }, { "epoch": 2.0105454545454546, "grad_norm": 41.21800994873047, "learning_rate": 4.545454545454546e-05, "loss": 1.0014, "step": 500 }, { "epoch": 2.0123636363636366, "grad_norm": 7.544427871704102, "learning_rate": 4.636363636363636e-05, "loss": 0.844, "step": 510 }, { "epoch": 2.014181818181818, "grad_norm": 41.244773864746094, "learning_rate": 4.7272727272727275e-05, "loss": 1.2135, "step": 520 }, { "epoch": 2.016, "grad_norm": 7.405851364135742, "learning_rate": 4.8181818181818186e-05, "loss": 1.1618, "step": 530 }, { "epoch": 2.017818181818182, "grad_norm": 11.462644577026367, "learning_rate": 4.909090909090909e-05, "loss": 1.0918, "step": 540 }, { "epoch": 2.0196363636363635, "grad_norm": 13.532742500305176, "learning_rate": 5e-05, "loss": 1.0293, "step": 550 }, { "epoch": 2.0214545454545454, "grad_norm": 21.01549530029297, "learning_rate": 4.98989898989899e-05, "loss": 1.0992, "step": 560 }, { "epoch": 2.0232727272727273, "grad_norm": 8.411489486694336, "learning_rate": 4.97979797979798e-05, "loss": 1.0189, "step": 570 }, { "epoch": 2.0250909090909093, "grad_norm": 11.137345314025879, "learning_rate": 4.9696969696969694e-05, "loss": 1.0663, "step": 580 }, { "epoch": 2.0269090909090908, "grad_norm": 30.53400230407715, "learning_rate": 4.9595959595959594e-05, "loss": 1.0683, "step": 590 }, { "epoch": 2.0287272727272727, "grad_norm": 47.95043182373047, "learning_rate": 4.94949494949495e-05, "loss": 0.9578, "step": 600 }, { "epoch": 2.0305454545454547, "grad_norm": 13.816718101501465, "learning_rate": 4.93939393939394e-05, "loss": 1.0128, "step": 610 }, { "epoch": 2.032363636363636, "grad_norm": 40.12334442138672, "learning_rate": 4.92929292929293e-05, "loss": 1.084, "step": 620 }, { "epoch": 2.034181818181818, "grad_norm": 48.186866760253906, "learning_rate": 4.919191919191919e-05, "loss": 0.9239, "step": 630 }, { "epoch": 2.036, "grad_norm": 35.39604949951172, "learning_rate": 4.909090909090909e-05, "loss": 1.0707, "step": 640 }, { "epoch": 2.037818181818182, "grad_norm": 16.182966232299805, "learning_rate": 4.898989898989899e-05, "loss": 1.2235, "step": 650 }, { "epoch": 2.0396363636363635, "grad_norm": 9.64621639251709, "learning_rate": 4.888888888888889e-05, "loss": 0.862, "step": 660 }, { "epoch": 2.0401818181818183, "eval_accuracy": 0.4708994708994709, "eval_loss": 0.9646209478378296, "eval_runtime": 201.5162, "eval_samples_per_second": 0.938, "eval_steps_per_second": 0.238, "step": 663 }, { "epoch": 3.001272727272727, "grad_norm": 16.516555786132812, "learning_rate": 4.878787878787879e-05, "loss": 0.9561, "step": 670 }, { "epoch": 3.003090909090909, "grad_norm": 13.819075584411621, "learning_rate": 4.868686868686869e-05, "loss": 1.0881, "step": 680 }, { "epoch": 3.004909090909091, "grad_norm": 10.121333122253418, "learning_rate": 4.858585858585859e-05, "loss": 1.0883, "step": 690 }, { "epoch": 3.006727272727273, "grad_norm": 12.964982032775879, "learning_rate": 4.848484848484849e-05, "loss": 1.0234, "step": 700 }, { "epoch": 3.0085454545454544, "grad_norm": 9.034525871276855, "learning_rate": 4.838383838383839e-05, "loss": 0.8553, "step": 710 }, { "epoch": 3.0103636363636364, "grad_norm": 6.48071813583374, "learning_rate": 4.828282828282829e-05, "loss": 1.3251, "step": 720 }, { "epoch": 3.0121818181818183, "grad_norm": 12.058953285217285, "learning_rate": 4.8181818181818186e-05, "loss": 1.05, "step": 730 }, { "epoch": 3.014, "grad_norm": 16.26006317138672, "learning_rate": 4.808080808080808e-05, "loss": 1.0792, "step": 740 }, { "epoch": 3.0158181818181817, "grad_norm": 63.56451416015625, "learning_rate": 4.797979797979798e-05, "loss": 0.9569, "step": 750 }, { "epoch": 3.0176363636363637, "grad_norm": 27.69994354248047, "learning_rate": 4.787878787878788e-05, "loss": 1.0634, "step": 760 }, { "epoch": 3.0194545454545456, "grad_norm": 20.30846405029297, "learning_rate": 4.7777777777777784e-05, "loss": 1.1572, "step": 770 }, { "epoch": 3.021272727272727, "grad_norm": 16.615659713745117, "learning_rate": 4.7676767676767684e-05, "loss": 1.0451, "step": 780 }, { "epoch": 3.023090909090909, "grad_norm": 4.919287204742432, "learning_rate": 4.7575757575757576e-05, "loss": 1.1037, "step": 790 }, { "epoch": 3.024909090909091, "grad_norm": 8.121920585632324, "learning_rate": 4.7474747474747476e-05, "loss": 0.8158, "step": 800 }, { "epoch": 3.026727272727273, "grad_norm": 22.583158493041992, "learning_rate": 4.7373737373737375e-05, "loss": 0.8648, "step": 810 }, { "epoch": 3.0285454545454544, "grad_norm": 55.47071075439453, "learning_rate": 4.7272727272727275e-05, "loss": 0.8976, "step": 820 }, { "epoch": 3.0303636363636364, "grad_norm": 12.427018165588379, "learning_rate": 4.7171717171717174e-05, "loss": 1.1084, "step": 830 }, { "epoch": 3.0321818181818183, "grad_norm": 7.498060703277588, "learning_rate": 4.7070707070707074e-05, "loss": 0.9779, "step": 840 }, { "epoch": 3.034, "grad_norm": 12.233386039733887, "learning_rate": 4.696969696969697e-05, "loss": 0.9867, "step": 850 }, { "epoch": 3.0358181818181817, "grad_norm": 14.040285110473633, "learning_rate": 4.686868686868687e-05, "loss": 0.9304, "step": 860 }, { "epoch": 3.0376363636363637, "grad_norm": 12.534895896911621, "learning_rate": 4.676767676767677e-05, "loss": 0.8739, "step": 870 }, { "epoch": 3.0394545454545456, "grad_norm": 47.45646286010742, "learning_rate": 4.666666666666667e-05, "loss": 0.9291, "step": 880 }, { "epoch": 3.0401818181818183, "eval_accuracy": 0.48677248677248675, "eval_loss": 0.9938625693321228, "eval_runtime": 203.337, "eval_samples_per_second": 0.929, "eval_steps_per_second": 0.236, "step": 884 }, { "epoch": 4.001090909090909, "grad_norm": 33.868717193603516, "learning_rate": 4.656565656565657e-05, "loss": 1.0813, "step": 890 }, { "epoch": 4.002909090909091, "grad_norm": 8.45543098449707, "learning_rate": 4.6464646464646464e-05, "loss": 1.0566, "step": 900 }, { "epoch": 4.004727272727273, "grad_norm": 13.185288429260254, "learning_rate": 4.636363636363636e-05, "loss": 1.1579, "step": 910 }, { "epoch": 4.006545454545455, "grad_norm": 11.203399658203125, "learning_rate": 4.626262626262626e-05, "loss": 0.8934, "step": 920 }, { "epoch": 4.008363636363637, "grad_norm": 36.20549774169922, "learning_rate": 4.616161616161616e-05, "loss": 1.1601, "step": 930 }, { "epoch": 4.0101818181818185, "grad_norm": 69.12691497802734, "learning_rate": 4.606060606060607e-05, "loss": 0.9967, "step": 940 }, { "epoch": 4.012, "grad_norm": 16.70299530029297, "learning_rate": 4.595959595959596e-05, "loss": 1.0471, "step": 950 }, { "epoch": 4.0138181818181815, "grad_norm": 87.73255157470703, "learning_rate": 4.585858585858586e-05, "loss": 1.1211, "step": 960 }, { "epoch": 4.0156363636363634, "grad_norm": 201.55203247070312, "learning_rate": 4.575757575757576e-05, "loss": 1.1687, "step": 970 }, { "epoch": 4.017454545454545, "grad_norm": 33.13986587524414, "learning_rate": 4.565656565656566e-05, "loss": 0.9516, "step": 980 }, { "epoch": 4.019272727272727, "grad_norm": 9.230415344238281, "learning_rate": 4.555555555555556e-05, "loss": 1.0018, "step": 990 }, { "epoch": 4.021090909090909, "grad_norm": 77.70652770996094, "learning_rate": 4.545454545454546e-05, "loss": 0.9895, "step": 1000 }, { "epoch": 4.022909090909091, "grad_norm": 145.90219116210938, "learning_rate": 4.535353535353535e-05, "loss": 1.2767, "step": 1010 }, { "epoch": 4.024727272727273, "grad_norm": 11.502787590026855, "learning_rate": 4.525252525252526e-05, "loss": 0.8943, "step": 1020 }, { "epoch": 4.026545454545454, "grad_norm": 60.249473571777344, "learning_rate": 4.515151515151516e-05, "loss": 1.2685, "step": 1030 }, { "epoch": 4.028363636363636, "grad_norm": 4.921321868896484, "learning_rate": 4.5050505050505056e-05, "loss": 1.0308, "step": 1040 }, { "epoch": 4.030181818181818, "grad_norm": 38.16105651855469, "learning_rate": 4.494949494949495e-05, "loss": 0.9882, "step": 1050 }, { "epoch": 4.032, "grad_norm": 10.788283348083496, "learning_rate": 4.484848484848485e-05, "loss": 0.9997, "step": 1060 }, { "epoch": 4.033818181818182, "grad_norm": 9.961264610290527, "learning_rate": 4.474747474747475e-05, "loss": 0.8205, "step": 1070 }, { "epoch": 4.035636363636364, "grad_norm": 18.4578914642334, "learning_rate": 4.464646464646465e-05, "loss": 1.0358, "step": 1080 }, { "epoch": 4.037454545454546, "grad_norm": 9.838109970092773, "learning_rate": 4.454545454545455e-05, "loss": 1.0122, "step": 1090 }, { "epoch": 4.039272727272727, "grad_norm": 4.6190996170043945, "learning_rate": 4.4444444444444447e-05, "loss": 0.8705, "step": 1100 }, { "epoch": 4.040181818181818, "eval_accuracy": 0.5502645502645502, "eval_loss": 1.0091381072998047, "eval_runtime": 192.457, "eval_samples_per_second": 0.982, "eval_steps_per_second": 0.249, "step": 1105 }, { "epoch": 5.000909090909091, "grad_norm": 2.803194284439087, "learning_rate": 4.4343434343434346e-05, "loss": 1.1634, "step": 1110 }, { "epoch": 5.002727272727273, "grad_norm": 12.49787712097168, "learning_rate": 4.4242424242424246e-05, "loss": 1.2069, "step": 1120 }, { "epoch": 5.004545454545455, "grad_norm": 34.8055305480957, "learning_rate": 4.4141414141414145e-05, "loss": 1.0508, "step": 1130 }, { "epoch": 5.006363636363637, "grad_norm": 105.75518035888672, "learning_rate": 4.4040404040404044e-05, "loss": 0.9281, "step": 1140 }, { "epoch": 5.008181818181818, "grad_norm": 11.095450401306152, "learning_rate": 4.3939393939393944e-05, "loss": 1.0998, "step": 1150 }, { "epoch": 5.01, "grad_norm": 12.41756534576416, "learning_rate": 4.383838383838384e-05, "loss": 1.0209, "step": 1160 }, { "epoch": 5.011818181818182, "grad_norm": 9.809650421142578, "learning_rate": 4.3737373737373736e-05, "loss": 0.8645, "step": 1170 }, { "epoch": 5.013636363636364, "grad_norm": 4.656230926513672, "learning_rate": 4.3636363636363636e-05, "loss": 0.9903, "step": 1180 }, { "epoch": 5.015454545454546, "grad_norm": 10.687009811401367, "learning_rate": 4.3535353535353535e-05, "loss": 0.9723, "step": 1190 }, { "epoch": 5.0172727272727276, "grad_norm": 55.024658203125, "learning_rate": 4.343434343434344e-05, "loss": 0.8759, "step": 1200 }, { "epoch": 5.0190909090909095, "grad_norm": 27.996280670166016, "learning_rate": 4.3333333333333334e-05, "loss": 0.9792, "step": 1210 }, { "epoch": 5.0209090909090905, "grad_norm": 9.637792587280273, "learning_rate": 4.3232323232323234e-05, "loss": 0.9309, "step": 1220 }, { "epoch": 5.0227272727272725, "grad_norm": 7.562610626220703, "learning_rate": 4.313131313131313e-05, "loss": 0.9194, "step": 1230 }, { "epoch": 5.024545454545454, "grad_norm": 22.949871063232422, "learning_rate": 4.303030303030303e-05, "loss": 1.0565, "step": 1240 }, { "epoch": 5.026363636363636, "grad_norm": 18.67213249206543, "learning_rate": 4.292929292929293e-05, "loss": 0.8991, "step": 1250 }, { "epoch": 5.028181818181818, "grad_norm": 9.366035461425781, "learning_rate": 4.282828282828283e-05, "loss": 0.9728, "step": 1260 }, { "epoch": 5.03, "grad_norm": 21.2210693359375, "learning_rate": 4.2727272727272724e-05, "loss": 1.0556, "step": 1270 }, { "epoch": 5.031818181818182, "grad_norm": 19.24120330810547, "learning_rate": 4.262626262626263e-05, "loss": 0.9918, "step": 1280 }, { "epoch": 5.033636363636363, "grad_norm": 45.28664779663086, "learning_rate": 4.252525252525253e-05, "loss": 1.1574, "step": 1290 }, { "epoch": 5.035454545454545, "grad_norm": 47.121604919433594, "learning_rate": 4.242424242424243e-05, "loss": 0.8701, "step": 1300 }, { "epoch": 5.037272727272727, "grad_norm": 22.343584060668945, "learning_rate": 4.232323232323233e-05, "loss": 0.8461, "step": 1310 }, { "epoch": 5.039090909090909, "grad_norm": 11.974569320678711, "learning_rate": 4.222222222222222e-05, "loss": 0.9667, "step": 1320 }, { "epoch": 5.040181818181818, "eval_accuracy": 0.5555555555555556, "eval_loss": 0.9415688514709473, "eval_runtime": 192.3499, "eval_samples_per_second": 0.983, "eval_steps_per_second": 0.25, "step": 1326 }, { "epoch": 6.000727272727273, "grad_norm": 14.801665306091309, "learning_rate": 4.212121212121212e-05, "loss": 0.9938, "step": 1330 }, { "epoch": 6.002545454545454, "grad_norm": 4.720005989074707, "learning_rate": 4.202020202020202e-05, "loss": 0.9117, "step": 1340 }, { "epoch": 6.004363636363636, "grad_norm": 11.161520957946777, "learning_rate": 4.191919191919192e-05, "loss": 1.1576, "step": 1350 }, { "epoch": 6.006181818181818, "grad_norm": 50.02983474731445, "learning_rate": 4.181818181818182e-05, "loss": 0.9844, "step": 1360 }, { "epoch": 6.008, "grad_norm": 6.945914268493652, "learning_rate": 4.171717171717172e-05, "loss": 0.9107, "step": 1370 }, { "epoch": 6.009818181818182, "grad_norm": 12.48246955871582, "learning_rate": 4.161616161616162e-05, "loss": 0.9041, "step": 1380 }, { "epoch": 6.011636363636364, "grad_norm": 71.0044937133789, "learning_rate": 4.151515151515152e-05, "loss": 0.8598, "step": 1390 }, { "epoch": 6.013454545454546, "grad_norm": 6.990677833557129, "learning_rate": 4.141414141414142e-05, "loss": 1.0659, "step": 1400 }, { "epoch": 6.015272727272727, "grad_norm": 9.843575477600098, "learning_rate": 4.131313131313132e-05, "loss": 0.9873, "step": 1410 }, { "epoch": 6.017090909090909, "grad_norm": 4.533863067626953, "learning_rate": 4.1212121212121216e-05, "loss": 0.9079, "step": 1420 }, { "epoch": 6.018909090909091, "grad_norm": 33.53059005737305, "learning_rate": 4.111111111111111e-05, "loss": 0.7887, "step": 1430 }, { "epoch": 6.020727272727273, "grad_norm": 26.704490661621094, "learning_rate": 4.101010101010101e-05, "loss": 1.0126, "step": 1440 }, { "epoch": 6.022545454545455, "grad_norm": 24.7235107421875, "learning_rate": 4.0909090909090915e-05, "loss": 0.8291, "step": 1450 }, { "epoch": 6.024363636363637, "grad_norm": 13.622713088989258, "learning_rate": 4.0808080808080814e-05, "loss": 1.0642, "step": 1460 }, { "epoch": 6.0261818181818185, "grad_norm": 3.207956314086914, "learning_rate": 4.070707070707071e-05, "loss": 0.8796, "step": 1470 }, { "epoch": 6.028, "grad_norm": 7.998806953430176, "learning_rate": 4.0606060606060606e-05, "loss": 0.8937, "step": 1480 }, { "epoch": 6.0298181818181815, "grad_norm": 8.217198371887207, "learning_rate": 4.0505050505050506e-05, "loss": 0.9417, "step": 1490 }, { "epoch": 6.0316363636363635, "grad_norm": 11.666306495666504, "learning_rate": 4.0404040404040405e-05, "loss": 0.945, "step": 1500 }, { "epoch": 6.033454545454545, "grad_norm": 7.258649826049805, "learning_rate": 4.0303030303030305e-05, "loss": 0.898, "step": 1510 }, { "epoch": 6.035272727272727, "grad_norm": 33.963783264160156, "learning_rate": 4.0202020202020204e-05, "loss": 1.1643, "step": 1520 }, { "epoch": 6.037090909090909, "grad_norm": 20.44380760192871, "learning_rate": 4.01010101010101e-05, "loss": 0.9079, "step": 1530 }, { "epoch": 6.038909090909091, "grad_norm": 47.127071380615234, "learning_rate": 4e-05, "loss": 0.9227, "step": 1540 }, { "epoch": 6.040181818181818, "eval_accuracy": 0.582010582010582, "eval_loss": 0.8646734356880188, "eval_runtime": 188.7776, "eval_samples_per_second": 1.001, "eval_steps_per_second": 0.254, "step": 1547 }, { "epoch": 7.000545454545454, "grad_norm": 23.793865203857422, "learning_rate": 3.98989898989899e-05, "loss": 0.9091, "step": 1550 }, { "epoch": 7.002363636363636, "grad_norm": 97.33353424072266, "learning_rate": 3.97979797979798e-05, "loss": 1.0564, "step": 1560 }, { "epoch": 7.004181818181818, "grad_norm": 15.528971672058105, "learning_rate": 3.96969696969697e-05, "loss": 0.9467, "step": 1570 }, { "epoch": 7.006, "grad_norm": 13.52229118347168, "learning_rate": 3.9595959595959594e-05, "loss": 0.9877, "step": 1580 }, { "epoch": 7.007818181818182, "grad_norm": 9.012044906616211, "learning_rate": 3.9494949494949494e-05, "loss": 0.9771, "step": 1590 }, { "epoch": 7.009636363636363, "grad_norm": 29.697484970092773, "learning_rate": 3.939393939393939e-05, "loss": 0.8836, "step": 1600 }, { "epoch": 7.011454545454545, "grad_norm": 13.41483211517334, "learning_rate": 3.929292929292929e-05, "loss": 1.3449, "step": 1610 }, { "epoch": 7.013272727272727, "grad_norm": 8.07792854309082, "learning_rate": 3.91919191919192e-05, "loss": 0.9811, "step": 1620 }, { "epoch": 7.015090909090909, "grad_norm": 10.059922218322754, "learning_rate": 3.909090909090909e-05, "loss": 0.9815, "step": 1630 }, { "epoch": 7.016909090909091, "grad_norm": 18.550546646118164, "learning_rate": 3.898989898989899e-05, "loss": 0.9835, "step": 1640 }, { "epoch": 7.018727272727273, "grad_norm": 29.54568099975586, "learning_rate": 3.888888888888889e-05, "loss": 0.9226, "step": 1650 }, { "epoch": 7.020545454545455, "grad_norm": 48.8908805847168, "learning_rate": 3.878787878787879e-05, "loss": 0.9583, "step": 1660 }, { "epoch": 7.022363636363636, "grad_norm": 53.94483947753906, "learning_rate": 3.868686868686869e-05, "loss": 0.8708, "step": 1670 }, { "epoch": 7.024181818181818, "grad_norm": 7.722486972808838, "learning_rate": 3.858585858585859e-05, "loss": 0.9358, "step": 1680 }, { "epoch": 7.026, "grad_norm": 23.644926071166992, "learning_rate": 3.848484848484848e-05, "loss": 1.0788, "step": 1690 }, { "epoch": 7.027818181818182, "grad_norm": 7.95550537109375, "learning_rate": 3.838383838383838e-05, "loss": 0.9001, "step": 1700 }, { "epoch": 7.029636363636364, "grad_norm": 32.47743606567383, "learning_rate": 3.828282828282829e-05, "loss": 0.9419, "step": 1710 }, { "epoch": 7.031454545454546, "grad_norm": 28.465147018432617, "learning_rate": 3.818181818181819e-05, "loss": 0.8118, "step": 1720 }, { "epoch": 7.033272727272728, "grad_norm": 27.852588653564453, "learning_rate": 3.8080808080808087e-05, "loss": 0.9924, "step": 1730 }, { "epoch": 7.0350909090909095, "grad_norm": 9.697334289550781, "learning_rate": 3.797979797979798e-05, "loss": 0.9926, "step": 1740 }, { "epoch": 7.036909090909091, "grad_norm": 26.939453125, "learning_rate": 3.787878787878788e-05, "loss": 0.8222, "step": 1750 }, { "epoch": 7.0387272727272725, "grad_norm": 10.071974754333496, "learning_rate": 3.777777777777778e-05, "loss": 1.0569, "step": 1760 }, { "epoch": 7.040181818181818, "eval_accuracy": 0.4126984126984127, "eval_loss": 1.107624888420105, "eval_runtime": 198.1727, "eval_samples_per_second": 0.954, "eval_steps_per_second": 0.242, "step": 1768 }, { "epoch": 8.000363636363636, "grad_norm": 26.684202194213867, "learning_rate": 3.767676767676768e-05, "loss": 0.9988, "step": 1770 }, { "epoch": 8.002181818181818, "grad_norm": 18.15027618408203, "learning_rate": 3.757575757575758e-05, "loss": 1.0715, "step": 1780 }, { "epoch": 8.004, "grad_norm": 28.486724853515625, "learning_rate": 3.747474747474748e-05, "loss": 0.8121, "step": 1790 }, { "epoch": 8.005818181818182, "grad_norm": 303.9952392578125, "learning_rate": 3.7373737373737376e-05, "loss": 0.9878, "step": 1800 }, { "epoch": 8.007636363636363, "grad_norm": 14.011321067810059, "learning_rate": 3.7272727272727276e-05, "loss": 1.1017, "step": 1810 }, { "epoch": 8.009454545454545, "grad_norm": 22.236854553222656, "learning_rate": 3.7171717171717175e-05, "loss": 1.0964, "step": 1820 }, { "epoch": 8.011272727272727, "grad_norm": 10.44234561920166, "learning_rate": 3.7070707070707075e-05, "loss": 0.6585, "step": 1830 }, { "epoch": 8.01309090909091, "grad_norm": 8.875130653381348, "learning_rate": 3.6969696969696974e-05, "loss": 0.8471, "step": 1840 }, { "epoch": 8.014909090909091, "grad_norm": 10.0613374710083, "learning_rate": 3.686868686868687e-05, "loss": 0.9548, "step": 1850 }, { "epoch": 8.016727272727273, "grad_norm": 24.469505310058594, "learning_rate": 3.6767676767676766e-05, "loss": 1.2708, "step": 1860 }, { "epoch": 8.018545454545455, "grad_norm": 14.466684341430664, "learning_rate": 3.6666666666666666e-05, "loss": 1.0558, "step": 1870 }, { "epoch": 8.020363636363637, "grad_norm": 8.436592102050781, "learning_rate": 3.656565656565657e-05, "loss": 1.0655, "step": 1880 }, { "epoch": 8.022181818181819, "grad_norm": 11.807909965515137, "learning_rate": 3.6464646464646465e-05, "loss": 1.0226, "step": 1890 }, { "epoch": 8.024, "grad_norm": 7.324114799499512, "learning_rate": 3.6363636363636364e-05, "loss": 1.0769, "step": 1900 }, { "epoch": 8.025818181818181, "grad_norm": 7.894458293914795, "learning_rate": 3.6262626262626264e-05, "loss": 0.9414, "step": 1910 }, { "epoch": 8.027636363636363, "grad_norm": 9.577462196350098, "learning_rate": 3.616161616161616e-05, "loss": 0.9558, "step": 1920 }, { "epoch": 8.029454545454545, "grad_norm": 13.144161224365234, "learning_rate": 3.606060606060606e-05, "loss": 1.0216, "step": 1930 }, { "epoch": 8.031272727272727, "grad_norm": 6.9937357902526855, "learning_rate": 3.595959595959596e-05, "loss": 0.7935, "step": 1940 }, { "epoch": 8.033090909090909, "grad_norm": 27.100025177001953, "learning_rate": 3.5858585858585855e-05, "loss": 0.8731, "step": 1950 }, { "epoch": 8.03490909090909, "grad_norm": 71.13379669189453, "learning_rate": 3.575757575757576e-05, "loss": 0.9298, "step": 1960 }, { "epoch": 8.036727272727273, "grad_norm": 118.5480728149414, "learning_rate": 3.565656565656566e-05, "loss": 1.3363, "step": 1970 }, { "epoch": 8.038545454545455, "grad_norm": 8.516532897949219, "learning_rate": 3.555555555555556e-05, "loss": 0.9919, "step": 1980 }, { "epoch": 8.040181818181818, "eval_accuracy": 0.5608465608465608, "eval_loss": 0.9307597875595093, "eval_runtime": 199.9286, "eval_samples_per_second": 0.945, "eval_steps_per_second": 0.24, "step": 1989 }, { "epoch": 9.000181818181819, "grad_norm": 18.11604881286621, "learning_rate": 3.545454545454546e-05, "loss": 0.9344, "step": 1990 }, { "epoch": 9.002, "grad_norm": 6.241488456726074, "learning_rate": 3.535353535353535e-05, "loss": 0.9527, "step": 2000 }, { "epoch": 9.003818181818183, "grad_norm": 13.090892791748047, "learning_rate": 3.525252525252525e-05, "loss": 0.6875, "step": 2010 }, { "epoch": 9.005636363636363, "grad_norm": 48.75657653808594, "learning_rate": 3.515151515151515e-05, "loss": 0.9867, "step": 2020 }, { "epoch": 9.007454545454545, "grad_norm": 21.59893226623535, "learning_rate": 3.505050505050505e-05, "loss": 1.3935, "step": 2030 }, { "epoch": 9.009272727272727, "grad_norm": 31.900312423706055, "learning_rate": 3.494949494949495e-05, "loss": 0.7955, "step": 2040 }, { "epoch": 9.011090909090909, "grad_norm": 21.120643615722656, "learning_rate": 3.484848484848485e-05, "loss": 1.1775, "step": 2050 }, { "epoch": 9.01290909090909, "grad_norm": 21.606521606445312, "learning_rate": 3.474747474747475e-05, "loss": 0.7836, "step": 2060 }, { "epoch": 9.014727272727272, "grad_norm": 23.555465698242188, "learning_rate": 3.464646464646465e-05, "loss": 1.0021, "step": 2070 }, { "epoch": 9.016545454545454, "grad_norm": 8.791067123413086, "learning_rate": 3.454545454545455e-05, "loss": 0.8174, "step": 2080 }, { "epoch": 9.018363636363636, "grad_norm": 9.401249885559082, "learning_rate": 3.444444444444445e-05, "loss": 0.9594, "step": 2090 }, { "epoch": 9.020181818181818, "grad_norm": 30.353981018066406, "learning_rate": 3.434343434343435e-05, "loss": 0.9275, "step": 2100 }, { "epoch": 9.022, "grad_norm": 14.411357879638672, "learning_rate": 3.424242424242424e-05, "loss": 0.808, "step": 2110 }, { "epoch": 9.023818181818182, "grad_norm": 27.9307861328125, "learning_rate": 3.414141414141414e-05, "loss": 0.8911, "step": 2120 }, { "epoch": 9.025636363636364, "grad_norm": 48.075042724609375, "learning_rate": 3.4040404040404045e-05, "loss": 1.0519, "step": 2130 }, { "epoch": 9.027454545454546, "grad_norm": 24.737884521484375, "learning_rate": 3.3939393939393945e-05, "loss": 0.8945, "step": 2140 }, { "epoch": 9.029272727272728, "grad_norm": 5.976382255554199, "learning_rate": 3.3838383838383844e-05, "loss": 0.8602, "step": 2150 }, { "epoch": 9.03109090909091, "grad_norm": 54.03303146362305, "learning_rate": 3.373737373737374e-05, "loss": 1.1649, "step": 2160 }, { "epoch": 9.03290909090909, "grad_norm": 12.576547622680664, "learning_rate": 3.3636363636363636e-05, "loss": 1.1853, "step": 2170 }, { "epoch": 9.034727272727272, "grad_norm": 12.94636344909668, "learning_rate": 3.3535353535353536e-05, "loss": 0.9627, "step": 2180 }, { "epoch": 9.036545454545454, "grad_norm": 80.18585968017578, "learning_rate": 3.3434343434343435e-05, "loss": 0.9442, "step": 2190 }, { "epoch": 9.038363636363636, "grad_norm": 34.63486862182617, "learning_rate": 3.3333333333333335e-05, "loss": 1.1025, "step": 2200 }, { "epoch": 9.040181818181818, "grad_norm": 21.734012603759766, "learning_rate": 3.3232323232323234e-05, "loss": 0.7163, "step": 2210 }, { "epoch": 9.040181818181818, "eval_accuracy": 0.5185185185185185, "eval_loss": 0.9721111059188843, "eval_runtime": 188.9985, "eval_samples_per_second": 1.0, "eval_steps_per_second": 0.254, "step": 2210 }, { "epoch": 10.001818181818182, "grad_norm": 22.439699172973633, "learning_rate": 3.3131313131313134e-05, "loss": 1.1736, "step": 2220 }, { "epoch": 10.003636363636364, "grad_norm": 9.710022926330566, "learning_rate": 3.303030303030303e-05, "loss": 0.9365, "step": 2230 }, { "epoch": 10.005454545454546, "grad_norm": 18.069990158081055, "learning_rate": 3.292929292929293e-05, "loss": 0.9009, "step": 2240 }, { "epoch": 10.007272727272728, "grad_norm": 40.750938415527344, "learning_rate": 3.282828282828283e-05, "loss": 1.1381, "step": 2250 }, { "epoch": 10.00909090909091, "grad_norm": 13.775208473205566, "learning_rate": 3.272727272727273e-05, "loss": 1.022, "step": 2260 }, { "epoch": 10.010909090909092, "grad_norm": 14.381951332092285, "learning_rate": 3.2626262626262624e-05, "loss": 0.9232, "step": 2270 }, { "epoch": 10.012727272727274, "grad_norm": 8.365996360778809, "learning_rate": 3.2525252525252524e-05, "loss": 0.9186, "step": 2280 }, { "epoch": 10.014545454545454, "grad_norm": 20.199583053588867, "learning_rate": 3.2424242424242423e-05, "loss": 0.9464, "step": 2290 }, { "epoch": 10.016363636363636, "grad_norm": 17.12706756591797, "learning_rate": 3.232323232323233e-05, "loss": 0.818, "step": 2300 }, { "epoch": 10.018181818181818, "grad_norm": 15.488892555236816, "learning_rate": 3.222222222222223e-05, "loss": 0.9192, "step": 2310 }, { "epoch": 10.02, "grad_norm": 21.11297607421875, "learning_rate": 3.212121212121212e-05, "loss": 0.9366, "step": 2320 }, { "epoch": 10.021818181818182, "grad_norm": 14.04183292388916, "learning_rate": 3.202020202020202e-05, "loss": 0.9159, "step": 2330 }, { "epoch": 10.023636363636363, "grad_norm": 24.148868560791016, "learning_rate": 3.191919191919192e-05, "loss": 0.7416, "step": 2340 }, { "epoch": 10.025454545454545, "grad_norm": 122.17024993896484, "learning_rate": 3.181818181818182e-05, "loss": 1.0963, "step": 2350 }, { "epoch": 10.027272727272727, "grad_norm": 7.737703800201416, "learning_rate": 3.171717171717172e-05, "loss": 0.8124, "step": 2360 }, { "epoch": 10.02909090909091, "grad_norm": 7.881679534912109, "learning_rate": 3.161616161616161e-05, "loss": 0.8641, "step": 2370 }, { "epoch": 10.030909090909091, "grad_norm": 26.089942932128906, "learning_rate": 3.151515151515151e-05, "loss": 0.8965, "step": 2380 }, { "epoch": 10.032727272727273, "grad_norm": 11.208564758300781, "learning_rate": 3.141414141414142e-05, "loss": 0.8305, "step": 2390 }, { "epoch": 10.034545454545455, "grad_norm": 18.578123092651367, "learning_rate": 3.131313131313132e-05, "loss": 1.0483, "step": 2400 }, { "epoch": 10.036363636363637, "grad_norm": 180.40223693847656, "learning_rate": 3.121212121212122e-05, "loss": 0.9592, "step": 2410 }, { "epoch": 10.038181818181819, "grad_norm": 8.062999725341797, "learning_rate": 3.111111111111111e-05, "loss": 0.8835, "step": 2420 }, { "epoch": 10.04, "grad_norm": 27.839271545410156, "learning_rate": 3.101010101010101e-05, "loss": 0.7941, "step": 2430 }, { "epoch": 10.040181818181818, "eval_accuracy": 0.5396825396825397, "eval_loss": 0.9407670497894287, "eval_runtime": 198.3627, "eval_samples_per_second": 0.953, "eval_steps_per_second": 0.242, "step": 2431 }, { "epoch": 11.001636363636363, "grad_norm": 38.953304290771484, "learning_rate": 3.090909090909091e-05, "loss": 0.8673, "step": 2440 }, { "epoch": 11.003454545454545, "grad_norm": 81.81233215332031, "learning_rate": 3.080808080808081e-05, "loss": 1.0405, "step": 2450 }, { "epoch": 11.005272727272727, "grad_norm": 35.105220794677734, "learning_rate": 3.070707070707071e-05, "loss": 0.7739, "step": 2460 }, { "epoch": 11.007090909090909, "grad_norm": 109.05673217773438, "learning_rate": 3.060606060606061e-05, "loss": 1.4651, "step": 2470 }, { "epoch": 11.008909090909091, "grad_norm": 8.228311538696289, "learning_rate": 3.050505050505051e-05, "loss": 0.9487, "step": 2480 }, { "epoch": 11.010727272727273, "grad_norm": 14.025703430175781, "learning_rate": 3.0404040404040406e-05, "loss": 0.9334, "step": 2490 }, { "epoch": 11.012545454545455, "grad_norm": 34.061092376708984, "learning_rate": 3.0303030303030306e-05, "loss": 1.045, "step": 2500 }, { "epoch": 11.014363636363637, "grad_norm": 20.992385864257812, "learning_rate": 3.0202020202020205e-05, "loss": 1.0003, "step": 2510 }, { "epoch": 11.016181818181819, "grad_norm": 28.905166625976562, "learning_rate": 3.01010101010101e-05, "loss": 0.7843, "step": 2520 }, { "epoch": 11.018, "grad_norm": 14.449548721313477, "learning_rate": 3e-05, "loss": 1.1539, "step": 2530 }, { "epoch": 11.019818181818183, "grad_norm": 178.1973876953125, "learning_rate": 2.98989898989899e-05, "loss": 1.1367, "step": 2540 }, { "epoch": 11.021636363636363, "grad_norm": 31.946022033691406, "learning_rate": 2.9797979797979796e-05, "loss": 0.832, "step": 2550 }, { "epoch": 11.023454545454545, "grad_norm": 7.3684468269348145, "learning_rate": 2.96969696969697e-05, "loss": 0.9064, "step": 2560 }, { "epoch": 11.025272727272727, "grad_norm": 29.028446197509766, "learning_rate": 2.95959595959596e-05, "loss": 0.9373, "step": 2570 }, { "epoch": 11.027090909090909, "grad_norm": 27.086463928222656, "learning_rate": 2.9494949494949498e-05, "loss": 0.8377, "step": 2580 }, { "epoch": 11.02890909090909, "grad_norm": 14.71929931640625, "learning_rate": 2.9393939393939394e-05, "loss": 0.9033, "step": 2590 }, { "epoch": 11.030727272727272, "grad_norm": 12.47122859954834, "learning_rate": 2.9292929292929294e-05, "loss": 1.0606, "step": 2600 }, { "epoch": 11.032545454545454, "grad_norm": 16.485416412353516, "learning_rate": 2.9191919191919193e-05, "loss": 1.1198, "step": 2610 }, { "epoch": 11.034363636363636, "grad_norm": 6.706732273101807, "learning_rate": 2.909090909090909e-05, "loss": 0.7835, "step": 2620 }, { "epoch": 11.036181818181818, "grad_norm": 44.958946228027344, "learning_rate": 2.898989898989899e-05, "loss": 0.9495, "step": 2630 }, { "epoch": 11.038, "grad_norm": 11.914873123168945, "learning_rate": 2.8888888888888888e-05, "loss": 0.7911, "step": 2640 }, { "epoch": 11.039818181818182, "grad_norm": 21.188133239746094, "learning_rate": 2.878787878787879e-05, "loss": 0.9386, "step": 2650 }, { "epoch": 11.040181818181818, "eval_accuracy": 0.5925925925925926, "eval_loss": 0.8729740381240845, "eval_runtime": 192.0496, "eval_samples_per_second": 0.984, "eval_steps_per_second": 0.25, "step": 2652 }, { "epoch": 12.001454545454546, "grad_norm": 8.996429443359375, "learning_rate": 2.868686868686869e-05, "loss": 0.7471, "step": 2660 }, { "epoch": 12.003272727272726, "grad_norm": 7.39767599105835, "learning_rate": 2.8585858585858587e-05, "loss": 0.9742, "step": 2670 }, { "epoch": 12.005090909090908, "grad_norm": 23.08453369140625, "learning_rate": 2.8484848484848486e-05, "loss": 1.022, "step": 2680 }, { "epoch": 12.00690909090909, "grad_norm": 22.011754989624023, "learning_rate": 2.8383838383838386e-05, "loss": 0.9812, "step": 2690 }, { "epoch": 12.008727272727272, "grad_norm": 14.558695793151855, "learning_rate": 2.8282828282828282e-05, "loss": 0.8475, "step": 2700 }, { "epoch": 12.010545454545454, "grad_norm": 23.893945693969727, "learning_rate": 2.818181818181818e-05, "loss": 0.7818, "step": 2710 }, { "epoch": 12.012363636363636, "grad_norm": 28.504093170166016, "learning_rate": 2.808080808080808e-05, "loss": 0.8586, "step": 2720 }, { "epoch": 12.014181818181818, "grad_norm": 29.902450561523438, "learning_rate": 2.7979797979797984e-05, "loss": 0.9901, "step": 2730 }, { "epoch": 12.016, "grad_norm": 8.041543960571289, "learning_rate": 2.7878787878787883e-05, "loss": 0.7766, "step": 2740 }, { "epoch": 12.017818181818182, "grad_norm": 19.797578811645508, "learning_rate": 2.777777777777778e-05, "loss": 1.1183, "step": 2750 }, { "epoch": 12.019636363636364, "grad_norm": 24.141660690307617, "learning_rate": 2.767676767676768e-05, "loss": 0.7957, "step": 2760 }, { "epoch": 12.021454545454546, "grad_norm": 14.93819522857666, "learning_rate": 2.7575757575757578e-05, "loss": 0.7143, "step": 2770 }, { "epoch": 12.023272727272728, "grad_norm": 34.424285888671875, "learning_rate": 2.7474747474747474e-05, "loss": 0.8015, "step": 2780 }, { "epoch": 12.02509090909091, "grad_norm": 14.072427749633789, "learning_rate": 2.7373737373737374e-05, "loss": 0.8037, "step": 2790 }, { "epoch": 12.026909090909092, "grad_norm": 42.77705764770508, "learning_rate": 2.7272727272727273e-05, "loss": 1.0436, "step": 2800 }, { "epoch": 12.028727272727274, "grad_norm": 62.84910583496094, "learning_rate": 2.717171717171717e-05, "loss": 0.7561, "step": 2810 }, { "epoch": 12.030545454545454, "grad_norm": 29.13556671142578, "learning_rate": 2.7070707070707075e-05, "loss": 1.0289, "step": 2820 }, { "epoch": 12.032363636363636, "grad_norm": 13.636876106262207, "learning_rate": 2.696969696969697e-05, "loss": 0.8254, "step": 2830 }, { "epoch": 12.034181818181818, "grad_norm": 24.48445701599121, "learning_rate": 2.686868686868687e-05, "loss": 0.7901, "step": 2840 }, { "epoch": 12.036, "grad_norm": 25.308664321899414, "learning_rate": 2.676767676767677e-05, "loss": 0.859, "step": 2850 }, { "epoch": 12.037818181818182, "grad_norm": 30.3497371673584, "learning_rate": 2.6666666666666667e-05, "loss": 1.0236, "step": 2860 }, { "epoch": 12.039636363636363, "grad_norm": 6.4086198806762695, "learning_rate": 2.6565656565656566e-05, "loss": 0.857, "step": 2870 }, { "epoch": 12.040181818181818, "eval_accuracy": 0.656084656084656, "eval_loss": 0.8833221793174744, "eval_runtime": 208.1476, "eval_samples_per_second": 0.908, "eval_steps_per_second": 0.231, "step": 2873 }, { "epoch": 13.001272727272728, "grad_norm": 30.714641571044922, "learning_rate": 2.6464646464646466e-05, "loss": 0.9611, "step": 2880 }, { "epoch": 13.00309090909091, "grad_norm": 6.578027248382568, "learning_rate": 2.636363636363636e-05, "loss": 0.9732, "step": 2890 }, { "epoch": 13.004909090909091, "grad_norm": 31.625444412231445, "learning_rate": 2.6262626262626268e-05, "loss": 0.8233, "step": 2900 }, { "epoch": 13.006727272727273, "grad_norm": 10.502728462219238, "learning_rate": 2.6161616161616164e-05, "loss": 0.8193, "step": 2910 }, { "epoch": 13.008545454545455, "grad_norm": 7.814911842346191, "learning_rate": 2.6060606060606063e-05, "loss": 1.0234, "step": 2920 }, { "epoch": 13.010363636363637, "grad_norm": 7.037054061889648, "learning_rate": 2.5959595959595963e-05, "loss": 1.0141, "step": 2930 }, { "epoch": 13.012181818181817, "grad_norm": 73.73834991455078, "learning_rate": 2.585858585858586e-05, "loss": 1.2231, "step": 2940 }, { "epoch": 13.014, "grad_norm": 47.82062911987305, "learning_rate": 2.575757575757576e-05, "loss": 0.9928, "step": 2950 }, { "epoch": 13.015818181818181, "grad_norm": 10.504758834838867, "learning_rate": 2.5656565656565658e-05, "loss": 0.8383, "step": 2960 }, { "epoch": 13.017636363636363, "grad_norm": 43.19855880737305, "learning_rate": 2.5555555555555554e-05, "loss": 0.7892, "step": 2970 }, { "epoch": 13.019454545454545, "grad_norm": 15.07066822052002, "learning_rate": 2.5454545454545454e-05, "loss": 0.7949, "step": 2980 }, { "epoch": 13.021272727272727, "grad_norm": 13.859105110168457, "learning_rate": 2.5353535353535356e-05, "loss": 0.896, "step": 2990 }, { "epoch": 13.023090909090909, "grad_norm": 6.099465370178223, "learning_rate": 2.5252525252525256e-05, "loss": 0.813, "step": 3000 }, { "epoch": 13.024909090909091, "grad_norm": 25.261594772338867, "learning_rate": 2.5151515151515155e-05, "loss": 0.7597, "step": 3010 }, { "epoch": 13.026727272727273, "grad_norm": 9.865255355834961, "learning_rate": 2.505050505050505e-05, "loss": 0.885, "step": 3020 }, { "epoch": 13.028545454545455, "grad_norm": 22.643348693847656, "learning_rate": 2.494949494949495e-05, "loss": 0.9914, "step": 3030 }, { "epoch": 13.030363636363637, "grad_norm": 23.875263214111328, "learning_rate": 2.4848484848484847e-05, "loss": 0.6312, "step": 3040 }, { "epoch": 13.032181818181819, "grad_norm": 18.725353240966797, "learning_rate": 2.474747474747475e-05, "loss": 0.787, "step": 3050 }, { "epoch": 13.034, "grad_norm": 30.778732299804688, "learning_rate": 2.464646464646465e-05, "loss": 1.0014, "step": 3060 }, { "epoch": 13.035818181818183, "grad_norm": 42.08274459838867, "learning_rate": 2.4545454545454545e-05, "loss": 0.9, "step": 3070 }, { "epoch": 13.037636363636363, "grad_norm": 13.115510940551758, "learning_rate": 2.4444444444444445e-05, "loss": 0.8936, "step": 3080 }, { "epoch": 13.039454545454545, "grad_norm": 40.42136001586914, "learning_rate": 2.4343434343434344e-05, "loss": 0.7059, "step": 3090 }, { "epoch": 13.040181818181818, "eval_accuracy": 0.6402116402116402, "eval_loss": 0.8672856092453003, "eval_runtime": 196.9971, "eval_samples_per_second": 0.959, "eval_steps_per_second": 0.244, "step": 3094 }, { "epoch": 14.001090909090909, "grad_norm": 13.292407989501953, "learning_rate": 2.4242424242424244e-05, "loss": 0.9836, "step": 3100 }, { "epoch": 14.00290909090909, "grad_norm": 34.00928497314453, "learning_rate": 2.4141414141414143e-05, "loss": 0.8808, "step": 3110 }, { "epoch": 14.004727272727273, "grad_norm": 10.223441123962402, "learning_rate": 2.404040404040404e-05, "loss": 0.7724, "step": 3120 }, { "epoch": 14.006545454545455, "grad_norm": 52.84186553955078, "learning_rate": 2.393939393939394e-05, "loss": 0.9767, "step": 3130 }, { "epoch": 14.008363636363637, "grad_norm": 41.73188781738281, "learning_rate": 2.3838383838383842e-05, "loss": 1.1687, "step": 3140 }, { "epoch": 14.010181818181819, "grad_norm": 11.622649192810059, "learning_rate": 2.3737373737373738e-05, "loss": 1.0479, "step": 3150 }, { "epoch": 14.012, "grad_norm": 18.575862884521484, "learning_rate": 2.3636363636363637e-05, "loss": 0.9784, "step": 3160 }, { "epoch": 14.013818181818182, "grad_norm": 4.266345500946045, "learning_rate": 2.3535353535353537e-05, "loss": 0.8186, "step": 3170 }, { "epoch": 14.015636363636364, "grad_norm": 41.91463088989258, "learning_rate": 2.3434343434343436e-05, "loss": 0.821, "step": 3180 }, { "epoch": 14.017454545454546, "grad_norm": 22.115848541259766, "learning_rate": 2.3333333333333336e-05, "loss": 0.8563, "step": 3190 }, { "epoch": 14.019272727272726, "grad_norm": 15.369574546813965, "learning_rate": 2.3232323232323232e-05, "loss": 0.9562, "step": 3200 }, { "epoch": 14.021090909090908, "grad_norm": 35.73291778564453, "learning_rate": 2.313131313131313e-05, "loss": 0.7987, "step": 3210 }, { "epoch": 14.02290909090909, "grad_norm": 13.289374351501465, "learning_rate": 2.3030303030303034e-05, "loss": 0.953, "step": 3220 }, { "epoch": 14.024727272727272, "grad_norm": 9.4387845993042, "learning_rate": 2.292929292929293e-05, "loss": 0.7263, "step": 3230 }, { "epoch": 14.026545454545454, "grad_norm": 19.509065628051758, "learning_rate": 2.282828282828283e-05, "loss": 0.6561, "step": 3240 }, { "epoch": 14.028363636363636, "grad_norm": 11.643479347229004, "learning_rate": 2.272727272727273e-05, "loss": 0.9432, "step": 3250 }, { "epoch": 14.030181818181818, "grad_norm": 30.66929817199707, "learning_rate": 2.262626262626263e-05, "loss": 0.9817, "step": 3260 }, { "epoch": 14.032, "grad_norm": 25.15669059753418, "learning_rate": 2.2525252525252528e-05, "loss": 0.9076, "step": 3270 }, { "epoch": 14.033818181818182, "grad_norm": 15.897071838378906, "learning_rate": 2.2424242424242424e-05, "loss": 0.9879, "step": 3280 }, { "epoch": 14.035636363636364, "grad_norm": 25.583818435668945, "learning_rate": 2.2323232323232324e-05, "loss": 0.8784, "step": 3290 }, { "epoch": 14.037454545454546, "grad_norm": 21.015098571777344, "learning_rate": 2.2222222222222223e-05, "loss": 0.7533, "step": 3300 }, { "epoch": 14.039272727272728, "grad_norm": 23.844053268432617, "learning_rate": 2.2121212121212123e-05, "loss": 0.8322, "step": 3310 }, { "epoch": 14.040181818181818, "eval_accuracy": 0.6296296296296297, "eval_loss": 0.8232609033584595, "eval_runtime": 190.437, "eval_samples_per_second": 0.992, "eval_steps_per_second": 0.252, "step": 3315 }, { "epoch": 15.00090909090909, "grad_norm": 9.514819145202637, "learning_rate": 2.2020202020202022e-05, "loss": 1.0237, "step": 3320 }, { "epoch": 15.002727272727272, "grad_norm": 57.46669387817383, "learning_rate": 2.191919191919192e-05, "loss": 0.8501, "step": 3330 }, { "epoch": 15.004545454545454, "grad_norm": 77.95063018798828, "learning_rate": 2.1818181818181818e-05, "loss": 0.858, "step": 3340 }, { "epoch": 15.006363636363636, "grad_norm": 18.01690673828125, "learning_rate": 2.171717171717172e-05, "loss": 0.8786, "step": 3350 }, { "epoch": 15.008181818181818, "grad_norm": 20.38191795349121, "learning_rate": 2.1616161616161617e-05, "loss": 0.9039, "step": 3360 }, { "epoch": 15.01, "grad_norm": 41.25912857055664, "learning_rate": 2.1515151515151516e-05, "loss": 1.1472, "step": 3370 }, { "epoch": 15.011818181818182, "grad_norm": 11.111079216003418, "learning_rate": 2.1414141414141416e-05, "loss": 0.7277, "step": 3380 }, { "epoch": 15.013636363636364, "grad_norm": 17.063514709472656, "learning_rate": 2.1313131313131315e-05, "loss": 0.7826, "step": 3390 }, { "epoch": 15.015454545454546, "grad_norm": 10.764243125915527, "learning_rate": 2.1212121212121215e-05, "loss": 0.743, "step": 3400 }, { "epoch": 15.017272727272728, "grad_norm": 5.325133800506592, "learning_rate": 2.111111111111111e-05, "loss": 0.6667, "step": 3410 }, { "epoch": 15.01909090909091, "grad_norm": 12.80745792388916, "learning_rate": 2.101010101010101e-05, "loss": 0.8126, "step": 3420 }, { "epoch": 15.020909090909091, "grad_norm": 20.577905654907227, "learning_rate": 2.090909090909091e-05, "loss": 0.8996, "step": 3430 }, { "epoch": 15.022727272727273, "grad_norm": 17.238866806030273, "learning_rate": 2.080808080808081e-05, "loss": 0.7257, "step": 3440 }, { "epoch": 15.024545454545455, "grad_norm": 11.546747207641602, "learning_rate": 2.070707070707071e-05, "loss": 0.8113, "step": 3450 }, { "epoch": 15.026363636363635, "grad_norm": 19.22821807861328, "learning_rate": 2.0606060606060608e-05, "loss": 0.9306, "step": 3460 }, { "epoch": 15.028181818181817, "grad_norm": 8.0140380859375, "learning_rate": 2.0505050505050504e-05, "loss": 0.7893, "step": 3470 }, { "epoch": 15.03, "grad_norm": 159.8179931640625, "learning_rate": 2.0404040404040407e-05, "loss": 0.6769, "step": 3480 }, { "epoch": 15.031818181818181, "grad_norm": 57.692176818847656, "learning_rate": 2.0303030303030303e-05, "loss": 0.8511, "step": 3490 }, { "epoch": 15.033636363636363, "grad_norm": 17.908693313598633, "learning_rate": 2.0202020202020203e-05, "loss": 1.0586, "step": 3500 }, { "epoch": 15.035454545454545, "grad_norm": 7.324565410614014, "learning_rate": 2.0101010101010102e-05, "loss": 0.7331, "step": 3510 }, { "epoch": 15.037272727272727, "grad_norm": 18.01776123046875, "learning_rate": 2e-05, "loss": 0.8703, "step": 3520 }, { "epoch": 15.039090909090909, "grad_norm": 14.08636474609375, "learning_rate": 1.98989898989899e-05, "loss": 0.8574, "step": 3530 }, { "epoch": 15.040181818181818, "eval_accuracy": 0.6825396825396826, "eval_loss": 0.7642742395401001, "eval_runtime": 195.2966, "eval_samples_per_second": 0.968, "eval_steps_per_second": 0.246, "step": 3536 }, { "epoch": 16.00072727272727, "grad_norm": 8.01131820678711, "learning_rate": 1.9797979797979797e-05, "loss": 0.6872, "step": 3540 }, { "epoch": 16.002545454545455, "grad_norm": 83.17420196533203, "learning_rate": 1.9696969696969697e-05, "loss": 0.9136, "step": 3550 }, { "epoch": 16.004363636363635, "grad_norm": 16.785919189453125, "learning_rate": 1.95959595959596e-05, "loss": 1.0454, "step": 3560 }, { "epoch": 16.00618181818182, "grad_norm": 14.56745719909668, "learning_rate": 1.9494949494949496e-05, "loss": 0.8476, "step": 3570 }, { "epoch": 16.008, "grad_norm": 15.242620468139648, "learning_rate": 1.9393939393939395e-05, "loss": 0.8815, "step": 3580 }, { "epoch": 16.009818181818183, "grad_norm": 13.342121124267578, "learning_rate": 1.9292929292929295e-05, "loss": 1.018, "step": 3590 }, { "epoch": 16.011636363636363, "grad_norm": 16.53753662109375, "learning_rate": 1.919191919191919e-05, "loss": 0.8287, "step": 3600 }, { "epoch": 16.013454545454547, "grad_norm": 13.803275108337402, "learning_rate": 1.9090909090909094e-05, "loss": 1.0207, "step": 3610 }, { "epoch": 16.015272727272727, "grad_norm": 17.9733943939209, "learning_rate": 1.898989898989899e-05, "loss": 0.7543, "step": 3620 }, { "epoch": 16.01709090909091, "grad_norm": 24.71270179748535, "learning_rate": 1.888888888888889e-05, "loss": 0.7426, "step": 3630 }, { "epoch": 16.01890909090909, "grad_norm": 74.74566650390625, "learning_rate": 1.878787878787879e-05, "loss": 0.6937, "step": 3640 }, { "epoch": 16.020727272727274, "grad_norm": 16.248302459716797, "learning_rate": 1.8686868686868688e-05, "loss": 0.8434, "step": 3650 }, { "epoch": 16.022545454545455, "grad_norm": 21.447010040283203, "learning_rate": 1.8585858585858588e-05, "loss": 0.6861, "step": 3660 }, { "epoch": 16.024363636363635, "grad_norm": 18.33003807067871, "learning_rate": 1.8484848484848487e-05, "loss": 0.8178, "step": 3670 }, { "epoch": 16.02618181818182, "grad_norm": 11.963517189025879, "learning_rate": 1.8383838383838383e-05, "loss": 0.7212, "step": 3680 }, { "epoch": 16.028, "grad_norm": 48.92180633544922, "learning_rate": 1.8282828282828286e-05, "loss": 0.8028, "step": 3690 }, { "epoch": 16.029818181818182, "grad_norm": 82.1492919921875, "learning_rate": 1.8181818181818182e-05, "loss": 0.827, "step": 3700 }, { "epoch": 16.031636363636363, "grad_norm": 22.397035598754883, "learning_rate": 1.808080808080808e-05, "loss": 0.9913, "step": 3710 }, { "epoch": 16.033454545454546, "grad_norm": 92.46865844726562, "learning_rate": 1.797979797979798e-05, "loss": 0.7714, "step": 3720 }, { "epoch": 16.035272727272726, "grad_norm": 44.989501953125, "learning_rate": 1.787878787878788e-05, "loss": 0.7027, "step": 3730 }, { "epoch": 16.03709090909091, "grad_norm": 61.908939361572266, "learning_rate": 1.777777777777778e-05, "loss": 0.7927, "step": 3740 }, { "epoch": 16.03890909090909, "grad_norm": 124.97834014892578, "learning_rate": 1.7676767676767676e-05, "loss": 0.7092, "step": 3750 }, { "epoch": 16.040181818181818, "eval_accuracy": 0.671957671957672, "eval_loss": 0.7971976399421692, "eval_runtime": 193.9592, "eval_samples_per_second": 0.974, "eval_steps_per_second": 0.247, "step": 3757 }, { "epoch": 17.000545454545456, "grad_norm": 40.8574104309082, "learning_rate": 1.7575757575757576e-05, "loss": 0.7822, "step": 3760 }, { "epoch": 17.002363636363636, "grad_norm": 41.2142448425293, "learning_rate": 1.7474747474747475e-05, "loss": 0.7753, "step": 3770 }, { "epoch": 17.004181818181817, "grad_norm": 24.647171020507812, "learning_rate": 1.7373737373737375e-05, "loss": 0.5532, "step": 3780 }, { "epoch": 17.006, "grad_norm": 95.03191375732422, "learning_rate": 1.7272727272727274e-05, "loss": 0.9703, "step": 3790 }, { "epoch": 17.00781818181818, "grad_norm": 23.023130416870117, "learning_rate": 1.7171717171717173e-05, "loss": 0.959, "step": 3800 }, { "epoch": 17.009636363636364, "grad_norm": 11.591606140136719, "learning_rate": 1.707070707070707e-05, "loss": 0.7913, "step": 3810 }, { "epoch": 17.011454545454544, "grad_norm": 9.28178882598877, "learning_rate": 1.6969696969696972e-05, "loss": 0.8629, "step": 3820 }, { "epoch": 17.013272727272728, "grad_norm": 8.856142044067383, "learning_rate": 1.686868686868687e-05, "loss": 0.7767, "step": 3830 }, { "epoch": 17.015090909090908, "grad_norm": 32.30220413208008, "learning_rate": 1.6767676767676768e-05, "loss": 0.7585, "step": 3840 }, { "epoch": 17.016909090909092, "grad_norm": 30.16693878173828, "learning_rate": 1.6666666666666667e-05, "loss": 0.7905, "step": 3850 }, { "epoch": 17.018727272727272, "grad_norm": 10.049819946289062, "learning_rate": 1.6565656565656567e-05, "loss": 0.8439, "step": 3860 }, { "epoch": 17.020545454545456, "grad_norm": 27.414104461669922, "learning_rate": 1.6464646464646466e-05, "loss": 0.8226, "step": 3870 }, { "epoch": 17.022363636363636, "grad_norm": 37.73158645629883, "learning_rate": 1.6363636363636366e-05, "loss": 0.8542, "step": 3880 }, { "epoch": 17.02418181818182, "grad_norm": 24.757152557373047, "learning_rate": 1.6262626262626262e-05, "loss": 0.7458, "step": 3890 }, { "epoch": 17.026, "grad_norm": 33.96831512451172, "learning_rate": 1.6161616161616165e-05, "loss": 0.751, "step": 3900 }, { "epoch": 17.027818181818184, "grad_norm": 39.2791633605957, "learning_rate": 1.606060606060606e-05, "loss": 0.7804, "step": 3910 }, { "epoch": 17.029636363636364, "grad_norm": 13.831995010375977, "learning_rate": 1.595959595959596e-05, "loss": 0.7447, "step": 3920 }, { "epoch": 17.031454545454544, "grad_norm": 29.68831443786621, "learning_rate": 1.585858585858586e-05, "loss": 0.8449, "step": 3930 }, { "epoch": 17.033272727272728, "grad_norm": 89.53331756591797, "learning_rate": 1.5757575757575756e-05, "loss": 0.6897, "step": 3940 }, { "epoch": 17.035090909090908, "grad_norm": 31.637697219848633, "learning_rate": 1.565656565656566e-05, "loss": 0.8963, "step": 3950 }, { "epoch": 17.03690909090909, "grad_norm": 8.320121765136719, "learning_rate": 1.5555555555555555e-05, "loss": 0.4999, "step": 3960 }, { "epoch": 17.03872727272727, "grad_norm": 20.025047302246094, "learning_rate": 1.5454545454545454e-05, "loss": 0.6816, "step": 3970 }, { "epoch": 17.040181818181818, "eval_accuracy": 0.708994708994709, "eval_loss": 0.712218165397644, "eval_runtime": 190.8911, "eval_samples_per_second": 0.99, "eval_steps_per_second": 0.251, "step": 3978 }, { "epoch": 18.000363636363637, "grad_norm": 11.93933391571045, "learning_rate": 1.5353535353535354e-05, "loss": 0.788, "step": 3980 }, { "epoch": 18.002181818181818, "grad_norm": 57.62703323364258, "learning_rate": 1.5252525252525255e-05, "loss": 0.9024, "step": 3990 }, { "epoch": 18.004, "grad_norm": 16.53204917907715, "learning_rate": 1.5151515151515153e-05, "loss": 0.7909, "step": 4000 }, { "epoch": 18.00581818181818, "grad_norm": 4.855262279510498, "learning_rate": 1.505050505050505e-05, "loss": 0.7213, "step": 4010 }, { "epoch": 18.007636363636365, "grad_norm": 7.463796138763428, "learning_rate": 1.494949494949495e-05, "loss": 0.9467, "step": 4020 }, { "epoch": 18.009454545454545, "grad_norm": 13.145658493041992, "learning_rate": 1.484848484848485e-05, "loss": 0.7383, "step": 4030 }, { "epoch": 18.011272727272726, "grad_norm": 29.503515243530273, "learning_rate": 1.4747474747474749e-05, "loss": 0.9726, "step": 4040 }, { "epoch": 18.01309090909091, "grad_norm": 52.793174743652344, "learning_rate": 1.4646464646464647e-05, "loss": 0.7689, "step": 4050 }, { "epoch": 18.01490909090909, "grad_norm": 43.86151123046875, "learning_rate": 1.4545454545454545e-05, "loss": 0.8847, "step": 4060 }, { "epoch": 18.016727272727273, "grad_norm": 5.855788707733154, "learning_rate": 1.4444444444444444e-05, "loss": 0.653, "step": 4070 }, { "epoch": 18.018545454545453, "grad_norm": 50.7757453918457, "learning_rate": 1.4343434343434345e-05, "loss": 0.74, "step": 4080 }, { "epoch": 18.020363636363637, "grad_norm": 119.08275604248047, "learning_rate": 1.4242424242424243e-05, "loss": 0.9263, "step": 4090 }, { "epoch": 18.022181818181817, "grad_norm": 18.412038803100586, "learning_rate": 1.4141414141414141e-05, "loss": 0.638, "step": 4100 }, { "epoch": 18.024, "grad_norm": 5.919767379760742, "learning_rate": 1.404040404040404e-05, "loss": 0.7176, "step": 4110 }, { "epoch": 18.02581818181818, "grad_norm": 22.25209617614746, "learning_rate": 1.3939393939393942e-05, "loss": 0.7239, "step": 4120 }, { "epoch": 18.027636363636365, "grad_norm": 18.18853187561035, "learning_rate": 1.383838383838384e-05, "loss": 0.7939, "step": 4130 }, { "epoch": 18.029454545454545, "grad_norm": 25.958633422851562, "learning_rate": 1.3737373737373737e-05, "loss": 0.7458, "step": 4140 }, { "epoch": 18.03127272727273, "grad_norm": 49.500728607177734, "learning_rate": 1.3636363636363637e-05, "loss": 0.8482, "step": 4150 }, { "epoch": 18.03309090909091, "grad_norm": 50.96277618408203, "learning_rate": 1.3535353535353538e-05, "loss": 0.7981, "step": 4160 }, { "epoch": 18.034909090909093, "grad_norm": 48.261512756347656, "learning_rate": 1.3434343434343436e-05, "loss": 0.9629, "step": 4170 }, { "epoch": 18.036727272727273, "grad_norm": 70.20667266845703, "learning_rate": 1.3333333333333333e-05, "loss": 0.9913, "step": 4180 }, { "epoch": 18.038545454545453, "grad_norm": 18.857158660888672, "learning_rate": 1.3232323232323233e-05, "loss": 0.839, "step": 4190 }, { "epoch": 18.040181818181818, "eval_accuracy": 0.7142857142857143, "eval_loss": 0.7404036521911621, "eval_runtime": 194.8175, "eval_samples_per_second": 0.97, "eval_steps_per_second": 0.246, "step": 4199 }, { "epoch": 19.00018181818182, "grad_norm": 13.861733436584473, "learning_rate": 1.3131313131313134e-05, "loss": 0.9389, "step": 4200 }, { "epoch": 19.002, "grad_norm": 85.8123550415039, "learning_rate": 1.3030303030303032e-05, "loss": 0.7941, "step": 4210 }, { "epoch": 19.003818181818183, "grad_norm": 28.504823684692383, "learning_rate": 1.292929292929293e-05, "loss": 0.7037, "step": 4220 }, { "epoch": 19.005636363636363, "grad_norm": 45.82936096191406, "learning_rate": 1.2828282828282829e-05, "loss": 0.7746, "step": 4230 }, { "epoch": 19.007454545454546, "grad_norm": 31.655263900756836, "learning_rate": 1.2727272727272727e-05, "loss": 0.7592, "step": 4240 }, { "epoch": 19.009272727272727, "grad_norm": 15.960184097290039, "learning_rate": 1.2626262626262628e-05, "loss": 0.6311, "step": 4250 }, { "epoch": 19.01109090909091, "grad_norm": 6.695078372955322, "learning_rate": 1.2525252525252526e-05, "loss": 0.7749, "step": 4260 }, { "epoch": 19.01290909090909, "grad_norm": 24.639005661010742, "learning_rate": 1.2424242424242424e-05, "loss": 0.827, "step": 4270 }, { "epoch": 19.014727272727274, "grad_norm": 16.702285766601562, "learning_rate": 1.2323232323232325e-05, "loss": 0.9035, "step": 4280 }, { "epoch": 19.016545454545454, "grad_norm": 27.27808952331543, "learning_rate": 1.2222222222222222e-05, "loss": 0.9385, "step": 4290 }, { "epoch": 19.018363636363638, "grad_norm": 13.049288749694824, "learning_rate": 1.2121212121212122e-05, "loss": 0.9155, "step": 4300 }, { "epoch": 19.02018181818182, "grad_norm": 12.153098106384277, "learning_rate": 1.202020202020202e-05, "loss": 0.7098, "step": 4310 }, { "epoch": 19.022, "grad_norm": 56.02737808227539, "learning_rate": 1.1919191919191921e-05, "loss": 0.7314, "step": 4320 }, { "epoch": 19.023818181818182, "grad_norm": 25.432979583740234, "learning_rate": 1.1818181818181819e-05, "loss": 0.8002, "step": 4330 }, { "epoch": 19.025636363636362, "grad_norm": 10.923587799072266, "learning_rate": 1.1717171717171718e-05, "loss": 0.889, "step": 4340 }, { "epoch": 19.027454545454546, "grad_norm": 30.052419662475586, "learning_rate": 1.1616161616161616e-05, "loss": 0.7825, "step": 4350 }, { "epoch": 19.029272727272726, "grad_norm": 13.3062744140625, "learning_rate": 1.1515151515151517e-05, "loss": 0.7734, "step": 4360 }, { "epoch": 19.03109090909091, "grad_norm": 81.07504272460938, "learning_rate": 1.1414141414141415e-05, "loss": 0.7578, "step": 4370 }, { "epoch": 19.03290909090909, "grad_norm": 97.549072265625, "learning_rate": 1.1313131313131314e-05, "loss": 0.9045, "step": 4380 }, { "epoch": 19.034727272727274, "grad_norm": 57.642093658447266, "learning_rate": 1.1212121212121212e-05, "loss": 0.5685, "step": 4390 }, { "epoch": 19.036545454545454, "grad_norm": 66.42289733886719, "learning_rate": 1.1111111111111112e-05, "loss": 0.5724, "step": 4400 }, { "epoch": 19.038363636363638, "grad_norm": 10.334883689880371, "learning_rate": 1.1010101010101011e-05, "loss": 0.6384, "step": 4410 }, { "epoch": 19.040181818181818, "grad_norm": 4.034684181213379, "learning_rate": 1.0909090909090909e-05, "loss": 0.4672, "step": 4420 }, { "epoch": 19.040181818181818, "eval_accuracy": 0.7195767195767195, "eval_loss": 0.7093486189842224, "eval_runtime": 195.9324, "eval_samples_per_second": 0.965, "eval_steps_per_second": 0.245, "step": 4420 }, { "epoch": 20.00181818181818, "grad_norm": 38.80495834350586, "learning_rate": 1.0808080808080808e-05, "loss": 0.7484, "step": 4430 }, { "epoch": 20.003636363636364, "grad_norm": 96.9532241821289, "learning_rate": 1.0707070707070708e-05, "loss": 0.8687, "step": 4440 }, { "epoch": 20.005454545454544, "grad_norm": 31.927587509155273, "learning_rate": 1.0606060606060607e-05, "loss": 0.6293, "step": 4450 }, { "epoch": 20.007272727272728, "grad_norm": 31.61720848083496, "learning_rate": 1.0505050505050505e-05, "loss": 0.8344, "step": 4460 }, { "epoch": 20.009090909090908, "grad_norm": 17.091989517211914, "learning_rate": 1.0404040404040405e-05, "loss": 0.8526, "step": 4470 }, { "epoch": 20.01090909090909, "grad_norm": 11.5504150390625, "learning_rate": 1.0303030303030304e-05, "loss": 1.0841, "step": 4480 }, { "epoch": 20.012727272727272, "grad_norm": 17.285606384277344, "learning_rate": 1.0202020202020204e-05, "loss": 0.6781, "step": 4490 }, { "epoch": 20.014545454545456, "grad_norm": 7.431727409362793, "learning_rate": 1.0101010101010101e-05, "loss": 0.6636, "step": 4500 }, { "epoch": 20.016363636363636, "grad_norm": 64.10966491699219, "learning_rate": 1e-05, "loss": 0.7486, "step": 4510 }, { "epoch": 20.01818181818182, "grad_norm": 37.64710235595703, "learning_rate": 9.898989898989899e-06, "loss": 0.7398, "step": 4520 }, { "epoch": 20.02, "grad_norm": 69.10208892822266, "learning_rate": 9.7979797979798e-06, "loss": 1.0162, "step": 4530 }, { "epoch": 20.021818181818183, "grad_norm": 27.21761703491211, "learning_rate": 9.696969696969698e-06, "loss": 0.7102, "step": 4540 }, { "epoch": 20.023636363636363, "grad_norm": 28.60508155822754, "learning_rate": 9.595959595959595e-06, "loss": 0.5108, "step": 4550 }, { "epoch": 20.025454545454547, "grad_norm": 124.87145233154297, "learning_rate": 9.494949494949495e-06, "loss": 0.7203, "step": 4560 }, { "epoch": 20.027272727272727, "grad_norm": 40.59431838989258, "learning_rate": 9.393939393939394e-06, "loss": 0.9909, "step": 4570 }, { "epoch": 20.029090909090908, "grad_norm": 18.73350715637207, "learning_rate": 9.292929292929294e-06, "loss": 0.6461, "step": 4580 }, { "epoch": 20.03090909090909, "grad_norm": 20.62631607055664, "learning_rate": 9.191919191919192e-06, "loss": 0.774, "step": 4590 }, { "epoch": 20.03272727272727, "grad_norm": 86.15074157714844, "learning_rate": 9.090909090909091e-06, "loss": 0.8109, "step": 4600 }, { "epoch": 20.034545454545455, "grad_norm": 43.705196380615234, "learning_rate": 8.98989898989899e-06, "loss": 0.6988, "step": 4610 }, { "epoch": 20.036363636363635, "grad_norm": 9.670613288879395, "learning_rate": 8.88888888888889e-06, "loss": 0.8493, "step": 4620 }, { "epoch": 20.03818181818182, "grad_norm": 36.182037353515625, "learning_rate": 8.787878787878788e-06, "loss": 0.6715, "step": 4630 }, { "epoch": 20.04, "grad_norm": 22.325069427490234, "learning_rate": 8.686868686868687e-06, "loss": 0.5119, "step": 4640 }, { "epoch": 20.040181818181818, "eval_accuracy": 0.6825396825396826, "eval_loss": 0.7170515656471252, "eval_runtime": 194.357, "eval_samples_per_second": 0.972, "eval_steps_per_second": 0.247, "step": 4641 }, { "epoch": 21.001636363636365, "grad_norm": 30.531394958496094, "learning_rate": 8.585858585858587e-06, "loss": 0.5961, "step": 4650 }, { "epoch": 21.003454545454545, "grad_norm": 24.909801483154297, "learning_rate": 8.484848484848486e-06, "loss": 0.8621, "step": 4660 }, { "epoch": 21.00527272727273, "grad_norm": 84.33313751220703, "learning_rate": 8.383838383838384e-06, "loss": 0.7983, "step": 4670 }, { "epoch": 21.00709090909091, "grad_norm": 33.05415344238281, "learning_rate": 8.282828282828283e-06, "loss": 0.8508, "step": 4680 }, { "epoch": 21.00890909090909, "grad_norm": 55.02461242675781, "learning_rate": 8.181818181818183e-06, "loss": 0.7546, "step": 4690 }, { "epoch": 21.010727272727273, "grad_norm": 19.233810424804688, "learning_rate": 8.080808080808082e-06, "loss": 0.58, "step": 4700 }, { "epoch": 21.012545454545453, "grad_norm": 8.127425193786621, "learning_rate": 7.97979797979798e-06, "loss": 0.7661, "step": 4710 }, { "epoch": 21.014363636363637, "grad_norm": 28.833446502685547, "learning_rate": 7.878787878787878e-06, "loss": 0.653, "step": 4720 }, { "epoch": 21.016181818181817, "grad_norm": 24.907699584960938, "learning_rate": 7.777777777777777e-06, "loss": 0.6454, "step": 4730 }, { "epoch": 21.018, "grad_norm": 20.222206115722656, "learning_rate": 7.676767676767677e-06, "loss": 0.847, "step": 4740 }, { "epoch": 21.01981818181818, "grad_norm": 7.958954811096191, "learning_rate": 7.5757575757575764e-06, "loss": 0.8589, "step": 4750 }, { "epoch": 21.021636363636365, "grad_norm": 75.67617797851562, "learning_rate": 7.474747474747475e-06, "loss": 0.6666, "step": 4760 }, { "epoch": 21.023454545454545, "grad_norm": 2.3386125564575195, "learning_rate": 7.3737373737373745e-06, "loss": 0.8145, "step": 4770 }, { "epoch": 21.02527272727273, "grad_norm": 69.61566162109375, "learning_rate": 7.272727272727272e-06, "loss": 0.4374, "step": 4780 }, { "epoch": 21.02709090909091, "grad_norm": 26.695165634155273, "learning_rate": 7.171717171717173e-06, "loss": 0.6145, "step": 4790 }, { "epoch": 21.028909090909092, "grad_norm": 19.140888214111328, "learning_rate": 7.0707070707070704e-06, "loss": 0.6424, "step": 4800 }, { "epoch": 21.030727272727272, "grad_norm": 8.267212867736816, "learning_rate": 6.969696969696971e-06, "loss": 0.7443, "step": 4810 }, { "epoch": 21.032545454545456, "grad_norm": 23.349206924438477, "learning_rate": 6.8686868686868685e-06, "loss": 0.5858, "step": 4820 }, { "epoch": 21.034363636363636, "grad_norm": 24.537132263183594, "learning_rate": 6.767676767676769e-06, "loss": 0.7272, "step": 4830 }, { "epoch": 21.036181818181817, "grad_norm": 27.91595458984375, "learning_rate": 6.666666666666667e-06, "loss": 0.9845, "step": 4840 }, { "epoch": 21.038, "grad_norm": 20.382572174072266, "learning_rate": 6.565656565656567e-06, "loss": 0.629, "step": 4850 }, { "epoch": 21.03981818181818, "grad_norm": 49.914493560791016, "learning_rate": 6.464646464646465e-06, "loss": 0.7769, "step": 4860 }, { "epoch": 21.040181818181818, "eval_accuracy": 0.6931216931216931, "eval_loss": 0.7071664929389954, "eval_runtime": 196.3708, "eval_samples_per_second": 0.962, "eval_steps_per_second": 0.244, "step": 4862 }, { "epoch": 22.001454545454546, "grad_norm": 47.80362319946289, "learning_rate": 6.363636363636363e-06, "loss": 0.9012, "step": 4870 }, { "epoch": 22.003272727272726, "grad_norm": 41.31081771850586, "learning_rate": 6.262626262626263e-06, "loss": 0.7611, "step": 4880 }, { "epoch": 22.00509090909091, "grad_norm": 24.996232986450195, "learning_rate": 6.161616161616162e-06, "loss": 0.479, "step": 4890 }, { "epoch": 22.00690909090909, "grad_norm": 18.948902130126953, "learning_rate": 6.060606060606061e-06, "loss": 0.6003, "step": 4900 }, { "epoch": 22.008727272727274, "grad_norm": 59.650230407714844, "learning_rate": 5.9595959595959605e-06, "loss": 0.5891, "step": 4910 }, { "epoch": 22.010545454545454, "grad_norm": 96.68114471435547, "learning_rate": 5.858585858585859e-06, "loss": 0.8473, "step": 4920 }, { "epoch": 22.012363636363638, "grad_norm": 29.867095947265625, "learning_rate": 5.7575757575757586e-06, "loss": 0.6998, "step": 4930 }, { "epoch": 22.014181818181818, "grad_norm": 25.995758056640625, "learning_rate": 5.656565656565657e-06, "loss": 0.6509, "step": 4940 }, { "epoch": 22.016, "grad_norm": 54.34788131713867, "learning_rate": 5.555555555555556e-06, "loss": 0.5927, "step": 4950 }, { "epoch": 22.017818181818182, "grad_norm": 42.82137680053711, "learning_rate": 5.4545454545454545e-06, "loss": 0.7703, "step": 4960 }, { "epoch": 22.019636363636362, "grad_norm": 34.68387222290039, "learning_rate": 5.353535353535354e-06, "loss": 0.7322, "step": 4970 }, { "epoch": 22.021454545454546, "grad_norm": 27.881155014038086, "learning_rate": 5.2525252525252526e-06, "loss": 0.5349, "step": 4980 }, { "epoch": 22.023272727272726, "grad_norm": 22.914897918701172, "learning_rate": 5.151515151515152e-06, "loss": 0.8067, "step": 4990 }, { "epoch": 22.02509090909091, "grad_norm": 18.1747989654541, "learning_rate": 5.050505050505051e-06, "loss": 0.7394, "step": 5000 }, { "epoch": 22.02690909090909, "grad_norm": 18.75357437133789, "learning_rate": 4.949494949494949e-06, "loss": 0.7725, "step": 5010 }, { "epoch": 22.028727272727274, "grad_norm": 88.3263931274414, "learning_rate": 4.848484848484849e-06, "loss": 0.6074, "step": 5020 }, { "epoch": 22.030545454545454, "grad_norm": 34.32087326049805, "learning_rate": 4.747474747474747e-06, "loss": 0.7811, "step": 5030 }, { "epoch": 22.032363636363637, "grad_norm": 32.58205032348633, "learning_rate": 4.646464646464647e-06, "loss": 0.8519, "step": 5040 }, { "epoch": 22.034181818181818, "grad_norm": 28.88677215576172, "learning_rate": 4.5454545454545455e-06, "loss": 0.6139, "step": 5050 }, { "epoch": 22.036, "grad_norm": 14.495834350585938, "learning_rate": 4.444444444444445e-06, "loss": 0.5717, "step": 5060 }, { "epoch": 22.03781818181818, "grad_norm": 42.042579650878906, "learning_rate": 4.343434343434344e-06, "loss": 0.6286, "step": 5070 }, { "epoch": 22.039636363636365, "grad_norm": 102.42108917236328, "learning_rate": 4.242424242424243e-06, "loss": 0.6635, "step": 5080 }, { "epoch": 22.040181818181818, "eval_accuracy": 0.7195767195767195, "eval_loss": 0.7095865607261658, "eval_runtime": 197.7202, "eval_samples_per_second": 0.956, "eval_steps_per_second": 0.243, "step": 5083 }, { "epoch": 23.001272727272728, "grad_norm": 71.68008422851562, "learning_rate": 4.141414141414142e-06, "loss": 0.6267, "step": 5090 }, { "epoch": 23.003090909090908, "grad_norm": 50.31315612792969, "learning_rate": 4.040404040404041e-06, "loss": 0.7405, "step": 5100 }, { "epoch": 23.00490909090909, "grad_norm": 40.68128204345703, "learning_rate": 3.939393939393939e-06, "loss": 0.7911, "step": 5110 }, { "epoch": 23.00672727272727, "grad_norm": 35.65066146850586, "learning_rate": 3.8383838383838385e-06, "loss": 0.8023, "step": 5120 }, { "epoch": 23.008545454545455, "grad_norm": 24.422685623168945, "learning_rate": 3.7373737373737375e-06, "loss": 0.5818, "step": 5130 }, { "epoch": 23.010363636363635, "grad_norm": 19.126468658447266, "learning_rate": 3.636363636363636e-06, "loss": 0.6642, "step": 5140 }, { "epoch": 23.01218181818182, "grad_norm": 19.357858657836914, "learning_rate": 3.5353535353535352e-06, "loss": 0.6839, "step": 5150 }, { "epoch": 23.014, "grad_norm": 38.63173294067383, "learning_rate": 3.4343434343434343e-06, "loss": 0.5163, "step": 5160 }, { "epoch": 23.015818181818183, "grad_norm": 17.257484436035156, "learning_rate": 3.3333333333333333e-06, "loss": 0.7778, "step": 5170 }, { "epoch": 23.017636363636363, "grad_norm": 38.906246185302734, "learning_rate": 3.2323232323232324e-06, "loss": 0.6887, "step": 5180 }, { "epoch": 23.019454545454547, "grad_norm": 103.85768127441406, "learning_rate": 3.1313131313131314e-06, "loss": 0.7222, "step": 5190 }, { "epoch": 23.021272727272727, "grad_norm": 28.443326950073242, "learning_rate": 3.0303030303030305e-06, "loss": 0.7608, "step": 5200 }, { "epoch": 23.02309090909091, "grad_norm": 38.45210647583008, "learning_rate": 2.9292929292929295e-06, "loss": 0.782, "step": 5210 }, { "epoch": 23.02490909090909, "grad_norm": 28.65218734741211, "learning_rate": 2.8282828282828286e-06, "loss": 0.4031, "step": 5220 }, { "epoch": 23.02672727272727, "grad_norm": 59.21070098876953, "learning_rate": 2.7272727272727272e-06, "loss": 0.6224, "step": 5230 }, { "epoch": 23.028545454545455, "grad_norm": 136.05392456054688, "learning_rate": 2.6262626262626263e-06, "loss": 0.7196, "step": 5240 }, { "epoch": 23.030363636363635, "grad_norm": 183.45257568359375, "learning_rate": 2.5252525252525253e-06, "loss": 1.0722, "step": 5250 }, { "epoch": 23.03218181818182, "grad_norm": 17.261632919311523, "learning_rate": 2.4242424242424244e-06, "loss": 0.5863, "step": 5260 }, { "epoch": 23.034, "grad_norm": 100.2103500366211, "learning_rate": 2.3232323232323234e-06, "loss": 0.4809, "step": 5270 }, { "epoch": 23.035818181818183, "grad_norm": 32.065818786621094, "learning_rate": 2.2222222222222225e-06, "loss": 0.5746, "step": 5280 }, { "epoch": 23.037636363636363, "grad_norm": 9.926220893859863, "learning_rate": 2.1212121212121216e-06, "loss": 0.451, "step": 5290 }, { "epoch": 23.039454545454547, "grad_norm": 27.379253387451172, "learning_rate": 2.0202020202020206e-06, "loss": 0.8027, "step": 5300 }, { "epoch": 23.040181818181818, "eval_accuracy": 0.7037037037037037, "eval_loss": 0.6887486577033997, "eval_runtime": 194.3489, "eval_samples_per_second": 0.972, "eval_steps_per_second": 0.247, "step": 5304 }, { "epoch": 24.00109090909091, "grad_norm": 71.3526840209961, "learning_rate": 1.9191919191919192e-06, "loss": 0.5815, "step": 5310 }, { "epoch": 24.002909090909093, "grad_norm": 4.591304779052734, "learning_rate": 1.818181818181818e-06, "loss": 0.8352, "step": 5320 }, { "epoch": 24.004727272727273, "grad_norm": 7.72051477432251, "learning_rate": 1.7171717171717171e-06, "loss": 0.8424, "step": 5330 }, { "epoch": 24.006545454545453, "grad_norm": 71.4774169921875, "learning_rate": 1.6161616161616162e-06, "loss": 0.9185, "step": 5340 }, { "epoch": 24.008363636363637, "grad_norm": 25.500089645385742, "learning_rate": 1.5151515151515152e-06, "loss": 0.5875, "step": 5350 }, { "epoch": 24.010181818181817, "grad_norm": 80.47840118408203, "learning_rate": 1.4141414141414143e-06, "loss": 0.9027, "step": 5360 }, { "epoch": 24.012, "grad_norm": 15.391636848449707, "learning_rate": 1.3131313131313131e-06, "loss": 0.4817, "step": 5370 }, { "epoch": 24.01381818181818, "grad_norm": 29.841554641723633, "learning_rate": 1.2121212121212122e-06, "loss": 0.7169, "step": 5380 }, { "epoch": 24.015636363636364, "grad_norm": 50.79997634887695, "learning_rate": 1.1111111111111112e-06, "loss": 0.7159, "step": 5390 }, { "epoch": 24.017454545454545, "grad_norm": 108.86124420166016, "learning_rate": 1.0101010101010103e-06, "loss": 1.0093, "step": 5400 }, { "epoch": 24.019272727272728, "grad_norm": 29.645301818847656, "learning_rate": 9.09090909090909e-07, "loss": 0.6033, "step": 5410 }, { "epoch": 24.02109090909091, "grad_norm": 28.632356643676758, "learning_rate": 8.080808080808081e-07, "loss": 0.5634, "step": 5420 }, { "epoch": 24.022909090909092, "grad_norm": 24.16314697265625, "learning_rate": 7.070707070707071e-07, "loss": 0.5789, "step": 5430 }, { "epoch": 24.024727272727272, "grad_norm": 69.10330200195312, "learning_rate": 6.060606060606061e-07, "loss": 0.523, "step": 5440 }, { "epoch": 24.026545454545456, "grad_norm": 153.1522216796875, "learning_rate": 5.050505050505052e-07, "loss": 0.7189, "step": 5450 }, { "epoch": 24.028363636363636, "grad_norm": 29.681365966796875, "learning_rate": 4.0404040404040405e-07, "loss": 0.4212, "step": 5460 }, { "epoch": 24.03018181818182, "grad_norm": 51.57491683959961, "learning_rate": 3.0303030303030305e-07, "loss": 0.724, "step": 5470 }, { "epoch": 24.032, "grad_norm": 19.3199520111084, "learning_rate": 2.0202020202020202e-07, "loss": 0.5203, "step": 5480 }, { "epoch": 24.03381818181818, "grad_norm": 138.87806701660156, "learning_rate": 1.0101010101010101e-07, "loss": 0.9596, "step": 5490 }, { "epoch": 24.035636363636364, "grad_norm": 38.64838790893555, "learning_rate": 0.0, "loss": 0.4765, "step": 5500 }, { "epoch": 24.035636363636364, "eval_accuracy": 0.7407407407407407, "eval_loss": 0.6834621429443359, "eval_runtime": 207.2262, "eval_samples_per_second": 0.912, "eval_steps_per_second": 0.232, "step": 5500 }, { "epoch": 1.0001818181818183, "eval_accuracy": 0.671957671957672, "eval_loss": 0.7627605199813843, "eval_runtime": 237.6048, "eval_samples_per_second": 0.795, "eval_steps_per_second": 0.202, "step": 5501 } ], "logging_steps": 10, "max_steps": 5500, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.389253736670403e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }